def evaluate_forward(net): length = 20 net.forward_layer(layers.NumpyData(name='prev_hidden', data=np.zeros((1, hyper['mem_cells'], 1, 1)))) net.forward_layer(layers.NumpyData(name='prev_mem', data=np.zeros((1, hyper['mem_cells'], 1, 1)))) filler = layers.Filler(type='uniform', min=-hyper['init_range'], max=hyper['init_range']) accum = np.array([0.]) predictions = [] for step in range(length): value = 0.5 net.forward_layer(layers.NumpyData(name='value', data=np.array(value).reshape((1, 1, 1, 1)))) accum += value prev_hidden = 'prev_hidden' prev_mem = 'prev_mem' net.forward_layer(layers.Concat(name='lstm_concat', bottoms=[prev_hidden, 'value'])) net.forward_layer(layers.Lstm(name='lstm', bottoms=['lstm_concat', prev_mem], param_names=['input_value', 'input_gate', 'forget_gate', 'output_gate'], weight_filler=filler, tops=['next_hidden', 'next_mem'], num_cells=hyper['mem_cells'])) net.forward_layer(layers.InnerProduct(name='ip', bottoms=['next_hidden'], num_output=1)) predictions.append(float(net.tops['ip'].data.flatten()[0])) # set up for next prediction by copying LSTM outputs back to inputs net.tops['prev_hidden'].data_tensor.copy_from(net.tops['next_hidden'].data_tensor) net.tops['prev_mem'].data_tensor.copy_from(net.tops['next_mem'].data_tensor) net.reset_forward() return predictions
def forward(net, hyper, sentence_batches): batch = next(sentence_batches) #sentence_batch = np.array(pad_batch([x['body'] for x in batch], hyper)) sentence_batch = np.array(pad_batch(batch, hyper)) length = min(sentence_batch.shape[1], 100) assert length > 0 filler = layers.Filler(type='uniform', max=hyper['init_range'], min=(-hyper['init_range'])) net.forward_layer(layers.NumpyData(name='lstm_seed', data=np.zeros((hyper['batch_size'], hyper['mem_cells'], 1, 1)))) net.forward_layer(layers.NumpyData(name='label', data=np.zeros((hyper['batch_size'] * length, 1, 1, 1)))) loss = [] for step in range(length): net.forward_layer(layers.DummyData(name=('word%d' % step), shape=[hyper['batch_size'], 1, 1, 1])) if step == 0: prev_hidden = 'lstm_seed' prev_mem = 'lstm_seed' word = np.zeros(sentence_batch[:, 0].shape) else: prev_hidden = 'lstm%d_hidden' % (step - 1) prev_mem = 'lstm%d_mem' % (step - 1) word = sentence_batch[:, step - 1] net.tops['word%d' % step].data[:,0,0,0] = word net.forward_layer(layers.Wordvec(name=('wordvec%d' % step), bottoms=['word%d' % step], dimension=hyper['mem_cells'], vocab_size=hyper['vocab_size'], param_names=['wordvec_param'], weight_filler=filler)) net.forward_layer(layers.Concat(name='lstm_concat%d' % step, bottoms=[prev_hidden, 'wordvec%d' % step])) net.forward_layer(layers.Lstm(name='lstm%d' % step, bottoms=['lstm_concat%d' % step, prev_mem], param_names=['lstm_input_value', 'lstm_input_gate', 'lstm_forget_gate', 'lstm_output_gate'], tops=['lstm%d_hidden' % step, 'lstm%d_mem' % step], num_cells=hyper['mem_cells'], weight_filler=filler)) net.forward_layer(layers.Dropout(name='dropout%d' % step, bottoms=['lstm%d_hidden' % step], dropout_ratio=0.16)) label = np.reshape(sentence_batch[:, step], (hyper['batch_size'], 1, 1, 1)) net.forward_layer(layers.NumpyData(name='label%d' % step, data=label)) net.forward_layer(layers.InnerProduct(name='ip%d' % step, bottoms=['dropout%d' % step], param_names=['softmax_ip_weights', 'softmax_ip_bias'], num_output=hyper['vocab_size'], weight_filler=filler)) loss.append(net.forward_layer(layers.SoftmaxWithLoss(name='softmax_loss%d' % step, ignore_label=hyper['zero_symbol'], bottoms=['ip%d' % step, 'label%d' % step]))) return np.mean(loss)
def forward(net): length = random.randrange(5, 15) # initialize all weights in [-0.1, 0.1] filler = layers.Filler(type='uniform', min=-hyper['init_range'], max=hyper['init_range']) # initialize the LSTM memory with all 0's net.forward_layer(layers.NumpyData(name='lstm_seed', data=np.zeros((hyper['batch_size'], hyper['mem_cells'], 1, 1)))) accum = np.zeros((hyper['batch_size'],)) # Begin recurrence through 5 - 15 inputs for step in range(length): # Set up the value blob net.forward_layer(layers.DummyData(name='value%d' % step, shape=[hyper['batch_size'], 1, 1, 1])) value = np.array([random.random() for _ in range(hyper['batch_size'])]) accum += value # Set data of value blob to contain a batch of random numbers net.tops['value%d' % step].data[:, 0, 0, 0] = value if step == 0: prev_hidden = 'lstm_seed' prev_mem = 'lstm_seed' else: prev_hidden = 'lstm%d_hidden' % (step - 1) prev_mem = 'lstm%d_mem' % (step - 1) # Concatenate the hidden output with the next input value net.forward_layer(layers.Concat(name='lstm_concat%d' % step, bottoms=[prev_hidden, 'value%d' % step])) # Run the LSTM for one more step net.forward_layer(layers.Lstm(name='lstm%d' % step, bottoms=['lstm_concat%d' % step, prev_mem], param_names=['input_value', 'input_gate', 'forget_gate', 'output_gate'], tops=['lstm%d_hidden' % step, 'lstm%d_mem' % step], num_cells=hyper['mem_cells'], weight_filler=filler)) # Add a fully connected layer with a bottom blob set to be the last used LSTM cell # Note that the network structure is now a function of the data net.forward_layer(layers.InnerProduct(name='ip', bottoms=['lstm%d_hidden' % (length - 1)], num_output=1, weight_filler=filler)) # Add a label for the sum of the inputs net.forward_layer(layers.NumpyData(name='label', data=np.reshape(accum, (hyper['batch_size'], 1, 1, 1)))) # Compute the Euclidean loss between the preiction and label, used for backprop loss = net.forward_layer(layers.EuclideanLoss(name='euclidean', bottoms=['ip', 'label'])) return loss
def eval_forward(net, hyper): output_words = [] filler = layers.Filler(type='uniform', max=hyper['init_range'], min=(-hyper['init_range'])) net.forward_layer(layers.NumpyData(name='lstm_hidden_prev', data=np.zeros((1, hyper['mem_cells'], 1, 1)))) net.forward_layer(layers.NumpyData(name='lstm_mem_prev', data=np.zeros((1, hyper['mem_cells'], 1, 1)))) length = hyper['length'] for step in range(length): net.forward_layer(layers.NumpyData(name=('word'), data=np.zeros((1, 1, 1, 1)))) prev_hidden = 'lstm_hidden_prev' prev_mem = 'lstm_mem_prev' word = np.zeros((1, 1, 1, 1)) if step == 0: #output = ord('.') output = vocab[' '] else: output = softmax_choice(net.tops['softmax'].data) output_words.append(output) net.tops['word'].data[0,0,0,0] = output net.forward_layer(layers.Wordvec(name=('wordvec'), bottoms=['word'], dimension=hyper['mem_cells'], vocab_size=hyper['vocab_size'], param_names=['wordvec_param'], weight_filler=filler)) net.forward_layer(layers.Concat(name='lstm_concat', bottoms=[prev_hidden, 'wordvec'])) net.forward_layer(layers.Lstm(name='lstm', bottoms=['lstm_concat', prev_mem], param_names=['lstm_input_value', 'lstm_input_gate', 'lstm_forget_gate', 'lstm_output_gate'], tops=['lstm_hidden_next', 'lstm_mem_next'], num_cells=hyper['mem_cells'], weight_filler=filler)) net.forward_layer(layers.Dropout(name='dropout', bottoms=['lstm_hidden_next'], dropout_ratio=0.16)) net.forward_layer(layers.InnerProduct(name='ip', bottoms=['dropout'], param_names=['softmax_ip_weights', 'softmax_ip_bias'], num_output=hyper['vocab_size'], weight_filler=filler)) net.tops['ip'].data[:] *= hyper['i_temperature'] net.forward_layer(layers.Softmax(name='softmax', ignore_label=hyper['zero_symbol'], bottoms=['ip'])) net.tops['lstm_hidden_prev'].data_tensor.copy_from(net.tops['lstm_hidden_next'].data_tensor) net.tops['lstm_mem_prev'].data_tensor.copy_from(net.tops['lstm_mem_next'].data_tensor) net.reset_forward() print ''.join([ivocab[x].encode('utf8') for x in output_words]) return 0.
def forward(net, sentence_batches): source_batch, target_batch = next(sentence_batches) filler = layers.Filler(type='uniform', max=hyper['init_range'], min=(-hyper['init_range'])) net.forward_layer( layers.NumpyData(name='source_lstm_seed', data=np.zeros( (hyper['batch_size'], hyper['mem_cells'], 1, 1)))) hidden_bottoms = ['source_lstm_seed'] mem_bottoms = ['source_lstm_seed'] lengths = [ min(len([1 for token in x if token != hyper['pad_symbol']]), hyper['max_len']) for x in source_batch ] for step in range(source_batch.shape[1]): net.forward_layer( layers.DummyData(name=('source_word%d' % step), shape=[hyper['batch_size'], 1, 1, 1])) if step == 0: prev_hidden = 'source_lstm_seed' prev_mem = 'source_lstm_seed' else: prev_hidden = 'source_lstm%d_hidden' % (step - 1) prev_mem = 'source_lstm%d_mem' % (step - 1) next_hidden = 'source_lstm%d_hidden' % (step) next_mem = 'source_lstm%d_mem' % (step) hidden_bottoms.append(next_hidden) mem_bottoms.append(next_mem) word = source_batch[:, step] net.tops['source_word%d' % step].data[:, 0, 0, 0] = word wordvec = layers.Wordvec(name=('source_wordvec%d' % step), bottoms=['source_word%d' % step], dimension=hyper['mem_cells'], vocab_size=hyper['vocab_size'], param_names=['source_wordvec_param'], weight_filler=filler) concat = layers.Concat( name='source_lstm_concat%d' % step, bottoms=[prev_hidden, 'source_wordvec%d' % step]) lstm = layers.Lstm( name='source_lstm%d' % step, bottoms=['source_lstm_concat%d' % step, prev_mem], param_names=[ 'source_lstm_input_value', 'source_lstm_input_gate', 'source_lstm_forget_gate', 'source_lstm_output_gate' ], tops=['source_lstm%d_hidden' % step, 'source_lstm%d_mem' % step], num_cells=hyper['mem_cells'], weight_filler=filler) net.forward_layer(wordvec) net.forward_layer(concat) net.forward_layer(lstm) net.forward_layer( layers.CapSequence(name='hidden_seed', sequence_lengths=lengths, bottoms=hidden_bottoms)) net.forward_layer( layers.CapSequence(name='mem_seed', sequence_lengths=lengths, bottoms=mem_bottoms)) loss = [] for step in range(target_batch.shape[1]): if step == 0: prev_hidden = 'hidden_seed' prev_mem = 'mem_seed' word = np.zeros(target_batch[:, 0].shape) else: prev_hidden = 'lstm%d_hidden' % (step - 1) prev_mem = 'lstm%d_mem' % (step - 1) word = target_batch[:, step - 1] word = layers.NumpyData(name=('word%d' % step), data=np.reshape( word, (hyper['batch_size'], 1, 1, 1))) wordvec = layers.Wordvec(name=('wordvec%d' % step), bottoms=['word%d' % step], dimension=hyper['mem_cells'], vocab_size=hyper['vocab_size'], param_names=['source_wordvec_param'], weight_filler=filler) concat = layers.Concat(name='lstm_concat%d' % step, bottoms=[prev_hidden, 'wordvec%d' % step]) lstm = layers.Lstm(name='lstm%d' % step, bottoms=['lstm_concat%d' % step, prev_mem], param_names=[ 'lstm_input_value', 'lstm_input_gate', 'lstm_forget_gate', 'lstm_output_gate' ], tops=['lstm%d_hidden' % step, 'lstm%d_mem' % step], num_cells=hyper['mem_cells'], weight_filler=filler) dropout = layers.Dropout(name='dropout%d' % step, bottoms=['lstm%d_hidden' % step], dropout_ratio=0.16) net.forward_layer(word) net.forward_layer(wordvec) net.forward_layer(concat) net.forward_layer(lstm) net.forward_layer(dropout) net.forward_layer( layers.NumpyData(name='label%d' % step, data=np.reshape(target_batch[:, step], (hyper['batch_size'], 1, 1, 1)))) net.forward_layer( layers.InnerProduct(name='ip%d' % step, bottoms=['dropout%d' % step], param_names=['ip_weight', 'ip_bias'], num_output=hyper['vocab_size'], weight_filler=filler)) loss.append( net.forward_layer( layers.SoftmaxWithLoss( name='softmax_loss%d' % step, ignore_label=hyper['pad_symbol'], bottoms=['ip%d' % step, 'label%d' % step]))) loss.append( net.forward_layer( layers.Softmax(name='softmax%d' % step, ignore_label=hyper['pad_symbol'], bottoms=['ip%d' % step]))) return np.mean(loss)