def test_inputTensor_batched_list(self): for i in range(4): dy.renew_cg() input_tensor = self.input_vals.reshape(self.shapes[i]) xb = dy.inputTensor([np.asarray(x).transpose() for x in input_tensor.transpose()]) self.assertEqual( xb.dim()[0], (self.shapes[i][:-1] if i > 0 else (1,)), msg="Dimension mismatch" ) self.assertEqual( xb.dim()[1], self.shapes[i][-1], msg="Dimension mismatch" ) self.assertTrue( np.allclose(xb.npvalue(), input_tensor), msg="Expression value different from initial value" ) self.assertEqual( dy.sum_batches(dy.squared_norm(xb)).scalar_value(), self.squared_norm, msg="Value mismatch" )
def eval_dict_dataset(dataset, net, shortlist, proj, parsed): ranks = [] num_batches = len(dataset) if parsed: dim = dataset[0][0].shape[0] batch_size = 1 else: dim, batch_size = dataset[0][0].shape for batch_num, data in enumerate(dataset): if parsed: words, definitions, _ = data else: words, definitions = data words = np.reshape(np.transpose(words), (batch_size, dim)) dy.renew_cg() P = dy.parameter(proj) if parsed: outputs = net.do_parse_tree(definitions) else: outputs, _ = net(definitions) outputs = P * outputs normalised_outputs = outputs * dy.cdiv(dy.inputTensor([1]), dy.sqrt(dy.squared_norm(outputs))) normalised_outputs = np.reshape(np.transpose(normalised_outputs.npvalue()), (batch_size, dim)) for output, word in zip(normalised_outputs, words): target_similarity = np.dot(word, output) similarities = np.dot(shortlist, output) rank = (similarities > target_similarity).sum() ranks.append(rank) total = len(ranks) accuracy10 = float(sum(int(r <= 10) for r in ranks))/total accuracy100 = float(sum(int(r <= 100) for r in ranks))/total return np.median(ranks), accuracy10, accuracy100
def decode(self, vectors_array, output_array, input_array, end_token): # Preprocess the batch #print output_array out_vectors_array = [] isents = output_array #[1:] # transposes inps = input_array[1:] # Declare all your stuff input_mat_array = dynet.concatenate_cols(vectors_array) w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) w1 = dynet.parameter(self.attention_w1) w2 = dynet.parameter(self.attention_w2) v = dynet.parameter(self.attention_v) w1dt = None w1dt = w1dt or w1 * input_mat_array last_output_embeddings = lookup(self.output_lookup, 1) s = self.dec_lstm.initial_state( ) #.add_input(dynet.concatenate([dynet.vecInput(self.state_size *2), last_output_embeddings])) # This can be argued to be some form of cheating I think first_flag = 0 errs = [] # Okay Go on words = 0 for (curr_vec, inp) in zip(isents, inps): #print "Mapping: ", curr_vec , inp ''' if first_flag == 0: first_flag = 1 last_output_embeddings = lookup(self.output_lookup, 1) a = dynet.vecInput(1024) bb = last_output_embeddings else: bb = last_output_embeddings #a = self.attend(input_mat_array,s, w1dt) a = dynet.vecInput(1024) ''' if first_flag == 0: first_flag = 1 a = dynet.vecInput(1024) bb = last_output_embeddings x_t = dynet.concatenate([a, bb]) s = s.add_input(x_t) #print "Added input" y = s.output() #print y.value() output_vector = w * y + b #err = dynet.pickneglogsoftmax(dynet.softmax(output_vector), curr_vec) #rr = dynet.pickneglogsoftmax(output_vector, curr_vec) err = dynet.squared_norm(output_vector - curr_vec) last_output_embeddings = output_vector words += 1 errs.append(err) print errs err_v = dynet.esum(errs) print "Returning", err_v return err_v, words
def test(sqnorm_original_value, assert_equal): dy.renew_cg() inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm)) sqnorm = dy.squared_norm(avg) if assert_equal: self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10) else: self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10)
def test_get_bilstm_all_update(self): pc = dy.ParameterCollection() trainer = dy.AdamTrainer(pc, 0.1) flstm = dy.LSTMBuilder(1, 1, 1, pc) blstm = dy.LSTMBuilder(1, 1, 1, pc) model = Model() common = CommonArchitecture(model) def make_inputs(): return [dy.inputTensor([1.0]), dy.inputTensor([2.0]), dy.inputTensor([3.0]), dy.inputTensor([4.0])] def test(sqnorm_original_value, assert_equal): dy.renew_cg() inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm)) sqnorm = dy.squared_norm(avg) if assert_equal: self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10) else: self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10) inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm, False)) sqnorm = dy.squared_norm(avg) sqnorm_original_value = sqnorm.value() sqnorm.backward() trainer.update() # Shouldn't update LSTMs. test(sqnorm_original_value, True) dy.renew_cg() inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm)) sqnorm = dy.squared_norm(avg) sqnorm.backward() trainer.update() # Should update LSTMs. test(sqnorm_original_value, False)
def test_inputTensor_not_batched(self): for i in range(4): dy.renew_cg() input_tensor = self.input_vals.reshape(self.shapes[i]) x = dy.inputTensor(input_tensor) self.assertEqual(x.dim()[0], self.shapes[i], msg="Dimension mismatch") self.assertEqual(x.dim()[1], 1, msg="Dimension mismatch") self.assertTrue(np.allclose(x.npvalue(), input_tensor), msg="Expression value different from initial value") self.assertEqual(dy.squared_norm(x).scalar_value(), self.squared_norm, msg="Value mismatch")
def test_inputTensor_not_batched(self): for i in range(4): dy.renew_cg() input_tensor = self.input_vals.reshape(self.shapes[i]) x = dy.inputTensor(input_tensor) self.assertEqual(x.dim()[0], self.shapes[i], msg="Dimension mismatch") self.assertEqual(x.dim()[1], 1, msg="Dimension mismatch") self.assertTrue(np.allclose(x.npvalue(), input_tensor), msg="Expression value different from initial value") self.assertEqual(dy.squared_norm(x).scalar_value(), self.squared_norm, msg="Value mismatch")
def _batch_forward(self, training_vectors): batch_loss = [] predictions = [] for input_vector, target_vector in training_vectors: pred = dy.softmax(self._forward(input_vector)) loss = -dy.log(dy.pick(pred, target_vector.index(1))) \ + self._l2_param * sum([dy.squared_norm(param) for param in self._model.parameters_list()]) batch_loss.append(loss) predictions.append(pred.npvalue()) return predictions, dy.esum(batch_loss), np.mean( [loss.npvalue() for loss in batch_loss])
def test_inputTensor_batched_list(self): for i in range(4): dy.renew_cg() input_tensor = self.input_vals.reshape(self.shapes[i]) xb = dy.inputTensor([np.asarray(x).transpose() for x in input_tensor.transpose()]) self.assertEqual(xb.dim()[0], (self.shapes[i][:-1] if i > 0 else (1,)), msg="Dimension mismatch") self.assertEqual(xb.dim()[1], self.shapes[i][-1], msg="Dimension mismatch") self.assertTrue(np.allclose(xb.npvalue(), input_tensor), msg="Expression value different from initial value") self.assertEqual(dy.sum_batches(dy.squared_norm(xb)).scalar_value(), self.squared_norm, msg="Value mismatch")
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: src = src.as_tensor() src_height = src.dim()[0][0] src_width = 1 batch_size = src.dim()[1] W = dy.parameter(self.pW) b = dy.parameter(self.pb) src = dy.reshape(src, (src_height, src_width), batch_size=batch_size) # ((276, 80, 3), 1) # convolution and pooling layers l1 = (W*src)+b output = dy.cdiv(l1,dy.sqrt(dy.squared_norm(l1))) return ExpressionSequence(expr_tensor=output)
def transduce(self, src): src = src.as_tensor() src_height = src.dim()[0][0] src_width = src.dim()[0][1] src_channels = 1 batch_size = src.dim()[1] src = dy.reshape(src, (src_height, src_width, src_channels), batch_size=batch_size) # ((276, 80, 3), 1) # print(self.filters1) # convolution and pooling layers l1 = dy.rectify( dy.conv2d(src, dy.parameter(self.filters1), stride=[self.stride[0], self.stride[0]], is_valid=True)) pool1 = dy.maxpooling2d(l1, (1, 4), (1, 2), is_valid=True) l2 = dy.rectify( dy.conv2d(pool1, dy.parameter(self.filters2), stride=[self.stride[1], self.stride[1]], is_valid=True)) pool2 = dy.maxpooling2d(l2, (1, 4), (1, 2), is_valid=True) l3 = dy.rectify( dy.conv2d(pool2, dy.parameter(self.filters3), stride=[self.stride[2], self.stride[2]], is_valid=True)) pool3 = dy.kmax_pooling(l3, 1, d=1) # print(pool3.dim()) output = dy.cdiv(pool3, dy.sqrt(dy.squared_norm(pool3))) output = dy.reshape(output, (self.num_filters[2], ), batch_size=batch_size) # print("my dim: ", output.dim()) return ExpressionSequence(expr_tensor=output)
def cosine_similarity(a, b): # FIXME do I really need to do this for scalar division? :( norm = dy.cdiv(dy.inputTensor([1]), dy.sqrt(dy.squared_norm(a))) return dy.dot_product(a, b) * norm
def score_constituent(self, constituent): w_score = dy.parameter(self.w_score) w_score = dy.cdiv(w_score, dy.sqrt(dy.squared_norm(w_score))) h = dy.cdiv(constituent.h, dy.sqrt(dy.squared_norm(constituent.h))) return dy.dot_product(w_score, h)*self.inv_temp
for char in t: s.add_input(dy.lookup(lookup, int(char))) # Decode the feat sequence dec_init_state = decoder_lstm.initial_state() losses = [] last_output_embedding = dy.lookup(lookup, 0) s = dec_init_state.add_input(last_output_embedding) #s = dec_init_state.add_input(dy.concatenate([dy.vecInput(64), last_output_embedding])) idx = 0 W = dy.parameter(decoder_weight) b = dy.parameter(decoder_bias) idx = 0 while True: #print idx idx += 1 if idx > 10000: break for feat in f: #print idx, feat last_output_embedding = s.output() pred = dy.affine_transform([b, W, last_output_embedding]) losses.append(dy.squared_norm(pred - dy.inputTensor(feat))) if len(losses) > 50: break loss = dy.esum(losses) train_loss += loss.value() loss.backward() trainer.update() print "Train loss : ", train_loss
def delta_norm(self): """Return the (average) L2 norm of Δ.""" # We average over vocabulary, otherwise we are # not consistent accross varying vocab-sizes. return dy.sum_elems(dy.squared_norm(self.delta.embedding)) / self.size
def transduce(self, x): # some preparations output_states = [] current_state = self._encode_src(x, apply_emb=False) if self.mode_transduce == "split": first_state = SymmetricDecoderState( rnn_state=current_state.rnn_state, context=current_state.context) batch_size = x.dim()[1] done = [False] * batch_size out_mask = batchers.Mask(np_arr=np.zeros((batch_size, self.max_dec_len))) out_mask.np_arr.flags.writeable = True # teacher / split mode: unfold guided by reference targets # -> feed everything up unto (except) the last token back into the LSTM # other modes: unfold until EOS is output or max len is reached max_dec_len = self.cur_src.batches[1].sent_len( ) if self.mode_transduce in ["teacher", "split"] else self.max_dec_len atts_list = [] generated_word_ids = [] for pos in range(max_dec_len): if self.train and self.mode_transduce in ["teacher", "split"]: # unroll RNN guided by reference prev_ref_action, ref_action = None, None if pos > 0: prev_ref_action = self._batch_ref_action(pos - 1) if self.transducer_loss: ref_action = self._batch_ref_action(pos) step_loss = self.calc_loss_one_step( dec_state=current_state, batch_size=batch_size, mode=self.mode_transduce, ref_action=ref_action, prev_ref_action=prev_ref_action) self.transducer_losses.append(step_loss) else: # inference # unroll RNN guided by model predictions if self.mode_transduce in ["teacher", "split"]: prev_ref_action = self._batch_max_action( batch_size, current_state, pos) else: prev_ref_action = None out_scores = self.generate_one_step( dec_state=current_state, mask=out_mask, cur_step=pos, batch_size=batch_size, mode=self.mode_transduce, prev_ref_action=prev_ref_action) word_id = np.argmax(out_scores.npvalue(), axis=0) word_id = word_id.reshape((word_id.size, )) generated_word_ids.append(word_id[0]) for batch_i in range(batch_size): if self._terminate_rnn(batch_i=batch_i, pos=pos, batched_word_id=word_id): done[batch_i] = True out_mask.np_arr[batch_i, pos + 1:] = 1.0 if pos > 0 and all(done): atts_list.append(self.attender.get_last_attention()) output_states.append(current_state.rnn_state.h()[-1]) break output_states.append(current_state.rnn_state.h()[-1]) atts_list.append(self.attender.get_last_attention()) if self.mode_transduce == "split": # split mode: use attentions to compute context, then run RNNs over these context inputs if self.split_regularizer: assert len(atts_list) == len( self._chosen_rnn_inputs ), f"{len(atts_list)} != {len(self._chosen_rnn_inputs)}" split_output_states = [] split_rnn_state = first_state.rnn_state for pos, att in enumerate(atts_list): lstm_input_context = self.attender.curr_sent.as_tensor( ) * att # TODO: better reuse the already computed context vecs lstm_input_context = dy.reshape( lstm_input_context, (lstm_input_context.dim()[0][0], ), batch_size=batch_size) if self.split_dual: lstm_input_label = self._chosen_rnn_inputs[pos] if self.split_dual[0] > 0.0 and self.train: lstm_input_context = dy.dropout_batch( lstm_input_context, self.split_dual[0]) if self.split_dual[1] > 0.0 and self.train: lstm_input_label = dy.dropout_batch( lstm_input_label, self.split_dual[1]) if self.split_context_transform: lstm_input_context = self.split_context_transform.transform( lstm_input_context) lstm_input_context = self.split_dual_proj.transform( dy.concatenate([lstm_input_context, lstm_input_label])) if self.split_regularizer and pos < len( self._chosen_rnn_inputs): # _chosen_rnn_inputs does not contain first (empty) input, so this is in fact like comparing to pos-1: penalty = dy.squared_norm(lstm_input_context - self._chosen_rnn_inputs[pos]) if self.split_regularizer != 1: penalty = self.split_regularizer * penalty self.split_reg_penalty_expr = penalty split_rnn_state = split_rnn_state.add_input(lstm_input_context) split_output_states.append(split_rnn_state.h()[-1]) assert len(output_states) == len(split_output_states) output_states = split_output_states out_mask.np_arr = out_mask.np_arr[:, :len(output_states)] self._final_states = [] if self.compute_report: # for symmetric reporter (this can only be run at inference time) assert batch_size == 1 atts_matrix = np.asarray([att.npvalue() for att in atts_list ]).reshape(len(atts_list), atts_list[0].dim()[0][0]).T self.report_sent_info({ "symm_att": atts_matrix, "symm_out": sent.SimpleSentence( words=generated_word_ids, idx=self.cur_src.batches[0][0].idx, vocab=self.cur_src.batches[1][0].vocab, output_procs=self.cur_src.batches[1][0].output_procs), "symm_ref": self.cur_src.batches[1][0] if isinstance( self.cur_src, batchers.CompoundBatch) else None }) # prepare final outputs for layer_i in range(len(current_state.rnn_state.h())): self._final_states.append( transducers.FinalTransducerState( main_expr=current_state.rnn_state.h()[layer_i], cell_expr=current_state.rnn_state._c[layer_i])) out_mask.np_arr.flags.writeable = False return expression_seqs.ExpressionSequence(expr_list=output_states, mask=out_mask)
def test_duration(self, state, idx): dw = dy.parameter(self.duration_weight) db = dy.parameter(self.duration_bias) dur = dw * state.output() + db return dy.squared_norm(dur - idx)
def test_duration(self, state, idx): dw = dy.parameter(self.duration_weight) db = dy.parameter(self.duration_bias) duration = dy.rectify(dw * state.output() + db) return dy.squared_norm(duration - float(idx))
import numpy as np input_vals = np.arange(81) squared_norm = (input_vals**2).sum() shapes = [(81, ), (3, 27), (3, 3, 9), (3, 3, 3, 3)] for i in range(4): # Not batched dy.renew_cg() input_tensor = input_vals.reshape(shapes[i]) x = dy.inputTensor(input_tensor) assert (x.dim()[0] == shapes[i] and x.dim()[1] == 1), "Dimension mismatch : {} : ({}, {})".format( x.dim(), shapes[i], 1) assert (x.npvalue() == input_tensor ).all(), "Expression value different from initial value" assert dy.squared_norm(x).scalar_value() == squared_norm, "Value mismatch" # Batched dy.renew_cg() xb = dy.inputTensor(input_tensor, batched=True) assert (xb.dim()[0] == (shapes[i][:-1] if i > 0 else (1, )) and xb.dim()[1] == shapes[i][-1] ), "Dimension mismatch with batch size : {} : ({}, {})".format( xb.dim(), (shapes[i][:-1] if i > 0 else 1), shapes[i][-1]) assert (xb.npvalue() == input_tensor ).all(), "Batched expression value different from initial value" assert dy.sum_batches( dy.squared_norm(xb)).scalar_value() == squared_norm, "Value mismatch" # Batched with list dy.renew_cg() xb = dy.inputTensor( [np.asarray(x).transpose() for x in input_tensor.transpose()])
def L2_req_term(self): W = dy.parameter(self.W) WW = W *dy.transpose(W) loss = dy.squared_norm(WW - dy.inputTensor(np.eye(self.output))) / 2 return loss