コード例 #1
0
ファイル: test.py プロジェクト: danielhers/cnn
 def test_inputTensor_batched_list(self):
     for i in range(4):
         dy.renew_cg()
         input_tensor = self.input_vals.reshape(self.shapes[i])
         xb = dy.inputTensor([np.asarray(x).transpose()
                              for x in input_tensor.transpose()])
         self.assertEqual(
             xb.dim()[0],
             (self.shapes[i][:-1] if i > 0 else (1,)),
             msg="Dimension mismatch"
         )
         self.assertEqual(
             xb.dim()[1],
             self.shapes[i][-1],
             msg="Dimension mismatch"
         )
         self.assertTrue(
             np.allclose(xb.npvalue(), input_tensor),
             msg="Expression value different from initial value"
         )
         self.assertEqual(
             dy.sum_batches(dy.squared_norm(xb)).scalar_value(),
             self.squared_norm,
             msg="Value mismatch"
         )
コード例 #2
0
def eval_dict_dataset(dataset, net, shortlist, proj, parsed):
    ranks = []
    num_batches = len(dataset)
    if parsed:
        dim = dataset[0][0].shape[0]
        batch_size = 1
    else:
        dim, batch_size = dataset[0][0].shape
    for batch_num, data in enumerate(dataset):
        if parsed:
            words, definitions, _ = data
        else:
            words, definitions = data
        words = np.reshape(np.transpose(words), (batch_size, dim))
        dy.renew_cg()
        P = dy.parameter(proj)
        if parsed:
            outputs = net.do_parse_tree(definitions)
        else:
            outputs, _ = net(definitions)
        outputs = P * outputs
        normalised_outputs = outputs * dy.cdiv(dy.inputTensor([1]), dy.sqrt(dy.squared_norm(outputs)))
        normalised_outputs = np.reshape(np.transpose(normalised_outputs.npvalue()), (batch_size, dim))
        for output, word in zip(normalised_outputs, words):
            target_similarity = np.dot(word, output)
            similarities = np.dot(shortlist, output)
            rank = (similarities > target_similarity).sum()
            ranks.append(rank)
    total = len(ranks)
    accuracy10 = float(sum(int(r <= 10) for r in ranks))/total
    accuracy100 = float(sum(int(r <= 100) for r in ranks))/total
    return np.median(ranks), accuracy10, accuracy100
    def decode(self, vectors_array, output_array, input_array, end_token):
        # Preprocess the batch
        #print output_array
        out_vectors_array = []
        isents = output_array  #[1:] # transposes
        inps = input_array[1:]
        # Declare all your stuff
        input_mat_array = dynet.concatenate_cols(vectors_array)
        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)
        w1 = dynet.parameter(self.attention_w1)
        w2 = dynet.parameter(self.attention_w2)
        v = dynet.parameter(self.attention_v)
        w1dt = None
        w1dt = w1dt or w1 * input_mat_array
        last_output_embeddings = lookup(self.output_lookup, 1)
        s = self.dec_lstm.initial_state(
        )  #.add_input(dynet.concatenate([dynet.vecInput(self.state_size *2), last_output_embeddings])) # This can be argued to be some form of cheating I think
        first_flag = 0
        errs = []

        # Okay Go on
        words = 0
        for (curr_vec, inp) in zip(isents, inps):
            #print "Mapping: ",  curr_vec 	, inp
            '''            
             if first_flag == 0:
                 first_flag = 1
                 last_output_embeddings = lookup(self.output_lookup, 1)
                 a = dynet.vecInput(1024)
                 bb = last_output_embeddings
             else:
                 bb = last_output_embeddings
                 #a = self.attend(input_mat_array,s, w1dt)
                 a = dynet.vecInput(1024)
             '''
            if first_flag == 0:
                first_flag = 1
                a = dynet.vecInput(1024)
                bb = last_output_embeddings
                x_t = dynet.concatenate([a, bb])
            s = s.add_input(x_t)
            #print "Added input"
            y = s.output()
            #print y.value()
            output_vector = w * y + b
            #err = dynet.pickneglogsoftmax(dynet.softmax(output_vector), curr_vec)
            #rr = dynet.pickneglogsoftmax(output_vector, curr_vec)
            err = dynet.squared_norm(output_vector - curr_vec)
            last_output_embeddings = output_vector
            words += 1
            errs.append(err)
        print errs
        err_v = dynet.esum(errs)
        print "Returning", err_v
        return err_v, words
コード例 #4
0
 def test(sqnorm_original_value, assert_equal):
     dy.renew_cg()
     inputs = make_inputs()
     avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm))
     sqnorm = dy.squared_norm(avg)
     if assert_equal:
         self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                places=10)
     else:
         self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                   places=10)
コード例 #5
0
    def test_get_bilstm_all_update(self):
        pc = dy.ParameterCollection()
        trainer = dy.AdamTrainer(pc, 0.1)
        flstm = dy.LSTMBuilder(1, 1, 1, pc)
        blstm = dy.LSTMBuilder(1, 1, 1, pc)
        model = Model()
        common = CommonArchitecture(model)

        def make_inputs():
            return [dy.inputTensor([1.0]), dy.inputTensor([2.0]),
                    dy.inputTensor([3.0]), dy.inputTensor([4.0])]

        def test(sqnorm_original_value, assert_equal):
            dy.renew_cg()
            inputs = make_inputs()
            avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm))
            sqnorm = dy.squared_norm(avg)
            if assert_equal:
                self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                       places=10)
            else:
                self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(),
                                          places=10)

        inputs = make_inputs()
        avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm, False))
        sqnorm = dy.squared_norm(avg)
        sqnorm_original_value = sqnorm.value()
        sqnorm.backward()
        trainer.update()  # Shouldn't update LSTMs.

        test(sqnorm_original_value, True)

        dy.renew_cg()
        inputs = make_inputs()
        avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm))
        sqnorm = dy.squared_norm(avg)
        sqnorm.backward()
        trainer.update()  # Should update LSTMs.

        test(sqnorm_original_value, False)
コード例 #6
0
ファイル: test.py プロジェクト: jayantk/cnn
 def test_inputTensor_not_batched(self):
     for i in range(4):
         dy.renew_cg()
         input_tensor = self.input_vals.reshape(self.shapes[i])
         x = dy.inputTensor(input_tensor)
         self.assertEqual(x.dim()[0], self.shapes[i],
                          msg="Dimension mismatch")
         self.assertEqual(x.dim()[1], 1,
                          msg="Dimension mismatch")
         self.assertTrue(np.allclose(x.npvalue(), input_tensor),
                         msg="Expression value different from initial value")
         self.assertEqual(dy.squared_norm(x).scalar_value(), self.squared_norm,
                          msg="Value mismatch")
コード例 #7
0
 def test_inputTensor_not_batched(self):
     for i in range(4):
         dy.renew_cg()
         input_tensor = self.input_vals.reshape(self.shapes[i])
         x = dy.inputTensor(input_tensor)
         self.assertEqual(x.dim()[0], self.shapes[i],
                          msg="Dimension mismatch")
         self.assertEqual(x.dim()[1], 1,
                          msg="Dimension mismatch")
         self.assertTrue(np.allclose(x.npvalue(), input_tensor),
                         msg="Expression value different from initial value")
         self.assertEqual(dy.squared_norm(x).scalar_value(), self.squared_norm,
                          msg="Value mismatch")
コード例 #8
0
    def _batch_forward(self, training_vectors):
        batch_loss = []
        predictions = []
        for input_vector, target_vector in training_vectors:
            pred = dy.softmax(self._forward(input_vector))

            loss = -dy.log(dy.pick(pred, target_vector.index(1))) \
                   + self._l2_param * sum([dy.squared_norm(param) for param in self._model.parameters_list()])
            batch_loss.append(loss)
            predictions.append(pred.npvalue())

        return predictions, dy.esum(batch_loss), np.mean(
            [loss.npvalue() for loss in batch_loss])
コード例 #9
0
 def test_inputTensor_batched_list(self):
     for i in range(4):
         dy.renew_cg()
         input_tensor = self.input_vals.reshape(self.shapes[i])
         xb = dy.inputTensor([np.asarray(x).transpose() for x in input_tensor.transpose()])
         self.assertEqual(xb.dim()[0], (self.shapes[i][:-1] if i > 0 else (1,)),
                          msg="Dimension mismatch")
         self.assertEqual(xb.dim()[1], self.shapes[i][-1],
                          msg="Dimension mismatch")
         self.assertTrue(np.allclose(xb.npvalue(), input_tensor),
                         msg="Expression value different from initial value")
         self.assertEqual(dy.sum_batches(dy.squared_norm(xb)).scalar_value(),
                          self.squared_norm, msg="Value mismatch")
コード例 #10
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = 1
    batch_size = src.dim()[1]

    W = dy.parameter(self.pW)
    b = dy.parameter(self.pb)

    src = dy.reshape(src, (src_height, src_width), batch_size=batch_size) # ((276, 80, 3), 1)
    # convolution and pooling layers
    l1 = (W*src)+b
    output = dy.cdiv(l1,dy.sqrt(dy.squared_norm(l1)))
    return ExpressionSequence(expr_tensor=output)
コード例 #11
0
    def transduce(self, src):
        src = src.as_tensor()

        src_height = src.dim()[0][0]
        src_width = src.dim()[0][1]
        src_channels = 1
        batch_size = src.dim()[1]

        src = dy.reshape(src, (src_height, src_width, src_channels),
                         batch_size=batch_size)  # ((276, 80, 3), 1)
        # print(self.filters1)
        # convolution and pooling layers
        l1 = dy.rectify(
            dy.conv2d(src,
                      dy.parameter(self.filters1),
                      stride=[self.stride[0], self.stride[0]],
                      is_valid=True))
        pool1 = dy.maxpooling2d(l1, (1, 4), (1, 2), is_valid=True)

        l2 = dy.rectify(
            dy.conv2d(pool1,
                      dy.parameter(self.filters2),
                      stride=[self.stride[1], self.stride[1]],
                      is_valid=True))
        pool2 = dy.maxpooling2d(l2, (1, 4), (1, 2), is_valid=True)

        l3 = dy.rectify(
            dy.conv2d(pool2,
                      dy.parameter(self.filters3),
                      stride=[self.stride[2], self.stride[2]],
                      is_valid=True))

        pool3 = dy.kmax_pooling(l3, 1, d=1)
        # print(pool3.dim())
        output = dy.cdiv(pool3, dy.sqrt(dy.squared_norm(pool3)))
        output = dy.reshape(output, (self.num_filters[2], ),
                            batch_size=batch_size)
        # print("my dim: ", output.dim())

        return ExpressionSequence(expr_tensor=output)
コード例 #12
0
def cosine_similarity(a, b):
    # FIXME do I really need to do this for scalar division? :(
    norm = dy.cdiv(dy.inputTensor([1]), dy.sqrt(dy.squared_norm(a)))
    return dy.dot_product(a, b) * norm 
コード例 #13
0
 def score_constituent(self, constituent):
     w_score = dy.parameter(self.w_score)
     w_score = dy.cdiv(w_score, dy.sqrt(dy.squared_norm(w_score)))
     h = dy.cdiv(constituent.h, dy.sqrt(dy.squared_norm(constituent.h)))
     return dy.dot_product(w_score, h)*self.inv_temp
コード例 #14
0
        for char in t:
            s.add_input(dy.lookup(lookup, int(char)))

        # Decode the feat sequence
        dec_init_state = decoder_lstm.initial_state()
        losses = []
        last_output_embedding = dy.lookup(lookup, 0)
        s = dec_init_state.add_input(last_output_embedding)
        #s = dec_init_state.add_input(dy.concatenate([dy.vecInput(64), last_output_embedding]))
        idx = 0
        W = dy.parameter(decoder_weight)
        b = dy.parameter(decoder_bias)
        idx = 0
        while True:
            #print idx
            idx += 1
            if idx > 10000:
                break
            for feat in f:
                #print idx, feat
                last_output_embedding = s.output()
                pred = dy.affine_transform([b, W, last_output_embedding])
                losses.append(dy.squared_norm(pred - dy.inputTensor(feat)))
                if len(losses) > 50:
                    break
        loss = dy.esum(losses)
        train_loss += loss.value()
        loss.backward()
        trainer.update()
    print "Train loss : ", train_loss
コード例 #15
0
 def delta_norm(self):
     """Return the (average) L2 norm of Δ."""
     # We average over vocabulary, otherwise we are
     # not consistent accross varying vocab-sizes.
     return dy.sum_elems(dy.squared_norm(self.delta.embedding)) / self.size
コード例 #16
0
 def transduce(self, x):
     # some preparations
     output_states = []
     current_state = self._encode_src(x, apply_emb=False)
     if self.mode_transduce == "split":
         first_state = SymmetricDecoderState(
             rnn_state=current_state.rnn_state,
             context=current_state.context)
     batch_size = x.dim()[1]
     done = [False] * batch_size
     out_mask = batchers.Mask(np_arr=np.zeros((batch_size,
                                               self.max_dec_len)))
     out_mask.np_arr.flags.writeable = True
     # teacher / split mode: unfold guided by reference targets
     #  -> feed everything up unto (except) the last token back into the LSTM
     # other modes: unfold until EOS is output or max len is reached
     max_dec_len = self.cur_src.batches[1].sent_len(
     ) if self.mode_transduce in ["teacher", "split"] else self.max_dec_len
     atts_list = []
     generated_word_ids = []
     for pos in range(max_dec_len):
         if self.train and self.mode_transduce in ["teacher", "split"]:
             # unroll RNN guided by reference
             prev_ref_action, ref_action = None, None
             if pos > 0:
                 prev_ref_action = self._batch_ref_action(pos - 1)
             if self.transducer_loss:
                 ref_action = self._batch_ref_action(pos)
             step_loss = self.calc_loss_one_step(
                 dec_state=current_state,
                 batch_size=batch_size,
                 mode=self.mode_transduce,
                 ref_action=ref_action,
                 prev_ref_action=prev_ref_action)
             self.transducer_losses.append(step_loss)
         else:  # inference
             # unroll RNN guided by model predictions
             if self.mode_transduce in ["teacher", "split"]:
                 prev_ref_action = self._batch_max_action(
                     batch_size, current_state, pos)
             else:
                 prev_ref_action = None
             out_scores = self.generate_one_step(
                 dec_state=current_state,
                 mask=out_mask,
                 cur_step=pos,
                 batch_size=batch_size,
                 mode=self.mode_transduce,
                 prev_ref_action=prev_ref_action)
             word_id = np.argmax(out_scores.npvalue(), axis=0)
             word_id = word_id.reshape((word_id.size, ))
             generated_word_ids.append(word_id[0])
             for batch_i in range(batch_size):
                 if self._terminate_rnn(batch_i=batch_i,
                                        pos=pos,
                                        batched_word_id=word_id):
                     done[batch_i] = True
                     out_mask.np_arr[batch_i, pos + 1:] = 1.0
             if pos > 0 and all(done):
                 atts_list.append(self.attender.get_last_attention())
                 output_states.append(current_state.rnn_state.h()[-1])
                 break
         output_states.append(current_state.rnn_state.h()[-1])
         atts_list.append(self.attender.get_last_attention())
     if self.mode_transduce == "split":
         # split mode: use attentions to compute context, then run RNNs over these context inputs
         if self.split_regularizer:
             assert len(atts_list) == len(
                 self._chosen_rnn_inputs
             ), f"{len(atts_list)} != {len(self._chosen_rnn_inputs)}"
         split_output_states = []
         split_rnn_state = first_state.rnn_state
         for pos, att in enumerate(atts_list):
             lstm_input_context = self.attender.curr_sent.as_tensor(
             ) * att  # TODO: better reuse the already computed context vecs
             lstm_input_context = dy.reshape(
                 lstm_input_context, (lstm_input_context.dim()[0][0], ),
                 batch_size=batch_size)
             if self.split_dual:
                 lstm_input_label = self._chosen_rnn_inputs[pos]
                 if self.split_dual[0] > 0.0 and self.train:
                     lstm_input_context = dy.dropout_batch(
                         lstm_input_context, self.split_dual[0])
                 if self.split_dual[1] > 0.0 and self.train:
                     lstm_input_label = dy.dropout_batch(
                         lstm_input_label, self.split_dual[1])
                 if self.split_context_transform:
                     lstm_input_context = self.split_context_transform.transform(
                         lstm_input_context)
                 lstm_input_context = self.split_dual_proj.transform(
                     dy.concatenate([lstm_input_context, lstm_input_label]))
             if self.split_regularizer and pos < len(
                     self._chosen_rnn_inputs):
                 # _chosen_rnn_inputs does not contain first (empty) input, so this is in fact like comparing to pos-1:
                 penalty = dy.squared_norm(lstm_input_context -
                                           self._chosen_rnn_inputs[pos])
                 if self.split_regularizer != 1:
                     penalty = self.split_regularizer * penalty
                 self.split_reg_penalty_expr = penalty
             split_rnn_state = split_rnn_state.add_input(lstm_input_context)
             split_output_states.append(split_rnn_state.h()[-1])
         assert len(output_states) == len(split_output_states)
         output_states = split_output_states
     out_mask.np_arr = out_mask.np_arr[:, :len(output_states)]
     self._final_states = []
     if self.compute_report:
         # for symmetric reporter (this can only be run at inference time)
         assert batch_size == 1
         atts_matrix = np.asarray([att.npvalue() for att in atts_list
                                   ]).reshape(len(atts_list),
                                              atts_list[0].dim()[0][0]).T
         self.report_sent_info({
             "symm_att":
             atts_matrix,
             "symm_out":
             sent.SimpleSentence(
                 words=generated_word_ids,
                 idx=self.cur_src.batches[0][0].idx,
                 vocab=self.cur_src.batches[1][0].vocab,
                 output_procs=self.cur_src.batches[1][0].output_procs),
             "symm_ref":
             self.cur_src.batches[1][0] if isinstance(
                 self.cur_src, batchers.CompoundBatch) else None
         })
     # prepare final outputs
     for layer_i in range(len(current_state.rnn_state.h())):
         self._final_states.append(
             transducers.FinalTransducerState(
                 main_expr=current_state.rnn_state.h()[layer_i],
                 cell_expr=current_state.rnn_state._c[layer_i]))
     out_mask.np_arr.flags.writeable = False
     return expression_seqs.ExpressionSequence(expr_list=output_states,
                                               mask=out_mask)
コード例 #17
0
 def test_duration(self, state, idx):
     dw = dy.parameter(self.duration_weight)
     db = dy.parameter(self.duration_bias)
     dur = dw * state.output() + db
     return dy.squared_norm(dur - idx)
コード例 #18
0
 def test_duration(self, state, idx):
     dw = dy.parameter(self.duration_weight)
     db = dy.parameter(self.duration_bias)
     duration = dy.rectify(dw * state.output() + db)
     return dy.squared_norm(duration - float(idx))
コード例 #19
0
import numpy as np

input_vals = np.arange(81)
squared_norm = (input_vals**2).sum()
shapes = [(81, ), (3, 27), (3, 3, 9), (3, 3, 3, 3)]
for i in range(4):
    # Not batched
    dy.renew_cg()
    input_tensor = input_vals.reshape(shapes[i])
    x = dy.inputTensor(input_tensor)
    assert (x.dim()[0] == shapes[i]
            and x.dim()[1] == 1), "Dimension mismatch : {} : ({}, {})".format(
                x.dim(), shapes[i], 1)
    assert (x.npvalue() == input_tensor
            ).all(), "Expression value different from initial value"
    assert dy.squared_norm(x).scalar_value() == squared_norm, "Value mismatch"
    # Batched
    dy.renew_cg()
    xb = dy.inputTensor(input_tensor, batched=True)
    assert (xb.dim()[0] == (shapes[i][:-1] if i > 0 else
                            (1, )) and xb.dim()[1] == shapes[i][-1]
            ), "Dimension mismatch with batch size : {} : ({}, {})".format(
                xb.dim(), (shapes[i][:-1] if i > 0 else 1), shapes[i][-1])
    assert (xb.npvalue() == input_tensor
            ).all(), "Batched expression value different from initial value"
    assert dy.sum_batches(
        dy.squared_norm(xb)).scalar_value() == squared_norm, "Value mismatch"
    # Batched with list
    dy.renew_cg()
    xb = dy.inputTensor(
        [np.asarray(x).transpose() for x in input_tensor.transpose()])
コード例 #20
0
ファイル: NN.py プロジェクト: makyr90/DL_Syntax_Models
    def L2_req_term(self):

        W = dy.parameter(self.W)
        WW = W *dy.transpose(W)
        loss = dy.squared_norm(WW - dy.inputTensor(np.eye(self.output))) / 2
        return loss