Example #1
0
    def forward(self, words, labels=None):
        multitask = labels is not None

        if self.training:
            words = self.word_vocab.unkify(words)

        rnn = self.rnn_builder.initial_state()

        word_ids = [self.word_vocab.index_or_unk(word)
            for word in [START] + words + [STOP]]

        prev_embeddings = [self.embeddings[word_id] for word_id in word_ids[:-1]]
        lstm_outputs = rnn.transduce(prev_embeddings)
        logits = self.out(dy.concatenate_to_batch(lstm_outputs))
        nlls = dy.pickneglogsoftmax_batch(logits, word_ids[1:])
        word_nll = dy.sum_batches(nlls)

        if multitask:

            label_ids = [self.label_vocab.index(label) for label in labels]

            logits = self.f_label(dy.concatenate_to_batch(lstm_outputs[1:]))
            nlls = dy.pickneglogsoftmax_batch(logits, label_ids)
            label_nll = dy.sum_batches(nlls)

            # easy proxy to track progress on this task
            self.correct += np.sum(np.argmax(logits.npvalue(), axis=0) == label_ids)
            self.predicted += len(label_ids)

            nll = word_nll + label_nll

        else:
            nll = word_nll

        return nll
Example #2
0
    def __evaluate(self, lstm_output):
        length = len(lstm_output)

        # (i, j) -> (i * length + j,)
        # i = k / length, j = k % length
        # 1 1 2 2 3 3 4 4 ..
        heads = [
            dn.transpose(self.activation(self.head_dense_layer(
                lstm_output[i]))) for i in range(length)
        ]
        mods = [
            self.activation(self.dep_dense_layer(lstm_output[i]))
            for i in range(length)
        ]
        head_part = dn.concatenate_to_batch(
            [heads[i // len(lstm_output)] for i in range(length * length)])
        # 1 2 3 4 .. 1 2 3 4 ...
        mod_part = dn.concatenate_to_batch([mods[i]
                                            for i in range(length)] * length)

        output = self.fusion_layer(head_part, mod_part)

        exprs = [[
            dn.pick_batch_elem(output, i * length + j) for j in range(length)
        ] for i in range(length)]
        scores = output.npvalue()
        scores = scores.reshape((len(lstm_output), len(lstm_output)))

        return scores, exprs
Example #3
0
    def forward(self, words, spans=None):
        multitask = spans is not None

        if self.training:
            words = self.word_vocab.unkify(words)

        rnn = self.rnn_builder.initial_state()

        word_ids = [self.word_vocab.index_or_unk(word)
            for word in [START] + words + [STOP]]

        prev_embeddings = [self.embeddings[word_id] for word_id in word_ids[:-1]]
        lstm_outputs = rnn.transduce(prev_embeddings)
        logits = self.out(dy.concatenate_to_batch(lstm_outputs))
        nlls = dy.pickneglogsoftmax_batch(logits, word_ids[1:])
        word_nll = dy.sum_batches(nlls)

        if multitask:

            # predict label for each possible span (null for nonexistent spans)
            if self.predict_all_spans:
                gold_spans = {(left, right): self.label_vocab.index(label)
                    for left, right, label in spans}

                all_spans = [(left, left + length)
                    for length in range(1, len(words) + 1)
                    for left in range(0, len(words) + 1 - length)]

                label_ids = [gold_spans.get((left, right), self.label_vocab.size)  # last index is for null label
                    for left, right in all_spans]

                # 'lstm minus' features, same as those of the crf parser
                span_encodings = [lstm_outputs[right] - lstm_outputs[left]
                    for left, right in all_spans]

            # only predict labels for existing spans
            else:
                label_ids = [self.label_vocab.index(label) for _, _, label in spans]

                # 'lstm minus' features, same as those of the crf parser
                span_encodings = [lstm_outputs[right] - lstm_outputs[left]
                    for left, right, label in spans]

            logits = self.f_label(dy.concatenate_to_batch(span_encodings))
            nlls = dy.pickneglogsoftmax_batch(logits, label_ids)
            label_nll = dy.sum_batches(nlls)

            # easy proxy to track progress on this task
            self.correct += np.sum(np.argmax(logits.npvalue(), axis=0) == label_ids)
            self.predicted += len(label_ids)

            nll = word_nll + label_nll

        else:
            nll = word_nll

        return nll
Example #4
0
 def __call__(self, encoder_output, hsz, beam_width=1):
     h_i = self.get_state(encoder_output)
     context = encoder_output.output
     if beam_width > 1:
         # To vectorize, we need to expand along the batch dimension, K times
         context = [dy.concatenate_to_batch([c] * beam_width) for c in context]
         h_i = [dy.concatenate_to_batch([h] * beam_width) for h in h_i]
     _, batchsz = context[0].dim()
     init_zeros = dy.zeros((hsz,), batch_size=batchsz)
     return h_i, init_zeros, context
Example #5
0
    def transduce(self, inputs, masks, predict=False):

        if not self.init:
            print("No Initial state provided")
            return

        outputs = []
        batch_size = inputs[0].dim()[1]

        for idx, input_tensor in enumerate(inputs):
            recur_s = []
            cell_s = []
            out = []

            hidden = self.hidden_previous
            cell = self.cell_previous
            if not predict:
                input_tensor = dy.cmult(input_tensor, self.input_drop_mask)
                hidden = dy.cmult(hidden, self.recur_drop_mask)

            gates = dy.affine_transform([
                self.b.expr(),
                self.WXH.expr(),
                dy.concatenate([input_tensor, hidden])
            ])
            iga = dy.pickrange(gates, 0, self.recur_size)
            fga = dy.pickrange(gates, self.recur_size, 2 * self.recur_size)
            oga = dy.pickrange(gates, 2 * self.recur_size, 3 * self.recur_size)
            cga = dy.pickrange(gates, 3 * self.recur_size, 4 * self.recur_size)

            ig = dy.logistic(iga)
            fg = dy.logistic(fga)  # +self.forget_bias
            og = dy.logistic(oga)
            c_tilda = dy.tanh(cga)
            new_cell = dy.cmult(cell, fg) + dy.cmult(c_tilda, ig)
            new_hidden = dy.cmult(dy.tanh(new_cell), og)

            for jdx in range(batch_size):
                if masks[idx][jdx] == 1:
                    h_t = dy.pick_batch_elem(new_hidden, jdx)
                    recur_s.append(h_t)
                    cell_s.append(dy.pick_batch_elem(new_cell, jdx))
                    out.append(h_t)
                else:
                    recur_s.append(dy.pick_batch_elem(hidden, jdx))
                    cell_s.append(dy.pick_batch_elem(cell, jdx))
                    out.append(dy.zeros(self.recur_size))

            new_cell = dy.concatenate_to_batch(cell_s)
            new_hidden = dy.concatenate_to_batch(recur_s)
            self.cell_previous = new_cell
            self.hidden_previous = new_hidden
            outputs.append(dy.concatenate_to_batch(out))

        return outputs
Example #6
0
  def __call__(self, x, z=None, mask=None):
    h = self.h
    if z == None:
      Q = self.W_Q(x)
      K = self.W_K(x)
      V = self.W_V(x)
    else:
      Q = self.W_Q(x)
      K = self.W_K(z)
      V = self.W_V(z)

    (n_units, n_querys), batch = Q.dim()
    (_, n_keys), _ = K.dim()

    batch_Q = dy.concatenate_to_batch(self.split_rows(Q, h))
    batch_K = dy.concatenate_to_batch(self.split_rows(K, h))
    batch_V = dy.concatenate_to_batch(self.split_rows(V, h))

    assert(batch_Q.dim() == (n_units // h, n_querys), batch * h)
    assert(batch_K.dim() == (n_units // h, n_keys), batch * h)
    assert(batch_V.dim() == (n_units // h, n_keys), batch * h)

    mask = np.concatenate([mask] * h, axis=0)
    mask = np.moveaxis(mask, [1, 0, 2], [0, 2, 1])
    mask = dy.inputTensor(mask, batched=True)
    batch_A = (dy.transpose(batch_Q) * batch_K) * self.scale_score
    batch_A = dy.cmult(batch_A, mask) + (1 - mask)*MIN_VALUE

    sent_len = batch_A.dim()[0][0]
    if sent_len == 1:
        batch_A = dy.softmax(batch_A)
    else:
        batch_A = dy.softmax(batch_A, d=1)

    batch_A = dy.cmult(batch_A, mask)
    assert (batch_A.dim() == ((n_querys, n_keys), batch * h))

    if self.attn_dropout:
      if self.dropout != 0.0:
        batch_A = dy.dropout(batch_A, self.dropout)

    batch_C = dy.transpose(batch_A * dy.transpose(batch_V))
    assert (batch_C.dim() == ((n_units // h, n_querys), batch * h))

    C = dy.concatenate(self.split_batch(batch_C, h), d=0)
    assert (C.dim() == ((n_units, n_querys), batch))
    C = self.finishing_linear_layer(C)
    return C
Example #7
0
File: test.py Project: jayantk/cnn
 def test_concatenate_to_batch(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     y = dy.pick_batch_elem(x, 0)
     z = dy.pick_batch_elem(x, 1)
     w = dy.concatenate_to_batch([y, z])
     self.assertTrue(np.allclose(w.npvalue(), self.pval.T))
Example #8
0
 def test_concatenate_to_batch(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     y = dy.pick_batch_elem(x, 0)
     z = dy.pick_batch_elem(x, 1)
     w = dy.concatenate_to_batch([y, z])
     self.assertTrue(np.allclose(w.npvalue(), self.pval.T))
Example #9
0
 def produce_parse_forest(self, sentence, required_probability_mass):
     lstm_outputs = self._featurize_sentence(sentence, is_train=False)
     encodings = []
     spans = []
     for start in range(0, len(sentence)):
         for end in range(start + 1, len(sentence) + 1):
             spans.append((start, end))
             encodings.append(self._get_span_encoding(start, end, lstm_outputs))
     label_scores = self.f_label(dy.concatenate_to_batch(encodings))
     label_scores_reshaped = dy.reshape(label_scores,
                                        (self.label_vocab.size, len(encodings)))
     label_probabilities_np = dy.softmax(label_scores_reshaped).npvalue()
     span_to_labels = {}
     forest_prob_mass = 1
     for index, span in enumerate(spans):
         distribution = list(enumerate(label_probabilities_np[:, index]))
         distribution.sort(key=lambda x: - x[1])
         total_probability = 0
         labels = []
         while total_probability < required_probability_mass:
             (label_index, probability) = distribution.pop()
             labels.append(self.label_vocab.values[label_index])
             total_probability += probability
         forest_prob_mass *= total_probability
         span_to_labels[span] = labels
     return span_to_labels, forest_prob_mass
Example #10
0
 def embed(self, x):
   if self.word_dropout > 0.0 and self.word_id_mask is None:
     batch_size = len(x) if xnmt.batcher.is_batched(x) else 1
     self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)]
   emb_e = dy.parameter(self.embeddings)
   # single mode
   if not xnmt.batcher.is_batched(x):
     if self.train and self.word_id_mask and x in self.word_id_mask[0]:
       ret = dy.zeros((self.emb_dim,))
     else:
       ret = dy.pick(emb_e, index=x)
       if self.fix_norm != None:
         ret = dy.cdiv(ret, dy.l2_norm(ret))
         if self.fix_norm != 1:
           ret *= self.fix_norm
   # minibatch mode
   else:
     ret = dy.concatenate_to_batch([dy.pick(emb_e, index=xi) for xi in x])
     if self.fix_norm != None:
       ret = dy.cdiv(ret, dy.l2_norm(ret))
       if self.fix_norm != 1:
         ret *= self.fix_norm
     if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(len(x))):
       dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(len(x))]), batched=True)
       ret = dy.cmult(ret, dropout_mask)
   if self.train and self.weight_noise > 0.0:
     ret = dy.noise(ret, self.weight_noise)
   return ret
Example #11
0
	def beam_decode(self, encodings, input_len=10, beam_size=1):
		batch_size = 1
		self.__dec.init_params(encodings, batch_size, self.__train_flag)
		context = dy.zeros((self.__enc.output_dim, ))
		beams = [Beam(self.__dec.dec_state, context, [self.__trg_sos], 0.0)]

		for i in xrange(int(min(self.__max_len, input_len * 1.5))):
			new_beams = []
			p_list = []
			for b in beams:
				if b.words[-1] == self.__trg_eos:
					p_list.append(dy.ones((self.__trg_vsize, )))
					continue
				hidden, embs, b.state = self.__dec.next([b.words[-1]], b.context, self.__train_flag, b.state)
				b.context, _ = self.attend(encodings, hidden)
				score = self.__dec.score(hidden, b.context, embs, self.__train_flag)
				p_list.append(dy.softmax(score))
			p_list = dy.concatenate_to_batch(p_list).npvalue().T.reshape(-1, self.__trg_vsize)
			for p, b in zip(p_list, beams):
				p = p.flatten() / p.sum()
				kbest = np.argsort(p)
				if b.words[-1] == self.__trg_eos:
					new_beams.append(Beam(b.state, b.context, b.words, b.log_prob))
				else:
					for next_word in kbest[-beam_size:]:
						new_beams.append(Beam(b.state, b.context, b.words + [next_word], b.log_prob + np.log(p[next_word])))
			beams = sorted(new_beams, key=lambda b: b.log_prob)[-beam_size:]
			if beams[-1].words[-1] == self.__trg_eos:
				break
		return beams[-1].words
    def predict_sequence_batched(self,
                                 inputs,
                                 mask_array,
                                 wlen,
                                 predictFlag=False):

        batch_size = inputs[0].dim()[1]
        src_len = len(inputs)

        if not predictFlag:
            self.charlstm.set_dropouts(self.dropout, self.dropout)
            self.charlstm.set_dropout_masks(batch_size)

        char_fwd = self.charlstm.initial_state(batch_size)
        recur_states, cells = char_fwd.add_inputs(inputs, mask_array,
                                                  predictFlag)

        hidden_states = []
        for idx in range(src_len):
            mask = dy.inputVector(mask_array[idx])
            mask_expr = dy.reshape(mask, (1, ), batch_size)
            hidden_states.append(recur_states[idx] * mask_expr)

        H = dy.concatenate_cols(hidden_states)

        if (predictFlag):
            a = dy.softmax(dy.transpose(self.W_atten.expr()) * H)
        else:
            #dropout attention connections(keep the same dim across the sequence)
            a = dy.softmax(
                dy.transpose(self.W_atten.expr()) *
                dy.dropout_dim(H, 1, self.dropout))

        cell_states = []
        for idx in range(batch_size):
            if (wlen[idx] > 0):
                cell = dy.pick_batch_elem(cells[wlen[idx] - 1], idx)
            else:
                cell = dy.zeros(self.ldims)

            cell_states.append(cell)

        C = dy.concatenate_to_batch(cell_states)

        H_atten = H * dy.transpose(a)
        char_emb = dy.concatenate([H_atten, C])

        if predictFlag:
            proj_char_emb = dy.affine_transform(
                [self.b_linear.expr(),
                 self.W_linear.expr(), char_emb])
        else:
            proj_char_emb = dy.affine_transform([
                self.b_linear.expr(),
                self.W_linear.expr(),
                dy.dropout(char_emb, self.dropout)
            ])

        return proj_char_emb
Example #13
0
 def disc_ll(self):
     try:
         return dy.concatenate_to_batch(self.ll_buffer)
     finally:
         # Make sure that the state is not used again after the log likelihood is requested
         del self.ll_buffer
         del self.batch_size
         del self.counter
Example #14
0
    def _encodings_to_label_log_probabilities(self, encodings, lmbd=None):
        label_scores = self.f_label(dy.concatenate_to_batch(encodings))
        label_scores_reshaped = dy.reshape(label_scores, (self.label_vocab.size, len(encodings)))

        if lmbd is not None:
            label_scores_reshaped = dy.cmult(label_scores_reshaped, lmbd)

        return dy.log_softmax(label_scores_reshaped)
Example #15
0
 def evaluate_all_states(self, states):
     input_tensors = dn.concatenate_to_batch(
         [state.get_input_tensor(self.k, self.empty) for state in states])
     action_outputs = self.action_classifier(input_tensors)
     relation_outputs = None
     if self.relation:
         relation_outputs = self.relation_classifier(input_tensors)
     return action_outputs, relation_outputs
Example #16
0
    def aggressive_annotation(self,
                              sentence,
                              sentence_number,
                              span_to_gold_label,
                              low_conf_cutoff,
                              seen):
        if len(span_to_gold_label) == 0:
            return []  # , []
        lstm_outputs = self._featurize_sentence(sentence, is_train=False)
        encodings = []
        spans = span_to_gold_label.keys()
        for (start, end) in spans:
            encodings.append(self._get_span_encoding(start, end, lstm_outputs))
        label_scores = self.f_label(dy.concatenate_to_batch(encodings))
        label_scores_reshaped = dy.reshape(label_scores,
                                           (self.label_vocab.size, len(encodings)))
        label_probabilities_np = dy.softmax(label_scores_reshaped).npvalue()
        low_confidence_labels = []
        # high_confidence_labels = []
        on_labels = []
        for index, (start, end) in list(enumerate(spans)):
            distribution = label_probabilities_np[:, index]
            entropy = stats.entropy(distribution)
            oracle_label = span_to_gold_label[(start, end)]
            annotation_request = dict(
                sentence_number=sentence_number,
                left=start,
                right=end,
                entropy=entropy,
                non_constituent_probability=distribution[0],
                label=oracle_label
            )
            if (start, end) in seen:
                del span_to_gold_label[(start, end)]
                continue
            if low_conf_cutoff < entropy and distribution[self.empty_label_index] < 0.5:
                # annotation_request['label'] = oracle_label
                low_confidence_labels.append(annotation_request)
            elif entropy < 10 ** -5 and distribution[self.empty_label_index] > 0.99:
                del span_to_gold_label[(start, end)]
                # if entropy > 10 ** -7:
                #     high_confidence_labels.append(annotation_request)
            if np.max(distribution) > distribution[self.empty_label_index]:
                on_labels.append(annotation_request)

        for index, label_a in enumerate(on_labels):
            span_a = (label_a['left'], label_a['right'])
            for label_b in on_labels[index + 1:]:
                span_b = (label_b['left'], label_b['right'])
                if check_overlap(span_a, span_b):
                    label_a['entropy'] = 10
                    low_confidence_labels.append(label_a)
                    label_b['entropy'] = 10
                    low_confidence_labels.append(label_b)

        return low_confidence_labels  # , high_confidence_labels
Example #17
0
    def _encodings_to_label_log_probabilities(self, encodings, lmbd=None, alpha=None):
        label_scores = self.f_label(dy.concatenate_to_batch(encodings))
        label_scores_reshaped = dy.reshape(label_scores, (self.label_vocab.size, len(encodings)))

        # if alpha is not None:
        #     temp = dy.abs(dy.reshape(alpha[0], (1, 1)))
        #     label_scores_reshaped = dy.cmult(dy.logistic(dy.cmult(label_scores_reshaped, temp) + alpha[1]), lmbd) + alpha[2]
        # 990.51641846]] [ 0.03124614  4.00097179 -9.43100834
        # label_scores_reshaped = dy.logistic(label_scores_reshaped * 0.03124614 + 4.00097179) * 990.51641846 - 9.43100834
        return dy.log_softmax(label_scores_reshaped)
Example #18
0
    def cal_scores(self, src_encodings, masks, train):

        src_len = len(src_encodings)
        batch_size = src_encodings[0].dim()[1]
        heads_LRlayer = []
        mods_LRlayer = []
        for encoding in src_encodings:
            heads_LRlayer.append(
                self.leaky_ReLu(self.b_head.expr() +
                                self.W_head.expr() * encoding))
            mods_LRlayer.append(
                self.leaky_ReLu(self.b_mod.expr() +
                                self.W_mod.expr() * encoding))

        heads_labels = []
        heads = []
        labels = []
        neg_inf = dy.constant(1, -float("inf"))
        for row in range(
                1, src_len
        ):  #exclude root @ index=0 since roots do not have heads

            scores_idx = []
            for col in range(src_len):

                dist = col - row
                mdist = self.dist_max
                dist_i = (min(dist, mdist - 1) + mdist if dist >= 0 else int(
                    min(-1.0 * dist, mdist - 1)))
                dist_vec = dy.lookup_batch(self.dlookup, [dist_i] * batch_size)
                if train:
                    input_vec = dy.concatenate([
                        dy.esum([
                            dy.dropout(heads_LRlayer[col], self.dropout),
                            dy.dropout(mods_LRlayer[row], self.dropout)
                        ]), dist_vec
                    ])
                else:
                    input_vec = dy.concatenate([
                        dy.esum([heads_LRlayer[col], mods_LRlayer[row]]),
                        dist_vec
                    ])
                score = self.scoreHeadModLabel(input_vec, train)
                mask = masks[row] and masks[col]
                join_scores = []
                for bdx in range(batch_size):
                    if (mask[bdx] == 1):
                        join_scores.append(dy.pick_batch_elem(score, bdx))
                    else:
                        join_scores.append(
                            dy.concatenate([neg_inf] * self.n_labels))
                scores_idx.append(dy.concatenate_to_batch(join_scores))
            heads_labels.append(dy.concatenate(scores_idx))

        return heads_labels
Example #19
0
 def transduce(self,
               embed_sent: ExpressionSequence) -> List[ExpressionSequence]:
     batch_size = embed_sent[0].dim()[1]
     actions = self.sample_segmentation(embed_sent, batch_size)
     sample_size = len(actions)
     embeddings = dy.concatenate(embed_sent.expr_list, d=1)
     embeddings.value()
     #
     composed_words = []
     for i in range(batch_size):
         sequence = dy.pick_batch_elem(embeddings, i)
         # For each sampled segmentations
         for j, sample in enumerate(actions):
             lower_bound = 0
             # Read every 'segment' decision
             for k, upper_bound in enumerate(sample[i]):
                 char_sequence = dy.pick_range(sequence, lower_bound,
                                               upper_bound + 1, 1)
                 composed_words.append(
                     (dy.pick_range(sequence, lower_bound, upper_bound + 1,
                                    1), j, i, k, lower_bound,
                      upper_bound + 1))
                 #self.segment_composer.set_word_boundary(lower_bound, upper_bound, self.src_sent[i])
                 #composed = self.segment_composer.transduce(char_sequence)
                 #outputs[j][i].append(composed)
                 lower_bound = upper_bound + 1
     outputs = self.segment_composer.compose(composed_words, sample_size,
                                             batch_size)
     # Padding + return
     try:
         if self.length_prior:
             seg_size_unpadded = [[
                 len(outputs[i][j]) for j in range(batch_size)
             ] for i in range(sample_size)]
         enc_outputs = []
         for batched_sampled_sentence in outputs:
             sampled_sentence, segment_mask = self.pad(
                 batched_sampled_sentence)
             expr_seq = ExpressionSequence(
                 expr_tensor=dy.concatenate_to_batch(sampled_sentence),
                 mask=segment_mask)
             sent_context = self.final_transducer.transduce(expr_seq)
             self.final_states.append(
                 self.final_transducer.get_final_states())
             enc_outputs.append(sent_context)
         return CompoundSeqExpression(enc_outputs)
     finally:
         if self.length_prior:
             self.seg_size_unpadded = seg_size_unpadded
         self.compose_output = outputs
         self.segment_actions = actions
         if not self.train and self.compute_report:
             self.add_sent_for_report({"segment_actions": actions})
Example #20
0
    def fit_partial(self, instances):
        random.shuffle(instances)
        self.iter += 1

        losses = []
        dy.renew_cg()

        total_loss, total_size = 0., 0
        prog = tqdm(desc="Epoch {}".format(self.iter), ncols=80, total=len(instances) + 1)
        for i, ins in enumerate(instances, 1):
            losses.extend(list(self.model.loss(*ins)))
            if i % self.batch_size == 0:
                loss = dy.sum_batches(dy.concatenate_to_batch(losses))
                total_loss += loss.value()
                total_size += len(losses)
                prog.set_postfix(loss=loss.value()/len(losses))

                loss.backward()
                self.opt.update()
                dy.renew_cg()
                losses = []

            prog.update()

        if losses:
            loss = dy.sum_batches(dy.concatenate_to_batch(losses))
            total_loss += loss.value()
            total_size += len(losses)
            self.loss = total_loss / total_size
            prog.set_postfix(loss=self.loss)

            loss.backward()
            self.opt.update()
            dy.renew_cg()

            prog.update()

        self.opt.learning_rate *= self.lr_decay
        prog.close()
Example #21
0
    def get_complete_raw_exprs(self, lstm_output):
        length = len(lstm_output)

        lstm_output_as_batch = dn.concatenate_to_batch(lstm_output)
        headfov = self.bilinear_layer.w1.expr() * lstm_output_as_batch
        modfov = self.bilinear_layer.w2.expr() * lstm_output_as_batch

        # (i, j) -> (i * length + j,)
        # i = k / length, j = k % length
        # 1 1 2 2 3 3 4 4 ..
        heads = [dn.pick_batch_elem(headfov, i) for i in range(length)]
        mods = [dn.pick_batch_elem(modfov, i) for i in range(length)]
        head_part = dn.concatenate_to_batch([heads[i // len(lstm_output)] for i in range(length * length)])
        # 1 2 3 4 .. 1 2 3 4 ...
        mod_part = dn.concatenate_to_batch([mods[i] for i in range(length)] * length)

        hidden = self.activation(head_part + mod_part + self.bilinear_layer.bias.expr())
        struct_dropout = getattr(self.options, "struct_dropout", 0.0)
        if self.options.is_train and struct_dropout > 0:
            hidden = dn.dropout(hidden, struct_dropout)
        output = self.dense_layer(hidden)
        return output
Example #22
0
    def get_complete_raw_exprs(self, lstm_output):
        length = len(lstm_output)

        lstm_output_as_batch = dn.concatenate_to_batch(lstm_output)
        headfov = self.bilinear_layer.w1.expr() * lstm_output_as_batch
        modfov = self.bilinear_layer.w2.expr() * lstm_output_as_batch

        # (i, j) -> (i * length + j,)
        # i = k / length, j = k % length
        # 1 1 2 2 3 3 4 4 ..
        heads = [dn.pick_batch_elem(headfov, i) for i in range(length)]
        mods = [dn.pick_batch_elem(modfov, i) for i in range(length)]
        head_part = dn.concatenate_to_batch(
            [heads[i // len(lstm_output)] for i in range(length * length)])
        # 1 2 3 4 .. 1 2 3 4 ...
        mod_part = dn.concatenate_to_batch([mods[i]
                                            for i in range(length)] * length)

        output = self.dense_layer(
            self.activation(head_part + mod_part +
                            self.bilinear_layer.bias.expr()))
        return output
Example #23
0
def rnn_encode(rnn, input_, lengths):
    """Return the final output for each batch based on lengths.

    :param rnn: dy.RNNBuilder or dy.BiRNNBuilder
    :param input_: List[dy.Expression]
    :param lengths: List[int]

    Returns:
        dy.Expression
    """
    states = rnn_forward(rnn, input_)
    final_states = [dy.pick_batch_elem(states[l - 1], i) for i, l in enumerate(lengths)]
    return dy.concatenate_to_batch(final_states)
Example #24
0
def rnn_encode(rnn, input_, lengths):
    """Return the final output for each batch based on lengths.

    :param rnn: dy.RNNBuilder or dy.BiRNNBuilder
    :param input_: List[dy.Expression]
    :param lengths: List[int]

    Returns:
        dy.Expression
    """
    states = rnn_forward(rnn, input_)
    final_states = [dy.pick_batch_elem(states[l - 1], i) for i, l in enumerate(lengths)]
    return dy.concatenate_to_batch(final_states)
Example #25
0
 def return_spans_and_uncertainties(self,
                                    sentence,
                                    sentence_number,
                                    gold,
                                    use_oracle,
                                    low_conf_cutoff,
                                    pseudo_label_cutoff,
                                    seen):
     spans = [span for span in get_all_spans(gold).keys() if
              (span, sentence_number) not in seen]
     if len(spans) == 0:
         return []
     lstm_outputs = self._featurize_sentence(sentence, is_train=False)
     encodings = []
     for (start, end) in spans:
         encodings.append(self._get_span_encoding(start, end, lstm_outputs))
     label_scores = self.f_label(dy.concatenate_to_batch(encodings))
     label_scores_reshaped = dy.reshape(label_scores,
                                        (self.label_vocab.size, len(encodings)))
     label_probabilities_np = dy.softmax(label_scores_reshaped).npvalue()
     low_confidence_labels = []
     high_confidence_labels = []
     for index, (start, end) in enumerate(spans):
         distribution = label_probabilities_np[:, index]
         entropy = stats.entropy(distribution)
         oracle_label = gold.oracle_label(start, end)
         predicted_label_index = distribution.argmax()
         predicted_label = self.label_vocab.value(predicted_label_index)
         annotation_request = dict(
             sentence_number=sentence_number,
             left=start,
             right=end,
             entropy=entropy,
             non_constituent_probability=distribution[0]
         )
         if use_oracle:
             oracle_label_index = self.label_vocab.index(oracle_label)
             if oracle_label_index != predicted_label_index and distribution[
                 oracle_label_index] > 0.01:
                 annotation_request['label'] = oracle_label
                 low_confidence_labels.append(annotation_request)
         elif max(distribution) > pseudo_label_cutoff and (
                         distribution[
                             self.empty_label_index] < 0.001 or random.random() < 0.001):
             annotation_request['label'] = predicted_label
             high_confidence_labels.append(annotation_request)
         elif low_conf_cutoff < entropy:
             annotation_request['label'] = oracle_label
             low_confidence_labels.append(annotation_request)
     return low_confidence_labels, high_confidence_labels
Example #26
0
def rnn_forward_with_state(rnn,
                           input_,
                           lengths=None,
                           state=None,
                           batched=True,
                           backward=False):
    """Return the output of the final layers and the final state of the RNN.

    :param rnn: dy.RNNBuilder
    :param input_: List[dy.Expression]
    :param lengths: List[int]
    :param state: List[np.ndarray] The previous state (used in TBPTT)
    :param batched: bool Is the state batched?
    :param backward: bool Is this a backward rnn in a bRNN?

    Returns:
        List[dy.Expression] (Seq_len): The outputs
        List[dy.Expression] (2 * layers if lstm): The state
    """
    if state is not None:
        state = [dy.inputTensor(s, batched) for s in state]
    lstm_state = rnn.initial_state(state)
    if backward:
        states = lstm_state.add_inputs(reversed(input_))
        outputs = list(reversed([s.h()[-1] for s in states]))
        # When going backwards (we pad right) the final state of the rnn
        # is always the last one.
        final_state = states[-1].s()
        return outputs, final_state
    states = lstm_state.add_inputs(input_)
    outputs = [s.h()[-1] for s in states]
    if lengths is None:
        if backward:
            outputs = list(reversed(outputs))
        return outputs, states[-1].s()
    final_states = [states[l - 1].s() for l in lengths]
    final_state_by_batch = []
    for i, state in enumerate(final_states):
        batch_state = [dy.pick_batch_elem(s, i) for s in state]
        final_state_by_batch.append(batch_state)
    final_state = []
    for i in range(len(final_state_by_batch[0])):
        col = dy.concatenate_to_batch([
            final_state_by_batch[j][i]
            for j in range(len(final_state_by_batch))
        ])
        final_state.append(col)
    if backward:
        outputs = list(reversed(outputs))
    return outputs, final_state
Example #27
0
    def forward(self, words):

        if self.training:
            words = self.word_vocab.unkify(words)

        rnn = self.rnn_builder.initial_state()

        word_ids = [self.word_vocab.index_or_unk(word) for word in [START] + words + [STOP]]

        prev_embeddings = [self.embeddings[word_id] for word_id in word_ids[:-1]]
        lstm_outputs = rnn.transduce(prev_embeddings)
        logits = self.out(dy.concatenate_to_batch(lstm_outputs))
        nlls = dy.pickneglogsoftmax_batch(logits, word_ids[1:])

        return dy.sum_batches(nlls)
Example #28
0
def pad_embedding(embeddings) -> expression_seqs.ExpressionSequence:
    max_col = max(len(xs) for xs in embeddings)
    p0 = dy.zeros(embeddings[0][0].dim()[0][0])
    masks = np.zeros((len(embeddings), max_col), dtype=int)
    modified = False
    ret = []
    for xs, mask in zip(embeddings, masks):
        deficit = max_col - len(xs)
        if deficit > 0:
            xs = xs + ([p0] * deficit)
            mask[-deficit:] = 1
            modified = True
        ret.append(dy.concatenate_cols(xs))
    mask = Mask(masks) if modified else None
    return expression_seqs.ExpressionSequence(
        expr_tensor=dy.concatenate_to_batch(ret), mask=mask)
Example #29
0
 def pad(self, outputs):
     # Padding
     max_col = max(len(xs) for xs in outputs)
     P0 = dy.vecInput(outputs[0][0].dim()[0][0])
     masks = numpy.zeros((len(outputs), max_col), dtype=int)
     ret = []
     modified = False
     for xs, mask in zip(outputs, masks):
         deficit = max_col - len(xs)
         if deficit > 0:
             xs.extend([P0 for _ in range(deficit)])
             mask[-deficit:] = 1
             modified = True
         ret.append(dy.concatenate_cols(xs))
     mask = Mask(masks) if modified else None
     return dy.concatenate_to_batch(ret), mask
Example #30
0
    def calc_output(self, sents, train_mode):
        cache = {}

        cf_init, cb_init = [b.initial_state() for b in self.char_lstms]
        wf_init, wb_init = [b.initial_state() for b in self.word_lstms]

        #get input/output for T1
        #get list of tokens
        xs = [['<SOS>'] + x.split() + ['<EOS>'] for (x, _) in sents]

        #fill the word embedding cache
        for x in xs:
            for w in x:
                if w not in cache:
                    t = [dy.lookup(self.char_lookup, c) for c in w.encode()]
                    fw = [x.output() for x in cf_init.add_inputs(t)]
                    bw = [x.output() for x in cb_init.add_inputs(reversed(t))]
                    wid = 0
                    if w in self.word_to_idx:
                        wid = self.word_to_idx[w]
                    if self.level == Level.HYBRID:
                        cache[w] = dy.lookup(self.word_lookup,
                                             wid) + fw[-1] + bw[-1]
                    if self.level == Level.CHAR:
                        cache[w] = fw[-1] + bw[-1]
                    if self.level == Level.WORD:
                        cache[w] = dy.lookup(self.word_lookup, wid)

        src_len = [len(x) for x in xs]
        max_src_len = np.max(src_len)
        num_words = 0

        #build the batch. Be careful!
        src_cws = []
        for i in range(max_src_len):
            src_cws.append(
                dy.concatenate_to_batch([
                    dy.dropout(cache[x[i]], self.dropout_rate)
                    if train_mode else cache[x[i]] for x in xs
                ]))

        fw = [x.output() for x in wf_init.add_inputs(src_cws)]
        bw = [x.output() for x in wb_init.add_inputs(reversed(src_cws))]

        return (fw, bw)
Example #31
0
def rnn_forward_with_state(rnn, input_, lengths=None, state=None, batched=True, backward=False):
    """Return the output of the final layers and the final state of the RNN.

    :param rnn: dy.RNNBuilder
    :param input_: List[dy.Expression]
    :param lengths: List[int]
    :param state: List[np.ndarray] The previous state (used in TBPTT)
    :param batched: bool Is the state batched?
    :param backward: bool Is this a backward rnn in a bRNN?

    Returns:
        List[dy.Expression] (Seq_len): The outputs
        List[dy.Expression] (2 * layers if lstm): The state
    """
    if state is not None:
        state = [dy.inputTensor(s, batched) for s in state]
    lstm_state = rnn.initial_state(state)
    if backward:
        states = lstm_state.add_inputs(reversed(input_))
        outputs = list(reversed([s.h()[-1] for s in states]))
        # When going backwards (we pad right) the final state of the rnn
        # is always the last one.
        final_state = states[-1].s()
        return outputs, final_state
    states = lstm_state.add_inputs(input_)
    outputs = [s.h()[-1] for s in states]
    if lengths is None:
        if backward:
            outputs = list(reversed(outputs))
        return outputs, states[-1].s()
    final_states = [states[l - 1].s() for l in lengths]
    final_state_by_batch = []
    for i, state in enumerate(final_states):
        batch_state = [dy.pick_batch_elem(s, i) for s in state]
        final_state_by_batch.append(batch_state)
    final_state = []
    for i in range(len(final_state_by_batch[0])):
        col = dy.concatenate_to_batch([final_state_by_batch[j][i] for j in range(len(final_state_by_batch))])
        final_state.append(col)
    if backward:
        outputs = list(reversed(outputs))
    return outputs, final_state
Example #32
0
 def transduce(self,
               embed_sent: ExpressionSequence) -> List[ExpressionSequence]:
     batch_size = embed_sent[0].dim()[1]
     actions = self.sample_segmentation(embed_sent, batch_size)
     embeddings = dy.concatenate(embed_sent.expr_list, d=1)
     embeddings.value()
     #
     composed_words = []
     for i in range(batch_size):
         sequence = dy.pick_batch_elem(embeddings, i)
         # For each sampled segmentations
         lower_bound = 0
         for j, upper_bound in enumerate(actions[i]):
             if self.no_char_embed:
                 char_sequence = []
             else:
                 char_sequence = dy.pick_range(sequence, lower_bound,
                                               upper_bound + 1, 1)
             composed_words.append(
                 (char_sequence, i, j, lower_bound, upper_bound + 1))
             lower_bound = upper_bound + 1
     outputs = self.segment_composer.compose(composed_words, batch_size)
     # Padding + return
     try:
         if self.length_prior:
             seg_size_unpadded = [
                 len(outputs[i]) for i in range(batch_size)
             ]
         sampled_sentence, segment_mask = self.pad(outputs)
         expr_seq = ExpressionSequence(
             expr_tensor=dy.concatenate_to_batch(sampled_sentence),
             mask=segment_mask)
         return self.final_transducer.transduce(expr_seq)
     finally:
         if self.length_prior:
             self.seg_size_unpadded = seg_size_unpadded
         self.compose_output = outputs
         self.segment_actions = actions
         if not self.train and self.is_reporting():
             if len(actions) == 1:  # Support only AccuracyEvalTask
                 self.report_sent_info({"segment_actions": actions})
Example #33
0
 def calc_loss(self, rewards):
     loss = FactoredLossExpr()
     ## Z-Normalization
     if self.z_normalization:
         reward_batches = dy.concatenate_to_batch(rewards)
         mean_batches = dy.mean_batches(reward_batches)
         std_batches = dy.std_batches(reward_batches)
         rewards = [
             dy.cdiv(reward - mean_batches, std_batches)
             for reward in rewards
         ]
     ## Calculate baseline
     if self.baseline is not None:
         pred_reward, baseline_loss = self.calc_baseline_loss(rewards)
         loss.add_loss("rl_baseline", baseline_loss)
     ## Calculate Confidence Penalty
     if self.confidence_penalty:
         loss.add_loss("rl_confpen",
                       self.confidence_penalty.calc_loss(self.policy_lls))
     ## Calculate Reinforce Loss
     reinf_loss = []
     # Loop through all action in one sequence
     for i, (policy,
             action_sample) in enumerate(zip(self.policy_lls,
                                             self.actions)):
         # Discount the reward if we use baseline
         if self.baseline is not None:
             rewards = [reward - pred_reward[i] for reward in rewards]
         # Main Reinforce calculation
         sample_loss = []
         for action, reward in zip(action_sample, rewards):
             ll = dy.pick_batch(policy, action)
             if self.valid_pos is not None:
                 ll = dy.pick_batch_elems(ll, self.valid_pos[i])
                 reward = dy.pick_batch_elems(reward, self.valid_pos[i])
             sample_loss.append(dy.sum_batches(ll * reward))
         # Take the average of the losses accross multiple samples
         reinf_loss.append(dy.esum(sample_loss) / len(sample_loss))
     loss.add_loss("rl_reinf", self.weight * -dy.esum(reinf_loss))
     ## the composed losses
     return loss
Example #34
0
    def predict_one(self, src, encoder_outputs, **kwargs):
        K = int(kwargs.get('beam', 5))
        mxlen = int(kwargs.get('mxlen', 100))

        paths = [[Offsets.GO] for _ in range(K)]
        # Which beams are done?
        done = np.array([False] * K)
        scores = np.array([0.0]*K)
        hidden, output_i, context = self.arc_policy(encoder_outputs, self.hsz, beam_width=K)
        num_states = len(hidden)
        rnn_state = self.decoder_rnn.initial_state(hidden)
        self.attn_cache(context)
        src_mask = encoder_outputs.src_mask

        for i in range(mxlen):
            dst_last = np.array([path[-1] for path in paths]).reshape(1, K)
            embed_i = self.tgt_embeddings.encode(dst_last)[-1]
            embed_i = self.input_i(embed_i, output_i)
            rnn_state = rnn_state.add_input(embed_i)
            rnn_output_i = rnn_state.output()
            output_i = self.attn(rnn_output_i, src_mask)
            wll = self.prediction([output_i])[-1].npvalue()  # (V,) K
            V = wll.shape[0]
            if i > 0:
                # expanded_history = np.expand_dims(scores, -1)
                # done_mask = np.expand_dims((done == False).astype(np.uint8), -1)
                # sll = np.multiply(wll.T, done_mask) + expanded_history

                wll = wll.T
                expanded_history = np.expand_dims(scores, -1)
                done_mask = np.expand_dims((done == False).astype(np.uint8), -1)
                done_mask_inv = (done_mask != 1).astype(np.uint8)
                eos_mask = np.zeros((1, V)).astype(np.uint8)
                mask = ((done_mask & eos_mask) != 1).astype(np.uint8)
                masked_wll = np.multiply(done_mask, wll)
                negged_wll = masked_wll + (done_mask_inv * -1e4)
                removed_eos = np.multiply(mask, negged_wll)
                sll = removed_eos + expanded_history
            else:
                sll = wll.T

            flat_sll = sll.reshape(-1)

            bests = topk(K, flat_sll)
            best_idx_flat = np.array(list(bests.keys()))
            best_beams = best_idx_flat // V
            best_idx = best_idx_flat % V

            new_paths = []
            new_done = []

            hidden = rnn_state.s()
            new_hidden = [[] for _ in range(num_states)]
            for j, best_flat in enumerate(best_idx_flat):
                beam_id = best_beams[j]
                best_word = best_idx[j]
                if done[j]:
                    new_paths.append(paths[beam_id] + [Offsets.EOS])
                else:
                    new_paths.append(paths[beam_id] + [best_word])
                if best_word == Offsets.EOS:
                    done[j] = True
                new_done.append(done[beam_id])
                scores[j] = bests[best_flat]
                # For each path, we need to pick that out and add it to the hiddens
                # This will be (c1, c2, ..., h1, h2, ...)
                for h_i, h in enumerate(hidden):
                    new_hidden[h_i].append(dy.pick_batch_elem(h, beam_id))

            done = np.array(new_done)
            new_hidden = [dy.concatenate_to_batch(new_h) for new_h in new_hidden]
            paths = new_paths
            rnn_state = self.decoder_rnn.initial_state(new_hidden)

        paths = np.stack([p[1:] for p in paths])
        return paths, scores