Esempio n. 1
0
 def generate(self, h_a, trg, maxlen=100):
     #decode(self, h_a, trg, decorate=False):
     h_a += ([dy.zeros(self.hdim)] * (self.max_len - len(h_a))
             )  #padding to make equal to maxlength
     h_ak = dy.concatenate(h_a, 1)
     #pdb.set_trace()
     pre_attend = dy.parameter(self.pre_attend)
     context = h_ak * pre_attend
     prev_out = dy.zeros((self.hdim))
     outputs = []
     s = self.decoder_rnn.initial_state()
     for i in range(maxlen):
         attender = dy.parameter(self.attender)
         #pdb.set_trace()
         V = dy.parameter(self.v)
         tmp = dy.tanh(dy.colwise_add(context, V * prev_out))
         U = dy.parameter(self.u)
         attention_weights = dy.softmax(dy.transpose(U * tmp))
         #pdb.set_trace()
         emb = dy.concatenate([h_ak * attention_weights, prev_out])
         s = s.add_input(emb)
         prev_out = s.output()
         pre2 = dy.parameter(self.pred)
         pre2 * prev_out
         outputs.append(pre2 * prev_out)
         act_value = pre2 * prev_out
         act_value = np.argmax(act_value.value())
         outputs.append(act_value)
         if act_value == 1:
             return outputs
     return outputs
Esempio n. 2
0
 def next_action(self, state, src_len, enc_len):
     if self.policy_learning is None:
         if state.has_been_read < src_len:
             return self.Action.READ
         else:
             return self.Action.WRITE
     else:
         # Sanity Check here:
         force_action = [
             self.Action.READ.value
         ] if enc_len == 0 else None  # No writing at the beginning.
         force_action = [
             self.Action.WRITE.value
         ] if enc_len == src_len else force_action  # No reading at the end.
         # Compose inputs from 3 states
         encoder_state = state.encoder_state.output()
         enc_dim = encoder_state.dim()
         context_state = state.context_state.as_vector(
         ) if state.context_state else dy.zeros(*enc_dim)
         output_embed = state.output_embed if state.output_embed else dy.zeros(
             *enc_dim)
         input_state = dy.concatenate(
             [encoder_state, context_state, output_embed])
         # Sample / Calculate a single action
         action = self.policy_learning.sample_action(
             input_state,
             predefined_actions=force_action,
             argmax=not self.train)[0]
         return self.Action(action)
Esempio n. 3
0
def extract_features(stack: List[int], buffer: List[int], bilstm_repr):
    """
  Return
    For 3 stack tokens & 1 buffer token:
    concatenated [stack_n-2, stack_n-1, stack_n, buffer_0]
    In case the stack has only 2 elements (or less), put zeros:
    concatenated [zero-vector, stack_n-1, stack_n, buffer_0]
  """
    # Get the positions of the tokens in the sentence.
    stack_token_positions = stack[-NO_STACK_FEATURES:]
    buffer_tokens_positions = buffer[:NO_BUFFER_FEATURES]
    # Get the bilstm representations.
    stack_bilstms = [bilstm_repr[i] for i in stack_token_positions]
    buffer_bilstms = [bilstm_repr[i] for i in buffer_tokens_positions]
    # Add zero-valued vectors if not enough features.
    no_missing_stack_features = NO_STACK_FEATURES - len(stack_bilstms)
    stack_bilstms = [dy.zeros(BILSTM_STATE_SIZE)
                     ] * no_missing_stack_features + stack_bilstms
    no_missing_buffer_features = NO_BUFFER_FEATURES - len(buffer)
    buffer_bilstms += [dy.zeros(BILSTM_STATE_SIZE)
                       ] * no_missing_buffer_features
    # Put stack & buffer features together in a list.
    features_list = stack_bilstms + buffer_bilstms
    # Concatenate the feature vectors.
    features = dy.concatenate(features_list)
    return features
Esempio n. 4
0
def _vaswani_model_init(e):
    w_embs = [w2e[idx] for idx in e["tk_words"]]
    if cfg["use_postags"]:
        pos_embs = [pos2e[idx] for idx in e["tk_postags"]]
        i_embs = [
            dy.concatenate([w_embs[i], pos_embs[i]])
            for i in xrange(len(e["tk_words"]))
        ]
    else:
        i_embs = w_embs

    f_init = fwd.initial_state()
    b_init = bwd.initial_state()
    lm_init = lm.initial_state()

    f_hs = dy.concatenate_cols(f_init.transduce(i_embs))
    b_hs = dy.concatenate_cols(b_init.transduce(reversed(i_embs))[::-1])
    out_c1 = dy.rectify(c1_Wf * f_hs + c1_Wb * b_hs)
    aux_c2 = c2_Wc * out_c1

    m = {
        "aux_c2": aux_c2,
        "beam_lm_states": [lm_init],
        "beam_lm_hs": dy.zeros((cfg["lm_h_dim"], 1)),
        "idx": 0
    }
    if cfg["accumulate_scores"]:
        m["acc_scores"] = dy.zeros((1, 1))

    return m
Esempio n. 5
0
    def _initialize_discourse_states(self):
        discourse_state = self.initial_discourse_state

        discourse_lstm_states = [lstm.initial_state([dy.zeros((lstm.spec[2],)),
                                                     dy.zeros((lstm.spec[2],))])
                                 for lstm in self.discourse_lstms]
        return discourse_state, discourse_lstm_states
Esempio n. 6
0
        def helper():
            label_scores = []
            for x in lstm_outputs[1:-1]:
                label_score = self.f_label(x)
                label_scores.append(label_score)

            if use_crf:
                tags, viterbi_scores = viterbi_decoding(label_scores, gold)
                if is_train and tags != gold:
                    gold_scores = forced_decoding(label_scores, gold)
                    total_loss = viterbi_scores - gold_scores
                else:
                    total_loss = dy.zeros(1)
            else:
                total_loss = dy.zeros(1)
                tags = []
                if is_train:
                    losses = []
                    for label_score, tag in zip(label_scores, gold):
                        tag_index = self.tag_vocab.index(tag)
                        loss = dy.pickneglogsoftmax(label_score, tag_index)
                        losses.append(loss)
                    total_loss = dy.esum(losses)
                else:
                    label_scores = [dy.softmax(ls) for ls in label_scores]
                    probs = [ls.npvalue() for ls in label_scores]
                    for prob in probs:
                        tag_index = np.argmax(prob)
                        tag = self.tag_vocab.value(tag_index)
                        tags.append(tag)

            return tags, total_loss
Esempio n. 7
0
 def initial_state(self):
     _init_h = dy.zeros((self.hidden_dim, ))
     _init_m = dy.zeros((self.hidden_dim, ))
     self.Wi = self.W_i.expr()
     self.Wf = self.W_f.expr()
     self.Wo = self.W_o.expr()
     self.Wu = self.W_u.expr()
     _init_s = LSTMState(self, -1, hidden=_init_h, memory=_init_m)
     return _init_s
Esempio n. 8
0
    def transduce(self, input, hx=None, cx=None):
        hx = hx if hx is not None else dy.zeros((self.n_hidden))
        cx = cx if cx is not None else dy.zeros((self.n_hidden))
        output = []
        cells = []
        for x in input:
            hx, cx = self.step(x, hx, cx)
            output.append(hx)
            cells.append(cx)

        return output, cells
 def gru_step(self, word, h_prev):
     if h_prev is None:
         h_prev = dy.zeros(HIDDEN_SIZE)
     if word not in self.vocabs:
         x = dy.zeros(EMBEDDING_DIMENSION)
     else:
         x = dy.lookup(self.embedding, self.vocabs[word])
     r = dy.logistic(self.w_xr * x + self.w_hr * h_prev + self.b_r)
     z = dy.logistic(self.w_xz * x + self.w_hz * h_prev + self.b_z)
     c_h = dy.tanh(self.w_xh * x + self.w_hh * dy.cmult(r, h_prev) +
                   self.b_h)
     return dy.cmult(1 - z, h_prev) + dy.cmult(z, c_h)
Esempio n. 10
0
    def evaluate(self, input_sentences, labels):

        dy.renew_cg()

        self.word_rnn.disable_dropout()
        self.sent_rnn.disable_dropout()

        embed_sents = []

        for input_sentence in input_sentences:
            input_sentence = self._preprocess_input(input_sentence,
                                                    self.word_to_ix)
            #input_sentence = [self.word_to_ix['<start>']] + input_sentence + [self.word_to_ix['<end>']]

            embed_words = self._embed_sentence(input_sentence)
            word_rnn_outputs = self._run_rnn(self.word_rnn, embed_words)
            sent_embed = dy.average(word_rnn_outputs)
            embed_sents.append(sent_embed)

        rnn_outputs = self._run_rnn(self.sent_rnn, embed_sents)

        doc_output_w = dy.parameter(self.doc_output_w)
        doc_output_b = dy.parameter(self.doc_output_b)
        doc_output = dy.tanh(doc_output_w * dy.average(rnn_outputs) +
                             doc_output_b)

        probs = []
        sum_output = dy.zeros(self.args.sent_hidden_dim)
        pred_labels = []
        correct = 0
        total = 0
        loss = dy.zeros(1)
        for i, rnn_output in enumerate(rnn_outputs):

            abspos_embed = dy.lookup(self.abspos_embeddings, self.abspos_ix[i])
            relpos_embed = dy.lookup(self.relpos_embeddings, self.relpos_ix[i])

            prob = self._get_probs(rnn_output, doc_output, sum_output,
                                   abspos_embed, relpos_embed)
            sum_output += dy.cmult(prob, rnn_output)
            pred_label = self._predict(prob)
            pred_labels.append(pred_label)

            if pred_label == labels[i]:
                correct += 1
            total += 1

            if labels[i] == 1:
                loss -= dy.log(prob)
            else:
                loss -= dy.log(dy.scalarInput(1) - prob)

        return loss.value(), pred_labels, correct, total
Esempio n. 11
0
 def embed(self, x):
   if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
     batch_size = x.batch_size() if xnmt.batcher.is_batched(x) else 1
     self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)]
   # single mode
   if not xnmt.batcher.is_batched(x):
     if self.train and self.word_id_mask and x in self.word_id_mask[0]:
       ret = dy.zeros((self.emb_dim,))
     else:
       ret = self.embeddings[x]
       if self.fix_norm is not None:
         ret = dy.cdiv(ret, dy.l2_norm(ret))
         if self.fix_norm != 1:
           ret *= self.fix_norm
   # minibatch mode
   else:
     ret = self.embeddings.batch(x)
     if self.fix_norm is not None:
       ret = dy.cdiv(ret, dy.l2_norm(ret))
       if self.fix_norm != 1:
         ret *= self.fix_norm
     if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(x.batch_size())):
       dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(x.batch_size())]), batched=True)
       ret = dy.cmult(ret, dropout_mask)
   if self.train and self.weight_noise > 0.0:
     ret = dy.noise(ret, self.weight_noise)
   return ret
Esempio n. 12
0
 def embed(self, x: Union[batchers.Batch, numbers.Integral]) -> dy.Expression:
   if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
     batch_size = x.batch_size() if batchers.is_batched(x) else 1
     self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)]
   emb_e = dy.parameter(self.embeddings)
   # single mode
   if not batchers.is_batched(x):
     if self.train and self.word_id_mask and x in self.word_id_mask[0]:
       ret = dy.zeros((self.emb_dim,))
     else:
       ret = dy.pick(emb_e, index=x)
       if self.fix_norm is not None:
         ret = dy.cdiv(ret, dy.l2_norm(ret))
         if self.fix_norm != 1:
           ret *= self.fix_norm
   # minibatch mode
   else:
     ret = dy.pick_batch(emb_e, x)
     if self.fix_norm is not None:
       ret = dy.cdiv(ret, dy.l2_norm(ret))
       if self.fix_norm != 1:
         ret *= self.fix_norm
     if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(x.batch_size())):
       dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(x.batch_size())]), batched=True)
       ret = dy.cmult(ret, dropout_mask)
   if self.train and self.weight_noise > 0.0:
     ret = dy.noise(ret, self.weight_noise)
   return ret
Esempio n. 13
0
    def _next_action(self, state, src_len, force_action=None) -> PolicyAction:
        # Sanity Check here:
        if force_action is None:
            force_action = self.Action.READ.value if state.has_been_read == 0 else force_action  # No writing at the beginning.
            force_action = self.Action.WRITE.value if state.has_been_read == src_len else force_action  # No reading at the end.

        if self.read_before_write:
            force_action = self.Action.READ.value if state.has_been_read < src_len else self.Action.WRITE.value

        # Compose inputs from 3 states
        if self.policy_network is not None:
            enc_dim = self.src_encoding[0].dim()
            encoder_state = state.encoder_state if state.encoder_state is not None else dy.zeros(
                *enc_dim)
            decoder_state = state.decoder_state.as_vector(
            ) if state.decoder_state is not None else dy.zeros(*enc_dim)
            policy_input = dy.nobackprop(
                dy.concatenate([encoder_state, decoder_state]))
            predefined_action = [force_action
                                 ] if force_action is not None else None
            # Sample / Calculate a single action
            policy_action = self.policy_network.sample_action(
                policy_input,
                predefined_actions=predefined_action,
                argmax=not (self.train and self.policy_sample))
            policy_action.single_action()
        else:
            policy_action = PolicyAction(force_action)

        # TODO(philip30): Update this value when you add more actions
        if policy_action.content > 2:
            import random
            policy_action.content = random.randint(0, 1)

        return policy_action
Esempio n. 14
0
	def beam_decode(self, encodings, input_len=10, beam_size=1):
		batch_size = 1
		self.__dec.init_params(encodings, batch_size, self.__train_flag)
		context = dy.zeros((self.__enc.output_dim, ))
		beams = [Beam(self.__dec.dec_state, context, [self.__trg_sos], 0.0)]

		for i in xrange(int(min(self.__max_len, input_len * 1.5))):
			new_beams = []
			p_list = []
			for b in beams:
				if b.words[-1] == self.__trg_eos:
					p_list.append(dy.ones((self.__trg_vsize, )))
					continue
				hidden, embs, b.state = self.__dec.next([b.words[-1]], b.context, self.__train_flag, b.state)
				b.context, _ = self.attend(encodings, hidden)
				score = self.__dec.score(hidden, b.context, embs, self.__train_flag)
				p_list.append(dy.softmax(score))
			p_list = dy.concatenate_to_batch(p_list).npvalue().T.reshape(-1, self.__trg_vsize)
			for p, b in zip(p_list, beams):
				p = p.flatten() / p.sum()
				kbest = np.argsort(p)
				if b.words[-1] == self.__trg_eos:
					new_beams.append(Beam(b.state, b.context, b.words, b.log_prob))
				else:
					for next_word in kbest[-beam_size:]:
						new_beams.append(Beam(b.state, b.context, b.words + [next_word], b.log_prob + np.log(p[next_word])))
			beams = sorted(new_beams, key=lambda b: b.log_prob)[-beam_size:]
			if beams[-1].words[-1] == self.__trg_eos:
				break
		return beams[-1].words
Esempio n. 15
0
 def sentence_block_embed(self, embed, x, mask):
   batch, length = x.shape
   x_mask = mask.reshape((batch * length,))
   _, units = embed.shape()  # According to updated Dynet
   e = dy.concatenate_cols([dy.zeros(units) if x_mask[j] == 1 else dy.lookup(embed, id_) for j, id_ in enumerate(x.reshape((batch * length,)))])
   e = dy.reshape(e, (units, length), batch_size=batch)
   return e
def adapt(s2s, trainer, X, Y, n_epochs, check_train_error_every):
    timer = utils.Timer()
    log = utils.Logger(True)
    n_train = len(X)
    n_tokens = (sum(map(len, Y)) - len(Y))
    s2s.set_train_mode()
    s2s.reset_usr_vec()
    # Train for n_iter
    for epoch in range(n_epochs):
        dy.renew_cg()
        loss = dy.zeros((1, ))
        timer.restart()
        # Add losses for all samples
        for x, y in zip(X, Y):
            loss += s2s.calculate_user_loss([x], [y])
        # Backward + update
        loss.backward()
        trainer.update()
        # Record metrics
        if n_train > 0 and epoch % check_train_error_every == 0:
            train_loss = loss.value() / n_tokens
            train_ppl = np.exp(train_loss)
            trainer.status()
            elapsed = timer.tick()
            log.info(" Training_loss=%f, ppl=%f, time=%f s, tok/s=%.1f" %
                     (train_loss, train_ppl, elapsed, n_tokens / elapsed))
Esempio n. 17
0
    def expr_for_tree(self,xt,tree,node,is_train):
        if is_train:
            # in the training phase, perform dropout
            W_dropout = dy.dropout(self.WP, self.dropout_rate)
            WR_dropout = dy.dropout(self.WR, self.dropout_rate)
            WC_dropout = dy.dropout(self.WC, self.dropout_rate)
        else:
            W_dropout = self.WP
            WR_dropout = self.WR
            WC_dropout = self.WC
            
            
        if node is None or node.is_leaf():
            Wx = W_dropout * xt
#             h = dy.tanh(Wx + self.bc)
            h = dy.tanh(dy.affine_transform([self.bc, self.WC, xt]))
            return h
        
        #get child nodes        
        children=tree.children(node.identifier)
        children_sum=dy.zeros((self.n_out))
        for i in range(len(children)):
            hc=self.expr_for_tree(xt=xt,tree=tree,node=children[i],is_train=is_train)
            rt = dy.logistic(self.WR * xt +self.UR*hc+self.br)
            children_sum=children_sum+dy.cmult(rt, hc)
        
        Wx = W_dropout * xt
        h = dy.tanh(Wx + self.bp+self.UP*children_sum)
        return h     
Esempio n. 18
0
    def _policy_shape_probs(self,
                            prob_dist):
        # TODO: this is specific to Alchemy
        num_actions = len(self.output_action_vocabulary) - 1
        num_locations = len(self.output_location_vocabulary) - 1
        num_arguments = len(self.output_argument_vocabulary) - 1
        new_probdist = dy.zeros(prob_dist.dim()[0])
        zeroes = numpy.zeros(num_locations * num_arguments)
        ones = numpy.ones(num_locations * num_arguments)
        eos_prob = prob_dist[self._all_output_vocabulary.lookup_index((EOS, NO_ARG, NO_ARG))]
        action_idx = 0
        for action in self.output_action_vocabulary:
            masks = numpy.concatenate(
                        (numpy.repeat(zeroes, action_idx),
                         ones,
                         numpy.repeat(zeroes, num_actions - action_idx - 1)))
            actions_masks = dy.reshape(dy.inputTensor(masks),
                                       (num_actions * num_locations * num_arguments, 1))
            if action == EOS:
                new_probdist += dy.cmult(actions_masks, prob_dist) / 2.
            elif action == "push":
                new_probdist += dy.cmult(actions_masks, prob_dist) + eos_prob / (2. * 56.)
            elif action == "pop":
                new_probdist += dy.cmult(actions_masks, prob_dist)

        if self.args.syntax_restricted:
            return dy.exp(dy.log_softmax(dy.cmult(new_probdist, prob_dist),
                                         restrict = self._valid_action_indices))
        else:
            return dy.softmax(dy.cmult(new_probdist, prob_dist))
Esempio n. 19
0
def hinge_loss(exprs, target, margin=1.0):
    scores = exprs.value()
    best_wrong = max([(i, sc) for i, sc in enumerate(scores) if i != target], key=lambda x: x[1])[0]
    if scores[target] < scores[best_wrong] + margin:
        return exprs[best_wrong] - exprs[target] + margin
    else:
        return dy.zeros(1)
Esempio n. 20
0
 def generate(self, context, trg, decorate=False, maxpossible=100):
     #greedy generation!
     prev_out=dy.zeros((self.hdim))
     outputs=[]
     for i in range(maxpossible):
         emb=dy.concatenate([context, prev_out])
         Ui,Uo,Uu = [dy.parameter(u) for u in self.US]
         Uf1= dy.parameter(self.UFS[0])
         bi,bo,bu,bf = [dy.parameter(b) for b in self.BS]
         #import pdb;pdb.set_trace()
         i = dy.logistic(bi+Ui*emb)
         o = dy.logistic(bi+Uo*emb)
         f = dy.logistic(bf+Uf1*emb)
         #print("hey")
         u = dy.tanh(bu+Uu*emb)
         c = dy.cmult(i,u) + dy.cmult(f,prev_out)
         h = dy.cmult(o,dy.tanh(c))
         if decorate: tree._e = h
         prev_out=c
         #pre1=dy.parameter(self.pre_l)
         pre2=dy.parameter(self.pred)
         out=dy.log_softmax(pre2*h)
         out=np.argmax(out)
         outputs.append(out)
         if out==1:
             print(outputs)
             print("-----")
             print(trg)
             return outputs
     print(outputs)
     print("---")
     print(trg)
     return outputs
Esempio n. 21
0
    def __call__(self, inputs, is_train=True):
        """

        :param inputs: input word embeddings
        :param is_train: train flag, used for dropout
        :return:
        """
        seq_len = len(inputs)
        h = dy.zeros((self.n_out,))
        c = dy.zeros((self.n_out,))
        H = []
        for t in range(seq_len):
            xt = inputs[t]
            h, c = self.recurrence(xt, h, c, train_flag=is_train)
            H.append(h)
        return H
Esempio n. 22
0
    def span_parser(self, sentence, is_train, elmo_embeddings, cur_word_index, gold=None):
        if gold is not None:
            assert isinstance(gold, ParseNode)

        lstm_outputs = self._featurize_sentence(sentence, is_train=is_train,
                                                elmo_embeddings=elmo_embeddings,
                                                cur_word_index=cur_word_index)
        encodings = []
        span_to_index = {}
        for start in range(0, len(sentence)):
            for end in range(start + 1, len(sentence) + 1):
                span_to_index[(start, end)] = len(encodings)
                encodings.append(self._get_span_encoding(start, end, lstm_outputs))
        label_log_probabilities = self._encodings_to_label_log_probabilities(encodings)

        total_loss = dy.zeros(1)
        if is_train:
            for start in range(0, len(sentence)):
                for end in range(start + 1, len(sentence) + 1):
                    gold_label = gold.oracle_label(start, end)
                    gold_label_index = self.label_vocab.index(gold_label)
                    index = span_to_index[(start, end)]
                    total_loss -= label_log_probabilities[gold_label_index][index]
            return None, total_loss
        else:
            label_log_probabilities_np = label_log_probabilities.npvalue()
            tree, additional_info = optimal_parser(label_log_probabilities_np,
                                                   span_to_index,
                                                   sentence,
                                                   self.empty_label_index,
                                                   self.label_vocab,
                                                   gold)
            return tree, additional_info, dy.exp(label_log_probabilities).npvalue()
Esempio n. 23
0
    def getWordEmbeddings(self, sentence, train, options, test_embeddings=defaultdict(lambda:{})):

        if self.elmo:
            # Get full text of sentence - excluding root, which is loaded differently 
            # for transition and graph-based parsers. 
            if options.graph_based:
                sentence_text = " ".join([entry.form for entry in sentence[1:]])
            else:
                sentence_text = " ".join([entry.form for entry in sentence[:-1]])

            elmo_sentence_representation = \
                self.elmo.get_sentence_representation(sentence_text)

        for i, root in enumerate(sentence):
            root.vecs = defaultdict(lambda: None) # all vecs are None by default (possibly a little risky?)
            if options.word_emb_size > 0:
                if train:
                    word_count = float(self.word_counts.get(root.norm, 0))
                    dropFlag = random.random() > word_count/(0.25+word_count)
                    root.vecs["word"] = self.word_lookup[self.words.get(root.norm, 0) if not dropFlag else 0]
                else: # need to check in test_embeddings at prediction time
                    if root.norm in self.words:
                        root.vecs["word"] = self.word_lookup[self.words[root.norm]]
                    elif root.norm in test_embeddings["words"]:
                        root.vecs["word"] = dy.inputVector(test_embeddings["words"][root.norm])
                    else:
                        root.vecs["word"] = self.word_lookup[0]
            if options.pos_emb_size > 0:
                root.vecs["pos"] = self.pos_lookup[self.pos.get(root.cpos,0)]
            if options.char_emb_size > 0:
                root.vecs["char"] = self.get_char_vector(root,train,test_embeddings["chars"])
            if options.tbank_emb_size > 0:
                if options.forced_tbank_emb:
                    treebank_id = options.forced_tbank_emb
                elif root.proxy_tbank:
                    treebank_id = root.proxy_tbank
                else:
                    treebank_id = root.treebank_id
                # this is a bit of a hack for models trained on an old version of the code
                # that used treebank name rather than id as the lookup
                if not treebank_id in self.treebanks and treebank_id in utils.reverse_iso_dict and \
                    utils.reverse_iso_dict[treebank_id] in self.treebanks:
                    treebank_id = utils.reverse_iso_dict[treebank_id]
                root.vecs["treebank"] = self.treebank_lookup[self.treebanks[treebank_id]]
            if self.elmo:
                if i < len(sentence) - 1:
                    # Don't look up the 'root' word
                    root.vecs["elmo"] = elmo_sentence_representation[i]
                else:
                    # TODO
                    root.vecs["elmo"] = dy.zeros(self.elmo.emb_dim)

            root.vec = dy.concatenate(list(filter(None, [root.vecs["word"],
                                                    root.vecs["elmo"],
                                                    root.vecs["pos"],
                                                    root.vecs["char"],
                                                         root.vecs["treebank"]])))

        for bilstm in self.bilstms:
            bilstm.set_token_vecs(sentence,train)
Esempio n. 24
0
    def predict_sequence_batched(self,
                                 inputs,
                                 mask_array,
                                 wlen,
                                 predictFlag=False):

        batch_size = inputs[0].dim()[1]
        src_len = len(inputs)

        if not predictFlag:
            self.charlstm.set_dropouts(self.dropout, self.dropout)
            self.charlstm.set_dropout_masks(batch_size)

        char_fwd = self.charlstm.initial_state(batch_size)
        recur_states, cells = char_fwd.add_inputs(inputs, mask_array,
                                                  predictFlag)

        hidden_states = []
        for idx in range(src_len):
            mask = dy.inputVector(mask_array[idx])
            mask_expr = dy.reshape(mask, (1, ), batch_size)
            hidden_states.append(recur_states[idx] * mask_expr)

        H = dy.concatenate_cols(hidden_states)

        if (predictFlag):
            a = dy.softmax(dy.transpose(self.W_atten.expr()) * H)
        else:
            #dropout attention connections(keep the same dim across the sequence)
            a = dy.softmax(
                dy.transpose(self.W_atten.expr()) *
                dy.dropout_dim(H, 1, self.dropout))

        cell_states = []
        for idx in range(batch_size):
            if (wlen[idx] > 0):
                cell = dy.pick_batch_elem(cells[wlen[idx] - 1], idx)
            else:
                cell = dy.zeros(self.ldims)

            cell_states.append(cell)

        C = dy.concatenate_to_batch(cell_states)

        H_atten = H * dy.transpose(a)
        char_emb = dy.concatenate([H_atten, C])

        if predictFlag:
            proj_char_emb = dy.affine_transform(
                [self.b_linear.expr(),
                 self.W_linear.expr(), char_emb])
        else:
            proj_char_emb = dy.affine_transform([
                self.b_linear.expr(),
                self.W_linear.expr(),
                dy.dropout(char_emb, self.dropout)
            ])

        return proj_char_emb
Esempio n. 25
0
    def __call__(self, input_expr):
        W1 = dy.parameter(self.W1)
        if self.bias:
            b1 = dy.parameter(self.b1)
        else:
            b1 = dy.zeros(self.output_dim)

        return dy.affine_transform([b1, W1, input_expr])
Esempio n. 26
0
    def push(self, input, idx):
        '''
        :param input:
        :param idx: word idx in buffer or action_id in vocab
        :return:
        '''
        if len(self.states) == 0:
            init_h, init_c = dy.zeros((self.hidden_size)), dy.zeros(
                (self.hidden_size))
            hx, cx = self.cell.step(input, init_h, init_c)
        else:
            pre_hx, pre_cx = self.states[-1]
            hx, cx = self.cell.step(input, pre_hx, pre_cx)

        self.states.append((hx, cx))

        #self.states.append((self.linear(input), None))
        self.indices.append(idx)
Esempio n. 27
0
  def transduce(self, es: expression_seqs.ExpressionSequence) -> expression_seqs.ExpressionSequence:
    mask = es.mask
    sent_len = len(es)
    es_expr = es.as_transposed_tensor()
    batch_size = es_expr.dim()[1]

    es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size)

    h_out = {}
    for direction in ["fwd", "bwd"]:
      # input convolutions
      gates_xt_bias = dy.conv2d_bias(es_chn, dy.parameter(self.params["x2all_" + direction]),
                                     dy.parameter(self.params["b_" + direction]), stride=(1, 1), is_valid=False)
      gates_xt_bias_list = [dy.pick_range(gates_xt_bias, i, i + 1) for i in range(sent_len)]

      h = []
      c = []
      for input_pos in range(sent_len):
        directional_pos = input_pos if direction == "fwd" else sent_len - input_pos - 1
        gates_t = gates_xt_bias_list[directional_pos]
        if input_pos > 0:
          # recurrent convolutions
          gates_h_t = dy.conv2d(h[-1], dy.parameter(self.params["h2all_" + direction]), stride=(1, 1), is_valid=False)
          gates_t += gates_h_t

        # standard LSTM logic
        if len(c) == 0:
          c_tm1 = dy.zeros((self.freq_dim * self.num_filters,), batch_size=batch_size)
        else:
          c_tm1 = c[-1]
        gates_t_reshaped = dy.reshape(gates_t, (4 * self.freq_dim * self.num_filters,), batch_size=batch_size)
        c_t = dy.reshape(dy.vanilla_lstm_c(c_tm1, gates_t_reshaped), (self.freq_dim * self.num_filters,),
                         batch_size=batch_size)
        h_t = dy.vanilla_lstm_h(c_t, gates_t_reshaped)
        h_t = dy.reshape(h_t, (1, self.freq_dim, self.num_filters,), batch_size=batch_size)

        if mask is None or np.isclose(np.sum(mask.np_arr[:, input_pos:input_pos + 1]), 0.0):
          c.append(c_t)
          h.append(h_t)
        else:
          c.append(
            mask.cmult_by_timestep_expr(c_t, input_pos, True) + mask.cmult_by_timestep_expr(c[-1], input_pos, False))
          h.append(
            mask.cmult_by_timestep_expr(h_t, input_pos, True) + mask.cmult_by_timestep_expr(h[-1], input_pos, False))

      h_out[direction] = h
    ret_expr = []
    for state_i in range(len(h_out["fwd"])):
      state_fwd = h_out["fwd"][state_i]
      state_bwd = h_out["bwd"][-1 - state_i]
      output_dim = (state_fwd.dim()[0][1] * state_fwd.dim()[0][2],)
      fwd_reshape = dy.reshape(state_fwd, output_dim, batch_size=batch_size)
      bwd_reshape = dy.reshape(state_bwd, output_dim, batch_size=batch_size)
      ret_expr.append(dy.concatenate([fwd_reshape, bwd_reshape], d=0 if self.reshape_output else 2))
    return expression_seqs.ExpressionSequence(expr_list=ret_expr, mask=mask)

  # TODO: implement get_final_states()
Esempio n. 28
0
    def train_on_partial_annotation(self, sentence, annotations, elmo_vecs, cur_word_index):
        if len(annotations) == 0:
            return dy.zeros(1)
        lstm_outputs = self._featurize_sentence(sentence, is_train=True, elmo_embeddings=elmo_vecs,
                                                cur_word_index=cur_word_index)

        encodings = []
        for annotation in annotations:
            assert 0 <= annotation.left < annotation.right <= len(sentence), \
                (0, annotation.left, annotation.right, len(sentence))
            encoding = self._get_span_encoding(annotation.left, annotation.right, lstm_outputs)
            encodings.append(encoding)

        label_log_probabilities = self._encodings_to_label_log_probabilities(encodings)
        total_loss = dy.zeros(1)
        for index, annotation in reversed(list(enumerate(annotations))):
            loss = - label_log_probabilities[annotation.oracle_label_index][index]
            total_loss = total_loss + loss
        return total_loss
Esempio n. 29
0
 def __call__(self, encoder_output, hsz, beam_width=1):
     h_i = self.get_state(encoder_output)
     context = encoder_output.output
     if beam_width > 1:
         # To vectorize, we need to expand along the batch dimension, K times
         context = [dy.concatenate_to_batch([c] * beam_width) for c in context]
         h_i = [dy.concatenate_to_batch([h] * beam_width) for h in h_i]
     _, batchsz = context[0].dim()
     init_zeros = dy.zeros((hsz,), batch_size=batchsz)
     return h_i, init_zeros, context
Esempio n. 30
0
 def __call__(self, inputs, is_train=True):
     """
     forward pass
     :param inputs: input word embeddings
     :return:
     """
     seq_len = len(inputs)
     # hm0
     hm = dy.zeros((self.n_steps+1, self.n_out))
     # h_tilde_0
     h_history = dy.zeros((self.n_out,))
     # list of hidden states
     H = []
     for i in range(seq_len):
         xt = inputs[i]
         hm, h_history = self.recurrence(xt, hm, h_history, dropout_flag=is_train)
         ht = hm[-1]
         H.append(ht)
     return H
Esempio n. 31
0
 def __call__(self, inputs, is_train=True):
     """
     input word embeddings
     :param inputs:
     :return: a list of hidden states for aspect predictions
     """
     seq_len = len(inputs)
     # hm0 and cm0
     hm = dy.zeros((self.n_steps, self.n_out))
     cm = dy.zeros((self.n_steps, self.n_out))
     h_tilde = dy.zeros((self.n_out,))
     # list of hidden states
     H = []
     for i in range(seq_len):
         xt = inputs[i]
         hm, cm, h_tilde = self.recurrence(xt, hm, cm, h_tilde, dropout_flag=is_train)
         ht = hm[-1]
         H.append(ht)
     return H
Esempio n. 32
0
 def get_state(self, encoder_outputs):
     final_state = encoder_outputs.hidden
     shape, batchsz = final_state[0].dim()
     return [dy.zeros(shape, batch_size=batchsz) for _ in len(final_state)]