Example #1
0
    def __call__(self, H,is_train=True):
        """

        :param xs: a list of ngrams (or words if win is set to 1)
        :return: embeddings looked from tables
        """
        
        seq_len = len(H)
        if is_train:
            # in the training phase, perform dropout
            W1 = dy.dropout(self.W1, self.dropout_rate)
            W2 = dy.dropout(self.W2, self.dropout_rate)
        else:
            W1= self.W1
            W2 = self.W2  
        
        pool= dy.average(H)
           
        aspect_attentions = []
        Weights=[]
        for t in range(seq_len):
            ht = H[t]
            scores = dy.tanh(dy.transpose(ht)*W1*pool+self.bd)
#             print(scores.value())
            Weights.append(scores.value() )
            ht_hat=dy.cmult(dy.softmax(scores),ht)
#             print(ht_hat.value())
            aspect_attentions.append(ht_hat)
        
        Weights_np=[]
        return aspect_attentions,Weights_np      
 def enable_dropout(self):
     self.fwdRNN.set_dropout(0.3)
     self.bwdRNN.set_dropout(0.3)
     self.cfwdRNN.set_dropout(0.3)
     self.cbwdRNN.set_dropout(0.3)
     self.w1 = dy.dropout(self.w1, 0.3)
     self.b1 = dy.dropout(self.b1, 0.3)
Example #3
0
 def init_sequence(self, test=False):
     self.test = test
     if not test:
         self.dropout_mask_x = dy.dropout(dy.ones((self.n_in, )),
                                          self.dropout_x)
         self.dropout_mask_h = dy.dropout(dy.ones((self.n_hidden, )),
                                          self.dropout_h)
Example #4
0
    def expr_for_tree(self,xt,tree,node,is_train):
        if is_train:
            # in the training phase, perform dropout
            W_dropout = dy.dropout(self.WP, self.dropout_rate)
            WR_dropout = dy.dropout(self.WR, self.dropout_rate)
            WC_dropout = dy.dropout(self.WC, self.dropout_rate)
        else:
            W_dropout = self.WP
            WR_dropout = self.WR
            WC_dropout = self.WC
            
            
        if node is None or node.is_leaf():
            Wx = W_dropout * xt
#             h = dy.tanh(Wx + self.bc)
            h = dy.tanh(dy.affine_transform([self.bc, self.WC, xt]))
            return h
        
        #get child nodes        
        children=tree.children(node.identifier)
        children_sum=dy.zeros((self.n_out))
        for i in range(len(children)):
            hc=self.expr_for_tree(xt=xt,tree=tree,node=children[i],is_train=is_train)
            rt = dy.logistic(self.WR * xt +self.UR*hc+self.br)
            children_sum=children_sum+dy.cmult(rt, hc)
        
        Wx = W_dropout * xt
        h = dy.tanh(Wx + self.bp+self.UP*children_sum)
        return h     
Example #5
0
    def build_tagging_graph(self, sentence):
        dy.renew_cg()

        embeddings = [self.word_rep(w) for w in sentence]

        lstm_out = self.bi_lstm.transduce(embeddings)

        H = dy.parameter(self.lstm_to_tags_params)
        Hb = dy.parameter(self.lstm_to_tags_bias)
        O = dy.parameter(self.mlp_out)
        Ob = dy.parameter(self.mlp_out_bias)
        scores = []
        if options.bigram:
            for rep, word in zip(lstm_out, sentence):
                bi1 = dy.lookup(self.bigram_lookup,
                                word[0],
                                update=self.we_update)
                bi2 = dy.lookup(self.bigram_lookup,
                                word[1],
                                update=self.we_update)
                if self.dropout is not None:
                    bi1 = dy.dropout(bi1, self.dropout)
                    bi2 = dy.dropout(bi2, self.dropout)
                score_t = O * dy.tanh(H * dy.concatenate([bi1, rep, bi2]) +
                                      Hb) + Ob
                scores.append(score_t)
        else:
            for rep in lstm_out:
                score_t = O * dy.tanh(H * rep + Hb) + Ob
                scores.append(score_t)

        return scores
    def build_tagging_graph(self, batch):
        self.initialize_paramerets()
        # get the word vectors.
        batch_embs = self.word_rep(batch)

        # feed word vectors into biLSTM
        fw_exps = self.f_init.transduce(batch_embs)
        bw_exps = self.b_init.transduce(reversed(batch_embs))

        # biLSTM states
        bi_exps = [
            dy.concatenate([f, b]) for f, b in zip(fw_exps, reversed(bw_exps))
        ]

        # 2nd biLSTM
        fw_exps = self.f2_init.transduce(bi_exps)
        bw_exps = self.b2_init.transduce(reversed(bi_exps))

        # biLSTM states
        bi_exps = dy.concatenate([
            dy.concatenate([f, b]) for f, b in zip(fw_exps, reversed(bw_exps))
        ],
                                 d=1)
        aT = self.meta.activation(self.aw * bi_exps + self.ab)
        alpha = self.av * aT
        attn = dy.softmax(alpha, 1)
        weighted_sum = dy.reshape(bi_exps * dy.transpose(attn),
                                  (self.meta.lstm_word_dim * 2, ))
        if not self.eval:
            weighted_sum = dy.dropout(weighted_sum, 0.3)
        xh = self.meta.activation(self.w1 * weighted_sum + self.b1)
        if not self.eval:
            xh = dy.dropout(xh, 0.3)
        xo = self.w2 * xh + self.b2
        return xo
Example #7
0
    def apply(self, sent1, sent2):
        eL = dy.parameter(self.linear)
        sent1 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent1)) * eL
        sent2 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent2)) * eL

        out1, out2 = self.feed_F(sent1, sent2)
        e_out = out1 * dy.transpose(out2)
        prob_f_1 = dy.softmax(e_out)
        score = dy.transpose(e_out)
        prob_f_2 = dy.softmax(score)

        sent1_allign = dy.concatenate_cols([sent1, prob_f_1 * sent2])
        sent2_allign = dy.concatenate_cols([sent2, prob_f_2 * sent1])

        out_g_1, out_g_2 = self.feed_G(sent1_allign, sent2_allign)

        sent1_out_g = dy.sum_dim(out_g_1, [0])
        sent2_out_g = dy.sum_dim(out_g_2, [0])

        concat = dy.transpose(dy.concatenate([sent1_out_g, sent2_out_g]))

        h_step_1 = dy.parameter(self.h_step_1)
        sent_h = dy.rectify(dy.dropout(concat, 0.2) * h_step_1)
        h_step_2 = dy.parameter(self.h_step_2)
        sent_h = dy.rectify(dy.dropout(sent_h, 0.2) * h_step_2)

        final = dy.parameter(self.linear2)
        final = dy.transpose(sent_h * final)
        return final
Example #8
0
    def recurrence(self, xt, hmtm1, h_history_tm1, dropout_flag):
        """

        :param xt: input vector at the time step t
        :param hmtm1: hidden memories in previous n_steps steps
        :param h_tilde_tm1: previous hidden summary
        :param dropout_flag: make a decision for conducting partial dropout
        :return:
        """
        score = dy.concatenate([dy.dot_product(self.u, dy.tanh( \
            self.W_h * hmtm1[i] + self.W_x * xt + self.W_htilde * h_history_tm1)) for i in range(self.n_steps)])
        # normalize the attention score
        score = dy.softmax(score)
        # shape: (1, n_out), history of [h[t-n_steps-1], ..., h[t-2]]
        h_history_t = dy.reshape(dy.transpose(score) * hmtm1[:-1], d=(self.n_out,))
        htm1 = hmtm1[-1]
        #h_tilde_t = dy.concatenate([h_history_t, htm1])
        h_tilde_t = htm1 + dy.rectify(h_history_t)
        if dropout_flag:
            # perform partial dropout, i.e., add dropout over the matrices W_x*
            rt = dy.logistic(dy.dropout(self.W_xr, self.dropout_rate) * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(dy.dropout(self.W_xz, self.dropout_rate) * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(dy.dropout(self.W_xh, self.dropout_rate) * xt + self.W_hh * dy.cmult(rt, h_tilde_t) \
                             + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        else:
            rt = dy.logistic(self.W_xr * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(self.W_xz * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(self.W_xh * xt + self.W_hh * dy.cmult(rt, h_tilde_t) + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        hmt = dy.concatenate([hmtm1[1:], dy.reshape(ht, (1, self.n_out))])
        return hmt, h_history_t
Example #9
0
 def word_assoc_score(self, source_idx, target_idx, relation):
     """
     NOTE THAT DROPOUT IS BEING APPLIED HERE
     :param source_idx: embedding index of source atom
     :param target_idx: embedding index of target atom
     :param relation: relation type
     :return: score
     """
     # prepare
     s = self.embeddings[source_idx]
     if self.no_assoc:
         A = dy.const_parameter(self.word_assoc_weights[relation])
     else:
         A = dy.parameter(self.word_assoc_weights[relation])
     dy.dropout(A, self.dropout)
     t = self.embeddings[target_idx]
     
     # compute
     if self.mode == BILINEAR_MODE:
         return dy.transpose(s) * A * t
     elif self.mode == DIAG_RANK1_MODE:
         diag_A = dyagonalize(A[0])
         rank1_BC = A[1] * dy.transpose(A[2])
         ABC = diag_A + rank1_BC
         return dy.transpose(s) * ABC * t
     elif self.mode == TRANSLATIONAL_EMBED_MODE:
         return -dy.l2_norm(s - t + A)
     elif self.mode == DISTMULT:
         return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
Example #10
0
    def calc_loss(self, enc_sen, y_adv, vec_drop, train):
        """
        the attacker core functionality.
        mlp function, with (possibely) multi layers, and at least one.
        :param enc_sen:
        :param y_adv:
        :param vec_drop:
        :param train:
        :return:
        """

        w = dy.parameter(self._params["adv_w0"])
        b = dy.parameter(self._params["adv_b0"])

        if train:
            drop = self._dropout
            out = dy.dropout(enc_sen, vec_drop)
        else:
            drop = 0
            out = enc_sen

        out = dy.tanh(dy.dropout(dy.affine_transform([b, w, out]), drop))
        if self._mlp_layers > 2:
            for i in range(self._mlp_layers - 2):
                w = dy.parameter(self._params["adv_w" + str(i + 1)])
                b = dy.parameter(self._params["adv_b" + str(i + 1)])
                out = dy.tanh(
                    dy.dropout(dy.affine_transform([b, w, out]), drop))
        w = dy.parameter(self._params["adv_w" + str(self._mlp_layers - 1)])
        b = dy.parameter(self._params["adv_b" + str(self._mlp_layers - 1)])
        out = dy.affine_transform([b, w, out])

        task_probs = dy.softmax(out)
        adv_loss = dy.pickneglogsoftmax(out, y_adv)
        return adv_loss, np.argmax(task_probs.npvalue())
Example #11
0
    def __call__(self, H1,H2,H3,is_train=True):
        """

        :param xs: a list of ngrams (or words if win is set to 1)
        :return: embeddings looked from tables
        """
        seq_len = len(H1)
        
        if is_train:
            # in the training phase, perform dropout
            W1 = dy.dropout(self.W1, self.dropout_rate)
            W2 = dy.dropout(self.W2, self.dropout_rate)
            W3 = dy.dropout(self.W3, self.dropout_rate)
        else:
            W1= self.W1
            W2 = self.W2  
            W3 = self.W3 
            
      
        H = []
        for t in range(seq_len):
            ht_hat = dy.tanh(W1*H1[t]+W2*H2[t]+W3*H3[t]+self.bd)
            H.append(ht_hat) 
            
        return H  
Example #12
0
    def word_assoc_score(self, source_idx, target_idx, relation):
        """
        NOTE THAT DROPOUT IS BEING APPLIED HERE
        :param source_idx: embedding index of source atom
        :param target_idx: embedding index of target atom
        :param relation: relation type
        :return: score
        """
        # prepare
        s = self.embeddings[source_idx]
        if self.no_assoc:
            A = dy.const_parameter(self.word_assoc_weights[relation])
        else:
            A = dy.parameter(self.word_assoc_weights[relation])
        dy.dropout(A, self.dropout)
        t = self.embeddings[target_idx]

        # compute
        if self.mode == BILINEAR_MODE:
            return dy.transpose(s) * A * t
        elif self.mode == DIAG_RANK1_MODE:
            diag_A = dyagonalize(A[0])
            rank1_BC = A[1] * dy.transpose(A[2])
            ABC = diag_A + rank1_BC
            return dy.transpose(s) * ABC * t
        elif self.mode == TRANSLATIONAL_EMBED_MODE:
            return -dy.l2_norm(s - t + A)
        elif self.mode == DISTMULT:
            return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
Example #13
0
    def adv_mlp(self, vec_sen, adv_ind, train, vec_drop):
        """
        calculating the adversarial mlp over the sentence representation vector.
        more than a single adversarial mlp is supported
        """

        if train:
            drop = self._dropout
            out = dy.dropout(vec_sen, vec_drop)
        else:
            drop = 0
            out = vec_sen

        for i in range(self._adv_depth):
            w = dy.parameter(self._params["adv_" + str(adv_ind) + "_w" +
                                          str(i + 1)])
            b = dy.parameter(self._params["adv_" + str(adv_ind) + "_b" +
                                          str(i + 1)])
            out = dy.tanh(dy.dropout(dy.affine_transform([b, w, out]), drop))

        w = dy.parameter(self._params["adv_" + str(adv_ind) + "_w" +
                                      str(self._adv_depth + 1)])
        b = dy.parameter(self._params["adv_" + str(adv_ind) + "_b" +
                                      str(self._adv_depth + 1)])
        out = dy.affine_transform([b, w, out])

        return out
Example #14
0
    def forward(self, features, dropout=False):
        # extract ids for word, pos and label
        word_ids = [self.vocab.word2id(w) for w in features[:20]]
        pos_ids = [self.vocab.pos2id(p) for p in features[20:40]]
        label_ids = [self.vocab.label2id(l) for l in features[40:52]]

        # extract embedding from features
        word_embeds = [self.word_embedding[wid] for wid in word_ids]
        pos_embeds = [self.pos_embedding[pid] for pid in pos_ids]
        label_embeds = [self.label_embedding[lid] for lid in label_ids]

        # concatenating all features
        embedding_layer = dynet.concatenate(word_embeds + pos_embeds +
                                            label_embeds)

        # calculating the hidden layers
        hidden_1 = self.transfer(self.hidden_layer_1.expr() * embedding_layer +
                                 self.hidden_layer_bias_1.expr())
        if dropout:
            hidden_1 = dynet.dropout(hidden_1, self.properties.dropout)
        hidden_2 = self.transfer(self.hidden_layer_2.expr() * hidden_1 +
                                 self.hidden_layer_bias_2.expr())
        if dropout:
            hidden_2 = dynet.dropout(hidden_2, self.properties.dropout)
        # calculating the output layer
        output = self.output_layer.expr() * hidden_2 + self.output_bias.expr()

        return output
Example #15
0
    def _build_computation_graph(self, words, train_mode=True):
        """
        Builds the computational graph.
        """
        dy.renew_cg()
        # turn parameters into expressions
        softmax_weight_exp = dy.parameter(self.softmax_weight)
        softmax_bias_exp = dy.parameter(self.softmax_bias)

        word_reps = [self._word_rep(word) for word in words]
        embs = dy.concatenate(word_reps, d=1)

        if self.pooling_method == "average":
            average_emb = dy.mean_dim(embs, d=1)
        elif self.pooling_method == "max":
            average_emb = dy.max_dim(embs, d=1)
        else:
            raise NotImplementedError

        average_emb = dy.reshape(average_emb, (self.word_embedding_size,))

        if self.average_dropout is not None:
            dy.dropout(average_emb, p=self.average_dropout)

        return softmax_weight_exp * average_emb + softmax_bias_exp
Example #16
0
    def evaluate_recurrent(self, fwd_bigrams, unigrams, test=False):
        fwd1 = self.fwd_lstm1.initial_state()
        back1 = self.back_lstm1.initial_state()

        fwd2 = self.fwd_lstm2.initial_state()
        back2 = self.back_lstm2.initial_state()

        fwd_input = []
        for i in range(len(unigrams)):
            bivec = dynet.lookup(self.bigram_embed, fwd_bigrams[i])
            univec = dynet.lookup(self.unigram_embed, unigrams[i])
            vec = dynet.concatenate([bivec, univec])
            #   fwd_input.append(dynet.tanh(self.embed2lstm_W*vec))
            fwd_input.append(vec)

        back_input = []
        for i in range(len(unigrams)):
            bivec = dynet.lookup(self.bigram_embed, fwd_bigrams[i + 1])
            univec = dynet.lookup(self.unigram_embed, unigrams[i])
            vec = dynet.concatenate([bivec, univec])
            # back_input.append(dynet.tanh(self.embed2lstm_W*vec))
            back_input.append(vec)

        fwd1_out = []
        for vec in fwd_input:
            fwd1 = fwd1.add_input(vec)
            fwd_vec = fwd1.output()
            fwd1_out.append(fwd_vec)

        back1_out = []
        for vec in reversed(back_input):
            back = back1.add_input(vec)
            back1_vec = back.output()
            back1_out.append(back1_vec)

        lsmt2_input = []
        for (f, b) in zip(fwd1_out, reversed(back1_out)):
            lsmt2_input.append(dynet.concatenate([f, b]))

        fwd2_out = []
        for vec in lsmt2_input:
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            fwd2 = fwd2.add_input(vec)
            fwd_vec = fwd2.output()
            fwd2_out.append(fwd_vec)

        back2_out = []
        for vec in reversed(lsmt2_input):
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            back2 = back2.add_input(vec)
            back_vec = back2.output()
            back2_out.append(back_vec)

        # fwd_out = [dynet.concatenate([f1,f2]) for (f1,f2) in zip(fwd1_out,fwd2_out)]
        # back_out = [dynet.concatenate([b1,b2]) for (b1,b2) in zip(back1_out,back2_out)]

        return fwd2_out, back2_out[::-1]
Example #17
0
 def out_layer(self,x,dropout):
     if dropout:
         W = dy.dropout(self._W2,0.3)
         b = dy.dropout(self._b2,0.3)
     else:
         W = self._W2
         b = self._b2
     return (W*x+b)
Example #18
0
    def set_dropouts(self, input_drop=0, recur_drop=0):

        self.input_drop = input_drop
        self.recur_drop = recur_drop
        self.input_drop_mask = dy.dropout(dy.ones(self.input_size),
                                          self.input_drop)
        self.recur_drop_mask = dy.dropout(dy.ones(self.recur_size),
                                          self.recur_drop)
 def out_layer(self, x, dropout):
     if dropout:
         W = dy.dropout(dy.parameter(self._W2), 0.3)
         b = dy.dropout(dy.parameter(self._b2), 0.3)
     else:
         W = dy.parameter(self._W2)
         b = dy.parameter(self._b2)
     return (W * x + b)
Example #20
0
 def hid_2_layer(self,x,dropout):
     if dropout:
         W = dy.dropout(self._W12,0.3)
         b = dy.dropout(self._b12,0.3)
     else:
         W = self._W12
         b = self._b12
     return self.activation(W*x+b)
 def hid_2_layer(self, x, dropout):
     if dropout:
         W = dy.dropout(dy.parameter(self._W12), 0.3)
         b = dy.dropout(dy.parameter(self._b12), 0.3)
     else:
         W = dy.parameter(self._W12)
         b = dy.parameter(self._b12)
     return self.activation(W * x + b)
Example #22
0
 def hid_layer(self, x, dropout):
     if dropout:
         W = dy.dropout(dy.parameter(self._W1), 0.3)
         b = dy.dropout(dy.parameter(self._b1), 0.3)
     else:
         W = dy.parameter(self._W1)
         b = dy.parameter(self._b1)
     return dy.rectify(W * x + b)
Example #23
0
    def evaluate_recurrent(self, word_inds, tag_inds, test=False):

        fwd1 = self.fwd_lstm1.initial_state()
        back1 = self.back_lstm1.initial_state()

        fwd2 = self.fwd_lstm2.initial_state()
        back2 = self.back_lstm2.initial_state()

        sentence = []

        for (w, t) in zip(word_inds, tag_inds):
            wordvec = dynet.lookup(self.word_embed, w)
            tagvec = dynet.lookup(self.tag_embed, t)
            vec = dynet.concatenate([wordvec, tagvec])
            sentence.append(vec)

        fwd1_out = []
        for vec in sentence:
            fwd1 = fwd1.add_input(vec)
            fwd_vec = fwd1.output()
            fwd1_out.append(fwd_vec)

        back1_out = []
        for vec in reversed(sentence):
            back1 = back1.add_input(vec)
            back_vec = back1.output()
            back1_out.append(back_vec)

        lstm2_input = []
        for (f, b) in zip(fwd1_out, reversed(back1_out)):
            lstm2_input.append(dynet.concatenate([f, b]))

        fwd2_out = []
        for vec in lstm2_input:
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            fwd2 = fwd2.add_input(vec)
            fwd_vec = fwd2.output()
            fwd2_out.append(fwd_vec)

        back2_out = []
        for vec in reversed(lstm2_input):
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            back2 = back2.add_input(vec)
            back_vec = back2.output()
            back2_out.append(back_vec)

        fwd_out = [
            dynet.concatenate([f1, f2])
            for (f1, f2) in zip(fwd1_out, fwd2_out)
        ]
        back_out = [
            dynet.concatenate([b1, b2])
            for (b1, b2) in zip(back1_out, back2_out)
        ]

        return fwd_out, back_out[::-1]
Example #24
0
 def _calculate_train_score(self, sentence):
     """Same as _calculate_score, but applies dropout after embedding and bi-lstm layers, used for training"""
     embeddings = [self.lookup[w] for w in sentence]
     embeddings = [dy.dropout(e, self.dropout_rate) for e in embeddings]
     bi_lstm_output = self.bilstm.transduce(embeddings)
     bi_lstm_output = [
         dy.dropout(o, self.dropout_rate) for o in bi_lstm_output
     ]
     return [self.w * o + self.b for o in bi_lstm_output]
Example #25
0
 def hid_layer(self,x,y,dropout):
     if dropout:
         W_h = dy.dropout(self._W1_h,0.3)
         W_d = dy.dropout(self._W1_d,0.3)
         b = dy.dropout(self._b1,0.3)
     else:
         W_h = self._W1_h
         W_d = self._W1_d
         b = self._b1
     return self.activation(W_h*x+W_d*y+b)
 def hid_layer(self, x, y, dropout):
     if dropout:
         W_h = dy.dropout(dy.parameter(self._W1_h), 0.3)
         W_d = dy.dropout(dy.parameter(self._W1_d), 0.3)
         b = dy.dropout(dy.parameter(self._b1), 0.3)
     else:
         W_h = dy.parameter(self._W1_h)
         W_d = dy.parameter(self._W1_d)
         b = dy.parameter(self._b1)
     return self.activation(W_h * x + W_d * y + b)
Example #27
0
    def __call__(self, sentence1, sentence2):
        W_1 = dy.parameter(self.W_1)
        # relu activation with dropout
        out1 = dy.rectify(dy.dropout(sentence1, self.drop_param) * W_1)
        out2 = dy.rectify(dy.dropout(sentence2, self.drop_param) * W_1)

        W_2 = dy.parameter(self.W_2)
        out1 = dy.rectify(dy.dropout(out1, self.drop_param) * W_2)
        out2 = dy.rectify(dy.dropout(out2, self.drop_param) * W_2)
        return out1, out2
Example #28
0
    def cal_scores(self, src_encodings, masks, train):

        src_len = len(src_encodings)
        batch_size = src_encodings[0].dim()[1]
        heads_LRlayer = []
        mods_LRlayer = []
        for encoding in src_encodings:
            heads_LRlayer.append(
                self.leaky_ReLu(self.b_head.expr() +
                                self.W_head.expr() * encoding))
            mods_LRlayer.append(
                self.leaky_ReLu(self.b_mod.expr() +
                                self.W_mod.expr() * encoding))

        heads_labels = []
        heads = []
        labels = []
        neg_inf = dy.constant(1, -float("inf"))
        for row in range(
                1, src_len
        ):  #exclude root @ index=0 since roots do not have heads

            scores_idx = []
            for col in range(src_len):

                dist = col - row
                mdist = self.dist_max
                dist_i = (min(dist, mdist - 1) + mdist if dist >= 0 else int(
                    min(-1.0 * dist, mdist - 1)))
                dist_vec = dy.lookup_batch(self.dlookup, [dist_i] * batch_size)
                if train:
                    input_vec = dy.concatenate([
                        dy.esum([
                            dy.dropout(heads_LRlayer[col], self.dropout),
                            dy.dropout(mods_LRlayer[row], self.dropout)
                        ]), dist_vec
                    ])
                else:
                    input_vec = dy.concatenate([
                        dy.esum([heads_LRlayer[col], mods_LRlayer[row]]),
                        dist_vec
                    ])
                score = self.scoreHeadModLabel(input_vec, train)
                mask = masks[row] and masks[col]
                join_scores = []
                for bdx in range(batch_size):
                    if (mask[bdx] == 1):
                        join_scores.append(dy.pick_batch_elem(score, bdx))
                    else:
                        join_scores.append(
                            dy.concatenate([neg_inf] * self.n_labels))
                scores_idx.append(dy.concatenate_to_batch(join_scores))
            heads_labels.append(dy.concatenate(scores_idx))

        return heads_labels
Example #29
0
    def forward(self, s1, s2, label=None):
        eL = dy.parameter(self.embeddingLinear)
        s1 = dy.inputTensor(s1) * eL
        s2 = dy.inputTensor(s2) * eL

        # F step
        Lf1 = dy.parameter(self.mlpF1)
        Fs1 = dy.rectify(dy.dropout(s1, 0.2) * Lf1)
        Fs2 = dy.rectify(dy.dropout(s2, 0.2) * Lf1)
        Lf2 = dy.parameter(self.mlpF2)
        Fs1 = dy.rectify(dy.dropout(Fs1, 0.2) * Lf2)
        Fs2 = dy.rectify(dy.dropout(Fs2, 0.2) * Lf2)

        # Attention scoring
        score1 = Fs1 * dy.transpose(Fs2)
        prob1 = dy.softmax(score1)

        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        # Align pairs using attention
        s1Pairs = dy.concatenate_cols([s1, prob1 * s2])
        s2Pairs = dy.concatenate_cols([s2, prob2 * s1])

        # G step
        Lg1 = dy.parameter(self.mlpG1)
        Gs1 = dy.rectify(dy.dropout(s1Pairs, 0.2) * Lg1)
        Gs2 = dy.rectify(dy.dropout(s2Pairs, 0.2) * Lg1)
        Lg2 = dy.parameter(self.mlpG2)
        Gs1 = dy.rectify(dy.dropout(Gs1, 0.2) * Lg2)
        Gs2 = dy.rectify(dy.dropout(Gs2, 0.2) * Lg2)

        # Sum
        Ss1 = dy.sum_dim(Gs1, [0])
        Ss2 = dy.sum_dim(Gs2, [0])

        concatS12 = dy.transpose(dy.concatenate([Ss1, Ss2]))

        # H step
        Lh1 = dy.parameter(self.mlpH1)
        Hs = dy.rectify(dy.dropout(concatS12, 0.2) * Lh1)
        Lh2 = dy.parameter(self.mlpH2)
        Hs = dy.rectify(dy.dropout(Hs, 0.2) * Lh2)

        # Final layer
        final_layer = dy.parameter(self.final_layer)
        final = dy.transpose(Hs * final_layer)

        # Label can be 0...
        if label != None:
            return dy.pickneglogsoftmax(final, label)
        else:
            out = dy.softmax(final)
            return np.argmax(out.npvalue())
Example #30
0
    def greedy_search(self, char_seq, truth = None, mu =0.):
        init_state = self.params['lstm'].initial_state().add_input(self.param_exprs['<bos>'])
        init_y = dy.tanh(self.param_exprs['pW'] * init_state.output() + self.param_exprs['pb'])
        init_score = dy.scalarInput(0.)
        init_sentence = Sentence(score=init_score.scalar_value(),score_expr=init_score,LSTMState =init_state, y= init_y , prevState = None, wlen=None, golden=True)
        
        if truth is not None:
            cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]
        else:
            cembs = [dy.lookup(self.params['embed'],char) for char in char_seq ]
            #cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]

        start_agenda = init_sentence
        agenda = [start_agenda]

        for idx, _ in enumerate(char_seq,1): # from left to right, character by character
            now = None
            for wlen in range(1,min(idx,self.options['max_word_len'])+1): # generate word candidate vectors
                # join segmentation sent + word
                word = self.word_repr(char_seq[idx-wlen:idx], cembs[idx-wlen:idx])
                sent = agenda[idx-wlen]

                if truth is not None:
                    word = dy.dropout(word,self.options['dropout_rate'])
                
                word_score = dy.dot_product(word,self.param_exprs['U'])

                if truth is not None:
                    golden =  sent.golden and truth[idx-1]==wlen
                    margin = dy.scalarInput(mu*wlen if truth[idx-1]!=wlen else 0.)
                    score = margin + sent.score_expr + dy.dot_product(sent.y, word) + word_score
                else:
                    golden = False
                    score = sent.score_expr + dy.dot_product(sent.y, word) + word_score


                good = (now is None or now.score < score.scalar_value())
                if golden or good:
                    new_state = sent.LSTMState.add_input(word)
                    new_y = dy.tanh(self.param_exprs['pW'] * new_state.output() + self.param_exprs['pb'])
                    new_sent = Sentence(score=score.scalar_value(),score_expr=score,LSTMState=new_state,y=new_y, prevState=sent, wlen=wlen, golden=golden)
                    if good:
                        now = new_sent
                    if golden:
                        golden_sent = new_sent

            agenda.append(now)
            if truth is not None and truth[idx-1]>0 and (not now.golden):
                return (now.score_expr - golden_sent.score_expr)

        if truth is not None:
            return (now.score_expr - golden_sent.score_expr)

        return agenda
Example #31
0
    def __convolve__(self, embeddings, F, b, W1, bW1):
        sntlen = len(embeddings)
        emb = dy.concatenate_cols(embeddings)

        x = dy.conv2d_bias(emb, F, b, [1, 1], is_valid=False)
        x = dy.rectify(x)
        x = dy.maxpooling2d(x, [1, sntlen], [1, 1], is_valid=True)
        if self.DROPOUT > 0:
            dy.dropout(x, self.DROPOUT)
        f = dy.reshape(x, (self.EMB_DIM * 1 * 100, ))

        return W1 * f + bW1
Example #32
0
    def __call__(self, x, mask=None, train=False):
        """Input: ((H, T), B)"""
        x = self.ln1(x)
        y = self.self_attn(x, x, x, mask, train)
        y = dy.dropout(y, self.pdrop) if train else y
        x = x + y

        x = self.ln2(x)
        y = self.ffn(x, train)
        y = dy.dropout(y, self.pdrop) if train else y
        x = x + y

        return x
Example #33
0
    def build_graph(self, features):
        # extract word and tags ids
        word_ids = [self.vocab.word2id(word_feat) for word_feat in features[0:20]]
        tag_ids = [self.vocab.tag2id(tag_feat) for tag_feat in features[20:40]]
        dep_ids = [self.vocab.dep2id(tag_feat) for tag_feat in features[40:]]

        # extract word embeddings and tag embeddings from features
        word_embeds = [self.word_embedding[wid] for wid in word_ids]
        tag_embeds = [self.tag_embedding[tid] for tid in tag_ids]
        dep_embeds = [self.dep_embedding[tid] for tid in dep_ids]

        # concatenating all features (recall that '+' for lists is equivalent to appending two lists)
        embedding_layer = dynet.concatenate(word_embeds + tag_embeds + dep_embeds)

        # calculating the hidden layer
        # .expr() converts a parameter to a matrix expression in dynet (its a dynet-specific syntax).
        hidden1 = self.transfer(self.hidden_layer1 * embedding_layer + self.hidden_layer_bias1)
        dropout1 = dynet.dropout(hidden1, 0.1)
        hidden2 = self.transfer(self.hidden_layer2 * dropout1 + self.hidden_layer_bias2)
	
	# To implement network without dropout, remove the line with dropout1 and change hidden2 to:
	# hidden2 = self.transfer(self.hidden_layer2 * hidden1 + self.hidden_layer_bias2)

        # calculating the output layer
        output = self.output_layer * hidden2 + self.output_bias

        # return the output as a dynet vector (expression)
        return output
Example #34
0
    def __call__(self, x, memory, src_mask, tgt_mask, train=False):
        """Input shape: ((H, T), B)"""
        x = self.ln1(x)
        y = self.self_attn(x, x, x, tgt_mask, train)
        y = dy.dropout(y, self.pdrop) if train else y
        x = x + y

        x = self.ln2(x)
        y = self.src_attn(x, memory, memory, src_mask)
        y = dy.dropout(y, self.pdrop) if train else y
        x = x + y

        x = self.ln3(x)
        y = self.ffn(x, train)
        y = dy.dropout(y, self.pdrop) if train else y
        x = x + y

        return x
Example #35
0
def calc_score_of_history(words, dropout=0.0):
  # Lookup the embeddings and concatenate them
  emb = dy.concatenate([W_emb[x] for x in words])
  # Create the hidden layer
  h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
  # CHANGE 2: perform dropout
  if dropout != 0.0:
    h = dy.dropout(h, dropout)
  # Calculate the score and return
  return dy.affine_transform([b_sm, W_sm, h])
Example #36
0
def calc_score_of_histories(words, dropout=0.0):
  # This will change from a list of histories, to a list of words in each history position
  words = np.transpose(words)
  # Lookup the embeddings and concatenate them
  emb = dy.concatenate([dy.lookup_batch(W_emb, x) for x in words])
  # Create the hidden layer
  h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
  # Perform dropout
  if dropout != 0.0:
    h = dy.dropout(h, dropout)
  # Calculate the score and return
  return dy.affine_transform([b_sm, W_sm, h])
Example #37
0
def dot_product_attention(query, key, value, mask=None, dropout=None):
    """Input Shape: ((D, T, H), B)"""
    scores = batch_matmul(transpose(key, 0, 1), query)
    if mask is not None:
        scores = dy.cmult(scores, mask[0]) + (mask[1] * -1e9)

    weights = folded_softmax(scores)

    if dropout is not None:
        weights = dy.dropout(weights, dropout)

    return batch_matmul(value, weights)
Example #38
0
 def __call__(self, x, dropout=False):
   if args.conv:
     x = dy.reshape(x, (28, 28, 1))
     x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))  # 7x7x64
     x = dy.reshape(x, (7 * 7 * 64,))
   h = dy.rectify(self.W1 * x + self.hbias)
   if dropout:
     h = dy.dropout(h, DROPOUT_RATE)
   logits = self.W2 * h
   return logits
Example #39
0
 def evaluate(self, inputs, train=False):
     """
     Apply all MLP layers to concatenated input
     :param inputs: (key, vector) per feature type
     :param train: are we training now?
     :return: output vector of size self.output_dim
     """
     input_keys, inputs = list(map(list, zip(*list(inputs))))
     if self.input_keys:
         assert input_keys == self.input_keys, "Got:     %s\nBut expected input keys: %s" % (
             self.input_keys_str(self.input_keys), self.input_keys_str(input_keys))
     else:
         self.input_keys = input_keys
     if self.gated:
         gates = self.params.get("gates")
         if gates is None:  # FIXME attention weights should not be just parameters, but based on biaffine product?
             gates = self.params["gates"] = self.model.add_parameters((len(inputs), self.gated),
                                                                      init=dy.UniformInitializer(1))
         input_dims = [i.dim()[0][0] for i in inputs]
         max_dim = max(input_dims)
         x = dy.concatenate_cols([dy.concatenate([i, dy.zeroes(max_dim - d)])  # Pad with zeros to get uniform dim
                                  if d < max_dim else i for i, d in zip(inputs, input_dims)]) * gates
         # Possibly multiple "attention heads" -- concatenate outputs to one vector
         inputs = [dy.reshape(x, (x.dim()[0][0] * x.dim()[0][1],))]
     x = dy.concatenate(inputs)
     assert len(x.dim()[0]) == 1, "Input should be a vector, but has dimension " + str(x.dim()[0])
     dim = x.dim()[0][0]
     if self.input_dim:
         assert dim == self.input_dim, "Input dim mismatch: %d != %d" % (dim, self.input_dim)
     else:
         self.init_params(dim)
     self.config.print(self, level=4)
     if self.total_layers:
         if self.weights is None:
             self.weights = [[self.params[prefix + str(i)] for prefix in ("W", "b")]
                             for i in range(self.total_layers)]
             if self.weights[0][0].dim()[0][1] < dim:  # number of columns in W0
                 self.weights[0][0] = dy.concatenate_cols([self.weights[0][0], self.params["W0+"]])
         for i, (W, b) in enumerate(self.weights):
             self.config.print(lambda: x.npvalue().tolist(), level=4)
             try:
                 if train and self.dropout:
                     x = dy.dropout(x, self.dropout)
                 x = self.activation()(W * x + b)
             except ValueError as e:
                 raise ValueError("Error in evaluating layer %d of %d" % (i + 1, self.total_layers)) from e
     self.config.print(lambda: x.npvalue().tolist(), level=4)
     return x
Example #40
0
 def __call__(self, inputs, dropout=False):
     x = dy.inputTensor(inputs)
     conv1 = dy.parameter(self.pConv1)
     b1 = dy.parameter(self.pB1)
     x = dy.conv2d_bias(x, conv1, b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     conv2 = dy.parameter(self.pConv2)
     b2 = dy.parameter(self.pB2)
     x = dy.conv2d_bias(x, conv2, b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.reshape(x, (7*7*64, 1))
     w1 = dy.parameter(self.pW1)
     b3 = dy.parameter(self.pB3)
     h = dy.rectify(w1*x+b3)
     if dropout:
         h = dy.dropout(h, DROPOUT_RATE)
     w2 = dy.parameter(self.pW2)
     output = w2*h
     # output = dy.softmax(w2*h)
     return output
Example #41
0
File: dy_model.py Project: jcyk/CWS
    def backward(self, char_seq, truth):
        self.renew_cg()

        cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]
    
        word_seq,word = [],[]
        for char,label in zip(cembs,truth):
            word.append(char)
            if label > 0:
                word_seq.append(word)
                word = []

        score = self.truth_score(word_seq)

        score_plus_margin_loss = self.beam_search(cembs,truth,self.options['margin_loss_discount'])

        loss = score_plus_margin_loss - score

        res = loss.scalar_value()
        loss.backward()
        return res
Example #42
0
 def __call__(self, x, train=False):
     """Input: ((H, T), B) Output: ((H, T), B)."""
     x = self.act(self.expand(x))
     x = dy.dropout(x, self.pdrop) if train else x
     return self.contract(x)
Example #43
0
 def encode(input_, train):
     x = conv(input_)
     x = dy.dropout(x, pdrop) if train else x
     return x
Example #44
0
 def dropout(self, input_):
     if self.train:
         return dy.dropout(input_, self.pdrop)
     return input_