Beispiel #1
0
    def apply(self, sent1, sent2):
        eL = dy.parameter(self.linear)
        sent1 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent1)) * eL
        sent2 = dy.inputTensor(self.embedding.all_embeds_from_ix(sent2)) * eL

        out1, out2 = self.feed_F(sent1, sent2)
        e_out = out1 * dy.transpose(out2)
        prob_f_1 = dy.softmax(e_out)
        score = dy.transpose(e_out)
        prob_f_2 = dy.softmax(score)

        sent1_allign = dy.concatenate_cols([sent1, prob_f_1 * sent2])
        sent2_allign = dy.concatenate_cols([sent2, prob_f_2 * sent1])

        out_g_1, out_g_2 = self.feed_G(sent1_allign, sent2_allign)

        sent1_out_g = dy.sum_dim(out_g_1, [0])
        sent2_out_g = dy.sum_dim(out_g_2, [0])

        concat = dy.transpose(dy.concatenate([sent1_out_g, sent2_out_g]))

        h_step_1 = dy.parameter(self.h_step_1)
        sent_h = dy.rectify(dy.dropout(concat, 0.2) * h_step_1)
        h_step_2 = dy.parameter(self.h_step_2)
        sent_h = dy.rectify(dy.dropout(sent_h, 0.2) * h_step_2)

        final = dy.parameter(self.linear2)
        final = dy.transpose(sent_h * final)
        return final
Beispiel #2
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = src.dim()[0][1]
    # src_channels = 1
    batch_size = src.dim()[1]

    # convolution and pooling layers
    # src dim is ((40, 1000), 128)
    src = padding(src, self.filter_width[0]+3)
    l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128)
    pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128)

    pool1 = padding(pool1, self.filter_width[1]+3)
    l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128)
    pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128)

    pool2 = padding(pool2, self.filter_width[2])
    l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128)
    pool3 = dy.max_dim(l3, d = 1)

    my_norm = dy.l2_norm(pool3) + 1e-6
    output = dy.cdiv(pool3,my_norm)
    output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size)

    return ExpressionSequence(expr_tensor=output)
 def pop(self, strength):
     strength_left = strength
     for element in reversed(self.elements):
         old_strength = element.strength
         element.strength = dynet.rectify(old_strength -
                                          dynet.rectify(strength_left))
         strength_left -= old_strength
Beispiel #4
0
    def beam_train_max_margin_with_answer_guidence(self, init_state, gold_ans):
        # perform two beam search; one for prediction and the other for state action suff
        # max reward y = argmax(r(y)) with the help of gold_ans
        # max y' = argmax f(x,y) - R(y')
        # loss = max(f(x,y') - f(x,y) + R(y) - R(y') , 0)

        #end_state_list = self.beam_predict(init_state)
        end_state_list = self.beam_predict_max_violation(
            init_state, gold_ans)  # have to use this to make it work....
        reward_list = [x.reward(gold_ans) for x in end_state_list]
        violation_list = [
            s.path_score_expression.value() - reward
            for s, reward in zip(end_state_list, reward_list)
        ]

        best_score_state_idx = violation_list.index(max(
            violation_list))  # find the best scoring seq with minimal reward
        best_score_state = end_state_list[best_score_state_idx]
        best_score_state_reward = reward_list[best_score_state_idx]

        loss_value = 0

        if self.only_one_best:
            best_states = self.beam_find_actions_with_answer_guidence(
                init_state, gold_ans)
            if best_states == []:
                return 0, []
            best_reward_state = best_states[0]
            #print ("debug: found best_reward_state: qid =", best_reward_state.qinfo.seq_qid, best_reward_state)
            best_reward_state_reward = best_reward_state.reward(gold_ans)
            #print ("debug: best_reward_state_reward =", best_reward_state_reward)
            loss = dt.rectify(best_score_state.path_score_expression -
                              best_reward_state.path_score_expression +
                              dt.scalarInput(best_reward_state_reward -
                                             best_score_state_reward))
        else:
            best_states = self.beam_find_actions_with_answer_guidence(
                init_state, gold_ans)
            best_states_rewards = [s.reward(gold_ans) for s in best_states]
            max_reward = max(best_states_rewards)
            best_states = [
                s for s, r in zip(best_states, best_states_rewards)
                if r == max_reward
            ]
            loss = dt.average([
                dt.rectify(best_score_state.path_score_expression -
                           best_reward_state.path_score_expression +
                           dt.scalarInput(max_reward -
                                          best_score_state_reward))
                for best_reward_state in best_states
            ])

        loss_value = loss.value()
        loss.backward()

        self.neural_model.learner.update()

        #print ("debug: beam_train_max_margin_with_answer_guidence done. loss_value =", loss_value)

        return loss_value, best_states
Beispiel #5
0
    def scorer(self, q_d_hists, q_idf, bm25_score, overlap_features, p):
        """
        Makes all the calculations and returns a relevance score
        """
        idf_vec = dy.inputVector(q_idf)
        bm25_score = dy.scalarInput(bm25_score)
        overlap_features = dy.inputVector(overlap_features)
        # Pass each query term representation through the MLP
        term_scores = []
        for hist in q_d_hists:
            q_d_hist = dy.reshape(dy.inputVector(hist), (1, len(hist)))
            hidd_out = dy.rectify(q_d_hist * self.W_1 + self.b_1)
            for i in range(0, self.mlp_layers):
                hidd_out = dy.rectify(hidd_out * self.W_n[i] + self.b_n[i])
            term_scores.append(hidd_out * self.W_last + self.b_last)

        # Term Gating
        gating_weights = idf_vec * self.w_g
        
        bm25_feature = bm25_score * self.W_bm25 + self.b_bm25 
        drop_out =  dy.scalarInput(1)
        drop_num = (np.random.rand(1) < p)/p #p= probability of keeping a unit active
        drop_out.set(drop_num)
        
        bm25_feature *= drop_out
        drmm_score = dy.transpose(dy.concatenate(term_scores)) * dy.reshape(gating_weights, (len(q_idf), 1)) #basic MLPs output
        doc_score = dy.transpose(dy.concatenate([drmm_score, overlap_features])) * self.W_scores + self.b_scores #extra features layer
        
        
        return doc_score
Beispiel #6
0
def leaky_relu(x):
    """:type x: dn.Expression
    :rtype: dn.Expression"""
    positive = dn.rectify(x)
    negative = dn.rectify(-x) * -0.01
    ret = positive + negative
    return ret
Beispiel #7
0
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        sent_len = src.dim()[0][1]
        src_width = 1
        batch_size = src.dim()[1]
        pad_size = (self.window_receptor -
                    1) / 2  #TODO adapt it also for even window size

        src = dy.concatenate([
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src,
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size)
        ],
                             d=1)
        padded_sent_len = sent_len + 2 * pad_size

        conv1 = dy.parameter(self.pConv1)
        bias1 = dy.parameter(self.pBias1)
        src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1),
                             batch_size=batch_size)
        cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1])

        hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1),
                                  batch_size=batch_size)
        if self.non_linearity is 'linear':
            hidden_layer = hidden_layer
        elif self.non_linearity is 'tanh':
            hidden_layer = dy.tanh(hidden_layer)
        elif self.non_linearity is 'relu':
            hidden_layer = dy.rectify(hidden_layer)
        elif self.non_linearity is 'sigmoid':
            hidden_layer = dy.logistic(hidden_layer)

        for conv_hid, bias_hid in self.builder_layers:
            hidden_layer = dy.conv2d_bias(hidden_layer,
                                          dy.parameter(conv_hid),
                                          dy.parameter(bias_hid),
                                          stride=[1, 1])
            hidden_layer = dy.reshape(hidden_layer,
                                      (self.internal_dim, sent_len, 1),
                                      batch_size=batch_size)
            if self.non_linearity is 'linear':
                hidden_layer = hidden_layer
            elif self.non_linearity is 'tanh':
                hidden_layer = dy.tanh(hidden_layer)
            elif self.non_linearity is 'relu':
                hidden_layer = dy.rectify(hidden_layer)
            elif self.non_linearity is 'sigmoid':
                hidden_layer = dy.logistic(hidden_layer)
        last_conv = dy.parameter(self.last_conv)
        last_bias = dy.parameter(self.last_bias)
        output = dy.conv2d_bias(hidden_layer,
                                last_conv,
                                last_bias,
                                stride=[1, 1])
        output = dy.reshape(output, (sent_len, self.output_dim),
                            batch_size=batch_size)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Beispiel #8
0
 def __call__(self, a, b, c):
     enc = [
         dy.rectify(self.a_mlp(a)),  # HOTFIX rectify here?
         dy.rectify(self.b_mlp(b)),
         dy.rectify(self.c_mlp(c))
     ]
     enc = [dy.concatenate([dy.scalarInput(1), x]) for x in enc]
     return self.multilinear(*enc)
Beispiel #9
0
def do_one_batch(X_batch, Z_batch):
    # Flatten the batch into 1-D vector for workaround
    batch_size = X_batch.shape[0]
    if DO_BATCH:
        X_batch_f = X_batch.flatten('F')
        Z_batch_f = Z_batch.flatten('F')
        x = dy.reshape(dy.inputVector(X_batch_f), (nmf, nframes),
                       batch_size=batch_size)
        z = dy.reshape(dy.inputVector(Z_batch_f), (nvgg),
                       batch_size=batch_size)
        scnn.add_input([X_batch[i] for i in range(X_batch.shape[0])])
        vgg.add_input([Z_batch[i] for i in range(X_batch.shape[0])])

    else:
        x = dy.matInput(X_batch.shape[0], X_batch.shape[1])
        x.set(X_batch.flatten('F'))
        z = dy.vecInput(Z_batch.shape[0])
        z.set(Z_batch.flatten('F'))
        x = dy.reshape(dy.transpose(x, [1, 0]),
                       (1, X_batch.shape[1], X_batch.shape[0]))
    print(x.npvalue().shape)
    a_h1 = dy.conv2d_bias(x, w_i, b_i, [1, 1], is_valid=False)
    h1 = dy.rectify(a_h1)
    h1_pool = dy.kmax_pooling(h1, D[1], d=1)

    a_h2 = dy.conv2d_bias(h1_pool, w_h1, b_h1, [1, 1], is_valid=False)
    h2 = dy.rectify(a_h2)
    h2_pool = dy.kmax_pooling(h2, D[2], d=1)

    a_h3 = dy.conv2d_bias(h2_pool, w_h2, b_h2, [1, 1], is_valid=False)
    h3 = dy.rectify(a_h3)
    h3_pool = dy.kmax_pooling(h3, D[3], d=1)

    h4 = dy.kmax_pooling(h3_pool, 1, d=1)
    h4_re = dy.reshape(h4, (J[3], ))
    #print(h4_re.npvalue().shape)
    g = dy.scalarInput(1.)
    zem_sp = dy.weight_norm(h4_re, g)
    #print(zem_sp.npvalue().shape)
    zem_vgg = w_embed * z + b_embed
    #print(zem_vgg.npvalue().shape)

    sa = dy.transpose(zem_sp) * zem_vgg
    s = dy.rectify(sa)

    if PRINT_EMBED:
        print('Vgg embedding vector:', zem_vgg.npvalue().shape)
        print(zem_vgg.value())

        print('Speech embedding vector:', zem_sp.npvalue().shape)
        print(zem_sp.value())
    if PRINT_SIM:
        print('Raw Similarity:', sa.npvalue())
        print(sa.value())
        print('Similarity:', s.npvalue())
        print(s.value())

    return s
Beispiel #10
0
    def __call__(self, sentence1, sentence2):
        W_1 = dy.parameter(self.W_1)
        # relu activation with dropout
        out1 = dy.rectify(dy.dropout(sentence1, self.drop_param) * W_1)
        out2 = dy.rectify(dy.dropout(sentence2, self.drop_param) * W_1)

        W_2 = dy.parameter(self.W_2)
        out1 = dy.rectify(dy.dropout(out1, self.drop_param) * W_2)
        out2 = dy.rectify(dy.dropout(out2, self.drop_param) * W_2)
        return out1, out2
Beispiel #11
0
def selu(x):
    """ :type x: dn.Expression
        :rtype: dn.Expression """
    positive = dn.rectify(x)
    positive_indicator = dn.rectify(dn.cdiv(positive, positive + epsilon))
    negative = -dn.rectify(-x)
    exp_negative = dn.exp(negative) - positive_indicator
    exp_negative_minus_alpha = exp_negative * alpha - alpha + positive_indicator * alpha
    # x>0: x=x * scale; x<0: x = (alpha * exp(x) - alpha) * scale
    ret = (positive + exp_negative_minus_alpha) * scale
    return ret
Beispiel #12
0
    def forward(self, s1, s2, label=None):
        eL = dy.parameter(self.embeddingLinear)
        s1 = dy.inputTensor(s1) * eL
        s2 = dy.inputTensor(s2) * eL

        # F step
        Lf1 = dy.parameter(self.mlpF1)
        Fs1 = dy.rectify(dy.dropout(s1, 0.2) * Lf1)
        Fs2 = dy.rectify(dy.dropout(s2, 0.2) * Lf1)
        Lf2 = dy.parameter(self.mlpF2)
        Fs1 = dy.rectify(dy.dropout(Fs1, 0.2) * Lf2)
        Fs2 = dy.rectify(dy.dropout(Fs2, 0.2) * Lf2)

        # Attention scoring
        score1 = Fs1 * dy.transpose(Fs2)
        prob1 = dy.softmax(score1)

        score2 = dy.transpose(score1)
        prob2 = dy.softmax(score2)

        # Align pairs using attention
        s1Pairs = dy.concatenate_cols([s1, prob1 * s2])
        s2Pairs = dy.concatenate_cols([s2, prob2 * s1])

        # G step
        Lg1 = dy.parameter(self.mlpG1)
        Gs1 = dy.rectify(dy.dropout(s1Pairs, 0.2) * Lg1)
        Gs2 = dy.rectify(dy.dropout(s2Pairs, 0.2) * Lg1)
        Lg2 = dy.parameter(self.mlpG2)
        Gs1 = dy.rectify(dy.dropout(Gs1, 0.2) * Lg2)
        Gs2 = dy.rectify(dy.dropout(Gs2, 0.2) * Lg2)

        # Sum
        Ss1 = dy.sum_dim(Gs1, [0])
        Ss2 = dy.sum_dim(Gs2, [0])

        concatS12 = dy.transpose(dy.concatenate([Ss1, Ss2]))

        # H step
        Lh1 = dy.parameter(self.mlpH1)
        Hs = dy.rectify(dy.dropout(concatS12, 0.2) * Lh1)
        Lh2 = dy.parameter(self.mlpH2)
        Hs = dy.rectify(dy.dropout(Hs, 0.2) * Lh2)

        # Final layer
        final_layer = dy.parameter(self.final_layer)
        final = dy.transpose(Hs * final_layer)

        # Label can be 0...
        if label != None:
            return dy.pickneglogsoftmax(final, label)
        else:
            out = dy.softmax(final)
            return np.argmax(out.npvalue())
Beispiel #13
0
def transform(sentence):
    w1 = dy.parameter(transform_w1)
    b1 = dy.parameter(transform_b1)
    w2 = dy.parameter(transform_w2)
    b2 = dy.parameter(transform_b2)

    sentence_transformed = dy.colwise_add(w1 * sentence, b1)
    sentence_transformed = dy.rectify(sentence_transformed)
    sentence_transformed = dy.colwise_add(w2 * sentence_transformed, b2)
    sentence_transformed = dy.rectify(sentence_transformed)

    return sentence_transformed
Beispiel #14
0
    def cal_scores(self, src_encodings):
        src_len = len(src_encodings)

        src_encodings = dy.concatenate_cols(
            src_encodings)  # src_ctx_dim, src_len, batch_size

        W_arc_hidden_to_head = dy.parameter(self.W_arc_hidden_to_head)
        b_arc_hidden_to_head = dy.parameter(self.b_arc_hidden_to_head)
        W_arc_hidden_to_dep = dy.parameter(self.W_arc_hidden_to_dep)
        b_arc_hidden_to_dep = dy.parameter(self.b_arc_hidden_to_dep)

        W_label_hidden_to_head = dy.parameter(self.W_label_hidden_to_head)
        b_label_hidden_to_head = dy.parameter(self.b_label_hidden_to_head)
        W_label_hidden_to_dep = dy.parameter(self.W_label_hidden_to_dep)
        b_label_hidden_to_dep = dy.parameter(self.b_label_hidden_to_dep)

        U_arc_1 = dy.parameter(self.U_arc_1)
        u_arc_2 = dy.parameter(self.u_arc_2)

        U_label_1 = [dy.parameter(x) for x in self.U_label_1]
        u_label_2_1 = [dy.parameter(x) for x in self.u_label_2_1]
        u_label_2_2 = [dy.parameter(x) for x in self.u_label_2_2]
        b_label = [dy.parameter(x) for x in self.b_label]

        h_arc_head = dy.rectify(
            dy.affine_transform(
                [b_arc_hidden_to_head, W_arc_hidden_to_head,
                 src_encodings]))  # n_arc_ml_units, src_len, bs
        h_arc_dep = dy.rectify(
            dy.affine_transform(
                [b_arc_hidden_to_dep, W_arc_hidden_to_dep, src_encodings]))
        h_label_head = dy.rectify(
            dy.affine_transform([
                b_label_hidden_to_head, W_label_hidden_to_head, src_encodings
            ]))
        h_label_dep = dy.rectify(
            dy.affine_transform(
                [b_label_hidden_to_dep, W_label_hidden_to_dep, src_encodings]))

        h_arc_head_transpose = dy.transpose(h_arc_head)
        h_label_head_transpose = dy.transpose(h_label_head)

        s_arc = h_arc_head_transpose * dy.colwise_add(U_arc_1 * h_arc_dep,
                                                      u_arc_2)

        s_label = []
        for U_1, u_2_1, u_2_2, b in zip(U_label_1, u_label_2_1, u_label_2_2,
                                        b_label):
            e1 = h_label_head_transpose * U_1 * h_label_dep
            e2 = h_label_head_transpose * u_2_1 * dy.ones((1, src_len))
            e3 = dy.ones((src_len, 1)) * u_2_2 * h_label_dep
            s_label.append(e1 + e2 + e3 + b)
        return s_arc, s_label
Beispiel #15
0
 def __call__(self, x, dropout=False):
     if args.conv:
         x = dy.reshape(x, (28, 28, 1))
         x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False)
         x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
         x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False)
         x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))  # 7x7x64
         x = dy.reshape(x, (7 * 7 * 64, ))
     h = dy.rectify(self.W1 * x + self.hbias)
     if dropout:
         h = dy.dropout(h, DROPOUT_RATE)
     logits = self.W2 * h
     return logits
Beispiel #16
0
 def __call__(self, x, dropout=False):
   if args.conv:
     x = dy.reshape(x, (28, 28, 1))
     x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))  # 7x7x64
     x = dy.reshape(x, (7 * 7 * 64,))
   h = dy.rectify(self.W1 * x + self.hbias)
   if dropout:
     h = dy.dropout(h, DROPOUT_RATE)
   logits = self.W2 * h
   return logits
def set_E_matrix(sen1, sen2, model_params):
    F_w1 = model_params['F_w1']
    F_b1 = model_params['F_b1']
    F_w2 = model_params['F_w2']
    F_b2 = model_params['F_b2']

    #sen1 = dy.dropout(sen1, DROPOUT_RATE)
    #sen2 = dy.dropout(sen2, DROPOUT_RATE)

    F_sen1 = dy.rectify(F_w2 * (dy.rectify(dy.colwise_add(F_w1*sen1, F_b1))) + F_b2)
    F_sen2 = dy.rectify(F_w2 * (dy.rectify(dy.colwise_add(F_w1*sen2, F_b1))) + F_b2)

    E_matrix = (dy.transpose(F_sen1)) * F_sen2

    return E_matrix, F_sen1, F_sen2
def get_v1_v2(alpha, beta, sen1, sen2, model_params):
    G_w1 = model_params['G_w1']
    G_b1 = model_params['G_b1']
    G_w2 = model_params['G_w2']
    G_b2 = model_params['G_b2']

    con = dy.concatenate([sen1, beta], d=0)
    #con = dy.dropout(con, DROPOUT_RATE)
    v1 = dy.rectify(G_w2 * (dy.rectify(dy.colwise_add(G_w1 * con, G_b1))) + G_b2)

    con = dy.concatenate([sen2, alpha], d=0)
    #con = dy.dropout(con, DROPOUT_RATE)
    v2 = dy.rectify(G_w2 * (dy.rectify(dy.colwise_add(G_w1 * con, G_b1))) + G_b2)

    return v1, v2
Beispiel #19
0
    def recurrence(self, xt, hmtm1, h_history_tm1, dropout_flag):
        """

        :param xt: input vector at the time step t
        :param hmtm1: hidden memories in previous n_steps steps
        :param h_tilde_tm1: previous hidden summary
        :param dropout_flag: make a decision for conducting partial dropout
        :return:
        """
        score = dy.concatenate([dy.dot_product(self.u, dy.tanh( \
            self.W_h * hmtm1[i] + self.W_x * xt + self.W_htilde * h_history_tm1)) for i in range(self.n_steps)])
        # normalize the attention score
        score = dy.softmax(score)
        # shape: (1, n_out), history of [h[t-n_steps-1], ..., h[t-2]]
        h_history_t = dy.reshape(dy.transpose(score) * hmtm1[:-1], d=(self.n_out,))
        htm1 = hmtm1[-1]
        #h_tilde_t = dy.concatenate([h_history_t, htm1])
        h_tilde_t = htm1 + dy.rectify(h_history_t)
        if dropout_flag:
            # perform partial dropout, i.e., add dropout over the matrices W_x*
            rt = dy.logistic(dy.dropout(self.W_xr, self.dropout_rate) * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(dy.dropout(self.W_xz, self.dropout_rate) * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(dy.dropout(self.W_xh, self.dropout_rate) * xt + self.W_hh * dy.cmult(rt, h_tilde_t) \
                             + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        else:
            rt = dy.logistic(self.W_xr * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(self.W_xz * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(self.W_xh * xt + self.W_hh * dy.cmult(rt, h_tilde_t) + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        hmt = dy.concatenate([hmtm1[1:], dy.reshape(ht, (1, self.n_out))])
        return hmt, h_history_t
Beispiel #20
0
def loss_cost_sensitive_margin_last(gold_tags, idx, beam_costs_prev, scores,
                                    beam_size):
    beam_size_prev, num_tags = scores.dim()[0]
    gold_idx = dynet_get_best_flat_idx(gold_tags, idx, beam_costs_prev)

    costs_flat = dynet_compute_costs_flat(gold_tags, idx, beam_costs_prev)

    scores_flat = dy.reshape(scores, (beam_size_prev * num_tags,))
    scores_flat_np = scores_flat.npvalue()
    sigma_hat = np.argsort(scores_flat_np)[::-1]

    # the beam size for the last transition is one.
    next_beam_size = beam_size if idx < len(gold_tags) - 1 else 1

    # gold_idx is inside the beam, so compare to first outside beam.
    if gold_idx in sigma_hat[:next_beam_size]:
        comp_idx = sigma_hat[next_beam_size]
    # gold_idx is outside the beam, so compare to last in beam.
    else:
        comp_idx = sigma_hat[next_beam_size - 1]

    # NOTE: this can be zero if comp_idx has the same cost as gold_idx (desirable?)
    cost_delta = costs_flat[comp_idx] - costs_flat[gold_idx]
    return cost_delta * dy.rectify(scores_flat[comp_idx] -
                                   scores_flat[gold_idx] + 1.0)
Beispiel #21
0
def _vaswani_model_scores(m):
    out_c2 = dy.rectify(
        dy.colwise_add(c2_Wlm * m["beam_lm_hs"],
                       dy.pick(m["aux_c2"], m["idx"], 1)))

    # if cfg["use_beam_bilstm"]:
    #     _, beam_size_prev = out_c2.dim()[0]
    #     beam_hs = [dy.pick(out_c2, i, 1) for i in xrange(beam_size_prev)]
    #     bf_init = b_fwd.initial_state()
    #     bb_init = b_bwd.initial_state()
    #     bf_hs = dy.concatenate_cols(bf_init.transduce(beam_hs))
    #     bb_hs = dy.concatenate_cols(bb_init.transduce(reversed(beam_hs))[::-1])
    #     out_c2 = dy.concatenate([bf_hs, bb_hs])

    # if cfg["use_beam_mlp"]:
    #     out_b = dy.max_dim(b_W1 * out_c2 + b_b1, 1)
    #     out_c2 = dy.colwise_add(out_c2, dy.rectify(b_W2 * out_b + b_b2))

    scores = o_W * out_c2 + o_b
    scores = dy.transpose(scores)
    if cfg["accumulate_scores"]:
        scores = m["acc_scores"] + scores
        m["scores"] = scores

    return scores
Beispiel #22
0
def _vaswani_model_init(e):
    w_embs = [w2e[idx] for idx in e["tk_words"]]
    if cfg["use_postags"]:
        pos_embs = [pos2e[idx] for idx in e["tk_postags"]]
        i_embs = [
            dy.concatenate([w_embs[i], pos_embs[i]])
            for i in xrange(len(e["tk_words"]))
        ]
    else:
        i_embs = w_embs

    f_init = fwd.initial_state()
    b_init = bwd.initial_state()
    lm_init = lm.initial_state()

    f_hs = dy.concatenate_cols(f_init.transduce(i_embs))
    b_hs = dy.concatenate_cols(b_init.transduce(reversed(i_embs))[::-1])
    out_c1 = dy.rectify(c1_Wf * f_hs + c1_Wb * b_hs)
    aux_c2 = c2_Wc * out_c1

    m = {
        "aux_c2": aux_c2,
        "beam_lm_states": [lm_init],
        "beam_lm_hs": dy.zeros((cfg["lm_h_dim"], 1)),
        "idx": 0
    }
    if cfg["accumulate_scores"]:
        m["acc_scores"] = dy.zeros((1, 1))

    return m
Beispiel #23
0
def get_constit_loss(fws, bws, goldspans):
    if not USE_PTB_CONSTITS:
        raise Exception("should not be using the constit loss now!",
                        USE_PTB_CONSTITS)

    if len(goldspans) == 0:
        return None, 0

    losses = []
    sentlen = len(fws)

    for j in range(sentlen):
        istart = 0
        if USE_SPAN_CLIP and j > ALLOWED_SPANLEN:
            istart = max(0, j - ALLOWED_SPANLEN)
        for i in range(istart, j + 1):
            constit_ij = w_c * dy.rectify(
                w_fb * dy.concatenate([fws[i][j], bws[i][j]]) + b_fb) + b_c
            logloss = dy.log_softmax(constit_ij)

            isconstit = int((i, j) in goldspans)
            losses.append(pick(logloss, isconstit))

    ptbconstitloss = dy.scalarInput(DELTA) * -esum(losses)
    numspanstagged = len(losses)
    return ptbconstitloss, numspanstagged
Beispiel #24
0
    def forward(self, state):
        full_side = state[0]
        empty_side = state[1]

        full_embs = [
            self.l1_weights[actions_ids.index(item)] for item in full_side
        ]
        empty_embs = [
            self.l1_weights[actions_ids.index(item)] for item in empty_side
        ]

        full_sum = dy.esum(full_embs)

        if len(empty_embs) > 0:
            empty_sum = dy.esum(empty_embs)
        else:
            empty_sum = dy.parameter(self.empty_state)

        cat = dy.concatenate([full_sum, empty_sum])

        result = dy.transpose(
            dy.rectify(
                dy.reshape(cat, (1, 2)) * dy.parameter(self.l2_weights)))

        return result
Beispiel #25
0
    def encode_sentence(self, toks):
        state_forward = self.forward_buffRNN.initial_state()
        state_backward = self.backward_buffRNN.initial_state()

        tok_embeddings = []
        buffer_forward = []
        buffer_backward = []

        for tok in toks:
            tok_embeddings.append(
                dy.rectify(self.W_input * self.get_tok_embedding(tok)))

        for tid in range(len(toks)):
            state_forward = state_forward.add_input(tok_embeddings[tid])
            buffer_forward.append(state_forward.output())

            state_backward = state_backward.add_input(
                tok_embeddings[len(toks) - 1 - tid])
            buffer_backward.append(state_backward.output())

        buffer = [
            dy.concatenate([x, y])
            for x, y in zip(buffer_forward, reversed(buffer_backward))
        ]

        return tok_embeddings, buffer
Beispiel #26
0
def calc_scores(words):
    dy.renew_cg()
    W_cnn_express = dy.parameter(W_cnn)
    b_cnn_express = dy.parameter(b_cnn)
    W_sm_express = dy.parameter(W_sm)
    b_sm_express = dy.parameter(b_sm)
    Waux_sm_express = dy.parameter(Waux_sm)
    baux_sm_express = dy.parameter(baux_sm)
    # basically, win size tells you how many words/chars/pixels (?) we're 'looking at' at each step.
    # Here, 1 unit is 1 word. If a sample has fewer words than win size, then we probably do need some padding.
    # Padd with index 0. (so we're treating the pad words as UNK (?))
    if len(words) < WIN_SIZE:
        words += [0] * (WIN_SIZE-len(words))

    # Convolution + pooling layer
    cnn_in = dy.concatenate([W_emb[x] for x in words], d=1) # concat repr of all words
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1) # Is this max pooling?
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out) # Is this ReLU activation?

    # get scores for either task
    scores_main = W_sm_express * pool_out + b_sm_express
    scores_aux = Waux_sm_express * pool_out + baux_sm_express
    return scores_main, scores_aux
 def _get_semantic_rep(self, y):
     w_emb = dy.parameter(self.W_latin_embeddings)
     w_emb2 = dy.parameter(self.W_latin_embeddings2)
     semantic_rep = w_emb2 * dy.rectify(
         w_emb * self.embedding_provider.get_word_embedding(y))
     self.latin_semantic_rep[y] = semantic_rep.npvalue()
     return semantic_rep
Beispiel #28
0
    def lookup(self, token):
        """
        Performs forward propagation from the token yielding a char embedding
        Args:
             token  (list): a list of chars
        Returns:
             a dynet expression. The char embedding.
        """
        token = list(token)

        char_embeddings = [
            self.E[self.charset.index(c)] for c in token if c in self.charset
        ]  #ignores unk chars

        if not char_embeddings:  #empty word, no char recognized
            print('problematic token', token, file=sys.stderr, flush=True)
            return self.b

        fwd_state = self.fwd_rnn.initial_state()
        fwd_states = fwd_state.transduce(char_embeddings)

        bwd_state = self.bwd_rnn.initial_state()
        bwd_states = bwd_state.transduce(reversed(char_embeddings))

        hidden = dy.concatenate([fwd_states[-1], bwd_states[-1]])
        out = dy.rectify(self.O * hidden + self.b)
        return out
Beispiel #29
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print ('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print (display_activations(words, activations))
    print ('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print ('  bias=%s' % bias)
    contributions = W * features
    print (' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print ('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print ('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print ('     good (%.4f): %s' % (scores[3], contributions[3]))
    print ('very good (%.4f): %s' % (scores[4], contributions[4]))
Beispiel #30
0
    def __call__(self, htA, HO, transform_flag=True):
        """

        :param htA:
        :param HO:
        :param transform_flag: determine if the model needs selective transformation,
        :return:
        """
        seq_len = len(HO)
        HO_hat = []
        Weights = []
        for i in range(seq_len):
            hiO = HO[i]
            if transform_flag:
                hiO_hat = hiO + dy.rectify(self.W_A * htA + self.W_O * hiO + self.b)
            else:
                hiO_hat = hiO
            wi = dy.tanh(dy.dot_product(self.W_concat, dy.concatenate([htA, hiO_hat])))
            HO_hat.append(hiO_hat)
            Weights.append(wi)
        HO_hat = dy.concatenate([dy.reshape(ele, d=(1, 2 * self.dim_opi)) for ele in HO_hat])
        Weights = dy.concatenate(Weights)
        # length: seq_len
        Weights = dy.softmax(Weights)
        Weights_np = Weights.npvalue()
        ho_summary_t = dy.reshape(Weights, (1, seq_len)) * HO_hat
        return dy.reshape(ho_summary_t, (2 * self.dim_opi,)), Weights_np
Beispiel #31
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE - len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in,
                             W_cnn,
                             b_cnn,
                             stride=(1, 1),
                             is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE, ))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print(display_activations(words, activations))
    print('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print('  bias=%s' % bias)
    contributions = W * features
    print(' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print('     good (%.4f): %s' % (scores[3], contributions[3]))
    print('very good (%.4f): %s' % (scores[4], contributions[4]))
Beispiel #32
0
 def __call__(self, s1):
     b_nli = dy.parameter(self.b_nli)
     W_nli_1 = dy.parameter(self.W_nli_1)
     relu = dy.rectify(dy.affine_transform([b_nli, W_nli_1, s1, ]))#W_nli_2, s2, W_nli_u, u, W_nli_v, v]))
     b_s = dy.parameter(self.b_s)
     w_s = dy.parameter(self.w_s)
     return dy.affine_transform([b_s, w_s, relu])
Beispiel #33
0
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
Beispiel #34
0
 def __call__(self, inputs, dropout=False):
     x = dy.inputTensor(inputs)
     conv1 = dy.parameter(self.pConv1)
     b1 = dy.parameter(self.pB1)
     x = dy.conv2d_bias(x, conv1, b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     conv2 = dy.parameter(self.pConv2)
     b2 = dy.parameter(self.pB2)
     x = dy.conv2d_bias(x, conv2, b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.reshape(x, (7*7*64, 1))
     w1 = dy.parameter(self.pW1)
     b3 = dy.parameter(self.pB3)
     h = dy.rectify(w1*x+b3)
     if dropout:
         h = dy.dropout(h, DROPOUT_RATE)
     w2 = dy.parameter(self.pW2)
     output = w2*h
     # output = dy.softmax(w2*h)
     return output
Beispiel #35
0
    def cal_scores(self, src_encodings):
        src_len = len(src_encodings)

        src_encodings = dy.concatenate_cols(src_encodings)  # src_ctx_dim, src_len, batch_size

        W_arc_hidden_to_head = dy.parameter(self.W_arc_hidden_to_head)
        b_arc_hidden_to_head = dy.parameter(self.b_arc_hidden_to_head)
        W_arc_hidden_to_dep = dy.parameter(self.W_arc_hidden_to_dep)
        b_arc_hidden_to_dep = dy.parameter(self.b_arc_hidden_to_dep)

        W_label_hidden_to_head = dy.parameter(self.W_label_hidden_to_head)
        b_label_hidden_to_head = dy.parameter(self.b_label_hidden_to_head)
        W_label_hidden_to_dep = dy.parameter(self.W_label_hidden_to_dep)
        b_label_hidden_to_dep = dy.parameter(self.b_label_hidden_to_dep)

        U_arc_1 = dy.parameter(self.U_arc_1)
        u_arc_2 = dy.parameter(self.u_arc_2)

        U_label_1 = [dy.parameter(x) for x in self.U_label_1]
        u_label_2_1 = [dy.parameter(x) for x in self.u_label_2_1]
        u_label_2_2 = [dy.parameter(x) for x in self.u_label_2_2]
        b_label = [dy.parameter(x) for x in self.b_label]

        h_arc_head = dy.rectify(dy.affine_transform([b_arc_hidden_to_head, W_arc_hidden_to_head, src_encodings]))  # n_arc_ml_units, src_len, bs
        h_arc_dep = dy.rectify(dy.affine_transform([b_arc_hidden_to_dep, W_arc_hidden_to_dep, src_encodings]))
        h_label_head = dy.rectify(dy.affine_transform([b_label_hidden_to_head, W_label_hidden_to_head, src_encodings]))
        h_label_dep = dy.rectify(dy.affine_transform([b_label_hidden_to_dep, W_label_hidden_to_dep, src_encodings]))

        h_arc_head_transpose = dy.transpose(h_arc_head)
        h_label_head_transpose = dy.transpose(h_label_head)

        s_arc = h_arc_head_transpose * dy.colwise_add(U_arc_1 * h_arc_dep, u_arc_2)

        s_label = []
        for U_1, u_2_1, u_2_2, b in zip(U_label_1, u_label_2_1, u_label_2_2, b_label):
            e1 = h_label_head_transpose * U_1 * h_label_dep
            e2 = h_label_head_transpose * u_2_1 * dy.ones((1, src_len))
            e3 = dy.ones((src_len, 1)) * u_2_2 * h_label_dep
            s_label.append(e1 + e2 + e3 + b)
        return s_arc, s_label
Beispiel #36
0
 def highway(input_, train):
     for func, weight, bias in zip(funcs, weights, biases):
         proj = dy.rectify(func(input_, train))
         transform = dy.logistic(dy.affine_transform([bias, weight, input_]))
         input_ = dy.cmult(transform, proj) + dy.cmult(input_, 1 - transform)
     return input_
Beispiel #37
0
def node_iteration(rel, g, node, opts, assoc_model, trainer, log_file, is_source):
    """
    Perform one iteration of trying to score a node's neighbors above negative samples.
    """
    
    # true instances likelihood
    trues = targets(g, node) if is_source else sources(g, node)
    side = '->' if is_source else '<-'
    if len(trues) == 0: return 0.0
    
    if opts.debug:
        dy.renew_cg(immediate_compute = True, check_validity = True)
    else:
        dy.renew_cg()
    
    # compute association score as dynet expression (can't do this above due to staleness)
    true_scores = []
    for tr in trues:
        if is_source:
            j_assoc_score = assoc_model.word_assoc_score(node, tr, rel)
        else:
            j_assoc_score = assoc_model.word_assoc_score(tr, node, rel)
        if log_file is not None:
            log_file.write('{} {}\tTRUE_{}\t{:.3e}\n'\
                         .format(node, side, tr, j_assoc_score.scalar_value()))
        true_scores.append(j_assoc_score)


    # false targets likelihood - negative sampling (uniform)
    # collect negative samples
    if opts.nll:
        sample_scores = [[ts] for ts in true_scores]
    else:
        margins = []
    neg_samples = [np.random.choice(range(N)) for _ in range(opts.neg_samp * len(trues))]
    # remove source and true targets if applicable
    for t in [node] + trues:
        if t in neg_samples:
            neg_samples.remove(t)
            neg_samples.append(np.random.choice(range(N)))
    for (i,ns) in enumerate(neg_samples):
        # compute association score as dynet expression
        if is_source:
            ns_assoc_score = assoc_model.word_assoc_score(node, ns, rel)
        else:
            ns_assoc_score = assoc_model.word_assoc_score(ns, node, rel)
        if log_file is not None:
            log_file.write('{} {}\tNEG_{}\t{:.3e}\n'\
                         .format(node, side, ns, ns_assoc_score.scalar_value()))
        corresponding_true = i // opts.neg_samp
        if opts.nll:
            sample_scores[corresponding_true].append(ns_assoc_score)
        else:
            # TODO maybe use dy.hinge()
            ctt_score = true_scores[corresponding_true]
            margin = ctt_score - ns_assoc_score
            margins.append(dy.rectify(dy.scalarInput(1.0) - margin))


    # compute overall loss
    if opts.nll:
        if len(sample_scores) == 0:
            dy_loss = dy.scalarInput(0.0)
        else:
            dy_loss = dy.esum([dy.pickneglogsoftmax(dy.concatenate(scrs), 0) for scrs in sample_scores])
    else:
        if len(margins) == 0:
            dy_loss = dy.scalarInput(0.0)
        else:
            dy_loss = dy.esum(margins)
    sc_loss = dy_loss.scalar_value()
    if log_file is not None:
        log_file.write('{}\tLOSS\t{:.3e}\n'\
                         .format(node, sc_loss))
                         
    # backprop and recompute score
    if opts.v > 1:
        timeprint('overall loss for relation {}, node {} as {} = {:.6f}'\
                  .format(rel, node, 'source' if is_source else 'target', sc_loss))

    dy_loss.backward()
    trainer.update()

    return sc_loss