예제 #1
0
def calc_score_of_history(words):
  # Lookup the embeddings and concatenate them
  emb = dy.concatenate([W_emb[x] for x in words])
  # Create the hidden layer
  h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
  # Calculate the score and return
  return dy.affine_transform([b_sm, W_sm, h])
예제 #2
0
파일: network.py 프로젝트: danielhers/cnn
 def __call__(self, obs, batched=False):
     out = self.network(obs, batched)
     W, b = dy.parameter(self.W), dy.parameter(self.b)
     As = dy.affine_transform([b, W, out])
     if self.dueling:
         W_extra, b_extra = dy.parameter(self.W_extra), dy.parameter(self.b_extra)
         V = dy.affine_transform([b_extra, W_extra, out])
         return As, V
     return As
예제 #3
0
def calc_score_of_history(words, dropout=0.0):
  # Lookup the embeddings and concatenate them
  emb = dy.concatenate([W_emb[x] for x in words])
  # Create the hidden layer
  h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
  # CHANGE 2: perform dropout
  if dropout != 0.0:
    h = dy.dropout(h, dropout)
  # Calculate the score and return
  return dy.affine_transform([b_sm, W_sm, h])
예제 #4
0
def calc_score_of_histories(words, dropout=0.0):
  # This will change from a list of histories, to a list of words in each history position
  words = np.transpose(words)
  # Lookup the embeddings and concatenate them
  emb = dy.concatenate([dy.lookup_batch(W_emb, x) for x in words])
  # Create the hidden layer
  h = dy.tanh(dy.affine_transform([b_h, W_h, emb]))
  # Perform dropout
  if dropout != 0.0:
    h = dy.dropout(h, dropout)
  # Calculate the score and return
  return dy.affine_transform([b_sm, W_sm, h])
예제 #5
0
def generate(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent

    #get the output of the first LSTM
    src_outputs =  [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([LOOKUP_SRC[word] for word in src])]

    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix



    #generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    attention_matrix = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)



    for i in range(MAX_SENT_SIZE):
        #feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()
        att_output, alignment = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component)
        attention_matrix.append(alignment)
        middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        probs = (-dy.log_softmax(s)).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent, dy.concatenate_cols(attention_matrix).value()
예제 #6
0
def generate(sent):
    dy.renew_cg()

    src = sent


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    #generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for i in range(MAX_SENT_SIZE):
        #feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()
        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        probs = (-dy.log_softmax(s)).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent
예제 #7
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()
    #now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        #feed the current state into the 
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word
    return dy.esum(all_losses)
예제 #8
0
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])[-1].output()
    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words
예제 #9
0
def calc_reinforce_loss(words, tags, delta):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    #calculate the probability distribution 
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]
    losses = [dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)]
    probs = [-dy.exp(loss).as_array() for loss in losses]

    #then take samples from the probability distribution
    samples = [np.random.choice(range(len(x)), p=x) for x in probs]

    #calculate accuracy=reward
    correct = [sample == tag for sample, tag in zip(samples, tags)]
    r_i = float(sum(correct))/len(correct)
    r = dy.constant((1), r_i)
    # Reward baseline for each word
    W_bl = dy.parameter(W_bl_p)
    b_bl = dy.parameter(b_bl_p)
    r_b = [dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps]

    #we need to take the value in order to break the computation graph
    #as the reward portion is trained seperatley and not backpropogated through during the overall score
    rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b]
    #the scores for training the baseline
    baseline_scores = [dy.square(r - x) for x in r_b]

    #then calculate the reinforce scores using reinforce
    reinforce_scores = [r_s*score for r_s, score in zip(rewards_over_baseline, scores)]

    #we want the first len(sent)-delta scores from xent then delta scores from reinforce
    #for mixer
    if len(scores) > delta:
        mixer_scores = scores[:len(scores)-delta] + reinforce_scores[delta-1:]
    else:
        mixer_scores = reinforce_scores
    return dy.esum(mixer_scores), dy.esum(baseline_scores)
예제 #10
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
예제 #11
0
def calc_scores(words):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]

    return scores
예제 #12
0
    def cal_scores(self, src_encodings):
        src_len = len(src_encodings)

        src_encodings = dy.concatenate_cols(src_encodings)  # src_ctx_dim, src_len, batch_size

        W_arc_hidden_to_head = dy.parameter(self.W_arc_hidden_to_head)
        b_arc_hidden_to_head = dy.parameter(self.b_arc_hidden_to_head)
        W_arc_hidden_to_dep = dy.parameter(self.W_arc_hidden_to_dep)
        b_arc_hidden_to_dep = dy.parameter(self.b_arc_hidden_to_dep)

        W_label_hidden_to_head = dy.parameter(self.W_label_hidden_to_head)
        b_label_hidden_to_head = dy.parameter(self.b_label_hidden_to_head)
        W_label_hidden_to_dep = dy.parameter(self.W_label_hidden_to_dep)
        b_label_hidden_to_dep = dy.parameter(self.b_label_hidden_to_dep)

        U_arc_1 = dy.parameter(self.U_arc_1)
        u_arc_2 = dy.parameter(self.u_arc_2)

        U_label_1 = [dy.parameter(x) for x in self.U_label_1]
        u_label_2_1 = [dy.parameter(x) for x in self.u_label_2_1]
        u_label_2_2 = [dy.parameter(x) for x in self.u_label_2_2]
        b_label = [dy.parameter(x) for x in self.b_label]

        h_arc_head = dy.rectify(dy.affine_transform([b_arc_hidden_to_head, W_arc_hidden_to_head, src_encodings]))  # n_arc_ml_units, src_len, bs
        h_arc_dep = dy.rectify(dy.affine_transform([b_arc_hidden_to_dep, W_arc_hidden_to_dep, src_encodings]))
        h_label_head = dy.rectify(dy.affine_transform([b_label_hidden_to_head, W_label_hidden_to_head, src_encodings]))
        h_label_dep = dy.rectify(dy.affine_transform([b_label_hidden_to_dep, W_label_hidden_to_dep, src_encodings]))

        h_arc_head_transpose = dy.transpose(h_arc_head)
        h_label_head_transpose = dy.transpose(h_label_head)

        s_arc = h_arc_head_transpose * dy.colwise_add(U_arc_1 * h_arc_dep, u_arc_2)

        s_label = []
        for U_1, u_2_1, u_2_2, b in zip(U_label_1, u_label_2_1, u_label_2_2, b_label):
            e1 = h_label_head_transpose * U_1 * h_label_dep
            e2 = h_label_head_transpose * u_2_1 * dy.ones((1, src_len))
            e3 = dy.ones((src_len, 1)) * u_2_2 * h_label_dep
            s_label.append(e1 + e2 + e3 + b)
        return s_arc, s_label
예제 #13
0
 def sent_lm_loss(self, sent):
   rnn_cur = self.rnn.initial_state()
   losses = []
   prev_word = self.start
   for word in sent:
     x_t = self.embeddings[prev_word]
     rnn_cur = rnn_cur.add_input(x_t)
     logits = dy.affine_transform([self.lb,
                                   self.h2l,
                                   rnn_cur.output()])
     losses.append(dy.pickneglogsoftmax(logits, word))
     prev_word = word
   return dy.esum(losses)
예제 #14
0
파일: network.py 프로젝트: danielhers/cnn
    def __call__(self, obs, batched=False):
        out = obs if isinstance(obs, dy.Expression) else dy.inputTensor(obs, batched=batched)

        for i in range(self.n_layers):
            b, W = dy.parameter(self.bs[i]), dy.parameter(self.Ws[i])
            out = dy.affine_transform([b, W, out])
            if self.layer_norm and i != self.n_layers - 1:
                out = dy.layer_norm(out, self.ln_gs[i], self.ln_bs[i])
            if self.specified_activation:
                if self.activation[i] is not None:
                    out = self.activation[i](out)
            else:
                out = self.activation(out)
        return out
 def build_graph(self, x, is_train):
     # dy.renew_cg()
     if is_train:
         embeddings = [
             dy.dropout(self.word_embedding[w], self.dropout) for w in x
         ]
     else:
         embeddings = [self.word_embedding[w] for w in x]
     lstm_out = self.bilstm.transduce(embeddings)
     features = [
         dy.affine_transform([self.linear_bias, self.linear_w, rep])
         for rep in lstm_out
     ]
     return features
예제 #16
0
    def __call__(self, obs, batched=False):
        out = obs if isinstance(obs, dy.Expression) else dy.inputTensor(
            obs, batched=batched)

        for i in range(self.n_layers):
            b, W = dy.parameter(self.bs[i]), dy.parameter(self.Ws[i])
            out = dy.affine_transform([b, W, out])
            if self.layer_norm and i != self.n_layers - 1:
                out = dy.layer_norm(out, self.ln_gs[i], self.ln_bs[i])
            if self.specified_activation:
                if self.activation[i] is not None:
                    out = self.activation[i](out)
            else:
                out = self.activation(out)
        return out
예제 #17
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
   src = src.as_tensor()
   # convolutional layer
   src = padding(src, src.dim()[0][0], src.dim()[0][1], self.filter_width, self.stride, src.dim()[1])
   l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filter_conv), stride = [self.stride, self.stride], is_valid = True))
   timestep = l1.dim()[0][1]
   features = l1.dim()[0][2]
   batch_size = l1.dim()[1]
   # transpose l1 to be (timesetp, dim), but keep the batch_size.
   rhn_in = dy.reshape(l1, (timestep, features), batch_size = batch_size)
   rhn_in = [dy.pick(rhn_in, i) for i in range(timestep)]
   for l in range(self.rhn_num_hidden_layers):
     rhn_out = []
     # initialize a random vector for the first state vector, keep the same batch size.
     prev_state = dy.parameter(self.init[l])
     # begin recurrent high way network
     for t in range(timestep):
       for m in range(0, self.rhn_microsteps):
         H = dy.affine_transform([dy.parameter(self.recur[l][m][1]), dy.parameter(self.recur[l][m][0]),  prev_state])
         T = dy.affine_transform([dy.parameter(self.recur[l][m][3]), dy.parameter(self.recur[l][m][2]),  prev_state])
         if m == 0:
           H += dy.parameter(self.linear[l][0]) * rhn_in[t]
           T += dy.parameter(self.linear[l][1]) * rhn_in[t]
         H = dy.tanh(H)
         T = dy.logistic(T)
         prev_state = dy.cmult(1 - T, prev_state) + dy.cmult(T, H) # ((1024, ), batch_size)
       rhn_out.append(prev_state)
     if self.residual and l>0:
       rhn_out = [sum(x) for x in zip(rhn_out, rhn_in)]
     rhn_in = rhn_out
   # Compute the attention-weighted average of the activations
   rhn_in = dy.concatenate_cols(rhn_in)
   scores = dy.transpose(dy.parameter(self.attention[0][1]))*dy.tanh(dy.parameter(self.attention[0][0])*rhn_in) # ((1,510), batch_size)
   scores = dy.reshape(scores, (scores.dim()[0][1],), batch_size = scores.dim()[1])
   attn_out = rhn_in*dy.softmax(scores) # # rhn_in.as_tensor() is ((1024,510), batch_size) softmax is ((510,), batch_size)
   return ExpressionSequence(expr_tensor = attn_out)
예제 #18
0
def calc_scores_with_previous_tag(words, referent_tags=None):
    """
    Calculate scores using previous tag as input. If the referent tags are provided, then we will sample from previous
    referent tag or previous system prediction.
    :param words:
    :param referent_tags:
    :return:
    """
    dy.renew_cg()

    word_embs = [LOOKUP[x] for x in words]

    # Transduce all batch elements for the backward LSTM, using the original word embeddings.
    bwd_init = bwdLSTM.initial_state()
    bwd_word_reps = bwd_init.transduce(reversed(word_embs))

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    scores = []
    # Transduce one by one for the forward LSTM
    fwd_init = fwdLSTM.initial_state()
    s_fwd = fwd_init

    prev_tag = start_tag

    index = 0
    for word, bwd_word_rep in zip(word_embs, reversed(bwd_word_reps)):
        # Concatenate word and tag representation just as training.
        fwd_input = dy.concatenate([word, TAG_LOOKUP[prev_tag]])
        s_fwd = s_fwd.add_input(fwd_input)
        combined_rep = dy.concatenate([s_fwd.output(), bwd_word_rep])
        score = dy.affine_transform([b, W, combined_rep])
        prediction = np.argmax(score.npvalue())

        if referent_tags:
            if sampler.sample_true():
                prev_tag = referent_tags[index]
            else:
                prev_tag = prediction
            index += 1
        else:
            prev_tag = prediction

        scores.append(score)

    return scores
예제 #19
0
    def batch_loss(self, batch, train=True):

        # load the parameters
        W_hid = dy.parameter(self.W_hid)
        b_hid = dy.parameter(self.b_hid)

        W_out = dy.parameter(self.W_out)

        losses = []
        for _, sent in batch:
            for i in range(self.n_word_context, len(sent)):
                prev_word_ix = sent[i - 1]
                curr_word_ix = sent[i]
                ct1 = dy.lookup(self.embed, prev_word_ix)
                ctx = ct1
                if self.n_word_context == 2:
                    prev_prev_word_ix = sent[i - 2]
                    ct2 = dy.lookup(self.embed, prev_prev_word_ix)
                    ctx = dy.concatenate([ct1, ct2])
                elif self.n_word_context == 3:
                    prev_prev_prev_word_ix = sent[i - 3]
                    ct3 = dy.lookup(self.embed, prev_prev_prev_word_ix)
                    prev_prev_word_ix = sent[i - 2]
                    ct2 = dy.lookup(self.embed, prev_prev_word_ix)
                    ctx = dy.concatenate([ct1, ct2, ct3])

                # hid is the hidden layer output, size=hidden_size
                # compute b_hid + W_hid * ctx, but faster
                hid = dy.affine_transform([b_hid, W_hid, ctx])
                hid = dy.tanh(hid)

                # out is the prediction of the next word, size=vocab_size
                out = W_out * hid

                # Intepretation: The model estimates that
                # log P(curr_word=k | prev_word) ~ out[k]
                # in other words,
                # P(curr_word=k | prev_word) = exp(out[k]) / sum_j exp(out[j])
                #                            = softmax(out)[k]

                # We want to maximize the probability of the correct word.
                # (equivalently, minimize the negative log-probability)

                loss = dy.pickneglogsoftmax(out, curr_word_ix)
                losses.append(loss)

        # esum simply adds up the expressions in the list
        return dy.esum(losses)
예제 #20
0
def calc_scores_with_previous_tag(words, referent_tags=None):
    """
    Calculate scores using previous tag as input. If the referent tags are provided, then we will sample from previous
    referent tag or previous system prediction.
    :param words:
    :param referent_tags:
    :return:
    """
    dy.renew_cg()

    word_embs = [LOOKUP[x] for x in words]

    # Transduce all batch elements for the backward LSTM, using the original word embeddings.
    bwd_init = bwdLSTM.initial_state()
    bwd_word_reps = bwd_init.transduce(reversed(word_embs))

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    scores = []
    # Transduce one by one for the forward LSTM
    fwd_init = fwdLSTM.initial_state()
    s_fwd = fwd_init

    prev_tag = start_tag

    index = 0
    for word, bwd_word_rep in zip(word_embs, reversed(bwd_word_reps)):
        # Concatenate word and tag representation just as training.
        fwd_input = dy.concatenate([word, TAG_LOOKUP[prev_tag]])
        s_fwd = s_fwd.add_input(fwd_input)
        combined_rep = dy.concatenate([s_fwd.output(), bwd_word_rep])
        score = dy.affine_transform([b, W, combined_rep])
        prediction = np.argmax(score.npvalue())

        if referent_tags:
            if sampler.sample_true():
                prev_tag = referent_tags[index]
            else:
                prev_tag = prediction
            index += 1
        else:
            prev_tag = prediction

        scores.append(score)

    return scores
예제 #21
0
    def beam_search_generate(self, src_seq, beam_n=5):
        dynet.renew_cg()

        embedded = self.embed_seq(src_seq)
        input_vectors = self.encode_seq(embedded)

        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)

        s = self.dec_lstm.initial_state()
        s = s.add_input(input_vectors[-1])
        beams = [{"state": s, "out": [], "err": 0}]
        completed_beams = []
        while len(completed_beams) < beam_n:
            potential_beams = []
            for beam in beams:
                if len(beam["out"]) > 0:
                    embed_vector = self.tgt_lookup[beam["out"][-1].i]
                    s = beam["state"].add_input(embed_vector)

                out_vector = dynet.affine_transform([b, w, s.output()])
                probs = dynet.softmax(out_vector)
                probs = probs.vec_value()

                for potential_next_i in range(len(probs)):
                    potential_beams.append({
                        "state":
                        s,
                        "out":
                        beam["out"] + [self.tgt_vocab[potential_next_i]],
                        "err":
                        beam["err"] - math.log(probs[potential_next_i])
                    })

            potential_beams.sort(key=lambda x: x["err"])
            beams = potential_beams[:beam_n - len(completed_beams)]
            completed_beams = completed_beams + [
                beam
                for beam in beams if beam["out"][-1] == self.tgt_vocab.END_TOK
                or len(beam["out"]) > 5 * len(src_seq)
            ]
            beams = [
                beam for beam in beams
                if beam["out"][-1] != self.tgt_vocab.END_TOK
                and len(beam["out"]) <= 5 * len(src_seq)
            ]
        completed_beams.sort(key=lambda x: x["err"])
        return [beam["out"] for beam in completed_beams]
예제 #22
0
    def get_decode_loss(self, src_encodings, decoder_init, tgt_sents):
        W_s = dy.parameter(self.W_s)
        b_s = dy.parameter(self.b_s)
        W_h = dy.parameter(self.W_h)
        b_h = dy.parameter(self.b_h)
        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)

        tgt_words, tgt_masks = input_transpose(tgt_sents)
        batch_size = len(tgt_sents)

        decoder_init_cell = W_s * decoder_init + b_s
        s = self.dec_builder.initial_state(
            [decoder_init_cell, dy.tanh(decoder_init_cell)])
        ctx_tm1 = dy.vecInput(self.args.hidden_size * 2)
        losses = []

        # start from <S>, until y_{T-1}
        for t, (y_ref_t, mask_t) in enumerate(zip(tgt_words[1:],
                                                  tgt_masks[1:]),
                                              start=1):
            y_tm1_embed = dy.lookup_batch(self.tgt_lookup, tgt_words[t - 1])
            x = dy.concatenate([y_tm1_embed, ctx_tm1])
            s = s.add_input(x)
            h_t = s.output()
            ctx_t, alpha_t = self.attention(src_encodings, h_t, batch_size)

            # read_out = dy.tanh(W_h * dy.concatenate([h_t, ctx_t]) + b_h)
            read_out = dy.tanh(
                dy.affine_transform([b_h, W_h,
                                     dy.concatenate([h_t, ctx_t])]))
            if args.dropout > 0.:
                read_out = dy.dropout(read_out, args.dropout)
            y_t = W_y * read_out + b_y
            loss_t = dy.pickneglogsoftmax_batch(y_t, y_ref_t)

            if 0 in mask_t:
                mask_expr = dy.inputVector(mask_t)
                mask_expr = dy.reshape(mask_expr, (1, ), batch_size)
                loss_t = loss_t * mask_expr

            losses.append(loss_t)
            ctx_tm1 = ctx_t

        loss = dy.esum(losses)
        loss = dy.sum_batches(loss) / batch_size

        return loss
예제 #23
0
  def get_attention_state(self, main, states):
    if not states:
      return main
    affinities = [self.get_affinity(main, state) for state in states]
    states_exp = dy.transpose(dy.concatenate_cols(states))
    scores_exp = dy.softmax(dy.concatenate(affinities))
    context = None
    if len(states) == 1:
      context = dy.transpose(states_exp)
    else:
      context = dy.transpose(states_exp)*scores_exp

    Wa = dy.parameter(self.Wa)
    b = dy.parameter(self.b)
    hidden_state = dy.affine_transform([b, Wa, dy.concatenate([main, context])])
    return hidden_state
예제 #24
0
    def calc_loss_basic(self, frames, label):

        # Renew the computation graph
        dy.renew_cg()

        # Initialize LSTM
        init_state_src = self.lstm_builder.initial_state()

        # Instantiate the params
        W_mean = dy.parameter(self.W_mean_p)
        V_mean = dy.parameter(self.V_mean_p)
        b_mean = dy.parameter(self.b_mean_p)
        W_var = dy.parameter(self.W_var_p)
        V_var = dy.parameter(self.V_var_p)
        b_var = dy.parameter(self.b_var_p)

        input_frames = dy.inputTensor(frames)
        output_label = label

        # Get the LSTM embeddings
        src_output = init_state_src.add_inputs(
            [frame for frame in input_frames])[-1].output()

        # Get the mean and diagonal log covariance from the encoder
        mu = self.mlp(src_output, W_mean, V_mean, b_mean)
        log_var = self.mlp(src_output, W_mean, V_mean, b_mean)

        # Compute the KL Divergence loss
        kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                      dy.pow(mu, dy.inputVector([2])) -
                                      dy.exp(log_var))

        # Reparameterize
        z = self.reparameterize(mu, log_var)

        W_sm = dy.parameter(self.W_sm_p)
        b_sm = dy.parameter(self.b_sm_p)

        # Calculate the reconstruction loss
        pred = dy.affine_transform([b_sm, W_sm, z])
        label_embedding = self.lookup[label]
        #print label, label_embedding
        recons_loss = dy.pickneglogsoftmax(pred, label)

        return kl_loss, recons_loss
예제 #25
0
    def train_example(self, example):
        """
		Parameters
		----------
		example : tuple
			A single training example of form (encoder_inputs, ground_labels)
			with initial padding to offset the encoder inputs.

		Output
		-------
		Loss for example.
		"""
        encoder_input = example[0]
        ground_labels = example[1]

        context_outputs = self.encoding(encoder_input)

        R = dy.parameter(self.R)
        b = dy.parameter(self.b)

        # Decoding:
        losses = []
        for (context_output, ground_label) in zip(context_outputs,
                                                  ground_labels):
            # context_ouput : state from single timestep of context_encoder
            # ground_label : ground truth labels for given sentence (for teacher forcing)
            decoder_input = [self.vocab.index("<START>")] + ground_label
            decoder_target = ground_label + [self.vocab.index("<END>")]

            embedded_decoder_input = [
                self.embeddings[word] for word in decoder_input
            ]
            decoder_initial_state = self.output_decoder.initial_state(
                vecs=[context_output, context_output])
            decoder_output = decoder_initial_state.transduce(
                embedded_decoder_input)
            log_probs_char = [
                dy.affine_transform([b, R, h_t]) for h_t in decoder_output
            ]

            for (log_prob, target) in zip(log_probs_char, decoder_target):
                losses.append(dy.pickneglogsoftmax(log_prob, target))

        loss = dy.esum(losses)
        return loss
예제 #26
0
def linear_layer(exp, weights, biases=None):
    """ Linear layer with weights and biases.

    Inputs:
        exp (dy.Expression): A Dynet tensor.
        params (dy.Parameters): Dynet parameters representing weights (a matrix).
        biases (dy.Parameters, optional): Dynet parameters representing biases
            (a vector).

    Returns:
        dy.Expression representing exp * weights + biases
    """
    if biases:
        return dy.affine_transform([add_dim(biases),
                                    add_dim(exp) if is_vector(exp) else exp,
                                    weights])
    else:
        return linear_transform(exp, weights)
예제 #27
0
    def step(self, x, hx, cx):
        if not self.test:
            if self.dropout_x > 0:
                x = dy.cmult(self.dropout_mask_x, x)
            if self.dropout_h > 0:
                hx = dy.cmult(self.dropout_mask_h, hx)

        gates = dy.affine_transform(
            [self.bias, self.weight_ih, x, self.weight_hh, hx])
        i = dy.pickrange(gates, 0, self.n_hidden)
        f = dy.pickrange(gates, self.n_hidden, self.n_hidden * 2)
        g = dy.pickrange(gates, self.n_hidden * 2, self.n_hidden * 3)
        o = dy.pickrange(gates, self.n_hidden * 3, self.n_hidden * 4)

        i, f, g, o = dy.logistic(i), dy.logistic(f), dy.tanh(g), dy.logistic(o)
        cy = dy.cmult(f, cx) + dy.cmult(i, g)
        hy = dy.cmult(o, dy.tanh(cy))
        return hy, cy
예제 #28
0
    def initialize(self, src_encodings, training=True):
        if training and self.rnn_dropout > 0.:
            self.decoder.set_dropout(self.rnn_dropout)
        else:
            self.decoder.disable_dropout()

        with parameters(self.src_enc_trans_W,
                        self.src_enc_trans_b,
                        trainable=training) as (src_enc_trans_W,
                                                src_enc_trans_b):
            last_enc_state = src_encodings[-1]
            decoder_init_state = dy.tanh(
                dy.affine_transform(
                    [src_enc_trans_b, src_enc_trans_W, last_enc_state]))
            state = self.decoder.initial_state(
                [decoder_init_state, decoder_init_state])

            return state
    def loss(self, observation, instance):
        #trans = instance.transformation
        #if trans not in self.known_transformations:
        #k    newtrans = list(self.param_dict.keys())[0][0] ### SUPER ARBITRARY
        #k    tqdm.write("WARNING: unknown transformtion picked for instance {}; using transformation {}".format(trans, newtrans))
        #k    trans = newtrans

        trans = 'lul'
        b = dy.parameter(self.param_dict[(trans, 'b')])
        W = dy.parameter(self.param_dict[(trans, 'W')])

        features, label = observation

        prediction = dy.softmax(dy.affine_transform([b, W, dy.inputVector(features)]))

        loss = -dy.log(dy.pick(prediction, label))

        return prediction, loss
예제 #30
0
def generate(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    sent_reps = [
        LSTM_SRC.transduce([LOOKUP_SRC[x] for x in src])[-1]
        for src, trg in sents
    ]

    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x]
                                            for x in src])[-1].output()

    #generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s(
        [src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for i in range(MAX_SENT_SIZE):
        #feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = hidden_state.output()
        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        probs = -dy.log_softmax(s).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent
예제 #31
0
    def decode_batch(self, encoding, output_batch):

        """
        Batch decoding function
        :param encoding: last hidden state from encoder
        :param output_batch: list of output sentences in format [word1, word2..]
        :return: loss
        """
        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)
        s = self.dec_lstm.initial_state().add_input(encoding)
        losses = []

        maxSentLength = max([len(sent) for sent in output_batch])
        wids = []
        masks = []
        for j in range(maxSentLength):
            wids.append([(self.tgt_vocab[sent[j]].i if len(sent)>j else self.tgt_vocab.END_TOK.i) for sent in output_batch])
            mask = [(1 if len(sent)>j else 0) for sent in output_batch]
            masks.append(mask)

        for wid, mask in zip(wids, masks):

            # apply dropout
            y = s.output()
            if args.dropout: y = dynet.dropout(y, self.args.dropout)

            # calculate the softmax and loss
            score = dynet.affine_transform([b, w, y])
            loss = dynet.pickneglogsoftmax_batch(score, wid)

            # mask the loss if at least one sentence is shorter than maxSentLength
            if 0 in mask:
                mask_expr = dynet.inputVector(mask)
                mask_expr = dynet.reshape(mask_expr, (1,), len(mask))
                loss = loss * mask_expr

            losses.append(loss)

            # update the state of the RNN
            embed_vector = dynet.lookup_batch(self.tgt_lookup, wid)
            s = s.add_input(embed_vector)

        return dynet.sum_batches(dynet.esum(losses))
예제 #32
0
    def _compute(self, input):
        if not self.bias:
            output = self.W * input
        else:
            output = dy.affine_transform([self.b, self.W, input])

        if self.act == 'linear':
            return output
        elif self.act == 'sigmoid':
            return sigmoid(output)
        elif self.act == 'tanh':
            return tanh(output)
        elif self.act == 'ptanh':
            return penalized_tanh(output)
        elif self.act == 'relu':
            return relu(output)
        elif self.act == 'elu':
            return elu(output)
        raise ValueError('Unknown activation function :' + self.act)
예제 #33
0
    def generate(self, src, sampled=False):
        embedding = self.embed_seq(src)
        encoding = self.encode_seq(embedding)[-1]

        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)

        s = self.dec_lstm.initial_state().add_input(encoding)

        out = []
        for _ in range(5 * len(src)):
            out_vector = dynet.affine_transform([b, w, s.output()])
            probs = dynet.softmax(out_vector)
            selection = np.argmax(probs.value())
            out.append(self.tgt_vocab[selection])
            if out[-1].s == self.tgt_vocab.END_TOK: break
            embed_vector = self.tgt_lookup[selection]
            s = s.add_input(embed_vector)
        return out
예제 #34
0
    def BuildLMGraph(self, sents):
        dy.renew_cg()
        # initialize the RNN
        init_state = self.builder.initial_state()
        # parameters -> expressions
        R = dy.parameter(self.R)
        bias = dy.parameter(self.bias)

        S = vocab.w2i["<s>"]
        # get the cids and masks for each step
        tot_chars = 0
        cids = []
        masks = []

        for i in range(len(sents[0])):
            cids.append([(vocab.w2i[sent[i]] if len(sent) > i else S)
                         for sent in sents])
            mask = [(1 if len(sent) > i else 0) for sent in sents]
            masks.append(mask)
            tot_chars += sum(mask)

        # start the rnn with "<s>"
        init_ids = cids[0]
        s = init_state.add_input(lookup_batch(self.lookup, init_ids))

        losses = []

        # feed char vectors into the RNN and predict the next char
        for cid, mask in zip(cids[1:], masks[1:]):
            score = dy.affine_transform([bias, R, s.output()])
            loss = dy.pickneglogsoftmax_batch(score, cid)
            # mask the loss if at least one sentence is shorter
            if mask[-1] != 1:
                mask_expr = dy.inputVector(mask)
                mask_expr = dy.reshape(mask_expr, (1, ), len(sents))
                loss = loss * mask_expr

            losses.append(loss)
            # update the state of the RNN
            cemb = dy.lookup_batch(self.lookup, cid)
            s = s.add_input(cemb)

        return dy.sum_batches(dy.esum(losses)), tot_chars
예제 #35
0
파일: ff.py 프로젝트: pmichel31415/xnmt
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        W = dy.parameter(self.pW)
        b = dy.parameter(self.pb)

        l1 = dy.affine_transform([b, W, src])
        output = l1
        if self.nonlinearity is 'linear':
            output = l1
        elif self.nonlinearity is 'sigmoid':
            output = dy.logistic(l1)
        elif self.nonlinearity is 'tanh':
            output = 2 * dy.logistic(l1) - 1
        elif self.nonlinearity is 'relu':
            output = dy.rectify(l1)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
예제 #36
0
    def decode_batch(self, encoding, output_batch):

        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)
        s = self.dec_lstm.initial_state()
        losses = []

        maxSentLength = max([len(sent) for sent in output_batch])
        wids = []
        masks = []
        for j in range(maxSentLength):
            wids.append([(self.tgt_vocab[sent[j]].i
                          if len(sent) > j else self.tgt_vocab.END_TOK.i)
                         for sent in output_batch])
            mask = [(1 if len(sent) > j else 0) for sent in output_batch]
            masks.append(mask)

        s = s.add_input(
            dynet.concatenate(
                [encoding[-1],
                 dynet.vecInput(self.args.hidden_dim * 2)]))
        for wid, mask in zip(wids, masks):

            # calculate the softmax and loss
            score = dynet.affine_transform([b, w, s.output()])
            loss = dynet.pickneglogsoftmax_batch(score, wid)

            # mask the loss if at least one sentence is shorter
            if 0 in mask:
                mask_expr = dynet.inputVector(mask)
                mask_expr = dynet.reshape(mask_expr, (1, ), len(output_batch))
                loss = loss * mask_expr

            losses.append(loss)

            # update the state of the RNN
            embed_vector = dynet.lookup_batch(self.tgt_lookup, wid)
            attn_vector = self.attend(encoding, s)
            inp = dynet.concatenate([embed_vector, attn_vector])
            s = s.add_input(inp)

        return dynet.sum_batches(dynet.esum(losses))
예제 #37
0
    def BuildLMGraph(self, sents):
        dy.renew_cg()
        # initialize the RNN
        init_state = self.builder.initial_state()
        # parameters -> expressions
        R = dy.parameter(self.R)
        bias = dy.parameter(self.bias)

        S = vocab.w2i["<s>"]
        # get the cids and masks for each step
        tot_chars = 0
        cids = []
        masks = []

        for i in range(len(sents[0])):
            cids.append([(vocab.w2i[sent[i]] if len(sent) > i else S) for sent in sents])
            mask = [(1 if len(sent)>i else 0) for sent in sents]
            masks.append(mask)
            tot_chars += sum(mask)

        # start the rnn with "<s>"
        init_ids = cids[0]
        s = init_state.add_input(lookup_batch(self.lookup, init_ids))

        losses = []

        # feed char vectors into the RNN and predict the next char
        for cid, mask in zip(cids[1:], masks[1:]):
            score = dy.affine_transform([bias, R, s.output()])
            loss = dy.pickneglogsoftmax_batch(score, cid)
            # mask the loss if at least one sentence is shorter
            if mask[-1] != 1:
                mask_expr = dy.inputVector(mask)
                mask_expr = dy.reshape(mask_expr, (1,), len(sents))
                loss = loss * mask_expr

            losses.append(loss)
            # update the state of the RNN
            cemb = dy.lookup_batch(self.lookup, cid)
            s = s.add_input(cemb)

        return dy.sum_batches(dy.esum(losses)), tot_chars
예제 #38
0
파일: EasyFirst.py 프로젝트: bcmi220/erefdp
	def get_score(self, tree_encoder):

		seq_len = len(tree_encoder.all_states)

		encode_repr = dy.concatenate_cols([item.output for item in tree_encoder.all_states]) #[dy.inputTensor(np.zeros(self.hidden_dim, 1), batch=False)] + 

		W_dep, b_dep = dy.parameter(self.mlp_dep_W), dy.parameter(self.mlp_dep_b)
		W_head, b_head = dy.parameter(self.mlp_head_W), dy.parameter(self.mlp_head_b)
		dep_rel, head_rel = leaky_relu(dy.affine_transform([b_dep, W_dep, encode_repr])),leaky_relu(dy.affine_transform([b_head, W_head, encode_repr]))
		# if self._train_flag:
		# 	dep_rel, head_rel= dy.dropout_dim(dep_rel, 1, self.dropout_mlp), dy.dropout_dim(head_rel, 1, self.dropout_mlp)

		W_rel = dy.parameter(self.rel_W)

		rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, 1, num_outputs = len(self.rels), bias_x = True, bias_y = True)
		# (#head x rel_size x #dep) x batch_size

		rel_logits = dy.transpose(rel_logits, dims=[0, 2, 1])

		return rel_logits, rel_logits.value()
예제 #39
0
def calc_lm_loss(sents):

    dy.renew_cg()
    # parameters -> expressions
    #W_exp = dy.parameter(W_sm)
    #b_exp = dy.parameter(b_sm)

    # initialize the RNN
    f_init = RNN.initial_state()

    # get the wids and masks for each step
    tot_words = 0
    wids = []
    masks = []
    for i in range(len(sents[0])):
        wids.append([(sent[i] if len(sent) > i else S) for sent in sents])
        mask = [(1 if len(sent) > i else 0) for sent in sents]
        masks.append(mask)
        tot_words += sum(mask)

    # start the rnn by inputting "<s>"
    init_ids = [S] * len(sents)
    s = f_init.add_input(dy.lookup_batch(WORDS_LOOKUP, init_ids))

    # feed word vectors into the RNN and predict the next word
    losses = []
    for wid, mask in zip(wids, masks):
        # calculate the softmax and loss
        score = dy.affine_transform([b_sm, W_sm, s.output()])
        loss = dy.pickneglogsoftmax_batch(score, wid)
        # mask the loss if at least one sentence is shorter
        if mask[-1] != 1:
            mask_expr = dy.inputVector(mask)
            mask_expr = dy.reshape(mask_expr, (1, ), len(sents))
            loss = loss * mask_expr
        losses.append(loss)
        # update the state of the RNN
        wemb = dy.lookup_batch(WORDS_LOOKUP, wid)
        s = s.add_input(wemb)

    return dy.sum_batches(dy.esum(losses)), tot_words
예제 #40
0
    def px(self, state, utterance):
        """
		Calculate the probability of utterance given state.
		"""
        R = dy.parameter(self.R)
        b = dy.parameter(self.b)

        decoder_input = [self.vocab.index("<START>")] + utterance

        embedded_decoder_input = [
            dy.concatenate([self.embeddings[word], state])
            for word in decoder_input
        ]
        decoder_initial_state = self.output_decoder.initial_state()
        decoder_output = decoder_initial_state.transduce(
            embedded_decoder_input)
        log_probs_char = [
            dy.affine_transform([b, R, h_t]) for h_t in decoder_output
        ]

        return log_probs_char
예제 #41
0
    def calc_loss_basic(self, embedding, label):

        # Renew the computation graph
        dy.renew_cg()

        # Instantiate the params
        W_mean = dy.parameter(self.W_mean_p)
        V_mean = dy.parameter(self.V_mean_p)
        b_mean = dy.parameter(self.b_mean_p)
        W_var = dy.parameter(self.W_var_p)
        V_var = dy.parameter(self.V_var_p)
        b_var = dy.parameter(self.b_var_p)

        input_embedding = dy.inputTensor(embedding)
        output_label = label

        # Get the LSTM embeddings
        src_output = self.dnn.predict(input_embedding)

        # Get the mean and diagonal log covariance from the encoder
        mu = self.mlp(src_output, W_mean, V_mean, b_mean)
        log_var = self.mlp(src_output, W_mean, V_mean, b_mean)

        # Compute the KL Divergence loss
        kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                      dy.pow(mu, dy.inputVector([2])) -
                                      dy.exp(log_var))

        # Reparameterize
        z = self.reparameterize(mu, log_var)

        W_sm = dy.parameter(self.W_sm_p)
        b_sm = dy.parameter(self.b_sm_p)

        # Calculate the reconstruction loss
        pred = dy.selu(dy.affine_transform([b_sm, W_sm, z]))
        label_embedding = self.lookup[label]
        recons_loss = dy.pickneglogsoftmax(pred, label)

        return kl_loss, recons_loss
예제 #42
0
    def get_sentence_level_bilstm_outputs(
            self, combined_word_representations,
            which_layer_to_use_for_morpho_disamb):
        """
        This function produces the context representations at each level given the word representations
        for each word and returns the last layer's output and the specific layer output which we want
        to use for morphological disambiguation.
         :param combined_word_representations: 
         :param which_layer_to_use_for_morpho_disamb: xyz 
         :type which_layer_to_use_for_morpho_disamb: int
         :return: two outputs: 1) layer output to be used for NER loss, 2) layer output to be used for MD loss
        """

        last_layer_context_representations, multilayered_context_representations = \
            self.sentence_level_bilstm_layer.transduce(combined_word_representations)

        last_layer_context_representations = [dynet.tanh(dynet.affine_transform([self.tanh_layer_b.expr(),
                                                                      self.tanh_layer_W.expr(),
                                                                      context])) \
                                   for context in last_layer_context_representations]
        return last_layer_context_representations, \
               multilayered_context_representations[which_layer_to_use_for_morpho_disamb-1]
예제 #43
0
    def init_params(self, src_encH, batch_size, train):
        if train:
            self.lstmbuilder.set_dropout(self.dropout)
        else:
            self.lstmbuilder.disable_dropout()
        self.Wp = self.Wp_p.expr()
        self.bp = self.bp_p.expr()
        self.Wo = self.Wo_p.expr()
        self.bo = self.bo_p.expr()
        self.E = self.E_p.expr()
        self.b = self.b_p.expr(False)
        self.batch_size = batch_size
        last_enc = dy.pick(src_encH, index=src_encH.dim()[0][-1] - 1, dim=1)
        init_state = dy.affine_transform([self.bp, self.Wp, last_enc])
        init_state = [
            init_state,
            dy.zeros((self.hidden_dims, ), batch_size=batch_size)
        ]
        self.dec_state = self.lstmbuilder.initial_state(init_state)

        if train:
            self.lstmbuilder.set_dropout_masks(batch_size)
예제 #44
0
    def init(self, H, y, usr, test=True, update=True):
        bs = len(y[0])
        if not test:
            self.lstm.set_dropout(self.dr)
        else:
            self.lstm.disable_dropout()
        # Initialize first state of the decoder with the last state of the encoder
        self.Wp = self.Wp_p.expr(update)
        self.bp = self.bp_p.expr(update)
        last_enc = dy.pick(H, index=H.dim()[0][-1] - 1, dim=1)
        init_state = dy.affine_transform([self.bp, self.Wp, last_enc])
        init_state = [dy.zeros(self.dh, batch_size=bs), init_state]
        self.ds = self.lstm.initial_state(init_state, update=update)
        # Initialize dropout masks
        if not test:
            self.lstm.set_dropout_masks(bs)

        self.Wo = dy.contract3d_1d_bias(self.To_p.expr(update), usr, self.Wo_p.expr(update))
        self.bo = self.bo_p.expr(update)

        self.E = self.E_p.expr(update)
        self.b = self.b_p.expr(False)
예제 #45
0
    def translate(self, bisents, beam_size, max_output_len, length_norm_alpha,
                  output_file, relative, absolute, local, candidate):
        avg_fan_outs = []
        total_fan_outs = []
        with open(output_file, 'w') as output:
            for i in range(len(bisents)):
                print("Translating sentence", i)
                src_sent = bisents[i][0]
                dy.renew_cg()
                self.encode([src_sent])
                self.decoder.init(
                    dy.affine_transform([
                        dy.parameter(self.b_bridge),
                        dy.parameter(self.W_bridge),
                        self.encoder.final_state()
                    ]))

                beam_search = BeamSearch(beam_size, max_output_len,
                                         length_norm_alpha)
                beam_search.set_pruning_strategy(relative, absolute, local,
                                                 candidate)
                k_best_output, avg_fan_out, total_fan_out, num_pruned = beam_search.search(
                    self)

                print("pruned:", num_pruned)
                print("avg fan out:", avg_fan_out)
                print("total fan out:", total_fan_out)

                # remove start and end symbols
                words = k_best_output[1:-1] if k_best_output[
                    -1] == self.tgt_vocab.eos else k_best_output[1:]
                output_sent = [self.tgt_vocab.i2w[word] for word in words]
                avg_fan_outs.append(avg_fan_out)
                total_fan_outs.append(total_fan_out)
                output.write(" ".join(output_sent) + '\n')
                if (i + 1) % 100 == 0:
                    output.flush()
        print("avg avg fan out:", sum(avg_fan_outs) / len(avg_fan_outs))
        print("avg total fan out:", sum(total_fan_outs) / len(total_fan_outs))
예제 #46
0
 def expr_for_tree(self, tree, decorate=False, training=True):
     if tree.isleaf():
         raise RuntimeError('Tree structure error: meet with leaves')
     if len(tree.children) == 1:
         if not tree.children[0].isleaf():
             raise RuntimeError(
                 'Tree structure error: tree nodes with one child should be a leaf'
             )
         emb = self.E[self.w2i.get(tree.children[0].label, 0)]
         Wi, Wo, Wu = [dy.parameter(w) for w in self.WS]
         bi, bo, bu, _ = [dy.parameter(b) for b in self.BS]
         i = dy.logistic(dy.affine_transform([bi, Wi, emb]))
         o = dy.logistic(dy.affine_transform([bo, Wo, emb]))
         u = dy.tanh(dy.affine_transform([bu, Wu, emb]))
         c = dy.cmult(i, u)
         h = dy.cmult(o, dy.tanh(c))
         if decorate: tree._e = h
         return h, c
     if len(tree.children) != 2:
         raise RuntimeError(
             'Tree structure error: only binary trees are supported.')
     e1, c1 = self.expr_for_tree(tree.children[0], decorate)
     e2, c2 = self.expr_for_tree(tree.children[1], decorate)
     Ui, Uo, Uu = [dy.parameter(u) for u in self.US]
     Uf1, Uf2 = [dy.parameter(u) for u in self.UFS]
     bi, bo, bu, bf = [dy.parameter(b) for b in self.BS]
     e = dy.concatenate([e1, e2])
     i = dy.logistic(dy.affine_transform([bi, Ui, e]))
     o = dy.logistic(dy.affine_transform([bo, Uo, e]))
     f1 = dy.logistic(dy.affine_transform([bf, Uf1, e]))
     f2 = dy.logistic(dy.affine_transform([bf, Uf2, e]))
     u = dy.tanh(dy.affine_transform([bu, Uu, e]))
     c = dy.cmult(i, u) + dy.cmult(f1, c1) + dy.cmult(f2, c2)
     h = dy.cmult(o, dy.tanh(c))
     if decorate: tree._e = h
     return h, c
예제 #47
0
def calc_lm_loss(sents):
    dy.renew_cg()

    # initialize the RNN
    f_init = RNN.initial_state()

    # get the wids and masks for each step
    tot_words = 0
    wids = []
    masks = []
    for i in range(len(sents[0])):
        wids.append([(sent[i] if len(sent) > i else S) for sent in sents])
        mask = [(1 if len(sent) > i else 0) for sent in sents]
        masks.append(mask)
        tot_words += sum(mask)

    # start the rnn by inputting "<s>"
    init_ids = [S] * len(sents)
    s = f_init.add_input(dy.lookup_batch(WORDS_LOOKUP, init_ids))

    # feed word vectors into the RNN and predict the next word
    losses = []
    for wid, mask in zip(wids, masks):
        # calculate the softmax and loss
        score = dy.affine_transform([b_exp, W_exp, s.output()])
        loss = dy.pickneglogsoftmax_batch(score, wid)
        # mask the loss if at least one sentence is shorter
        if mask[-1] != 1:
            mask_expr = dy.inputVector(mask)
            mask_expr = dy.reshape(mask_expr, (1,), len(sents))
            loss = loss * mask_expr
        losses.append(loss)
        # update the state of the RNN
        wemb = dy.lookup_batch(WORDS_LOOKUP, wid)
        s = s.add_input(wemb)

    return dy.sum_batches(dy.esum(losses)), tot_words
예제 #48
0
def calc_scores(words):
    """
    Calculate scores using BiLSTM.
    :param words:
    :return:
    """
    dy.renew_cg()

    word_embs = [LOOKUP[x] for x in words]

    # Transduce all batch elements with an LSTM
    fwd_init = fwdLSTM.initial_state()
    fwd_word_reps = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_word_reps = bwd_init.transduce(reversed(word_embs))

    combined_word_reps = [dy.concatenate([f, b]) for f, b in zip(fwd_word_reps, reversed(bwd_word_reps))]

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)
    scores = [dy.affine_transform([b, W, x]) for x in combined_word_reps]

    return scores
예제 #49
0
파일: dynety.py 프로젝트: dpressel/baseline
 def highway(input_, train):
     for func, weight, bias in zip(funcs, weights, biases):
         proj = dy.rectify(func(input_, train))
         transform = dy.logistic(dy.affine_transform([bias, weight, input_]))
         input_ = dy.cmult(transform, proj) + dy.cmult(input_, 1 - transform)
     return input_
예제 #50
0
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #get the outputs of the first LSTM
    src_outputs = [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])]
    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix

    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()
        att_output, _ = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component)
        middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words