コード例 #1
0
ファイル: mlp.py プロジェクト: danielhers/tupa
 def evaluate(self, inputs, train=False):
     """
     Apply all MLP layers to concatenated input
     :param inputs: (key, vector) per feature type
     :param train: are we training now?
     :return: output vector of size self.output_dim
     """
     input_keys, inputs = list(map(list, zip(*list(inputs))))
     if self.input_keys:
         assert input_keys == self.input_keys, "Got:     %s\nBut expected input keys: %s" % (
             self.input_keys_str(self.input_keys), self.input_keys_str(input_keys))
     else:
         self.input_keys = input_keys
     if self.gated:
         gates = self.params.get("gates")
         if gates is None:  # FIXME attention weights should not be just parameters, but based on biaffine product?
             gates = self.params["gates"] = self.model.add_parameters((len(inputs), self.gated),
                                                                      init=dy.UniformInitializer(1))
         input_dims = [i.dim()[0][0] for i in inputs]
         max_dim = max(input_dims)
         x = dy.concatenate_cols([dy.concatenate([i, dy.zeroes(max_dim - d)])  # Pad with zeros to get uniform dim
                                  if d < max_dim else i for i, d in zip(inputs, input_dims)]) * gates
         # Possibly multiple "attention heads" -- concatenate outputs to one vector
         inputs = [dy.reshape(x, (x.dim()[0][0] * x.dim()[0][1],))]
     x = dy.concatenate(inputs)
     assert len(x.dim()[0]) == 1, "Input should be a vector, but has dimension " + str(x.dim()[0])
     dim = x.dim()[0][0]
     if self.input_dim:
         assert dim == self.input_dim, "Input dim mismatch: %d != %d" % (dim, self.input_dim)
     else:
         self.init_params(dim)
     self.config.print(self, level=4)
     if self.total_layers:
         if self.weights is None:
             self.weights = [[self.params[prefix + str(i)] for prefix in ("W", "b")]
                             for i in range(self.total_layers)]
             if self.weights[0][0].dim()[0][1] < dim:  # number of columns in W0
                 self.weights[0][0] = dy.concatenate_cols([self.weights[0][0], self.params["W0+"]])
         for i, (W, b) in enumerate(self.weights):
             self.config.print(lambda: x.npvalue().tolist(), level=4)
             try:
                 if train and self.dropout:
                     x = dy.dropout(x, self.dropout)
                 x = self.activation()(W * x + b)
             except ValueError as e:
                 raise ValueError("Error in evaluating layer %d of %d" % (i + 1, self.total_layers)) from e
     self.config.print(lambda: x.npvalue().tolist(), level=4)
     return x
コード例 #2
0
def generate(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent

    #get the output of the first LSTM
    src_outputs =  [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([LOOKUP_SRC[word] for word in src])]

    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix



    #generate until a eos tag or max is reached
    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])

    prev_word = sos_trg
    trg_sent = []
    attention_matrix = []
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)



    for i in range(MAX_SENT_SIZE):
        #feed the previous word into the lstm, calculate the most likely word, add it to the sentence
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()
        att_output, alignment = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component)
        attention_matrix.append(alignment)
        middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        probs = (-dy.log_softmax(s)).value()
        next_word = np.argmax(probs)

        if next_word == eos_trg:
            break
        prev_word = next_word
        trg_sent.append(i2w_trg[next_word])
    return trg_sent, dy.concatenate_cols(attention_matrix).value()
コード例 #3
0
ファイル: dynety.py プロジェクト: dpressel/baseline
    def cache_encoder(self, context_vectors):
        """Cache transformations to the encoder vectors.

        :param context_vectors: list[dy.Expression] The encoder output vectors
            we do attention over. List is of lengths T and expression are ((H,), B)
        """
        self.context = dy.concatenate_cols(context_vectors)  # ((H, T), B)
コード例 #4
0
ファイル: attention.py プロジェクト: danielhers/cnn
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
コード例 #5
0
ファイル: attention.py プロジェクト: danielhers/cnn
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    embedded = embed_sentence(in_seq)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(encoded)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))

    out = ''
    count_EOS = 0
    for i in range(len(in_seq)*2):
        if count_EOS == 2: break
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector).vec_value()
        next_char = probs.index(max(probs))
        last_output_embeddings = output_lookup[next_char]
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
コード例 #6
0
ファイル: dy_model.py プロジェクト: jcyk/CWS
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d'%wlen not in self.param_exprs:
            self.param_exprs['rgW%d'%wlen] = dy.parameter(self.params['reset_gate_W'][wlen-1])
            self.param_exprs['rgb%d'%wlen] = dy.parameter(self.params['reset_gate_b'][wlen-1])
            self.param_exprs['cW%d'%wlen] = dy.parameter(self.params['com_W'][wlen-1])
            self.param_exprs['cb%d'%wlen] = dy.parameter(self.params['com_b'][wlen-1])
            self.param_exprs['ugW%d'%wlen] = dy.parameter(self.params['update_gate_W'][wlen-1])
            self.param_exprs['ugb%d'%wlen] = dy.parameter(self.params['update_gate_b'][wlen-1])
          
        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d'%wlen] * chars + self.param_exprs['rgb%d'%wlen])
        comb = dy.concatenate([dy.tanh(self.param_exprs['cW%d'%wlen] * dy.cmult(reset_gate,chars) + self.param_exprs['cb%d'%wlen]),chars])
        update_logits = self.param_exprs['ugW%d'%wlen] * comb + self.param_exprs['ugb%d'%wlen]
        
        update_gate = dy.transpose(dy.concatenate_cols([dy.softmax(dy.pickrange(update_logits,i*(wlen+1),(i+1)*(wlen+1))) for i in xrange(self.options['ndims'])]))
        
        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(dy.cmult(update_gate,dy.reshape(comb,(self.options['ndims'],wlen+1))))
        return word
コード例 #7
0
ファイル: dynety.py プロジェクト: dpressel/baseline
    def cache_encoder(self, context_vectors):
        """Cache transformations to the encoder vectors.

        This also projects the context vectors into a new space

        :param context_vectors: list[dy.Expression] The encoder output vectors
            we do attention over. List is of lengths T and expression are ((H,), B)
        """
        self.context = dy.concatenate_cols(context_vectors)  # ((H, T), B)
        self.context_proj = self.encoder * self.context  # ((H, T), B)
コード例 #8
0
ファイル: decoders.py プロジェクト: dpressel/baseline
 def __call__(self, encoder_output, dst, train):
     embed_out_th_b = self.tgt_embedding.encode(dst)
     embed_out_ht_b = dy.transpose(embed_out_th_b)
     embed_out_ht_b = self.proj_to_hsz(embed_out_ht_b)
     context = dy.concatenate_cols(encoder_output.output)
     T = embed_out_ht_b.dim()[0][1]
     dst_mask = subsequent_mask(T)
     src_mask = encoder_output.src_mask
     output = self.transformer_decoder(embed_out_ht_b, context, src_mask, dst_mask, train)
     output = self.proj_to_dsz(output)
     return self.output(output)
コード例 #9
0
ファイル: train.py プロジェクト: dpressel/baseline
    def _step(self, loader, update, log, reporting_fns, verbose=None):
        steps = len(loader)
        pg = create_progress_bar(steps)
        cm = ConfusionMatrix(self.labels)
        epoch_loss = 0
        epoch_div = 0
        preds, losses, ys = [], [], []
        dy.renew_cg()
        for i, batch_dict in enumerate(pg(loader), 1):
            inputs = self.model.make_input(batch_dict)
            y = inputs.pop('y')
            pred = self.model.forward(inputs)
            preds.append(pred)
            loss = self.model.loss(pred, y)
            losses.append(loss)
            ys.append(y)
            if i % self.autobatchsz == 0:
                loss = dy.average(losses)
                preds = dy.concatenate_cols(preds)
                batchsz = len(losses)
                lossv = loss.npvalue().item() * batchsz
                epoch_loss += lossv
                epoch_div += batchsz
                _add_to_cm(cm, np.array(ys), preds.npvalue())
                update(loss)
                log(self.optimizer.global_step, lossv, batchsz, reporting_fns)
                preds, losses, ys = [], [], []
                dy.renew_cg()
        loss = dy.average(losses)
        preds = dy.concatenate_cols(preds)
        batchsz = len(losses)
        epoch_loss += loss.npvalue().item() * batchsz
        epoch_div += batchsz
        _add_to_cm(cm, np.array(ys), preds.npvalue())
        update(loss)

        metrics = cm.get_all_metrics()
        metrics['avg_loss'] = epoch_loss / float(epoch_div)
        verbose_output(verbose, cm)
        return metrics
コード例 #10
0
def calc_loss(sents):
    dy.renew_cg()

    src_fwd = LSTM_SRC_FWD.initial_state()
    src_bwd = LSTM_SRC_BWD.initial_state()
    trg_fwd = LSTM_TRG_FWD.initial_state()
    trg_bwd = LSTM_TRG_BWD.initial_state()

    # Encoding
    src_reps = encode_sents(LOOKUP_SRC, src_fwd, src_bwd, [src for src, trg in sents])
    trg_reps = encode_sents(LOOKUP_TRG, trg_fwd, trg_bwd, [trg for src, trg in sents])

    # Concatenate the sentence representations to a single matrix
    mtx_src = dy.concatenate_cols(src_reps)
    mtx_trg = dy.concatenate_cols(trg_reps)

    # Do matrix multiplication to get a matrix of dot product similarity scores
    sim_mtx = dy.transpose(mtx_src) * mtx_trg

    # Calculate the hinge loss over all dimensions 
    loss = dy.hinge_dim(sim_mtx, list(range(len(sents))), d=1)

    return dy.sum_elems(loss)
コード例 #11
0
ファイル: biaffine.py プロジェクト: aiedward/nn4nlp-code
    def cal_scores(self, src_encodings):
        src_len = len(src_encodings)

        src_encodings = dy.concatenate_cols(src_encodings)  # src_ctx_dim, src_len, batch_size

        W_arc_hidden_to_head = dy.parameter(self.W_arc_hidden_to_head)
        b_arc_hidden_to_head = dy.parameter(self.b_arc_hidden_to_head)
        W_arc_hidden_to_dep = dy.parameter(self.W_arc_hidden_to_dep)
        b_arc_hidden_to_dep = dy.parameter(self.b_arc_hidden_to_dep)

        W_label_hidden_to_head = dy.parameter(self.W_label_hidden_to_head)
        b_label_hidden_to_head = dy.parameter(self.b_label_hidden_to_head)
        W_label_hidden_to_dep = dy.parameter(self.W_label_hidden_to_dep)
        b_label_hidden_to_dep = dy.parameter(self.b_label_hidden_to_dep)

        U_arc_1 = dy.parameter(self.U_arc_1)
        u_arc_2 = dy.parameter(self.u_arc_2)

        U_label_1 = [dy.parameter(x) for x in self.U_label_1]
        u_label_2_1 = [dy.parameter(x) for x in self.u_label_2_1]
        u_label_2_2 = [dy.parameter(x) for x in self.u_label_2_2]
        b_label = [dy.parameter(x) for x in self.b_label]

        h_arc_head = dy.rectify(dy.affine_transform([b_arc_hidden_to_head, W_arc_hidden_to_head, src_encodings]))  # n_arc_ml_units, src_len, bs
        h_arc_dep = dy.rectify(dy.affine_transform([b_arc_hidden_to_dep, W_arc_hidden_to_dep, src_encodings]))
        h_label_head = dy.rectify(dy.affine_transform([b_label_hidden_to_head, W_label_hidden_to_head, src_encodings]))
        h_label_dep = dy.rectify(dy.affine_transform([b_label_hidden_to_dep, W_label_hidden_to_dep, src_encodings]))

        h_arc_head_transpose = dy.transpose(h_arc_head)
        h_label_head_transpose = dy.transpose(h_label_head)

        s_arc = h_arc_head_transpose * dy.colwise_add(U_arc_1 * h_arc_dep, u_arc_2)

        s_label = []
        for U_1, u_2_1, u_2_2, b in zip(U_label_1, u_label_2_1, u_label_2_2, b_label):
            e1 = h_label_head_transpose * U_1 * h_label_dep
            e2 = h_label_head_transpose * u_2_1 * dy.ones((1, src_len))
            e3 = dy.ones((src_len, 1)) * u_2_2 * h_label_dep
            s_label.append(e1 + e2 + e3 + b)
        return s_arc, s_label
コード例 #12
0
    def generate(self, sentence):
        #embedded = embed_sentence(in_seq)
        encoded = self.encode_sentence(sentence)

        w = dy.parameter(self.decoder_w)
        b = dy.parameter(self.decoder_b)
        w1 = dy.parameter(self.attention_w1)
        input_mat = dy.concatenate_cols(encoded)
        w1dt = None

        last_output_embeddings = self.output_lookup[2]
        s = self.dec_lstm.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(self.state_size * 2), last_output_embeddings]))

        out = ''
        res = []
        count_EOS = 0
        for i in range(len(sentence)):
            if count_EOS == 2: break
            # w1dt can be computed and cached once for the entire decoding phase
            w1dt = w1dt or w1 * input_mat
            vector = dy.concatenate(
                [self.attend(input_mat, s, w1dt), last_output_embeddings])
            s = s.add_input(vector)
            #k = s
            #dloss = self.test_duration(k, i, b)
            out_vector = w * s.output() + b
            probs = dy.softmax(out_vector).vec_value()
            next_word = probs.index(max(probs))
            last_output_embeddings = self.output_lookup[next_word]
            if next_word == 2:
                count_EOS += 1
                continue
            res.append(next_word)

            #out += int2char[next_word]
        return res
コード例 #13
0
    def prediction(self):
        """Adds the core transformation for this model which transforms a batch of input
        data into a batch of predictions. In this case, the transformation is a linear layer plus a
        softmax transformation:

        y = softmax(xW + b)

        Args:
            input_data: A tensor of shape (batch_size, n_features).
        Returns:
            pred: A tensor of shape (batch_size, n_classes)
        """
        W = dy.parameter(self._pW)
        b = dy.parameter(self._pb)
        x = dy.inputTensor(self.input)

        z_m = x * W
        z_T = dy.concatenate_cols([z_m[i]+b for i in range(self.config.batch_size)])
        z = dy.transpose(z_T)
        # z = x * W + b

        pred = softmax(z)
        return pred
コード例 #14
0
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    embedded = embed_sentence(in_seq)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(encoded)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(
        dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))

    out = ''
    count_EOS = 0
    # For checking likelihood of entire output string
    max_probs_list = []
    for i in range(len(in_seq) * 2):
        if count_EOS == 2: break
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate(
            [attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector).vec_value()
        max_probs_list.append(max(probs))
        next_char = probs.index(max(probs))
        last_output_embeddings = output_lookup[next_char]
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]

    return out, prod(max_probs_list)
コード例 #15
0
 def make_decoder(self, in_seq, **kwargs):
     """
     Creates a decoder generator to be used as co-routine to the decoding
     procedure. It has two steps (i) it first output a probability dist
     on the vocabulary and (ii) it accepts a symbol to be fedback into
     the generation of the next symbol.
     See self.generate and self.
     """
     embedded = self._embed_seq(in_seq)
     enc_mat = dy.concatenate_cols(self.encode(embedded))
     # variables to compute and cache the encoder projection onto att space
     enc2att = dy.parameter(self.enc2att)
     encatt = None
     # EOS as zero-vector for 1st step
     last_char_emb = self.lookup[self.char2int[u.EOS]]
     # init hidden state of decoder should take last encoding hidden state
     state_vec = dy.vecInput(self.enc_hid_dim)
     if self.add_pred:
         init = dy.concatenate([state_vec, last_char_emb])
     else:
         init = state_vec
     s = self.dec_rnn.initial_state().add_input(init)
     while True:
         # (maybe) project encoding hidden seq onto attention space
         encatt = encatt or enc2att * enc_mat
         # create a new decoding state (s) combining previous decoding step,
         # encoded seq (output of encoder) and ev. last input encoding
         new_state = self.recur(s, enc_mat, last_char_emb, encatt, **kwargs)
         s = s.add_input(new_state)
         # TODO: according to Bahdanau 2015, the new state is computed with
         # deep output + single maxout:
         # p(y_i | s_i, y_{i-1}, c_i) \prop exp(y_i^T * W_o * t_i)
         # where $t_i = [max(t^~_{i, 2j-1}, t^~_{i, 2j})]^T_{j=1,...,l}$
         # and $t^~_i = U_o * s_{i-1} + V_o * Ey_{i-1} * C_o * c_i$
         yield self._output_softmax(s.output())
         last_char = yield
         last_char_emb = self.lookup[last_char]
コード例 #16
0
ファイル: attention.py プロジェクト: mpsilfve/conll2017
def beam_generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    embedded = embed_sentence(in_seq)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(encoded)
    w1dt = None

    histories = [[0.0,output_lookup[char2int[EOS]],'',None]]

    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), histories[0][1]]))
    histories[0][3] = s

    count_EOS = 0
    for i in range(len(in_seq)*2):
        if count_EOS == 2: break
        # w1dt can be computed and cached once for the entire decoding phase
        presents = []
        for ll, embedding, out, s in histories:
            w1dt = w1dt or w1 * input_mat        
            vector = dy.concatenate([attend(input_mat, s, w1dt), embedding])
            new_s = s.add_input(vector)
            out_vector = w * new_s.output() + b
            probs = dy.softmax(out_vector).vec_value()            
            probs = sorted([(prob,j) for j, prob in enumerate(probs)], key = lambda x:x[0], reverse=1)
            for prob, j in probs:      
                next_embedding = output_lookup[j]                
                presents.append([ll + log(prob), next_embedding, out + int2char[j], new_s])

        presents.sort(reverse=1,key=lambda x:x[0])
        histories = presents[:BEAM]
        if presents[0][2].endswith(EOS) and presents[0][2] != EOS:
            return presents[0][2].replace(EOS,'')

    return histories[0][2].replace(EOS,'')
コード例 #17
0
    def attend(self, encoded_inputs, h_t, input_masks=None):
        # encoded_inputs dimension is: seq len x 2*h x batch size, h_t dimension is h x batch size (for bilstm encoder)
        if len(encoded_inputs) == 1:
            # no need to attend if only one input state, compute output directly
            h_output = dn.tanh(self.w_c * dn.concatenate([h_t, encoded_inputs[0]]))
            # return trivial alphas (all 1's since one input gets all attention)
            if input_masks:
                # if batching
                alphas = dn.inputTensor([1]*len(input_masks[0]), batched=True)
            else:
                alphas = dn.inputTensor([1], batched=True)
            return h_output, alphas

        # iterate through input states to compute attention scores
        # scores = [v_a * dn.tanh(w_a * h_t + u_a * h_input) for h_input in blstm_outputs]
        w_a_h_t = self.w_a * h_t
        scores = [self.v_a * dn.tanh(dn.affine_transform([w_a_h_t, self.u_a, h_input])) for h_input in encoded_inputs]

        concatenated = dn.concatenate(scores)
        if input_masks:
            # if batching, multiply attention scores with input masks to zero-out scores for padded inputs
            dn_masks = dn.inputTensor(input_masks, batched=True)
            concatenated = dn.cmult(concatenated, dn_masks)

        # normalize scores
        alphas = dn.softmax(concatenated)

        # compute context vector with weighted sum for each seq in batch
        bo = dn.concatenate_cols(encoded_inputs)
        c = bo * alphas
        # c = dn.esum([h_input * dn.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])

        # compute output vector using current decoder state and context vector
        h_output = dn.tanh(self.w_c * dn.concatenate([h_t, c]))

        return h_output, alphas
コード例 #18
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
   src = src.as_tensor()
   # convolutional layer
   src = padding(src, src.dim()[0][0], src.dim()[0][1], self.filter_width, self.stride, src.dim()[1])
   l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filter_conv), stride = [self.stride, self.stride], is_valid = True))
   timestep = l1.dim()[0][1]
   features = l1.dim()[0][2]
   batch_size = l1.dim()[1]
   # transpose l1 to be (timesetp, dim), but keep the batch_size.
   rhn_in = dy.reshape(l1, (timestep, features), batch_size = batch_size)
   rhn_in = [dy.pick(rhn_in, i) for i in range(timestep)]
   for l in range(self.rhn_num_hidden_layers):
     rhn_out = []
     # initialize a random vector for the first state vector, keep the same batch size.
     prev_state = dy.parameter(self.init[l])
     # begin recurrent high way network
     for t in range(timestep):
       for m in range(0, self.rhn_microsteps):
         H = dy.affine_transform([dy.parameter(self.recur[l][m][1]), dy.parameter(self.recur[l][m][0]),  prev_state])
         T = dy.affine_transform([dy.parameter(self.recur[l][m][3]), dy.parameter(self.recur[l][m][2]),  prev_state])
         if m == 0:
           H += dy.parameter(self.linear[l][0]) * rhn_in[t]
           T += dy.parameter(self.linear[l][1]) * rhn_in[t]
         H = dy.tanh(H)
         T = dy.logistic(T)
         prev_state = dy.cmult(1 - T, prev_state) + dy.cmult(T, H) # ((1024, ), batch_size)
       rhn_out.append(prev_state)
     if self.residual and l>0:
       rhn_out = [sum(x) for x in zip(rhn_out, rhn_in)]
     rhn_in = rhn_out
   # Compute the attention-weighted average of the activations
   rhn_in = dy.concatenate_cols(rhn_in)
   scores = dy.transpose(dy.parameter(self.attention[0][1]))*dy.tanh(dy.parameter(self.attention[0][0])*rhn_in) # ((1,510), batch_size)
   scores = dy.reshape(scores, (scores.dim()[0][1],), batch_size = scores.dim()[1])
   attn_out = rhn_in*dy.softmax(scores) # # rhn_in.as_tensor() is ((1024,510), batch_size) softmax is ((510,), batch_size)
   return ExpressionSequence(expr_tensor = attn_out)
コード例 #19
0
ファイル: baseline.py プロジェクト: somoradi/conll2018
def decode(vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2id[c] for c in output]
    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2id[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
コード例 #20
0
ファイル: baseline.py プロジェクト: somoradi/conll2018
def generate(i, s, id2char):
    """ Generate a word form for the lemma at position i in sentence s. """
    context = get_context(i,s)
    embedded = embed(s[i][LEMMA],context)
    encoded = encode(embedded)

    in_seq = s[i][LEMMA]
    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(encoded)
    w1dt = None

    last_output_embeddings = output_lookup[char2id[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate(
            [dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))

    out = ''
    count_EOS = 0
    for i in range(len(in_seq)*2):
        if count_EOS == 2: break
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), 
                                 last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector).vec_value()
        next_char = probs.index(max(probs))
        last_output_embeddings = output_lookup[next_char]
        if id2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += id2char[next_char]
    return out
コード例 #21
0
    def decode(self, encoded, output_words, output_tags, output_index, masks):
        input_mat = dy.concatenate_cols(encoded)
        w1dt = None

        last_output_embeddings = dy.lookup_batch(self.wlookup, output_words[0])
        last_tag_embeddings = dy.lookup_batch(self.tlookup, output_tags[0])
        empty_tensor = dy.reshape(dy.inputTensor(np.zeros((self.options.hdim * 2, len(output_words[0])), dtype=float)),
                                  (self.options.hdim * 2,), len(output_words[0]))
        s = self.dec_lstm.initial_state().add_input(dy.concatenate([empty_tensor, last_output_embeddings, last_tag_embeddings]))
        loss = []
        for p, word in enumerate(output_words):
            # w1dt can be computed and cached once for the entire decoding phase
            mask_tensor = dy.reshape(dy.inputTensor(masks[p]), (1,), len(masks[p]))
            w1dt = w1dt or self.attention_w1.expr() * input_mat
            att_weights = self.attend(s, w1dt, True)
            vector = dy.concatenate([input_mat * att_weights, last_output_embeddings, last_tag_embeddings])
            if self.options.dropout > 0:
                vector = dy.dropout(vector, self.options.dropout)
            s = s.add_input(vector)
            last_output_embeddings = dy.lookup_batch(self.wlookup, word)
            last_tag_embeddings = dy.lookup_batch(self.tlookup, output_tags[p])
            loss_p = dy.cmult(dy.pick_batch(-dy.log(att_weights), output_index[p]), mask_tensor)
            loss.append(dy.sum_batches(loss_p)/loss_p.dim()[1])
        return loss
コード例 #22
0
    def translate_sentence(self, sent):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        W1_att_f = dy.parameter(self.W1_att_f)
        W1_att_e = dy.parameter(self.W1_att_e)
        w2_att = dy.parameter(self.w2_att)

        sent = [startSymbol] + sent + [endSymbol]
        sent_rev = list(reversed(sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []

        for (cw_l2r, cw_r2l) in zip(sent, sent_rev):
            l2r_state = l2r_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_l2r]))
            r2l_state = r2l_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_r2l]))
            l2r_contexts.append(l2r_state.output())
            r2l_contexts.append(r2l_state.output())
        r2l_contexts.reverse()

        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))

        h_fs_matrix = dy.concatenate_cols(h_fs)

        # Decoder
        trans_sentence = [startSymbol]
        cw = trans_sentence[-1]
        #initial context
        c_t = dy.vecInput(self.hidden_size * 2)
        start = dy.concatenate(
            [dy.lookup(self.tgt_lookup, self.tgt_token_to_id[endSymbol]), c_t])
        dec_state = self.dec_builder.initial_state().add_input(start)
        i = 0
        while len(trans_sentence) < self.max_len:
            i += 1
            h_e = dec_state.output()
            getAttention = self.__attention_mlp(h_fs_matrix, h_e)
            c_t = getAttention[0]
            embed_t = dy.lookup(self.tgt_lookup, self.tgt_token_to_id[cw])
            x_t = dy.concatenate([embed_t, c_t])
            dec_state = dec_state.add_input(x_t)
            y_star = dy.softmax(W_y * dec_state.output() + b_y).vec_value()
            next_wordID = np.argmax(y_star)
            if i == 1:
                #print y_star[next_wordID]
                pass
            cw = self.tgt_id_to_token[next_wordID]
            cpcw = cw
            if i < 5:
                #print (i,cw)
                pass
            if cw == unkSymbol:
                #find the source word with highest attention score
                keyWord = sent[getAttention[1]]
                if self.src_token_to_id[keyWord] == self.src_token_to_id[
                        unkSymbol]:
                    cw = keyWord  #special word . simply pass it source word out
                    #print (i,cw,1)
                else:
                    #find the target word with second max prob
                    #prob: y_star
                    next_wordID = np.argpartition(y_star, 1)[1]
                    cw = self.tgt_id_to_token[next_wordID]
                    #print (i,cw,2)
            if cw == endSymbol:
                break
            if cw != startSymbol:
                trans_sentence.append(cw)
            cw = cpcw

        return ' '.join(trans_sentence[1:])
コード例 #23
0
def rnn_mlp(self, sens):
    '''
	Here, I assumed all sens have the same length.
	'''
    words, pwords, pos, chars = sens[0], sens[1], sens[2], sens[5]
    # words: indices of words in wlookup.
    # words shape: sent_length x batch_size (length x batch)
    if self.options.use_char:
        cembed = [dy.lookup_batch(self.clookup, c) for c in chars]
        char_fwd, char_bckd = self.char_lstm.builder_layers[0][0].initial_state().transduce(cembed)[-1],\
               self.char_lstm.builder_layers[0][1].initial_state().transduce(reversed(cembed))[-1]
        crnn = dy.reshape(dy.concatenate_cols([char_fwd, char_bckd]),
                          (self.options.we, words.shape[0] * words.shape[1]))
        cnn_reps = [list() for _ in range(len(words))]
        for i in range(words.shape[0]):
            cnn_reps[i] = dy.pick_batch(
                crnn, [i * words.shape[1] + j for j in range(words.shape[1])],
                1)

        wembed = [
            dy.lookup_batch(self.wlookup, words[i]) +
            dy.lookup_batch(self.elookup, pwords[i]) + cnn_reps[i]
            for i in range(len(words))
        ]
    else:
        wembed = [
            dy.lookup_batch(self.wlookup, words[i]) +
            dy.lookup_batch(self.elookup, pwords[i]) for i in range(len(words))
        ]
    posembed = [
        dy.lookup_batch(self.plookup, pos[i]) for i in range(len(pos))
    ] if self.options.use_pos else None

    inputs = [dy.concatenate([w, pos]) for w, pos in zip(wembed, posembed)
              ] if self.options.use_pos else wembed

    h_out = self.bi_rnn(inputs, words.shape[1], 0,
                        0)  #self.deep_lstms.transduce(inputs)
    # h_out: python list of concatenated BiLSTM hidden state

    # BiLSTM hidden tape (python list --> dynet tensor)
    h = dy.concatenate_cols(h_out)  # shape: batch x ( 2*rnn x len )

    # arc-head
    H = self.activation(
        dy.affine_transform(
            [self.arc_mlp_head_b.expr(),
             self.arc_mlp_head.expr(), h]))
    # arc-modifier
    M = self.activation(
        dy.affine_transform(
            [self.arc_mlp_dep_b.expr(),
             self.arc_mlp_dep.expr(), h]))
    # arc-head for label
    HL = self.activation(
        dy.affine_transform(
            [self.label_mlp_head_b.expr(),
             self.label_mlp_head.expr(), h]))
    # arc-modifier for label
    ML = self.activation(
        dy.affine_transform(
            [self.label_mlp_dep_b.expr(),
             self.label_mlp_dep.expr(), h]))

    return h, H, M, HL, ML
コード例 #24
0
    def run(self, triple, isTrain):

        MLP = dy.parameter(self.MLP)
        MLP_bias = dy.parameter(self.MLP_bias)
        MLP_attn = dy.parameter(self.MLP_attn)
        MLP_attn_bias = dy.parameter(self.MLP_attn_bias)
        attn_weight = dy.parameter(self.attn_weight)
        classifier = dy.parameter(self.classifier)
        classifier_bias = dy.parameter(self.classifier_bias)

        s, t, f = triple
        s = [BOW] + s + [EOW]
        t = [BOW] + t + [EOW]
        char_embs = [self.lp_c[c] for c in s]
        top_recur = utils.biLSTM(self.LSTM_builders,
                                 char_embs,
                                 dropout_h=self._pdrop_lstm if isTrain else 0.,
                                 dropout_x=self._pdrop_lstm if isTrain else 0.)
        key = dy.concatenate_cols(top_recur[1:-1])

        feat_embs = []
        for idx in range(len(self.lp_feats)):
            if idx < len(f):
                feat_embs.append(self.lp_feats[idx][f[idx]])
            else:
                feat_embs.append(dy.inputVector(np.zeros(self._feat_dim)))
        feat_embs = dy.concatenate(feat_embs)

        prev_char = BOW
        pred_word = []
        losses = []

        prev_top_recur = dy.inputVector(np.zeros(self._hidden_dim))
        state = self.dec_LSTM.initial_state()
        idx = 0
        while prev_char != EOW:
            tmp = dy.concatenate(
                [self.lp_c[prev_char], feat_embs, prev_top_recur])

            if isTrain:
                tmp = dy.dropout(tmp, self._pdrop_embs)

            h = dy.affine_transform([MLP_attn_bias, MLP_attn, tmp])
            if isTrain:
                h = dy.dropout(h, self._pdrop_mlp)

            query = dy.cmult(attn_weight, dy.rectify(h))
            attn_vec = dy.softmax(dy.transpose(key) * query)
            value = key * attn_vec
            inp = dy.concatenate([value, tmp])
            inp = dy.affine_transform([MLP_bias, MLP, inp])
            h = state.add_input(inp).output()
            top_recur = dy.rectify(h)
            if isTrain:
                top_recur = dy.dropout(top_recur, self._pdrop_mlp)
            prev_top_recur = h
            score = dy.affine_transform(
                [classifier_bias, classifier, top_recur])
            if isTrain:
                losses.append(dy.pickneglogsoftmax(score, t[idx + 1]))
                prev_char = t[idx + 1]
                idx += 1
            else:
                pred_char = score.npvalue().argmax()
                pred_word.append(pred_char)
                prev_char = pred_char
                if len(pred_word) > 30:
                    break

        return pred_word, losses
コード例 #25
0
    def beam_search(self, pre_context, pos_context, entity, beam):
        embedded = self.embed_sentence(pre_context)
        pre_encoded = self.encode_sentence(self.encpre_fwd_lstm, self.encpre_bwd_lstm, embedded)

        embedded = self.embed_sentence(pos_context)
        pos_encoded = self.encode_sentence(self.encpos_fwd_lstm, self.encpos_bwd_lstm, embedded)

        w = dy.parameter(self.decoder_w)
        b = dy.parameter(self.decoder_b)

        w1_pre = dy.parameter(self.attention_w1_pre)
        h_pre = dy.concatenate_cols(pre_encoded)
        w1dt_pre = None

        w1_pos = dy.parameter(self.attention_w1_pos)
        h_pos = dy.concatenate_cols(pos_encoded)
        w1dt_pos = None

        try:
            entity_embedding = self.input_lookup[self.input2int[entity]]
        except:
            entity_embedding = self.input_lookup[self.input2int[self.EOS]]
        last_output_embeddings = self.output_lookup[self.output2int[self.EOS]]
        s = self.dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(self.STATE_SIZE*2), last_output_embeddings, entity_embedding]))
        candidates = [{'sentence':[self.EOS], 'prob':0.0, 'count_EOS':0, 's':s}]
        outputs = []

        i = 0
        while i < self.config['GENERATION'] and len(outputs) < beam:
            new_candidates = []
            for candidate in candidates:
                if candidate['count_EOS'] == 2:
                    outputs.append(candidate)

                    if len(outputs) == beam: break
                else:
                    # w1dt can be computed and cached once for the entire decoding phase
                    w1dt_pre = w1dt_pre or w1_pre * h_pre
                    w1dt_pos = w1dt_pos or w1_pos * h_pos

                    attention_pre = self.attend(h_pre, candidate['s'], w1dt_pre, self.attention_w2_pre, self.attention_v_pre)
                    attention_pos = self.attend(h_pos, candidate['s'], w1dt_pos, self.attention_w2_pos, self.attention_v_pos)

                    last_output_embeddings = self.output_lookup[self.output2int[candidate['sentence'][-1]]]
                    vector = dy.concatenate([self.hier_attend(attention_pre, attention_pos, candidate['s']), last_output_embeddings, entity_embedding])
                    s = candidate['s'].add_input(vector)
                    out_vector = w * s.output() + b
                    probs = dy.softmax(out_vector).vec_value()
                    next_words = [{'prob':e, 'index':probs.index(e)} for e in sorted(probs, reverse=True)[:beam]]

                    for next_word in next_words:
                        word = self.int2output[next_word['index']]

                        new_candidate = {
                            'sentence': candidate['sentence'] + [word],
                            'prob': candidate['prob'] + np.log(next_word['prob']),
                            'count_EOS': candidate['count_EOS'],
                            's':s
                        }

                        if word == self.EOS:
                            new_candidate['count_EOS'] += 1

                        new_candidates.append(new_candidate)
            candidates = sorted(new_candidates, key=lambda x: x['prob'], reverse=True)[:beam]
            i += 1

        if len(outputs) == 0:
            outputs = candidates

        # Length Normalization
        alpha = 0.6
        for output in outputs:
            length = len(output['sentence'])
            lp_y = ((5.0 + length)**alpha) / ((5.0+1.0)**alpha)

            output['prob'] = output['prob'] / lp_y

        outputs = sorted(outputs, key=lambda x: x['prob'], reverse=True)
        return list(map(lambda x: x['sentence'], outputs))
コード例 #26
0
ファイル: model.py プロジェクト: bjayakumar/mead-baseline
 def encode(self, embed_list):
     embed_list = dy.transpose(dy.concatenate_cols(embed_list))
     return [
         self.output(out) for out in self.encoder(embed_list, self.train)
     ]
コード例 #27
0
ファイル: model.py プロジェクト: dpressel/baseline
 def encode(self, embed_list):
     embed_list = dy.transpose(dy.concatenate_cols(embed_list))
     return [self.output(out) for out in self.encoder(embed_list, self.train)]
コード例 #28
0
ファイル: dynety.py プロジェクト: dpressel/baseline
 def cache_encoder(self, context_vectors):
     """Cache the context vectors and project them into a new spaace."""
     self.context = dy.concatenate_cols(context_vectors)  # ((H, T), B)
     self.context_proj = self.A * self.context # ((H, T), B)
コード例 #29
0
    def step(self, instances):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        W1_att_f = dy.parameter(self.W1_att_f)
        W1_att_e = dy.parameter(self.W1_att_e)
        w2_att = dy.parameter(self.w2_att)

        #instances : a list [(src0,tgt0),(src1,tgt1),(src2,tgt2)]
        maxLen = max(map(lambda x: len(x[1]), instances))
        src_sents = []
        src_sents_rev = []
        tgt_sents = []
        srcSenLen = len(
            instances[0][0]) + 2  #the length of the src sentence, all the same
        tgtSenLen = maxLen + 1
        masks = [
            [] for i in range(tgtSenLen)
        ]  #mask for each position. each item in this list is a list with length=batchsize
        num_words = 0

        for item in instances:
            #item[0]:src ; item[1]:tgt
            num_words += (len(item[1]) + 1)
            padNum = maxLen - len(item[1])
            for i in range(len(item[1]) + 1):
                masks[i].append(1)
            for i in range(len(item[1]) + 1, tgtSenLen):
                masks[i].append(0)
            thisSrc = [startSymbol] + item[0] + [endSymbol]
            src_sents.append(thisSrc)
            src_sents_rev.append(list(reversed(thisSrc)))
            thisTgt = item[1] + [endSymbol for i in range(padNum + 1)]
            tgt_sents.append(thisTgt)

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []
        for i in range(srcSenLen):
            batchSrc = dy.lookup_batch(
                self.src_lookup,
                [self.src_token_to_id[x[i]] for x in src_sents])
            batchSrc_rev = dy.lookup_batch(
                self.src_lookup,
                [self.src_token_to_id[x[i]] for x in src_sents_rev])
            l2r_state = l2r_state.add_input(batchSrc)
            r2l_state = r2l_state.add_input(batchSrc_rev)
            l2r_contexts.append(l2r_state.output())
            r2l_contexts.append(r2l_state.output())

        r2l_contexts.reverse()

        # Combine the left and right representations for every word
        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))
        h_fs_matrix = dy.concatenate_cols(h_fs)

        losses = []

        # Decoder
        c_t = dy.vecInput(self.hidden_size * 2)
        start = dy.concatenate([
            dy.lookup_batch(self.tgt_lookup,
                            [self.tgt_token_to_id['<S>'] for i in tgt_sents]),
            c_t
        ])
        dec_state = self.dec_builder.initial_state().add_input(start)
        loss = dy.pickneglogsoftmax_batch(
            W_y * dec_state.output() + b_y,
            [self.tgt_token_to_id[tgt_sent[0]] for tgt_sent in tgt_sents])
        losses.append(loss)

        for i in range(tgtSenLen - 1):
            #cw : item[i] nw:item[i+1]
            h_e = dec_state.output()
            c_t = self.__attention_mlp(h_fs_matrix, h_e)[0]
            # Get the embedding for the current target word
            embed_t = dy.lookup_batch(
                self.tgt_lookup,
                [self.tgt_token_to_id[tgt_sent[i]] for tgt_sent in tgt_sents])
            # Create input vector to the decoder
            x_t = dy.concatenate([embed_t, c_t])
            dec_state = dec_state.add_input(x_t)
            loss = dy.pickneglogsoftmax_batch(W_y * dec_state.output() + b_y, [
                self.tgt_token_to_id[tgt_sent[i + 1]] for tgt_sent in tgt_sents
            ])
            thisMask = dy.inputVector(masks[i + 1])
            thisMask = dy.reshape(thisMask, (1, ), len(instances))
            losses.append(loss * thisMask)

        return dy.sum_batches(dy.esum(losses)), num_words
コード例 #30
0
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #get the outputs of the first LSTM
    src_outputs = [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])]
    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix

    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()
        att_output, _ = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component)
        middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words
コード例 #31
0
    def translate_sentence(self, sent, lang):
        dy.renew_cg()
        W_y = dy.parameter(self.W_y[lang])
        b_y = dy.parameter(self.b_y[lang])
        W1_att_e = dy.parameter(self.W1_att_e)
        W1_att_f = dy.parameter(self.W1_att_f)
        w2_att = dy.parameter(self.w2_att)
        M_s = self.src_lookup
        M_t = self.tgt_lookup[lang]

        src_sent = sent
        src_sent_rev = list(reversed(sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []
        for (cw_l2r, cw_r2l) in zip(src_sent, src_sent_rev):
            l2r_state = l2r_state.add_input(M_s[cw_l2r])
            r2l_state = r2l_state.add_input(M_s[cw_r2l])
            l2r_contexts.append(l2r_state.output())  # [<S>, x_1, x_2, ..., </S>]
            r2l_contexts.append(r2l_state.output())  # [</S> x_n, x_{n-1}, ... <S>]
        r2l_contexts.reverse()  # [<S>, x_1, x_2, ..., </S>]

        # Combine the left and right representations for every word
        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))
        encoded_h = h_fs[-1]
        h_fs_matrix = dy.concatenate_cols(h_fs)
        # h_fs_matrix_t = dy.transpose(h_fs_matrix)

        # Decoder
        trans_sentence = [u'<s>']
        cw = self.tgt_vocab[lang][u'<s>']
        c_t = dy.vecInput(self.hidden_size * 2)
        c_t.set([0 for i in xrange(self.contextsize)])
        dec_state = self.dec_builder[lang].initial_state([encoded_h])

        while len(trans_sentence) < self.max_len:
            embed = dy.lookup(M_t,cw)
            dec_state = dec_state.add_input(dy.concatenate([embed, c_t]))
            h_e = dec_state.output()
            # c_t = self.__attention_mlp(h_fs_matrix, h_e)
            c_t = self.__attention_mlp(h_fs_matrix, h_e, W1_att_e, W1_att_f, w2_att)

            # calculate attention
            '''
            a_t = h_fs_matrix_t * h_e
            alignment = dy.softmax(a_t)
            c_t = h_fs_matrix * alignment'''
            ind_tem = dy.concatenate([h_e, c_t])
            ind_tem1 = W_y * ind_tem
            ind_tem2 = ind_tem1 + b_y
            score = dy.softmax(ind_tem2)
            probs1 = score.npvalue()
            cw = np.argmax(probs1)
            if cw == self.tgt_vocab[lang][u'</s>']:
                break
            trans_sentence.append(self.rtgt_vocab[lang][cw])
        return trans_sentence[1:]
コード例 #32
0
    def run(self, word_inputs, lemma_inputs, tag_inputs, pred_golds, rel_targets=None, isTrain=True):
        # inputs, targets: seq_len x batch_size
        def dynet_flatten_numpy(ndarray):
            return np.reshape(ndarray, (-1,), 'F')

        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        marker = self._vocab.PAD if self._unified else self._vocab.DUMMY
        mask = np.greater(word_inputs, marker).astype(np.float32)
        num_tokens = int(np.sum(mask))

        word_embs = [dy.lookup_batch(self.word_embs,
                                     np.where(w < self._vocab.words_in_train, w, self._vocab.UNK)
                                     ) for w in word_inputs]
        pre_embs = [dy.lookup_batch(self.pret_word_embs, w) for w in word_inputs]
        flag_embs = [dy.lookup_batch(self.flag_embs,
                                     np.array(w == i + 1, dtype=np.int)
                                     ) for i, w in enumerate(pred_golds)]
        lemma_embs = [dy.lookup_batch(self.lemma_embs, lemma) for lemma in lemma_inputs]
        tag_embs = [dy.lookup_batch(self.tag_embs, pos) for pos in tag_inputs]

        if isTrain:
            emb_masks = self.generate_emb_mask(seq_len, batch_size)
            emb_inputs = [dy.concatenate([dy.cmult(word, wm), dy.cmult(pre, wm), dy.cmult(flag, wm),
                                          dy.cmult(lemma, wm), dy.cmult(pos, posm)])
                          for word, pre, flag, lemma, pos, (wm, posm) in
                          zip(word_embs, pre_embs, flag_embs, lemma_embs, tag_embs, emb_masks)]

        else:
            emb_inputs = [dy.concatenate([word, pre, flag, lemma, pos])
                          for word, pre, flag, lemma, pos in
                          zip(word_embs, pre_embs, flag_embs, lemma_embs, tag_embs)]

        top_recur = dy.concatenate_cols(
            biLSTM(self.LSTM_builders, emb_inputs, batch_size,
                   self.dropout_lstm_input if isTrain else 0.,
                   self.dropout_lstm_hidden if isTrain else 0.))
        if isTrain:
            top_recur = dy.dropout_dim(top_recur, 1, self.dropout_mlp)

        W_arg, b_arg = dy.parameter(self.mlp_arg_W), dy.parameter(self.mlp_arg_b)
        W_pred, b_pred = dy.parameter(self.mlp_pred_W), dy.parameter(self.mlp_pred_b)
        arg_hidden = leaky_relu(dy.affine_transform([b_arg, W_arg, top_recur]))
        # pred_hidden = leaky_relu(dy.affine_transform([b_pred, W_pred, top_recur]))
        predicates_1D = pred_golds[0]
        pred_recur = dy.pick_batch(top_recur, predicates_1D, dim=1)
        pred_hidden = leaky_relu(dy.affine_transform([b_pred, W_pred, pred_recur]))
        if isTrain:
            arg_hidden = dy.dropout_dim(arg_hidden, 1, self.dropout_mlp)
            # pred_hidden = dy.dropout_dim(pred_hidden, 1, self.dropout_mlp)
            pred_hidden = dy.dropout(pred_hidden, self.dropout_mlp)

        W_rel = dy.parameter(self.rel_W)

        # rel_logits = bilinear(arg_hidden, W_rel, pred_hidden, self.mlp_size, seq_len, batch_size,
        # 						num_outputs = self._vocab.rel_size, bias_x = True, bias_y = True)
        # # (#pred x rel_size x #arg) x batch_size

        # flat_rel_logits = dy.reshape(rel_logits, (seq_len, self._vocab.rel_size), seq_len * batch_size)
        # # (#pred x rel_size) x (#arg x batch_size)

        # predicates_1D = dynet_flatten_numpy(pred_golds)
        # partial_rel_logits = dy.pick_batch(flat_rel_logits, predicates_1D)
        # # (rel_size) x (#arg x batch_size)

        rel_logits = bilinear(arg_hidden, W_rel, pred_hidden, self.mlp_size, seq_len, 1, batch_size,
                              num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True)
        # (1 x rel_size x #arg) x batch_size
        flat_rel_logits = dy.reshape(rel_logits, (1, self._vocab.rel_size), seq_len * batch_size)
        # (1 x rel_size) x (#arg x batch_size)

        predicates_1D = np.zeros(dynet_flatten_numpy(pred_golds).shape[0])
        partial_rel_logits = dy.pick_batch(flat_rel_logits, predicates_1D)
        # (1 x rel_size) x (#arg x batch_size)

        if isTrain:
            mask_1D = dynet_flatten_numpy(mask)
            mask_1D_tensor = dy.inputTensor(mask_1D, batched=True)
            rel_preds = partial_rel_logits.npvalue().argmax(0)
            targets_1D = dynet_flatten_numpy(rel_targets)
            rel_correct = np.equal(rel_preds, targets_1D).astype(np.float32) * mask_1D
            rel_accuracy = np.sum(rel_correct) / num_tokens
            losses = dy.pickneglogsoftmax_batch(partial_rel_logits, targets_1D)
            rel_loss = dy.sum_batches(losses * mask_1D_tensor) / num_tokens
            return rel_accuracy, rel_loss

        # rel_probs = np.transpose(np.reshape(dy.softmax(dy.transpose(flat_rel_logits)).npvalue(),
        # 									(self._vocab.rel_size, seq_len, seq_len, batch_size), 'F'))

        rel_probs = np.transpose(np.reshape(dy.softmax(dy.transpose(flat_rel_logits)).npvalue(),
                                            (self._vocab.rel_size, 1, seq_len, batch_size), 'F'))
        outputs = []

        # for msk, pred_gold, rel_prob in zip(np.transpose(mask), pred_golds.T, rel_probs):
        # 	msk[0] = 1.
        # 	sent_len = int(np.sum(msk))
        # 	rel_prob = rel_prob[np.arange(len(pred_gold)), pred_gold]
        # 	rel_pred = rel_argmax(rel_prob)
        # 	outputs.append(rel_pred[:sent_len])

        for msk, pred_gold, rel_prob in zip(np.transpose(mask), pred_golds.T, rel_probs):
            msk[0] = 1.
            sent_len = int(np.sum(msk))
            rel_prob = rel_prob[np.arange(len(pred_gold)), 0]
            rel_pred = rel_argmax(rel_prob)
            outputs.append(rel_pred[:sent_len])

        return outputs
コード例 #33
0
    def get_bert_embed(self, passage, lang, train=False):
        orig_tokens = passage
        bert_tokens = []
        # Token map will be an int -> int mapping between the `orig_tokens` index and
        # the `bert_tokens` index.
        orig_to_tok_map = []

        # Example:
        # orig_tokens = ["John", "Johanson", "'s",  "house"]
        # bert_tokens == ["[CLS]", "john", "johan", "##son", "'", "s", "house", "[SEP]"]
        # orig_to_tok_map == [(1), (2,3), (4,5), (6)]

        bert_tokens.append("[CLS]")
        for orig_token in orig_tokens:
            start_token = len(bert_tokens)
            bert_token = self.tokenizer.tokenize(orig_token)
            bert_tokens.extend(bert_token)
            end_token = start_token + len(bert_token)
            orig_to_tok_map.append(slice(start_token, end_token))
        bert_tokens.append("[SEP]")

        indexed_tokens = self.tokenizer.convert_tokens_to_ids(bert_tokens)
        tokens_tensor = self.torch.tensor([indexed_tokens])
        if self.config.args.bert_gpu:
            tokens_tensor = tokens_tensor.to('cuda')

        with self.torch.no_grad():
            encoded_layers, _ = self.bert_model(tokens_tensor)
        assert len(
            encoded_layers
        ) == self.bert_layers_count, "Invalid BERT layer count %s" % len(
            encoded_layers)

        aligned_layer = []
        for layer in range(self.bert_layers_count):
            aligned_layer.append([])
            for mapping_range in orig_to_tok_map:
                token_embeddings = encoded_layers[layer][0][mapping_range]
                if self.config.args.bert_token_align_by == "mean":
                    aligned_layer[layer].append(
                        self.torch.mean(token_embeddings,
                                        dim=(0, )).cpu().data.numpy())
                elif self.config.args.bert_token_align_by == "sum":
                    aligned_layer[layer].append(
                        self.torch.sum(token_embeddings,
                                       dim=(0, )).cpu().data.numpy())
                elif self.config.args.bert_token_align_by == "first":
                    aligned_layer[layer].append(
                        token_embeddings[0].cpu().data.numpy())
                else:
                    raise ValueError("Invalid BERT token align option '%s'" %
                                     self.config.args.bert_token_align_by)

        layer_list_to_use = self.config.args.bert_layers
        aligned_layer = [aligned_layer[i] for i in layer_list_to_use]

        if self.config.args.bert_layers_pooling == "weighted":
            bert_softmax = dy.softmax(self.params["bert_weights"])
            embeds = dy.cmult(dy.inputTensor(np.asarray(aligned_layer)),
                              bert_softmax)
            embeds = dy.sum_dim(embeds, [0])
        elif self.config.args.bert_layers_pooling == "concat":
            embeds = dy.inputTensor(np.concatenate(aligned_layer, axis=1))
        elif self.config.args.bert_layers_pooling == "sum":
            embeds = dy.inputTensor(np.sum(aligned_layer, axis=0))
        else:
            raise ValueError("Invalid BERT pooling option '%s'" %
                             self.config.args.bert_layers_pooling)

        if self.config.args.bert_multilingual == 0:
            assert lang
            if (lang + "_embed") in self.params:
                lang_embed = self.params[lang + "_embed"]
            else:
                lang_embed = self.model.add_parameters(50, init='glorot')
                self.params[lang + "_embed"] = lang_embed

            multilingual_embeds = []
            for embed in embeds:
                multilingual_embeds.append(dy.concatenate([lang_embed, embed]))

            embeds = dy.transpose(dy.concatenate_cols(multilingual_embeds))

        if self.config.args.bert_layers_pooling == "weighted":
            single_token_embed_len = self.bert_embedding_len
        elif self.config.args.bert_layers_pooling == "concat":
            single_token_embed_len = self.bert_embedding_len * len(
                layer_list_to_use)
        elif self.config.args.bert_layers_pooling == "sum":
            single_token_embed_len = self.bert_embedding_len
        else:
            raise ValueError("Invalid BERT pooling option '%s'" %
                             self.config.args.bert_layers_pooling)
        if self.config.args.bert_multilingual == 0:
            single_token_embed_len += 50

        # TODO: try dropout strategies like dropping at the per layer embeddings or dropping entire layers.
        assert embeds.dim() == ((len(passage), single_token_embed_len),
                                1), "Invalid BERT dim %s" % embeds.dim()

        assert 0 <= self.config.args.bert_dropout < 1, "Invalid BERT dropout %s" % self.config.args.bert_dropout
        if train:
            embeds = dy.dropout(embeds, self.config.args.bert_dropout)

        return embeds
コード例 #34
0
ファイル: dynet.py プロジェクト: GustikS/NeuralNetwork
e = e1[k:v]  # same

e = dy.pickneglogsoftmax(
    e1, k)  # k is unsigned integer. equiv to: (pick(-log(dy.softmax(e1)), k))

# Neural net stuff
dy.noise(
    e1, stddev
)  # add a noise to each element from a gausian with standard-dev = stddev
dy.dropout(e1, p)  # apply dropout with probability p

# functions over lists of expressions
e = dy.esum([e1, e2, ...])  # sum
e = dy.average([e1, e2, ...])  # average
e = dy.concatenate_cols(
    [e1, e2, ...]
)  # e1, e2,.. are column vectors. return a matrix. (sim to np.hstack([e1,e2,...])
e = dy.concatenate([e1, e2, ...])  # concatenate

e = dy.affine_transform([e0, e1, e2, ...])  # e = e0 + ((e1*e2) + (e3*e4) ...)

## Loss functions
e = dy.squared_distance(e1, e2)
e = dy.l1_distance(e1, e2)
e = dy.huber_distance(e1, e2, c=1.345)

# e1 must be a scalar that is a value between 0 and 1
# e2 (ty) must be a scalar that is a value between 0 and 1
# e = ty * log(e1) + (1 - ty) * log(1 - e1)
e = dy.binary_log_loss(e1, e2)
コード例 #35
0
    def translate_sentence(self, sent):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        W1_att_f = dy.parameter(self.W1_att_f)
        W1_att_e = dy.parameter(self.W1_att_e)
        w2_att = dy.parameter(self.w2_att)

        sent = [startSymbol] + sent + [endSymbol]
        sent_rev = list(reversed(sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []

        for (cw_l2r, cw_r2l) in zip(sent, sent_rev):
            l2r_state = l2r_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_l2r]))
            r2l_state = r2l_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_r2l]))
            l2r_contexts.append(l2r_state.output())
            r2l_contexts.append(r2l_state.output())
        r2l_contexts.reverse()

        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))

        h_fs_matrix = dy.concatenate_cols(h_fs)

        # Decoder
        trans_sentence1 = [startSymbol]
        trans_sentence2 = [startSymbol]
        cw1 = trans_sentence1[-1]
        cw2 = trans_sentence2[-1]
        #initial context
        c_t = dy.vecInput(self.hidden_size * 2)
        start = dy.concatenate(
            [dy.lookup(self.tgt_lookup, self.tgt_token_to_id[endSymbol]), c_t])
        init_state = self.dec_builder.initial_state().add_input(start)

        def generate_top_n(logProb, state, words, wordID, n):
            if words[-1] == endSymbol:
                yield logProb, words
            h_e = state.output()
            c_t, unkIndex = self.__attention_mlp(h_fs_matrix, h_e)
            embed_t = dy.lookup(self.tgt_lookup, wordID)
            x_t = dy.concatenate([embed_t, c_t])
            next_state = state.add_input(x_t)
            y_star = np.reshape(
                dy.softmax(W_y * next_state.output() + b_y).npvalue(), -1)
            for nextWordID in np.argpartition(-y_star, n)[n]:
                currentWord = self.tgt_id_to_token[nextWordID]
                if currentWord == unkSymbol:
                    currentWord = self.src_id_to_token[unkIndex]
                currentLogProb = logProb + np.log(y_star[nextWordID])
                newWords = words + [currentWord]
                yield currentLogProb, generate_top_n(currentProb, newWords,
                                                     nextWordID, n), newWords

        beamSize = 2
        trans = []
        currentBeam = [(0,
                        generate_top_n(0, init_state, [startSymbol],
                                       self.tgt_token_to_id[startSymbol],
                                       beamSize), [startSymbol])]
        remainStep = self.max_len + 2
        while not trans and remainStep > 0:
            nextBeam = []
            while currentBeam:
                _, maxProbStep, _ = heappop(currentBeam)
                for next in maxProbStep:
                    if isinstance(next[1], GeneratorType):
                        heappush(nextBeam, next)
                    else:
                        trans.append(next)
            while len(nextBeam) > beamSize:
                heappop(nextBeam)
            currentBeam = nextBeam

        if trans:
            trans_sentence = max(trans)[-1][1:]
        else:
            trans_sentence = max(currentBeam)[-1][1:-1]

        return ' '.join(trans_sentence)
コード例 #36
0
 def attend(self, context, x):
     context_cols = dy.concatenate_cols(context)
     context_emb = dy.max_dim(context_cols, 1)
     return context_emb, None
コード例 #37
0
    def run(self,
            word_inputs,
            lengths,
            tag_inputs,
            arc_targets=None,
            rel_targets=None,
            isTrain=True):
        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        mask = (np.broadcast_to(np.reshape(np.arange(seq_len), (seq_len, 1)),
                                (seq_len, batch_size)) < lengths).astype(
                                    np.float32)
        mask[0] = 0.
        num_tokens = int(np.sum(mask))

        if isTrain or arc_targets is not None:
            mask_1D = self.dynet_flatten_numpy(mask)
            # batched here means that the last dim is treated as batch dimension, both in input and output
            mask_1D_tensor = dy.inputTensor(mask_1D, batched=True)

        # TODO: 注意 _words_in_train
        # 两个 embedding 相加, [Expression of dim=((embedding_dim,), batch_size)] * seq_len
        if self.e_ext is not None:
            word_embs = [
                dy.lookup_batch(
                    self.e_form,
                    np.where(w < self.v_train, w,
                             self.vocab_form.stoi["<unk>"])) +
                dy.lookup_batch(self.e_ext, w, update=False)
                for w in word_inputs
            ]  # 两个 embedding 相加 [Expression] * seq_len
        else:
            word_embs = [
                dy.lookup_batch(
                    self.e_form,
                    np.where(w < self.v_train, w,
                             self.vocab_form.stoi["<unk>"]))
                for w in word_inputs
            ]
        tag_embs = [dy.lookup_batch(self.e_tag, pos) for pos in tag_inputs]

        if isTrain:
            emb_masks = self.generate_emb_msk(seq_len, batch_size)
            emb_inputs = [
                dy.concatenate([dy.cmult(w, wm),
                                dy.cmult(pos, posm)])
                for w, pos, (wm, posm) in zip(word_embs, tag_embs, emb_masks)
            ]
        else:
            emb_inputs = [
                dy.concatenate([w, pos])
                for w, pos in zip(word_embs, tag_embs)
            ]

        top_recur = dy.concatenate_cols(
            biLSTM(self.lstm_builders, emb_inputs, batch_size,
                   self.dropout_lstm_input if isTrain else 0.,
                   self.dropout_lstm_hidden if isTrain else 0.))
        if isTrain:
            # drop some dim for lstm_output for all words, all sentences
            top_recur = dy.dropout_dim(top_recur, 1, self.dropout_mlp)

        dep = leaky_relu(
            dy.affine_transform([self.mlp_dep_b, self.mlp_dep_W, top_recur]))
        head = leaky_relu(
            dy.affine_transform([self.mlp_head_b, self.mlp_head_W, top_recur]))
        if isTrain:
            dep, head = dy.dropout_dim(dep, 1,
                                       self.dropout_mlp), dy.dropout_dim(
                                           head, 1, self.dropout_mlp)
            # drop dim k means, it is possible that the whole dim k is set to zeros
            # for matrix with batch, ((R, C), B)
            # drop dim 0 means drop some cols, drop dim 1 means drop some rows
            # drop 2 means drop some batches, and it only supports for Tensor with rank <=3

        dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:]
        head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:]

        arc_logits = bilinear(dep_arc,
                              self.arc_W,
                              head_arc,
                              self.mlp_arc_size,
                              seq_len,
                              batch_size,
                              num_outputs=1,
                              bias_x=True,
                              bias_y=False)
        # (#head x #dep) x batch_size

        flat_arc_logits = dy.reshape(arc_logits, (seq_len, ),
                                     seq_len * batch_size)
        # flatten it to compute loss
        # (#head ) x (#dep x batch_size)

        arc_preds = np.reshape(arc_logits.npvalue().argmax(0),
                               (seq_len, batch_size))
        # seq_len x batch_size
        # here if an Expression's batch size is 1
        # npvalue() will drop the batch dimension
        # so add it back if needed

        if isTrain or arc_targets is not None:
            # tarin it in a neg log likelihood fashion, but enforce tree constraint when testing
            arc_correct = np.equal(arc_preds, arc_targets).astype(
                np.float32) * mask
            # mask is used to filter <pad>'s out in summing loss
            arc_accuracy = np.sum(arc_correct) / num_tokens
            targets_1D = self.dynet_flatten_numpy(arc_targets)
            losses = dy.pickneglogsoftmax_batch(flat_arc_logits, targets_1D)
            arc_loss = dy.sum_batches(losses * mask_1D_tensor) / num_tokens

        if not isTrain:
            arc_probs = np.transpose(
                np.reshape(
                    dy.softmax(flat_arc_logits).npvalue(),
                    (seq_len, seq_len, batch_size), 'F'))
            # #batch_size x #dep x #head, transpose reverse all, and since layout has changed, it's totally fine

        rel_logits = bilinear(dep_rel,
                              self.rel_W,
                              head_rel,
                              self.mlp_rel_size,
                              seq_len,
                              batch_size,
                              num_outputs=len(self.vocab_deprel),
                              bias_x=True,
                              bias_y=True)
        # (#head x rel_size x #dep) x batch_size

        flat_rel_logits = dy.reshape(rel_logits,
                                     (seq_len, len(self.vocab_deprel)),
                                     seq_len * batch_size)
        # (#head x rel_size) x (#dep x batch_size)

        partial_rel_logits = dy.pick_batch(
            flat_rel_logits,
            targets_1D if isTrain else self.dynet_flatten_numpy(arc_preds))
        # (rel_size) x (#dep x batch_size)

        if isTrain or arc_targets is not None:
            rel_preds = partial_rel_logits.npvalue().argmax(0)
            targets_1D = self.dynet_flatten_numpy(rel_targets)
            rel_correct = np.equal(rel_preds, targets_1D).astype(
                np.float32) * mask_1D  # 这里的形状如此, 需要用 mask1d
            rel_accuracy = np.sum(rel_correct) / num_tokens
            losses = dy.pickneglogsoftmax_batch(partial_rel_logits, targets_1D)
            rel_loss = dy.sum_batches(losses * mask_1D_tensor) / num_tokens

        if not isTrain:
            rel_probs = np.transpose(
                np.reshape(
                    dy.softmax(dy.transpose(flat_rel_logits)).npvalue(),
                    (len(self.vocab_deprel), seq_len, seq_len, batch_size),
                    'F'))
            # batch_size x #dep x #head x #nclasses

        if isTrain or arc_targets is not None:
            loss = arc_loss + rel_loss
            correct = rel_correct * self.dynet_flatten_numpy(arc_correct)
            overall_accuracy = np.sum(correct) / num_tokens

        if isTrain:
            return arc_accuracy, rel_accuracy, overall_accuracy, loss

        outputs = []

        for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs,
                                           rel_probs):
            # parse sentences one by ones
            msk[0] = 1.
            sent_len = int(np.sum(msk))
            arc_pred = arc_argmax(arc_prob, sent_len, msk)
            rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred]
            rel_pred = rel_argmax(
                rel_prob, sent_len, self.vocab_deprel,
                "root" if "root" in self.vocab_deprel.stoi else "ROOT")
            outputs.append(
                (arc_pred[1:sent_len], rel_pred[1:sent_len]))  # w_0 is <roor>
        assert (len(outputs) == batch_size)

        if arc_targets is not None:
            return arc_accuracy, rel_accuracy, overall_accuracy, outputs
        return outputs
コード例 #38
0
 def attend(self, context, x):
     context_cols = dy.concatenate_cols(context)
     hidden = dy.tanh(dy.colwise_add(self.Wha * context_cols, self.Wia * x))
     weights = dy.softmax(dy.transpose(hidden) * self.Va)
     context_emb = context_cols * weights
     return context_emb, weights
コード例 #39
0
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])

    #get the outputs of the first LSTM
    src_outputs = [
        dy.concatenate([x.output(), y.output()])
        for x, y in LSTM_SRC.add_inputs(
            [dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])
    ]
    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix

    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append(
            [sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)

    current_state = LSTM_TRG_BUILDER.initial_state().set_s(
        [src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the
        current_state = current_state.add_input(
            dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()
        att_output, _ = calc_attention(src_output_matrix, output_embedding,
                                       fixed_attentional_component)
        middle_expr = dy.tanh(
            dy.affine_transform(
                [b_m, W_m,
                 dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1, ), len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words
コード例 #40
0
 def attend(self, context, x):
     context_cols = dy.concatenate_cols(context)
     weights = dy.softmax(dy.transpose(context_cols) * self.W * x)
     context_emb = context_cols * weights
     return context_emb, weights
コード例 #41
0
    def get_combined_word_representations(self, sentence, training=None):
        """
        
        :param training:
        :param sentence: whole sentence with input values as ids
        :return: word representations made up according to the user preferences
        """

        if training is None:
            training = self.training

        representations_to_be_zipped = []
        word_embedding_based_representations = \
            [self.word_embeddings[word_id] for word_id in sentence['word_ids']]
        representations_to_be_zipped.append(
            dynet.concatenate([
                dynet.transpose(x)
                for x in word_embedding_based_representations
            ]))
        char_representations = self.get_char_representations(sentence)
        representations_to_be_zipped.append(
            dynet.concatenate(
                [dynet.transpose(x) for x in char_representations]))
        if self.parameters[
                'use_golden_morpho_analysis_in_word_representation']:
            morph_tag_based_representations = self.get_morph_analysis_representation_in_old_style(
                sentence)
            representations_to_be_zipped.append(
                dynet.concatenate([
                    dynet.transpose(x) for x in morph_tag_based_representations
                ]))
        if self.parameters['cap_dim'] > 0:
            cap_embedding_based_representations = \
                [self.cap_embeddings[cap_id] for cap_id in sentence['cap_ids']]
            representations_to_be_zipped.append(
                dynet.concatenate([
                    dynet.transpose(x)
                    for x in cap_embedding_based_representations
                ]))
            # combined_word_representations = [dynet.concatenate([x, y, z, xx]) for x, y, z, xx in
            #                                  zip(*representations_to_be_zipped)]
        # else:
        # combined_word_representations = [dynet.concatenate([x, y, xx]) for x, y, xx in
        #                                  zip(*representations_to_be_zipped)]

        combined_word_representations = dynet.concatenate_cols(
            representations_to_be_zipped)
        # print combined_word_representations
        # print self.parameters

        if training:
            combined_word_representations = [
                dynet.dropout(x, p=self.parameters['dropout'])
                for x in combined_word_representations
            ]
        else:
            combined_word_representations = [
                x for x in combined_word_representations
            ]

        return combined_word_representations
コード例 #42
0
    def calculate_batch_loss(self, batch):
        dy.renew_cg()

        W_y = dy.parameter(self.params["W_y"])
        b_y = dy.parameter(self.params["b_y"])
        s_lookup = self.params["s_lookup"]
        t_lookup = self.params["t_lookup"]

        s_batch = [x[0] for x in batch]
        t_batch = [x[1] for x in batch]

        wids = []

        for i in range(len(s_batch[0])):
            wids.append([sent[i] for sent in s_batch])

        wids_rev = list(reversed(wids))

        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []

        for wid in wids:
            l2r_state = l2r_state.add_input(dy.lookup_batch(s_lookup, wid))
            l2r_contexts.append(l2r_state.output())

        for wid in wids_rev:
            r2l_state = r2l_state.add_input(dy.lookup_batch(s_lookup, wid))
            r2l_contexts.append(r2l_state.output())

        r2l_contexts.reverse()

        losses = []

        H_f = []
        H_f = [dy.concatenate(list(p)) for p in zip(l2r_contexts, r2l_contexts)]

        H_f_mat = dy.concatenate_cols(H_f)
        W1_att = dy.parameter(self.params["W1_att"])
        w1dt = W1_att * H_f_mat

        t_wids = []
        masks = []

        num_words = 0

        for i in range(len(t_batch[0])):
            t_wids.append([(sent[i] if len(sent) > i else self.t_vocab[EOS]) for sent in t_batch])
            mask = [(1 if len(sent) > i else 0) for sent in t_batch]
            masks.append(mask)
            num_words += sum(mask)

        c_t = dy.vecInput(2*self.HIDDEN_DIM)

        words = [self.t_vocab[EOS]] * len(t_batch)
        embedding = dy.lookup_batch(t_lookup, words)

        dec_state = self.dec_builder.initial_state()

        for t_wid, mask in zip(t_wids, masks):
            x_t = dy.concatenate([c_t, embedding])
            dec_state = dec_state.add_input(x_t)

            c_t = self.attend(H_f_mat, dec_state, w1dt, len(s_batch[0]), len(wids[0]))

            probs = dy.affine_transform([b_y, W_y, dy.concatenate([c_t, dec_state.output()])])
            loss = dy.pickneglogsoftmax_batch(probs, t_wid)

            if mask[-1] != 1:
                mask_expr = dy.inputVector(mask)
                mask_expr = dy.reshape(mask_expr, (1,), len(t_batch))
                loss = loss * mask_expr

            losses.append(loss)
            embedding = dy.lookup_batch(t_lookup, t_wid)

        loss = dy.sum_batches(dy.esum(losses))  # /len(wids[0])
        return loss, num_words
コード例 #43
0
    def step_batch(self, batch, lang):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y[lang])
        b_y = dy.parameter(self.b_y[lang])
        W1_att_e = dy.parameter(self.W1_att_e)
        W1_att_f = dy.parameter(self.W1_att_f)
        w2_att = dy.parameter(self.w2_att)

        M_s = self.src_lookup
        M_t = self.tgt_lookup[lang]
        src_sent, tgt_sent = zip(*batch)
        src_sent = zip(*src_sent)
        tgt_sent = zip(*tgt_sent)
        src_sent_rev = list(reversed(src_sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()

        l2r_contexts = []
        r2l_contexts = []
        for (cw_l2r, cw_r2l) in zip(src_sent, src_sent_rev):
            l2r_state = l2r_state.add_input(dy.lookup_batch(M_s, cw_l2r))
            r2l_state = r2l_state.add_input(dy.lookup_batch(M_s, cw_r2l))
            l2r_contexts.append(l2r_state.output())  # [<S>, x_1, x_2, ..., </S>]
            r2l_contexts.append(r2l_state.output())  # [</S> x_n, x_{n-1}, ... <S>]

        # encoded_h1 = l2r_state.output()
        # tem1 = encoded_h1.npvalue()

        r2l_contexts.reverse()  # [<S>, x_1, x_2, ..., </S>]

        # Combine the left and right representations for every word
        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))

        encoded_h = h_fs[-1]

        h_fs_matrix = dy.concatenate_cols(h_fs)
        # h_fs_matrix_t = dy.transpose(h_fs_matrix)

        losses = []
        num_words = 0

        # Decoder
        c_t = dy.vecInput(self.hidden_size * 2)
        c_t.set([0 for i in xrange(self.contextsize)])
        encoded_h = dy.concatenate([encoded_h])
        dec_state = self.dec_builder[lang].initial_state([encoded_h])
        for (cw, nw) in zip(tgt_sent[0:-1], tgt_sent[1:]):
            embed = dy.lookup_batch(M_t, cw)
            dec_state = dec_state.add_input(dy.concatenate([embed, c_t]))
            h_e = dec_state.output()
            #calculate attention
            '''
            a_t = h_fs_matrix_t * h_e
            alignment = dy.softmax(a_t)
            c_t = h_fs_matrix * alignment'''
            c_t = self.__attention_mlp_batch(h_fs_matrix, h_e, W1_att_e, W1_att_f, w2_att)
            ind_tem = dy.concatenate([h_e, c_t])
            ind_tem1 = W_y * ind_tem
            ind_tem2 = ind_tem1 + b_y
            loss = dy.pickneglogsoftmax_batch(ind_tem2, nw)  # to modify
            losses.append(loss)
            num_words += 1
        return dy.sum_batches(dy.esum(losses)), num_words
コード例 #44
0
    def generate(self, s_sentence, max_len=150):

        dy.renew_cg()

        W_y = dy.parameter(self.params["W_y"])
        b_y = dy.parameter(self.params["b_y"])
        s_lookup = self.params["s_lookup"]
        t_lookup = self.params["t_lookup"]

        s_sentence = [self.s_vocab[EOS]] + s_sentence + [self.s_vocab[EOS]]
        s_sentence_rev = list(reversed(s_sentence))

        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []

        for cw_l2r in s_sentence:
            l2r_state = l2r_state.add_input(s_lookup[cw_l2r])
            l2r_contexts.append(l2r_state.output())

        for cw_r2l in s_sentence_rev:
            r2l_state = r2l_state.add_input(s_lookup[cw_r2l])
            r2l_contexts.append(r2l_state.output())

        r2l_contexts.reverse()

        H_f = []
        H_f = [dy.concatenate(list(p)) for p in zip(l2r_contexts, r2l_contexts)]

        H_f_mat = dy.concatenate_cols(H_f)
        W1_att = dy.parameter(self.params["W1_att"])
        w1dt = W1_att * H_f_mat

        c_t = dy.vecInput(2*self.HIDDEN_DIM)
        embedding = t_lookup[self.t_vocab["<EOS>"]]

        dec_state = self.dec_builder.initial_state()

        t_sentence = []

        count_eos = 0

        for i in range(len(s_sentence)*2):
            if count_eos == 2:
                break

            x_t = dy.concatenate([c_t, embedding])
            dec_state = dec_state.add_input(x_t)

            c_t = self.attend(H_f_mat, dec_state, w1dt, len(s_sentence), 1)
            probs = dy.softmax(W_y*dy.concatenate([c_t, dec_state.output()]) + b_y).vec_value()
            word = probs.index(max(probs))

            embedding = t_lookup[word]

            if self.t_id_lookup[word] == "<EOS>":
                count_eos += 1
                continue

            t_sentence.append(self.t_id_lookup[word])

        return " ".join(t_sentence)