Esempi in Python per tanh, esempi in Python per _dynet.tanh

Esempio n. 1

0

Mostra file

File: mt_layers.py Progetto: zzsfornlp/znmt-merge

 def __call__(self, input_exp, hidden_exp, mask=None):
     # two kinds of dropouts
     if self.idrop > 0.:
         input_exp = dy.dropout(input_exp, self.idrop)
     input_exp_g = input_exp_t = input_exp
     hidden_exp_g = hidden_exp_t = hidden_exp["H"]
     if self.gdrop > 0.:
         input_exp_g = dy.cmult(input_exp, self.masks[0])
         hidden_exp_g = dy.cmult(hidden_exp_g, self.masks[1])
         input_exp_t = dy.cmult(input_exp, self.masks[2])
         hidden_exp_t = dy.cmult(hidden_exp_t, self.masks[3])
     rzt = dy.affine_transform([
         self.iparams["brz"], self.iparams["x2rz"], input_exp_g,
         self.iparams["h2rz"], hidden_exp_g
     ])
     rzt = dy.logistic(rzt)
     rt, zt = dy.pick_range(rzt, 0, self.n_hidden), BK.pick_range(
         rzt, self.n_hidden, 2 * self.n_hidden)
     h_reset = dy.cmult(rt, hidden_exp_t)
     ht = dy.affine_transform([
         self.iparams["bh"], self.iparams["x2h"], input_exp_t,
         self.iparams["h2h"], h_reset
     ])
     ht = dy.tanh(ht)
     hidden = dy.cmult(zt, hidden_exp["H"]) + dy.cmult(
         (1. - zt), ht)  # first one use original hh
     # mask: if 0 then pass through
     if mask is not None:
         mask_array = np.asarray(mask).reshape((1, -1))
         m1 = dy.inputTensor(mask_array, True)  # 1.0 for real words
         m0 = dy.inputTensor(1.0 - mask_array,
                             True)  # 1.0 for padding words (mask=0)
         hidden = hidden * m1 + hidden_exp["H"] * m0
     return {"H": hidden}

Esempio n. 2

0

Mostra file

File: multisource_model.py Progetto: RosnerM/ocr-post-correction

 def attend(self, input_mat, state, w1dt, w2, v, coverage):
     w2dt = w2 * dy.concatenate(list(state.s()))
     if coverage:
         w1dt = w1dt + self.w_cov * dy.transpose(coverage)
     a_t = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
     a_t = dy.softmax(a_t)
     return a_t, (input_mat * a_t)

Esempio n. 3

0

Mostra file

File: nn_classifier.py Progetto: chrikoehn/antu

    def __call__(self, x, h_matrix, noprob=False):
        s_t = x
        for i in range(self.layers - 1):
            e_t = self.V[i] * dy.tanh(self.W1[i] * h_matrix + self.W2[i] * s_t)
            a_t = dy.softmax(dy.transpose(e_t))
            c_t = h_matrix * a_t
            s_t = dy.concatenate([x, c_t])

        e_t = self.V[-1] * dy.tanh(self.W1[-1] * h_matrix + self.W2[-1] *
                                   s_t) + self.B1 * h_matrix + self.B2 * s_t

        if len(h_matrix.dim()[0]) > 1:
            e_t = dy.reshape(e_t,
                             (self.V[-1].dim()[0][0] * h_matrix.dim()[0][1], ))
        if not noprob:
            p_t = dy.softmax(e_t)
            return p_t
        else:
            return e_t

Esempio n. 4

0

Mostra file

    def get_top_k_paths(self, all_paths, relation_index, threshold):
        """
        Get the top k scoring paths
        """
        builder = self.builder
        model = self.model
        model_parameters = self.model_parameters
        lemma_lookup = model_parameters['lemma_lookup']
        pos_lookup = model_parameters['pos_lookup']
        dep_lookup = model_parameters['dep_lookup']
        dir_lookup = model_parameters['dir_lookup']

        path_scores = []

        for i, path in enumerate(all_paths):

            if i % 1000 == 0:
                cg = dy.renew_cg()
                W1 = dy.parameter(model_parameters['W1'])
                b1 = dy.parameter(model_parameters['b1'])
                W2 = None
                b2 = None

                if self.num_hidden_layers == 1:
                    W2 = dy.parameter(model_parameters['W2'])
                    b2 = dy.parameter(model_parameters['b2'])

            path_embedding = get_path_embedding(builder, lemma_lookup,
                                                pos_lookup, dep_lookup,
                                                dir_lookup, path)

            if self.use_xy_embeddings:
                zero_word = dy.inputVector([0.0] * self.lemma_embeddings_dim)
                path_embedding = dy.concatenate(
                    [zero_word, path_embedding, zero_word])

            h = W1 * path_embedding + b1

            if self.num_hidden_layers == 1:
                h = W2 * dy.tanh(h) + b2

            path_score = dy.softmax(h).npvalue().T
            path_scores.append(path_score)

        path_scores = np.vstack(path_scores)

        top_paths = []
        for i in range(len(relation_index)):
            indices = np.argsort(-path_scores[:, i])
            top_paths.append([
                (all_paths[index], path_scores[index, i]) for index in indices
                if threshold is None or path_scores[index, i] >= threshold
            ])

        return top_paths

Esempio n. 5

0

Mostra file

File: AttentionBasedDecoder.py Progetto: ufwt/TraFix

    def attend(self, encoded_inputs, h_t, input_masks=None):
        # encoded_inputs dimension is: seq len x 2*h x batch size, h_t dimension is h x batch size (for bilstm encoder)
        if len(encoded_inputs) == 1:
            # no need to attend if only one input state, compute output directly
            h_output = dn.tanh(self.w_c *
                               dn.concatenate([h_t, encoded_inputs[0]]))
            # return trivial alphas (all 1's since one input gets all attention)
            if input_masks:
                # if batching
                alphas = dn.inputTensor([1] * len(input_masks[0]),
                                        batched=True)
            else:
                alphas = dn.inputTensor([1], batched=True)
            return h_output, alphas

        # iterate through input states to compute attention scores
        # scores = [v_a * dn.tanh(w_a * h_t + u_a * h_input) for h_input in blstm_outputs]
        w_a_h_t = self.w_a * h_t
        scores = [
            self.v_a *
            dn.tanh(dn.affine_transform([w_a_h_t, self.u_a, h_input]))
            for h_input in encoded_inputs
        ]

        concatenated = dn.concatenate(scores)
        if input_masks:
            # if batching, multiply attention scores with input masks to zero-out scores for padded inputs
            dn_masks = dn.inputTensor(input_masks, batched=True)
            concatenated = dn.cmult(concatenated, dn_masks)

        # normalize scores
        alphas = dn.softmax(concatenated)

        # compute context vector with weighted sum for each seq in batch
        bo = dn.concatenate_cols(encoded_inputs)
        c = bo * alphas
        # c = dn.esum([h_input * dn.pick(alphas, j) for j, h_input in enumerate(blstm_outputs)])

        # compute output vector using current decoder state and context vector
        h_output = dn.tanh(self.w_c * dn.concatenate([h_t, c]))

        return h_output, alphas

Esempio n. 6

0

Mostra file

File: mt_layers.py Progetto: zzsfornlp/znmt-merge

 def __call__(self, sent, n, caches):
     # s: list(len==steps) of {(n_s,), batch_size}, n: {(n_h,), batch_size}
     caches = self._restart_caches(sent, caches)
     val_h = self.iparams["h2e"] * n  # {(n_hidden,), batch_size}
     att_hidden_bef = dy.colwise_add(
         caches["V"], val_h)  # {(n_didden, steps), batch_size}
     att_hidden = dy.tanh(att_hidden_bef)
     # if self.hdrop > 0:     # save some space
     #     att_hidden = dy.dropout(att_hidden, self.hdrop)
     att_e = dy.reshape(self.iparams["v"] * att_hidden,
                        (BK.dims(caches["V"])[1], ),
                        batch_size=bs(att_hidden))
     att_alpha = dy.softmax(att_e)
     ctx = caches["S"] * att_alpha  # {(n_s, sent_len), batch_size}
     # append and return
     caches["ctx"] = ctx
     caches["att"] = att_alpha
     return caches

Esempio n. 7

0

Mostra file

def build_network(params, x_data):
    _, E, b, U, W, bp = params
    if type(x_data) == dict:
        # print("DICT")
        prefix_ordinals = x_data['prefix']
        suffix_ordinals = x_data['suffix']
        x_ordinals = x_data['fullwords']
    else:
        prefix_ordinals = None
        suffix_ordinals = None
        x_ordinals = x_data
    x = dy.concatenate([E[ord] for ord in x_ordinals])
    if prefix_ordinals:
        x_pre = dy.concatenate([E[ord] for ord in prefix_ordinals])
        x = x + x_pre
    if suffix_ordinals:
        x_suf = dy.concatenate([E[ord] for ord in suffix_ordinals])
        x = x + x_suf
    output = dy.softmax(U * (dy.tanh(W * x + b)) + bp)
    return output

Esempio n. 8

0

Mostra file

File: multisource_model.py Progetto: RosnerM/ocr-post-correction

    def encoder_forward(self, src1, src2):
        embedded_src1 = self.embed_idx(src1, self.src1_lookup)
        if self.single_source:
            embedded_src2 = [dy.vecInput(EMBEDDING_DIM) for idx in src2]
        else:
            embedded_src2 = self.embed_idx(src2, self.src2_lookup)

        encoded_src1 = self.encode(
            embedded_src1, self.enc1_fwd_lstm, self.enc1_bwd_lstm
        )
        encoded_src2 = self.encode(
            embedded_src2, self.enc2_fwd_lstm, self.enc2_bwd_lstm
        )

        src1_mat = dy.concatenate_cols(encoded_src1)
        src1_w1dt = self.att1_w1 * src1_mat
        src2_mat = dy.concatenate_cols(encoded_src2)
        src2_w1dt = self.att2_w1 * src2_mat

        if not self.single_source:
            start = (
                self.W_s * dy.concatenate([encoded_src1[-1], encoded_src2[-1]])
                + self.b_s
            )
        else:
            start = (
                self.W_s
                * dy.concatenate([encoded_src1[-1], dy.vecInput(2 * HIDDEN_DIM)])
                + self.b_s
            )

        last_output_embeddings = self.tgt_lookup[self.tgt_vocab.str2int(EOS)]
        c1_t = dy.vecInput(2 * HIDDEN_DIM)
        c2_t = dy.vecInput(2 * HIDDEN_DIM)
        decoder_state = self.dec_lstm.initial_state([start, dy.tanh(start)]).add_input(
            dy.concatenate([c1_t, c2_t, last_output_embeddings])
        )
        return src1_mat, src2_mat, src1_w1dt, src2_w1dt, decoder_state

Esempio n. 9

0

Mostra file

File: layers.py Progetto: toru34/zhou_acl_2017

    def __call__(self, x, tm1s=None, test=False):
        if test:
            # Initial states
            s_tm1 = tm1s[0]
            c_tm1 = tm1s[1]
            w_tm1 = x

            # GRU
            s_t = self.GRUBuilder.initial_state().set_s([s_tm1]).add_input(
                dy.concatenate([w_tm1, c_tm1])).output()

            # Attention
            e_t = dy.pick(
                self.va *
                dy.tanh(dy.colwise_add(self.Ua * self.hp, self.Wa * s_tm1)), 0)
            a_t = dy.softmax(e_t)
            c_t = dy.esum([
                dy.cmult(a_t_i, h_i)
                for a_t_i, h_i in zip(a_t, dy.transpose(self.hp))
            ])
            #c_t = self.hp*a_t # memory error?

            # Output
            r_t = dy.concatenate_cols([
                Wr_j * w_tm1 + Ur_j * c_t + Vr_j * s_t
                for Wr_j, Ur_j, Vr_j in zip(self.Wr, self.Ur, self.Vr)
            ])  # Maxout
            m_t = dy.max_dim(r_t, d=1)
            y_t = dy.softmax(self.Wo * m_t)

            return s_t, c_t, y_t

        else:
            w_embs = x
            # Initial states
            s_tm1 = self.s_0
            c_tm1 = self.c_0
            GRU = self.GRUBuilder.initial_state().set_s([s_tm1])

            y = []
            for w_tm1 in w_embs:
                # GRU
                GRU = GRU.add_input(dy.concatenate([w_tm1, c_tm1]))
                s_t = GRU.output()

                # Attention
                e_t = dy.pick(
                    self.va * dy.tanh(
                        dy.colwise_add(self.Ua * self.hp, self.Wa * s_tm1)), 0)
                a_t = dy.softmax(e_t)
                c_t = dy.esum([
                    dy.cmult(a_t_i, h_i)
                    for a_t_i, h_i in zip(a_t, dy.transpose(self.hp))
                ])
                #c_t = self.hp*a_t # memory error?

                # Output
                r_t = dy.concatenate_cols([
                    Wr_j * w_tm1 + Ur_j * c_t + Vr_j * s_t
                    for Wr_j, Ur_j, Vr_j in zip(self.Wr, self.Ur, self.Vr)
                ])  # Maxout
                m_t = dy.max_dim(r_t, d=1)

                y_t = self.Wo * m_t
                y.append(y_t)

                # t -> tm1
                s_tm1 = s_t
                c_tm1 = c_t

            return y

Esempio n. 10

0

Mostra file

File: layers.py Progetto: toru34/zhou_acl_2017

 def set_initial_states(self, hp, hb_1):
     self.s_0 = dy.tanh(self.Wd * hb_1 + self.bd)
     self.c_0 = dy.zeroes((2 * self.hid_dim, ))
     self.hp = hp

Esempio n. 11

0

Mostra file

File: bilstm_crf_softgazetteers.py Progetto: neulab/soft-gazetteers

    def get_features_for_tagging(self, sentence, training):
        word_feats = [
            dy.affine_transform(
                [
                    self.feat_b,
                    self.feat_w,
                    dy.inputTensor(feats.reshape(self.featsize, 1)),
                ]
            )
            for chars, word, feats, tag in sentence
        ]
        zero_feats = [
            dy.inputTensor(np.zeros(shape=(FEAT_OUT_SIZE, 1)))
            for chars, word, feats, tag in sentence
        ]

        # Non-linear transform for soft gazetteer features
        if self.feat_func == "tanh":
            word_feats = [dy.tanh(feat) for feat in word_feats]
        elif self.feat_func == "relu":
            word_feats = [dy.rectify(feat) for feat in word_feats]

        # Soft gazetteer features at the LSTM level
        if self.lstm_feats:
            cur_feats = word_feats
        else:
            cur_feats = zero_feats
        word_reps = [
            dy.concatenate(
                [self.cnn.encode(chars, training), self.word_embeds[word], enc_feat]
            )
            for enc_feat, (chars, word, feats, tag) in zip(cur_feats, sentence)
        ]

        contexts = self.word_lstm.transduce(word_reps)

        # Soft gazetteer features at the CRF level
        if self.crf_feats:
            cur_feats = word_feats
        else:
            cur_feats = zero_feats

        features = [
            dy.affine_transform(
                [
                    self.context_to_emit_b,
                    self.context_to_emit_w,
                    dy.concatenate([context, feats]),
                ]
            )
            for context, feats in zip(contexts, cur_feats)
        ]
        t_features = [
            dy.reshape(
                dy.affine_transform(
                    [
                        self.context_to_trans_b,
                        self.context_to_trans_w,
                        dy.concatenate([context, feats]),
                    ]
                ),
                (self.num_tags, self.num_tags),
            )
            for context, feats in zip(contexts, cur_feats)
        ]

        # Autoencoder feature reconstruction
        if self.lstm_feats:
            feat_reconstruct = [
                dy.logistic(
                    dy.affine_transform(
                        [self.feat_reconstruct_b, self.feat_reconstruct_w, context]
                    )
                )
                for context in contexts
            ]
        else:
            feat_reconstruct = [
                dy.inputTensor(np.zeros(shape=(self.featsize,))) for context in contexts
            ]

        return features, t_features, feat_reconstruct

Esempio n. 12

0

Mostra file

def do_one_sentence(encoder, decoder, params_encoder, params_decoder, sentence,
                    output, env, first, previous):
    pos_lookup = params_encoder["pos_lookup"]
    char_lookup = params_encoder["char_lookup"]
    char_v = params_decoder["attention_v"]
    char_w1 = params_decoder["attention_wc"]
    char_w2 = params_decoder["attention_bc"]
    sc_vector = []
    for i, world in enumerate(_state(env)):
        world = world
        sc0 = char_encoder.initial_state()
        sc = sc0
        for char in world:
            sc = sc.add_input(char_lookup[char2int[char]])
        sc_vector.append(dy.concatenate([sc.output(), pos_lookup[i]]))
    dy_sc_vector = dy.concatenate(sc_vector, d=1)
    s0 = encoder.initial_state()
    s = s0
    lookup = params_encoder["lookup"]
    attention_w = params_decoder["attention_w"]
    attention_b = params_decoder["attention_b"]
    sentence = sentence + ' <end>'
    sentence = [
        vocab.index(c) if c in vocab else vocab.index('<unknown>')
        for c in sentence.split(' ')
    ]
    loss = []
    generate = []
    s_vector = []
    for word in (sentence):
        s = s.add_input(lookup[word])
        s_vector.append(dy.softmax(attention_w * s.output() + attention_b))
    encode_output = s.output()
    dy_s_vector = dy.concatenate(s_vector, d=1)
    _s0 = decoder.initial_state(s.s())
    _s = _s0
    R = params_decoder["R"]
    bias = params_decoder["bias"]
    index = 1
    input_word = "<start>"
    _lookup = params_decoder["lookup"]
    while True:
        dy_env = dy.inputTensor(get_state_embed3(env))
        word = vocab_out.index(input_word)
        gt_y = vocab_out.index(output[index])

        weight = dy.softmax(
            dy.concatenate([dy.dot_product(x, _s.output()) for x in s_vector]))
        weight_char = dy.softmax(
            dy.concatenate([
                char_v * dy.tanh(char_w1 * x + char_w2 * _s.output())
                for x in sc_vector
            ]))

        encode_output = dy_s_vector * weight
        encode_state = dy_sc_vector * weight_char
        _s = _s.add_input(
            dy.concatenate([_lookup[word], encode_output, encode_state]))
        probs = dy.softmax((R) * _s.output() + bias)
        prediction = np.argsort(probs.npvalue())[-1]
        if (vocab_out[prediction]) == '<start>':
            prediction = np.argsort(probs.npvalue())[-2]
        generate.append(vocab_out[prediction])
        loss.append(-dy.log(dy.pick(probs, gt_y)))
        if output[index] == '<end>':
            break
        index += 1
        input_word = vocab_out[prediction]
        if input_word == '<end>':
            continue
        env = str(execute(env, [input_word]))
        if env == 'None':
            env = '1:_ 2:_ 3:_ 4:_ 5:_ 6:_ 7:_'
    loss = dy.esum(loss)
    while '<start>' in generate:
        generate.remove('<start>')
    previous = s.output()
    return loss, generate, previous

Esempio n. 13

0

Mostra file

File: AttentionBasedDecoder.py Progetto: ufwt/TraFix

    def compute_decoder_batch_loss(self, encoded_inputs, input_masks,
                                   output_word_ids, output_masks, batch_size):
        self.readout = dn.parameter(self.params['readout'])
        self.bias = dn.parameter(self.params['bias'])
        self.w_c = dn.parameter(self.params['w_c'])
        self.u_a = dn.parameter(self.params['u_a'])
        self.v_a = dn.parameter(self.params['v_a'])
        self.w_a = dn.parameter(self.params['w_a'])

        # initialize the decoder rnn
        s_0 = self.decoder_rnn.initial_state()

        # initial "input feeding" vectors to feed decoder - 3*h
        init_input_feeding = dn.lookup_batch(self.init_lookup,
                                             [0] * batch_size)

        # initial feedback embeddings for the decoder, use begin seq symbol embedding
        init_feedback = dn.lookup_batch(
            self.output_lookup, [self.y2int[common.BEGIN_SEQ]] * batch_size)

        # init decoder rnn
        decoder_init = dn.concatenate([init_feedback, init_input_feeding])
        s = s_0.add_input(decoder_init)

        # loss per timestep
        losses = []

        # run the decoder through the output sequences and aggregate loss
        for i, step_word_ids in enumerate(output_word_ids):

            # returns h x batch size matrix
            decoder_rnn_output = s.output()

            # compute attention context vector for each sequence in the batch (returns 2h x batch size matrix)
            attention_output_vector, alphas = self.attend(
                encoded_inputs, decoder_rnn_output, input_masks)

            # compute output scores (returns vocab_size x batch size matrix)
            # h = readout * attention_output_vector + bias
            h = dn.affine_transform(
                [self.bias, self.readout, attention_output_vector])

            # encourage diversity by punishing highly confident predictions
            # TODO: support batching - esp. w.r.t. scalar inputs
            if self.diverse:
                soft = dn.softmax(dn.tanh(h))
                batch_loss = dn.pick_batch(-dn.log(soft), step_word_ids) \
                    - dn.log(dn.scalarInput(1) - dn.pick_batch(soft, step_word_ids)) - dn.log(dn.scalarInput(4))
            else:
                # get batch loss for this timestep
                batch_loss = dn.pickneglogsoftmax_batch(h, step_word_ids)

            # mask the loss if at least one sentence is shorter
            if output_masks and output_masks[i][-1] != 1:
                mask_expr = dn.inputVector(output_masks[i])
                # noinspection PyArgumentList
                mask_expr = dn.reshape(mask_expr, (1, ), batch_size)
                batch_loss = batch_loss * mask_expr

            # input feeding approach - input h (attention_output_vector) to the decoder
            # prepare for the next iteration - "feedback"
            feedback_embeddings = dn.lookup_batch(self.output_lookup,
                                                  step_word_ids)
            decoder_input = dn.concatenate(
                [feedback_embeddings, attention_output_vector])
            s = s.add_input(decoder_input)

            losses.append(batch_loss)

        # sum the loss over the time steps and batch
        total_batch_loss = dn.sum_batches(dn.esum(losses))

        return total_batch_loss

Esempio n. 14

0

Mostra file

File: AttentionBasedDecoder.py Progetto: ufwt/TraFix

    def predict_beamsearch(self, encoder, input_seq):
        if len(input_seq) == 0:
            return []

        dn.renew_cg()

        self.readout = dn.parameter(self.params['readout'])
        self.bias = dn.parameter(self.params['bias'])
        self.w_c = dn.parameter(self.params['w_c'])
        self.u_a = dn.parameter(self.params['u_a'])
        self.v_a = dn.parameter(self.params['v_a'])
        self.w_a = dn.parameter(self.params['w_a'])

        alphas_mtx = []

        # encode input sequence
        blstm_outputs, input_masks = encoder.encode_batch([input_seq])

        # complete sequences and their probabilities
        final_states = []

        # initialize the decoder rnn
        s_0 = self.decoder_rnn.initial_state()

        # holds beam step index mapped to (sequence, probability, decoder state, attn_vector) tuples
        beam = {-1: [([common.BEGIN_SEQ], 1.0, s_0, self.init_lookup[0])]}
        i = 0

        # expand another step if didn't reach max length and there's still beams to expand
        #while i < self.max_prediction_len and len(beam[i - 1]) > 0:
        while ((self.max_prediction_len is None) or
               (i < self.max_prediction_len)) and len(beam[i - 1]) > 0:

            # create all expansions from the previous beam:
            new_hypos = []
            for hypothesis in beam[i - 1]:
                prefix_seq, prefix_prob, prefix_decoder, prefix_attn = hypothesis
                last_hypo_symbol = prefix_seq[-1]

                # cant expand finished sequences
                if last_hypo_symbol == common.END_SEQ:
                    continue

                # expand from the last symbol of the hypothesis
                try:
                    prev_output_vec = self.output_lookup[
                        self.y2int[last_hypo_symbol]]
                except KeyError:
                    # not a known symbol
                    print 'impossible to expand, key error: ' + str(
                        last_hypo_symbol)
                    continue

                decoder_input = dn.concatenate([prev_output_vec, prefix_attn])
                s = prefix_decoder.add_input(decoder_input)
                decoder_rnn_output = s.output()

                # perform attention step
                attention_output_vector, alphas = self.attend(
                    blstm_outputs, decoder_rnn_output)

                # save attention weights for plotting
                # TODO: add attention weights properly to allow building the attention matrix for the best path
                if self.plot:
                    val = alphas.vec_value()
                    alphas_mtx.append(val)

                # compute output probabilities
                # h = readout * attention_output_vector + bias
                h = dn.affine_transform(
                    [self.bias, self.readout, attention_output_vector])

                # TODO: understand why diverse needs tanh before softmax
                if self.diverse:
                    h = dn.tanh(h)
                probs = dn.softmax(h)
                probs_val = probs.npvalue()

                # TODO: maybe should choose nbest from all expansions and not only from nbest of each hypothesis?
                # find best candidate outputs
                n_best_indices = common.argmax(probs_val, self.beam_size)
                for index in n_best_indices:
                    p = probs_val[index]
                    new_seq = prefix_seq + [self.int2y[index]]
                    new_prob = prefix_prob * p
                    #if new_seq[-1] == common.END_SEQ or i == self.max_prediction_len - 1:
                    if new_seq[-1] == common.END_SEQ or (
                        (self.max_prediction_len is not None) and
                        (i == self.max_prediction_len - 1)):
                        # TODO: add to final states only if fits in k best?
                        # if found a complete sequence or max length - add to final states
                        final_states.append((new_seq[1:-1], new_prob))
                    else:
                        new_hypos.append(
                            (new_seq, new_prob, s, attention_output_vector))

            # add the most probable expansions from all hypotheses to the beam
            new_probs = np.array([p for (s, p, r, a) in new_hypos])
            argmax_indices = common.argmax(new_probs, self.beam_size)
            beam[i] = [new_hypos[l] for l in argmax_indices]
            i += 1

        # get nbest results from final states found in search
        final_probs = np.array([p for (s, p) in final_states])
        argmax_indices = common.argmax(final_probs, self.beam_size)
        nbest_seqs = [final_states[l] for l in argmax_indices]

        return nbest_seqs, alphas_mtx

Esempio n. 15

0

Mostra file

File: AttentionBasedDecoder.py Progetto: ufwt/TraFix

    def predict_greedy(self, encoder, input_seq):
        dn.renew_cg()

        self.readout = dn.parameter(self.params['readout'])
        self.bias = dn.parameter(self.params['bias'])
        self.w_c = dn.parameter(self.params['w_c'])
        self.u_a = dn.parameter(self.params['u_a'])
        self.v_a = dn.parameter(self.params['v_a'])
        self.w_a = dn.parameter(self.params['w_a'])

        alphas_mtx = []

        if len(input_seq) == 0:
            return []

        # encode input sequence
        blstm_outputs, input_masks = encoder.encode_batch([input_seq])

        # initialize the decoder rnn
        s = self.decoder_rnn.initial_state()

        # set prev_output_vec for first lstm step as BEGIN_WORD concatenated with special padding vector
        prev_output_vec = dn.concatenate([
            self.output_lookup[self.y2int[common.BEGIN_SEQ]],
            self.init_lookup[0]
        ])
        predicted_sequence = []
        i = 0

        # run the decoder through the sequence and predict output symbols
        while (self.max_prediction_len is None) or (i <
                                                    self.max_prediction_len):

            # get current h of the decoder
            s = s.add_input(prev_output_vec)
            decoder_rnn_output = s.output()

            # perform attention step
            attention_output_vector, alphas = self.attend(
                blstm_outputs, decoder_rnn_output)

            if self.plot:
                val = alphas.vec_value()
                alphas_mtx.append(val)

            # compute output probabilities
            # h = readout * attention_output_vector + bias
            h = dn.affine_transform(
                [self.bias, self.readout, attention_output_vector])

            # TODO: understand why diverse needs tanh before softmax
            if self.diverse:
                h = dn.tanh(h)
            probs = dn.softmax(h)

            # find best candidate output - greedy
            next_element_index = np.argmax(probs.npvalue())
            predicted_sequence.append(self.int2y[next_element_index])

            # check if reached end of word
            if predicted_sequence[-1] == common.END_SEQ:
                break

            # prepare for the next iteration - "feedback"
            prev_output_vec = dn.concatenate([
                self.output_lookup[next_element_index], attention_output_vector
            ])
            i += 1

        # remove the end seq symbol
        return predicted_sequence[0:-1], alphas_mtx

Esempio n. 16

0

Mostra file

 def __call__(self, s_t, h_matrix):
     e_t = self.v * dy.tanh(self.W1*h_matrix + self.W2 * s_t)
     a_t = dy.softmax(dy.transpose(e_t))
     c_t = h_matrix * a_t
     return c_t

Esempio n. 17

0

Mostra file

def process_one_instance(builder,
                         model,
                         model_parameters,
                         instance,
                         path_cache,
                         update=True,
                         dropout=0.0,
                         x_y_vectors=None,
                         num_hidden_layers=0):
    """
    Return the LSTM output vector of a single term-pair - the average path embedding
    :param builder: the LSTM builder
    :param model: the LSTM model
    :param model_parameters: the model parameters
    :param instance: a Counter object with paths
    :param path_cache: the cache for path embeddings
    :param update: whether to update the lemma embeddings
    :param dropout: word dropout rate
    :param x_y_vectors: the current word vectors for x and y
    :param num_hidden_layers The number of hidden layers for the term-pair classification network
    :return: the LSTM output vector of a single term-pair
    """
    W1 = dy.parameter(model_parameters['W1'])
    b1 = dy.parameter(model_parameters['b1'])
    W2 = None
    b2 = None

    if num_hidden_layers == 1:
        W2 = dy.parameter(model_parameters['W2'])
        b2 = dy.parameter(model_parameters['b2'])

    lemma_lookup = model_parameters['lemma_lookup']
    pos_lookup = model_parameters['pos_lookup']
    dep_lookup = model_parameters['dep_lookup']
    dir_lookup = model_parameters['dir_lookup']

    # Use the LSTM output vector and feed it to the MLP

    # Add the empty path
    paths = instance

    if len(paths) == 0:
        paths[EMPTY_PATH] = 1

    # Compute the averaged path
    num_paths = reduce(lambda x, y: x + y, instance.itervalues())
    path_embbedings = [
        get_path_embedding_from_cache(path_cache, builder, lemma_lookup,
                                      pos_lookup, dep_lookup, dir_lookup, path,
                                      update, dropout) * count
        for path, count in instance.iteritems()
    ]
    input_vec = dy.esum(path_embbedings) * (1.0 / num_paths)

    # Concatenate x and y embeddings
    if x_y_vectors is not None:
        x_vector, y_vector = dy.lookup(lemma_lookup,
                                       x_y_vectors[0]), dy.lookup(
                                           lemma_lookup, x_y_vectors[1])
        input_vec = dy.concatenate([x_vector, input_vec, y_vector])

    h = W1 * input_vec + b1

    if num_hidden_layers == 1:
        h = W2 * dy.tanh(h) + b2

    output = dy.softmax(h)

    return output

Esempio n. 18

0

Mostra file

def generator(encoder, decoder, params_encoder, params_decoder, sentence, env,
              first, previous):
    pos_lookup = params_encoder["pos_lookup"]
    char_lookup = params_encoder["char_lookup"]
    char_v = params_decoder["attention_v"]
    char_w1 = params_decoder["attention_wc"]
    char_w2 = params_decoder["attention_bc"]
    sc_vector = []
    for i, world in enumerate(_state(env)):
        world = world
        sc0 = char_encoder.initial_state()
        sc = sc0
        for char in world:
            sc = sc.add_input(char_lookup[char2int[char]])
        sc_vector.append(dy.concatenate([sc.output(), pos_lookup[i]]))
    dy_sc_vector = dy.concatenate(sc_vector, d=1)
    s0 = encoder.initial_state()
    s = s0
    lookup = params_encoder["lookup"]
    attention_w = params_decoder["attention_w"]
    attention_b = params_decoder["attention_b"]
    sentence = sentence + ' <end>'
    sentence = [
        vocab.index(c) if c in vocab else vocab.index('<unknown>')
        for c in sentence.split()
    ]
    s_vector = []
    generate = []
    for word in (sentence):
        s = s.add_input(lookup[word])
        s_vector.append(dy.softmax(attention_w * s.output() + attention_b))
    encode_output = s.output()
    dy_s_vector = dy.concatenate(s_vector, d=1)
    _s0 = decoder.initial_state(s.s())
    _s = _s0
    R = params_decoder["R"]
    bias = params_decoder["bias"]
    input_word = "<start>"
    _lookup = params_decoder["lookup"]
    repeat = 0
    while True:
        dy_env = dy.inputTensor(get_state_embed3(env))
        repeat += 1
        word = vocab_out.index(input_word)
        weight = dy.softmax(
            dy.concatenate([dy.dot_product(x, _s.output()) for x in s_vector]))
        weight_char = dy.softmax(
            dy.concatenate([
                char_v * dy.tanh(char_w1 * x + char_w2 * _s.output())
                for x in sc_vector
            ]))
        encode_state = dy_sc_vector * weight_char
        encode_output = dy_s_vector * weight
        _s = _s.add_input(
            dy.concatenate([_lookup[word], encode_output, encode_state]))
        probs = dy.softmax((R) * _s.output() + bias)
        top = 0
        while True:
            top += 1
            if top == 50:
                top = 1
                break
            prediction = np.argsort(probs.vec_value())[-top]
            if (vocab_out[prediction] == '<end>'): break
            if (vocab_out[prediction] == '<start>'): continue
            new_env = str(execute(env, [vocab_out[prediction]]))
            if new_env == 'None': continue
            break
        prediction = np.argsort(probs.vec_value())[-top]
        input_word = vocab_out[prediction]
        if input_word == '<end>':
            break
        if repeat >= 10:
            break
        generate.append(input_word)
        env = str(execute(env, [input_word]))
        if env == 'None':
            env = '1:_ 2:_ 3:_ 4:_ 5:_ 6:_ 7:_'
    while '<start>' in generate:
        generate.remove('<start>')
    previous = s.output()
    return generate, previous

Esempio n. 19

0

Mostra file

    def __call__(self, x=None, t=None, test=False):
        if test:
            tt_embs = [dy.lookup(self.E, t_t) for t_t in t]

            if self.encoder_type == 'bow':
                # Neural language model
                tt_c = dy.concatenate(tt_embs)
                h = dy.tanh(self.U * tt_c)

                # Output with softmax
                y_t = dy.softmax(self.V * h + self.W_enc)

            elif self.encoder_type == 'attention':
                ttp_embs = [dy.lookup(self.G, t_t) for t_t in t]

                # Neural language model
                tt_c = dy.concatenate(tt_embs)
                h = dy.tanh(self.U * tt_c)

                # Attention
                ttp_c = dy.concatenate(ttp_embs)
                p = dy.softmax(self.xt * self.P * ttp_c)  # Attention weight
                enc = self.xb * p  # Context vector

                # Output with softmax
                y_t = dy.softmax(self.V * h + self.W * enc)

            return y_t

        else:
            xt_embs = [dy.lookup(self.F, x_t) for x_t in x]
            tt_embs = [dy.lookup(self.E, t_t) for t_t in t]

            y = []
            if self.encoder_type == 'bow':
                # BoW
                enc = dy.average(xt_embs)
                W_enc = self.W * enc
                for i in range(len(t) - self.c + 1):
                    # Neural language model
                    tt_c = dy.concatenate(tt_embs[i:i + self.c])
                    h = dy.tanh(self.U * tt_c)

                    # Output without softmax
                    y_t = self.V * h + W_enc
                    y.append(y_t)

            elif self.encoder_type == 'attention':
                xb = dy.concatenate([
                    dy.esum(xt_embs[max(i - self.q, 0
                                        ):min(len(x) - 1 + 1, i + self.q + 1)])
                    / self.q for i in range(len(x))
                ],
                                    d=1)
                xt = dy.transpose(dy.concatenate(xt_embs, d=1))
                ttp_embs = [dy.lookup(self.G, t_t) for t_t in t]

                for i in range(len(t) - self.c + 1):
                    # Neural language model
                    tt_c = dy.concatenate(tt_embs[i:i + self.c])
                    h = dy.tanh(self.U * tt_c)

                    # Attention
                    ttp_c = dy.concatenate(
                        ttp_embs[i:i + self.c])  # Window-sized embedding
                    p = dy.softmax(xt * self.P * ttp_c)  # Attention weight
                    enc = xb * p  # Context vector

                    # Output without softmax
                    y_t = self.V * h + self.W * enc
                    y.append(y_t)

            return y

Esempio n. 20

0

Mostra file

 def get_graph(self, embedding):
     dy.renew_cg()
     w = dy.parameter(self.pW)
     u = dy.parameter(self.pU)
     return u * dy.tanh(w * dy.inputTensor(embedding))