def build_tagging_graph(self, sentence):
        dy.renew_cg()

        embeddings = [self.word_rep(w) for w in sentence]

        lstm_out = self.bi_lstm.transduce(embeddings)

        H = dy.parameter(self.lstm_to_tags_params)
        Hb = dy.parameter(self.lstm_to_tags_bias)
        O = dy.parameter(self.mlp_out)
        Ob = dy.parameter(self.mlp_out_bias)
        scores = []
        if options.bigram:
            for rep, word in zip(lstm_out, sentence):
                bi1 = dy.lookup(self.bigram_lookup,
                                word[0],
                                update=self.we_update)
                bi2 = dy.lookup(self.bigram_lookup,
                                word[1],
                                update=self.we_update)
                if self.dropout is not None:
                    bi1 = dy.dropout(bi1, self.dropout)
                    bi2 = dy.dropout(bi2, self.dropout)
                score_t = O * dy.tanh(H * dy.concatenate([bi1, rep, bi2]) +
                                      Hb) + Ob
                scores.append(score_t)
        else:
            for rep in lstm_out:
                score_t = O * dy.tanh(H * rep + Hb) + Ob
                scores.append(score_t)

        return scores
    def encode(self, w, o, s):

        k = 5

        suffixes, prefixes = [], []

        for i in range(1, k + 1):

            pre, suf = w[:i], w[-i:]
            pre_idx = self.P2I[pre] if pre in self.P2I else self.P2I["<unk>"]
            suf_idx = self.S2I[pre] if pre in self.S2I else self.S2I["<unk>"]
            suf_e = dy.lookup(self.E_suf, suf_idx)
            pre_e = dy.lookup(self.E_pre, pre_idx)
            suffixes.append(suf_e)
            prefixes.append(pre_e)

        word_encoded = self.W2I[w] if w in self.W2I else self.W2I["<unk>"]
        word_e = dy.lookup(self.E, word_encoded)

        exp_out = dy.vecInput(EMBEDDING_SIZE)
        if o == []: o = ["<unk>"]
        for out_token in o:
            out_token_encoded = self.OUTPUT2IND[
                out_token] if out_token in self.OUTPUT2IND else self.OUTPUT2IND[
                    "<unk>"]
        out_embedding = dy.lookup(self.E_output, out_token_encoded)
        exp_out = exp_out + out_embedding

        W = dy.parameter(self.W)

        return W * dy.concatenate(
            [word_e,
             dy.esum(suffixes),
             dy.esum(prefixes), out_embedding])
Exemple #3
0
    def print_probs(self, sent):
        dy.renew_cg()
        # initialize the RNN
        init_state = self.builder.initial_state()
        # parameters -> expressions
        R = dy.parameter(self.R)
        bias = dy.parameter(self.bias)

        # get the cids and masks for each step
        tot_chars = 0
        cids = []
        for w in sent:
            cids.append(vocab.w2i[w])
        # start the rnn with "<s>"
        init_ids = cids[0]
        s = init_state.add_input(dy.lookup(self.lookup, init_ids))

        # feed char vectors into the RNN and predict the next char
        for cid in cids[1:]:
            score = dy.affine_transform([bias, R, s.output()])
            loss = dy.pickneglogsoftmax(score, cid)
            print(f"{vocab.i2w[cid]} {loss.value()}")
            # update the state of the RNN
            cemb = dy.lookup(self.lookup, cid)
            s = s.add_input(cemb)
Exemple #4
0
    def evaluate_recurrent(self, fwd_bigrams, unigrams, test=False):
        fwd1 = self.fwd_lstm1.initial_state()
        back1 = self.back_lstm1.initial_state()

        fwd2 = self.fwd_lstm2.initial_state()
        back2 = self.back_lstm2.initial_state()

        fwd_input = []
        for i in range(len(unigrams)):
            bivec = dynet.lookup(self.bigram_embed, fwd_bigrams[i])
            univec = dynet.lookup(self.unigram_embed, unigrams[i])
            vec = dynet.concatenate([bivec, univec])
            #   fwd_input.append(dynet.tanh(self.embed2lstm_W*vec))
            fwd_input.append(vec)

        back_input = []
        for i in range(len(unigrams)):
            bivec = dynet.lookup(self.bigram_embed, fwd_bigrams[i + 1])
            univec = dynet.lookup(self.unigram_embed, unigrams[i])
            vec = dynet.concatenate([bivec, univec])
            # back_input.append(dynet.tanh(self.embed2lstm_W*vec))
            back_input.append(vec)

        fwd1_out = []
        for vec in fwd_input:
            fwd1 = fwd1.add_input(vec)
            fwd_vec = fwd1.output()
            fwd1_out.append(fwd_vec)

        back1_out = []
        for vec in reversed(back_input):
            back = back1.add_input(vec)
            back1_vec = back.output()
            back1_out.append(back1_vec)

        lsmt2_input = []
        for (f, b) in zip(fwd1_out, reversed(back1_out)):
            lsmt2_input.append(dynet.concatenate([f, b]))

        fwd2_out = []
        for vec in lsmt2_input:
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            fwd2 = fwd2.add_input(vec)
            fwd_vec = fwd2.output()
            fwd2_out.append(fwd_vec)

        back2_out = []
        for vec in reversed(lsmt2_input):
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            back2 = back2.add_input(vec)
            back_vec = back2.output()
            back2_out.append(back_vec)

        # fwd_out = [dynet.concatenate([f1,f2]) for (f1,f2) in zip(fwd1_out,fwd2_out)]
        # back_out = [dynet.concatenate([b1,b2]) for (b1,b2) in zip(back1_out,back2_out)]

        return fwd2_out, back2_out[::-1]
Exemple #5
0
    def evaluate_recurrent(self, word_inds, tag_inds, test=False):

        fwd1 = self.fwd_lstm1.initial_state()
        back1 = self.back_lstm1.initial_state()

        fwd2 = self.fwd_lstm2.initial_state()
        back2 = self.back_lstm2.initial_state()

        sentence = []

        for (w, t) in zip(word_inds, tag_inds):
            wordvec = dynet.lookup(self.word_embed, w)
            tagvec = dynet.lookup(self.tag_embed, t)
            vec = dynet.concatenate([wordvec, tagvec])
            sentence.append(vec)

        fwd1_out = []
        for vec in sentence:
            fwd1 = fwd1.add_input(vec)
            fwd_vec = fwd1.output()
            fwd1_out.append(fwd_vec)

        back1_out = []
        for vec in reversed(sentence):
            back1 = back1.add_input(vec)
            back_vec = back1.output()
            back1_out.append(back_vec)

        lstm2_input = []
        for (f, b) in zip(fwd1_out, reversed(back1_out)):
            lstm2_input.append(dynet.concatenate([f, b]))

        fwd2_out = []
        for vec in lstm2_input:
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            fwd2 = fwd2.add_input(vec)
            fwd_vec = fwd2.output()
            fwd2_out.append(fwd_vec)

        back2_out = []
        for vec in reversed(lstm2_input):
            if self.droprate > 0 and not test:
                vec = dynet.dropout(vec, self.droprate)
            back2 = back2.add_input(vec)
            back_vec = back2.output()
            back2_out.append(back_vec)

        fwd_out = [
            dynet.concatenate([f1, f2])
            for (f1, f2) in zip(fwd1_out, fwd2_out)
        ]
        back_out = [
            dynet.concatenate([b1, b2])
            for (b1, b2) in zip(back1_out, back2_out)
        ]

        return fwd_out, back_out[::-1]
        def __call__(self, query, options, gold, lengths, query_no):
            if len(options) == 1:
                return None, 0

            final = []
            if args.word_vectors:
                qvecs = [dy.lookup(self.pEmbedding, w) for w in query]
                qvec_max = dy.emax(qvecs)
                qvec_mean = dy.average(qvecs)
            for otext, features in options:
                if not args.no_features:
                    inputs = dy.inputTensor(features)
                if args.word_vectors:
                    ovecs = [dy.lookup(self.pEmbedding, w) for w in otext]
                    ovec_max = dy.emax(ovecs)
                    ovec_mean = dy.average(ovecs)
                    if args.no_features:
                        inputs = dy.concatenate(
                            [qvec_max, qvec_mean, ovec_max, ovec_mean])
                    else:
                        inputs = dy.concatenate(
                            [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean])
                if args.drop > 0:
                    inputs = dy.dropout(inputs, args.drop)
                h = inputs
                for pH, pB in zip(self.hidden, self.bias):
                    h = dy.affine_transform([pB, pH, h])
                    if args.nonlin == "linear":
                        pass
                    elif args.nonlin == "tanh":
                        h = dy.tanh(h)
                    elif args.nonlin == "cube":
                        h = dy.cube(h)
                    elif args.nonlin == "logistic":
                        h = dy.logistic(h)
                    elif args.nonlin == "relu":
                        h = dy.rectify(h)
                    elif args.nonlin == "elu":
                        h = dy.elu(h)
                    elif args.nonlin == "selu":
                        h = dy.selu(h)
                    elif args.nonlin == "softsign":
                        h = dy.softsign(h)
                    elif args.nonlin == "swish":
                        h = dy.cmult(h, dy.logistic(h))
                final.append(dy.sum_dim(h, [0]))

            final = dy.concatenate(final)
            nll = -dy.log_softmax(final)
            dense_gold = []
            for i in range(len(options)):
                dense_gold.append(1.0 / len(gold) if i in gold else 0.0)
            answer = dy.inputTensor(dense_gold)
            loss = dy.transpose(answer) * nll
            predicted_link = np.argmax(final.npvalue())

            return loss, predicted_link
    def load_src_lookup_params(self, src_vectors_file, model):
        self.src_lookup = model.add_lookup_parameters(
            (self.src_vocab_size, self.embed_size))
        pickle_fn = 'src_lookup_vectors.pkl'
        print('Loading source vectors as lookup parameters')
        count = 0
        frozen_params = defaultdict(lambda: False)

        if not os.path.exists(pickle_fn):
            init_array = np.zeros((self.src_vocab_size, self.embed_size))
            with open(src_vectors_file) as vector_file:
                first_line = True
                for l in vector_file:
                    if first_line:
                        first_line = False
                    else:
                        try:
                            space_delim = l.split()
                            word = space_delim[0]
                            w_id = int(self.src_token_to_id[word])
                            if w_id != 0:
                                init_array[w_id, :] = np.asarray(
                                    space_delim[1:])
                                count += 1
                                frozen_params[w_id] = True

                        except Exception as e:
                            print('Error:{0}, {1}'.format(e, l))
            with open(pickle_fn, 'wb') as pickle_file:
                pickle.dump(init_array, pickle_file)

            for i in range(self.src_vocab_size):
                if not np.any(init_array[i, :]):
                    expr = dy.lookup(self.src_lookup, i)
                    init_array[i, :] = expr.npvalue()
                    frozen_params[i] = False

        else:
            with open(pickle_fn, 'rb') as pickle_file:
                init_array = pickle.load(pickle_file)

            for i in range(self.src_vocab_size):
                if not np.any(init_array[i, :]):
                    expr = dy.lookup(self.src_lookup, i)
                    init_array[i, :] = expr.npvalue()
                    frozen_params[i] = False

                else:
                    count += 1
                    frozen_params[i] = True

        print('Set: {0} vectors out of vocab size: {1}'.format(
            count, self.src_vocab_size))

        self.src_lookup.init_from_array(init_array)
        return frozen_params
 def get_tok_embedding(self, tok):
     tok_embedding = dy.concatenate([
         dy.lookup(self.word_lookup, self.w2i_raw[tok[0]]),
         dy.lookup(self.pretrained_lookup,
                   self.w2i_pretrained[tok[1]],
                   update=False),
         dy.lookup(self.unked_lookup, self.w2i_unked[tok[2]]),
         self.pos_lookup[self.w2i_pos[tok[3]]]
     ])
     return tok_embedding
Exemple #9
0
 def represent(self, input):
     representations = []
     for word in input:
         w_r = dy.lookup(self.E, word)
         p_r = dy.lookup(
             self.Epre, self.Wp2I[word_to_prefix(self.index_to_word(word))])
         s_r = dy.lookup(
             self.Esuf, self.Ws2I[word_to_suffix(self.index_to_word(word))])
         representations.append(w_r + p_r + s_r)
     return representations
Exemple #10
0
    def train(self, inputs, target):
        dropout = self.Config.train.dropout
        uts = []
        for u in inputs:
            u = Utt(u)
            u.words_emb = []
            for word in u.words:
                u.words_emb.append(
                    dy.dropout(
                        dy.lookup(
                            self.input_lookup,
                            word,
                            update=True if word < 4 + self.Config.data.oov_size
                            else False), dropout))
            self.encode_words(u)
            uts.append(u)

        # last_output_embeddings = self.lookup(self.Config.data.START_ID, emb)
        last_output_embeddings = self.input_lookup[self.Config.data.START_ID]
        s = self.sess_lstm.initial_state().add_input(
            dy.concatenate([
                dy.vecInput(self.Config.model.num_units),
                last_output_embeddings
            ]))
        loss = []

        for gt in target:
            spt = dy.concatenate(list(s.s()))
            l = self.utt_lstm.initial_state_from_raw_vectors([
                np.random.normal(0, 0.1, self.Config.model.num_units)
                for i in range(1 * self.Config.model.num_layers)
            ])
            lpt = dy.concatenate(list(l.s()))

            # encode utt
            for i in range(len(uts) - 1, -1, -1):
                self.get_word_att(uts[i], lpt, spt)
                l = l.add_input(uts[i].context)
                uts[i].utt_enc = l.output()
                lpt = dy.concatenate(list(l.s()))

            # decode
            c = self.get_utt_att(uts, spt)
            s = s.add_input(dy.concatenate([c, last_output_embeddings]))
            probs = dy.softmax(self.decoder_w * s.output() + self.decoder_b)
            # last_output_embeddings = self.lookup(gt, emb)
            last_output_embeddings = dy.lookup(
                self.input_lookup,
                gt,
                update=True if gt < 4 + self.Config.data.oov_size else False)
            loss.append(-dy.log(dy.pick(probs, gt)))

        loss = dy.esum(loss)
        return loss
Exemple #11
0
    def greedy_search(self, char_seq, truth = None, mu =0.):
        init_state = self.params['lstm'].initial_state().add_input(self.param_exprs['<bos>'])
        init_y = dy.tanh(self.param_exprs['pW'] * init_state.output() + self.param_exprs['pb'])
        init_score = dy.scalarInput(0.)
        init_sentence = Sentence(score=init_score.scalar_value(),score_expr=init_score,LSTMState =init_state, y= init_y , prevState = None, wlen=None, golden=True)
        
        if truth is not None:
            cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]
        else:
            cembs = [dy.lookup(self.params['embed'],char) for char in char_seq ]
            #cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]

        start_agenda = init_sentence
        agenda = [start_agenda]

        for idx, _ in enumerate(char_seq,1): # from left to right, character by character
            now = None
            for wlen in range(1,min(idx,self.options['max_word_len'])+1): # generate word candidate vectors
                # join segmentation sent + word
                word = self.word_repr(char_seq[idx-wlen:idx], cembs[idx-wlen:idx])
                sent = agenda[idx-wlen]

                if truth is not None:
                    word = dy.dropout(word,self.options['dropout_rate'])
                
                word_score = dy.dot_product(word,self.param_exprs['U'])

                if truth is not None:
                    golden =  sent.golden and truth[idx-1]==wlen
                    margin = dy.scalarInput(mu*wlen if truth[idx-1]!=wlen else 0.)
                    score = margin + sent.score_expr + dy.dot_product(sent.y, word) + word_score
                else:
                    golden = False
                    score = sent.score_expr + dy.dot_product(sent.y, word) + word_score


                good = (now is None or now.score < score.scalar_value())
                if golden or good:
                    new_state = sent.LSTMState.add_input(word)
                    new_y = dy.tanh(self.param_exprs['pW'] * new_state.output() + self.param_exprs['pb'])
                    new_sent = Sentence(score=score.scalar_value(),score_expr=score,LSTMState=new_state,y=new_y, prevState=sent, wlen=wlen, golden=golden)
                    if good:
                        now = new_sent
                    if golden:
                        golden_sent = new_sent

            agenda.append(now)
            if truth is not None and truth[idx-1]>0 and (not now.golden):
                return (now.score_expr - golden_sent.score_expr)

        if truth is not None:
            return (now.score_expr - golden_sent.score_expr)

        return agenda
Exemple #12
0
    def step(self, instance):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        #        W1_att = dy.parameter(self.W1_att)
        #w2_att = dy.parameter(self.w2_att)

        src_sent, tgt_sent = instance
        src_sent_rev = list(reversed(src_sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []
        for (cw_l2r, cw_r2l) in zip(src_sent, src_sent_rev):
            l2r_state = l2r_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_l2r]))
            r2l_state = r2l_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_r2l]))
            l2r_contexts.append(
                l2r_state.output())  #[<S>, x_1, x_2, ..., </S>]
            r2l_contexts.append(
                r2l_state.output())  #[</S> x_n, x_{n-1}, ... <S>]

        r2l_contexts.reverse()  #[<S>, x_1, x_2, ..., </S>]
        # Combine the left and right representations for every word
        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))
        h_fs_matrix = dy.concatenate_cols(h_fs)
        losses = []
        num_words = 0

        # Decoder
        c_t = dy.vecInput(self.hidden_size * 2)
        start = dy.concatenate(
            [dy.lookup(self.tgt_lookup, self.tgt_token_to_id['<S>']), c_t])
        dec_state = self.dec_builder.initial_state().add_input(start)
        for (cw, nw) in zip(tgt_sent, tgt_sent[1:]):
            h_e = dec_state.output()
            c_t = self.__attention_mlp(h_fs_matrix, h_e)
            # Get the embedding for the current target word
            embed_t = dy.lookup(self.tgt_lookup, self.tgt_token_to_id[cw])
            # Create input vector to the decoder
            x_t = dy.concatenate([embed_t, c_t])
            dec_state = dec_state.add_input(x_t)
            y_star = dy.softmax(b_y + W_y * dec_state.output())
            loss = -dy.log(dy.pick(y_star, self.tgt_token_to_id[nw]))
            losses.append(loss)
            num_words += 1

        return dy.esum(losses), num_words
Exemple #13
0
    def __call__(self, sequence):
        next_input = [dy.lookup(self._E, self._W2I[i]) if i in self._W2I else dy.lookup(self._E, self._W2I["UNK"])
                for i in sequence]
        for layer in self._stacks[0:-1]:
            output = layer(next_input)
            next_input = [dy.concatenate([next_input[i], output[i]]) for i in range(len(sequence))]
        output = self._stacks[-1](next_input)

        exp_output = dy.concatenate_cols(output)
        v = dy.kmax_pooling(exp_output, 1, d=1)

        return v
Exemple #14
0
 def represent(self, seq):
     output_vec = []
     s0 = self.builder.initial_state()
     for word in seq:
         word_as_char_vec = [
             dy.lookup(self.embed, self.c2i[ci]) if ci in self.c2i else
             dy.lookup(self.embed, self.c2i[self.c_unk]) for ci in word
         ]
         word_output = s0.transduce(word_as_char_vec)[
             -1]  # apply lstm and take last output
         output_vec.append(word_output)
     return output_vec
Exemple #15
0
    def evaluate(self, input_sentences, labels):

        dy.renew_cg()

        self.word_rnn.disable_dropout()
        self.sent_rnn.disable_dropout()

        embed_sents = []

        for input_sentence in input_sentences:
            input_sentence = self._preprocess_input(input_sentence,
                                                    self.word_to_ix)
            #input_sentence = [self.word_to_ix['<start>']] + input_sentence + [self.word_to_ix['<end>']]

            embed_words = self._embed_sentence(input_sentence)
            word_rnn_outputs = self._run_rnn(self.word_rnn, embed_words)
            sent_embed = dy.average(word_rnn_outputs)
            embed_sents.append(sent_embed)

        rnn_outputs = self._run_rnn(self.sent_rnn, embed_sents)

        doc_output_w = dy.parameter(self.doc_output_w)
        doc_output_b = dy.parameter(self.doc_output_b)
        doc_output = dy.tanh(doc_output_w * dy.average(rnn_outputs) +
                             doc_output_b)

        probs = []
        sum_output = dy.zeros(self.args.sent_hidden_dim)
        pred_labels = []
        correct = 0
        total = 0
        loss = dy.zeros(1)
        for i, rnn_output in enumerate(rnn_outputs):

            abspos_embed = dy.lookup(self.abspos_embeddings, self.abspos_ix[i])
            relpos_embed = dy.lookup(self.relpos_embeddings, self.relpos_ix[i])

            prob = self._get_probs(rnn_output, doc_output, sum_output,
                                   abspos_embed, relpos_embed)
            sum_output += dy.cmult(prob, rnn_output)
            pred_label = self._predict(prob)
            pred_labels.append(pred_label)

            if pred_label == labels[i]:
                correct += 1
            total += 1

            if labels[i] == 1:
                loss -= dy.log(prob)
            else:
                loss -= dy.log(dy.scalarInput(1) - prob)

        return loss.value(), pred_labels, correct, total
Exemple #16
0
    def expr_for_tree(self, root, tree, WS, US, UFS, BS):
        nodes, edges = tree['nodes'], tree['edges']
        if len(edges[root]) > 2:
            raise RuntimeError(
                'Tree structure error: only binary trees are supported.')

        node_type = nodes[root]['type']
        if node_type == 'terminal':
            raise RuntimeError('Tree structure error: meet with leaves')

        if node_type == 'preterminal':
            terminal_id = edges[root][0]
            terminal = nodes[terminal_id]['name']

            try:
                idx = self.voc2id[terminal]
            except:
                idx = self.voc2id['UNK']

            emb = dy.lookup(self.terminal_lp, idx)
            Wi, Wo, Wu = [w for w in WS]
            bi, bo, bu, _ = [b for b in BS]
            i = dy.logistic(dy.affine_transform([bi, Wi, emb]))
            o = dy.logistic(dy.affine_transform([bo, Wo, emb]))
            u = dy.tanh(dy.affine_transform([bu, Wu, emb]))
            c = dy.cmult(i, u)
            h = dy.cmult(o, dy.tanh(c))
        else:
            nonterminal = nodes[root]['name']
            try:
                idx = self.tree2id[nonterminal]
            except:
                idx = self.tree2id['UNK']
            emb = dy.lookup(self.nonterminal_lp, idx)

            e1, c1 = self.expr_for_tree(edges[root][0], tree, WS, US, UFS, BS)
            e2, c2 = self.expr_for_tree(edges[root][1], tree, WS, US, UFS, BS)
            Ui, Uo, Uu = [u for u in US]
            Uf1, Uf2 = [u for u in UFS]
            bi, bo, bu, bf = [b for b in BS]
            e = dy.concatenate([emb, e1, e2])
            i = dy.logistic(dy.affine_transform([bi, Ui, e]))
            o = dy.logistic(dy.affine_transform([bo, Uo, e]))
            f1 = dy.logistic(dy.affine_transform([bf, Uf1, e]))
            f2 = dy.logistic(dy.affine_transform([bf, Uf2, e]))
            u = dy.tanh(dy.affine_transform([bu, Uu, e]))
            c = dy.cmult(i, u) + dy.cmult(f1, c1) + dy.cmult(f2, c2)
            h = dy.cmult(o, dy.tanh(c))

        if self.DROPOUT > 0:
            dy.dropout(h, self.DROPOUT)
        return h, c
Exemple #17
0
    def batch_loss(self, batch, train=True):

        # load the parameters
        W_hid = dy.parameter(self.W_hid)
        b_hid = dy.parameter(self.b_hid)

        W_out = dy.parameter(self.W_out)

        losses = []
        for _, sent in batch:
            for i in range(1, len(sent)):
                # task 6 4gram
                if i==1:
                    prev_word2_ix = sent[len(sent)-1]
                    prev_word3_ix = sent[len(sent)-1]
                if i==2:
                    prev_word2_ix = sent[i-2]
                    prev_word3_ix = sent[len(sent)-1]
                else: 
                    prev_word2_ix = sent[i-2]
                    prev_word3_ix = sent[i-3]
                prev_word_ix = sent[i - 1]
                curr_word_ix = sent[i]

                ctx1 = dy.lookup(self.embed, prev_word_ix)
                ctx2 = dy.lookup(self.embed, prev_word2_ix)
                ctx3 = dy.lookup(self.embed, prev_word3_ix)
                ctx = dy.concatenate([ctx3,ctx2,ctx1])

                # hid is the hidden layer output, size=hidden_size
                # compute b_hid + W_hid * ctx, but faster
                hid = dy.affine_transform([b_hid, W_hid, ctx])
                hid = dy.tanh(hid)

                # out is the prediction of the next word, size=vocab_size
                out = W_out * hid

                # Intepretation: The model estimates that
                # log P(curr_word=k | prev_word) ~ out[k]
                # in other words,
                # P(curr_word=k | prev_word) = exp(out[k]) / sum_j exp(out[j])
                #                            = softmax(out)[k]

                # We want to maximize the probability of the correct word.
                # (equivalently, minimize the negative log-probability)

                loss = dy.pickneglogsoftmax(out, curr_word_ix)
                losses.append(loss)

        # esum simply adds up the expressions in the list
        return dy.esum(losses)
Exemple #18
0
    def translate_sentence(self, sent):
        dy.renew_cg()

        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        #W1_att = dy.parameter(self.W1_att)
        #w2_att = dy.parameter(self.w2_att)

        sent_rev = list(reversed(sent))

        # Bidirectional representations
        l2r_state = self.l2r_builder.initial_state()
        r2l_state = self.r2l_builder.initial_state()
        l2r_contexts = []
        r2l_contexts = []
        for (cw_l2r, cw_r2l) in zip(sent, sent_rev):
            l2r_state = l2r_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_l2r]))
            r2l_state = r2l_state.add_input(
                dy.lookup(self.src_lookup, self.src_token_to_id[cw_r2l]))
            l2r_contexts.append(l2r_state.output())
            r2l_contexts.append(r2l_state.output())
        r2l_contexts.reverse()

        h_fs = []
        for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts):
            h_fs.append(dy.concatenate([l2r_i, r2l_i]))
        h_fs_matrix = dy.concatenate_cols(h_fs)

        # Decoder
        trans_sentence = ['<S>']
        cw = trans_sentence[-1]
        c_t = dy.vecInput(self.hidden_size * 2)
        start = dy.concatenate(
            [dy.lookup(self.tgt_lookup, self.tgt_token_to_id['<S>']), c_t])
        dec_state = self.dec_builder.initial_state().add_input(start)
        while len(trans_sentence) < self.max_len:
            h_e = dec_state.output()
            c_t = self.__attention_mlp(h_fs_matrix, h_e)
            embed_t = dy.lookup(self.tgt_lookup, self.tgt_token_to_id[cw])
            x_t = dy.concatenate([embed_t, c_t])
            dec_state = dec_state.add_input(x_t)
            y_star = b_y + W_y * dec_state.output()
            p = dy.softmax(y_star)
            cw = self.tgt_id_to_token[np.argmax(p.npvalue())]
            print np.max(p.npvalue())
            if cw == '</S>':
                break
            trans_sentence.append(cw)

        return ' '.join(trans_sentence[1:])
Exemple #19
0
	def encode(self, pend_encs, pend_ids, head, dep, irel=None):
		dep_enc = pend_encs[dep].output
		rel_enc = dy.lookup(self.REL_LOOKUP, irel) if self.rel_feat else None
		if self.dist_feat:
			dist = pend_ids[head] - pend_ids[dep] - self.dist_min
			index = self.neg_unk if dist < 0 \
								else (self.pos_unk if dist > self.dist_range \
													else dist)
			dist_enc = dy.lookup(self.DIST_LOOKUP, index)
		else:
			dist_enc = None
		feat_emb = dy.tanh(self.transW * \
						dy.concatenate(filter(None, [dep_enc, rel_enc, dist_enc])))
		return LSTMState(feat_emb, pend_encs[dep].memory_cell)
Exemple #20
0
 def represent(self, seq):
     word_r = super(SubWordRepresentation, self).represent(seq)
     pref_r = [
         dy.lookup(self.embed, self.w2i[self.pref_flag +
                                        w[:3]]) if self.pref_flag + w[:3]
         in self.w2i else dy.lookup(self.embed, self.w2i[self.pref_unk])
         for w in seq
     ]
     suff_r = [
         dy.lookup(self.embed, self.w2i[self.suff_flag +
                                        w[-3:]]) if self.suff_flag + w[-3:]
         in self.w2i else dy.lookup(self.embed, self.w2i[self.suff_unk])
         for w in seq
     ]
     return [pref_r[i] + word_r[i] + suff_r[i] for i in range(len(word_r))]
Exemple #21
0
    def translate(self, x, beam_size=1):
        """Translate a source sentence
        
        Translate a single source sentence by decoding using beam search

        Arguments:
            x (list): Source sentence (list of indices)
        
        Keyword Arguments:
            beam_size (int): Size of the beam for beam search. A value of 1 means greedy decoding (default: (1))
        
        Returns:
            list: generated translation (list of indices)
        """
        dy.renew_cg()
        input_len = len(x)
        encodings = self.encode([x], test=True)
        # Decode
        # Add parameters to the graph
        Wp, bp = self.Wp_p.expr(), self.bp_p.expr()
        Wo, bo = self.Wo_p.expr(), self.bo_p.expr()
        D, b = dy.transpose(dy.parameter(self.MT_p)), self.b_p.expr()
        # Initialize decoder with last encoding
        last_enc = dy.select_cols(encodings, [encodings.dim()[0][-1] - 1])
        init_state = dy.affine_transform([bp, Wp, last_enc])
        ds = self.dec.initial_state([init_state, dy.zeroes((self.dh, ))])
        # Initialize context
        context = dy.zeroes((self.enc_dim, ))
        # Initialize beam
        beam = [(ds, context, [self.trg_sos], 0.0)]
        # Loop
        for i in range(int(min(self.max_len, input_len * 1.5))):
            new_beam = []
            for ds, pc, pw, logprob in beam:
                embs = dy.lookup(self.MT_p, pw[-1])
                # Run LSTM
                ds = ds.add_input(dy.concatenate([embs, pc]))
                h = ds.output()
                # Compute next context
                context, _ = self.attend(encodings, h)
                # Compute output with residual connections
                output = dy.affine_transform(
                    [bo, Wo, dy.concatenate([h, context, embs])])
                # Score
                s = dy.affine_transform([b, D, output])
                # Probabilities
                p = dy.softmax(s).npvalue().flatten()
                # Careful of float error
                p = p / p.sum()
                kbest = np.argsort(p)
                for nw in kbest[-beam_size:]:
                    new_beam.append(
                        (ds, context, pw + [nw], logprob + np.log(p[nw])))

            beam = sorted(new_beam, key=lambda x: x[-1])[-beam_size:]

            if beam[-1][2][-1] == self.trg_eos:
                break

        return beam[-1][2]
Exemple #22
0
    def word_repr(self, char_seq, cembs):
        """
        obtain the word representation when given its character sequence
        :param char_seq: character index sequence
        :param cembs: character embedding sequence
        :return:
        """

        wlen = len(char_seq)
        if 'rgW%d' % wlen not in self.param_exprs:
            self.param_exprs['rgW%d' % wlen] = dy.parameter(
                self.params['reset_gate_W'][wlen - 1])
            self.param_exprs['rgb%d' % wlen] = dy.parameter(
                self.params['reset_gate_b'][wlen - 1])
            self.param_exprs['cW%d' % wlen] = dy.parameter(
                self.params['com_W'][wlen - 1])
            self.param_exprs['cb%d' % wlen] = dy.parameter(
                self.params['com_b'][wlen - 1])

        chars = dy.concatenate(cembs)  # [c1;c2...]
        # reste_gate = sigmoid(W_r_l * chars + b_r_l), shape: (m,char_dim)
        reset_gate = dy.logistic(self.param_exprs['rgW%d' % wlen] * chars +
                                 self.param_exprs['rgb%d' % wlen])
        # word = tanh(W_c_l * (reset_gate .* chars) + b_c_l)
        word = dy.tanh(self.param_exprs['cW%d' % wlen] *
                       dy.cmult(reset_gate, chars) +
                       self.param_exprs['cb%d' % wlen])
        if self.known_words is not None and tuple(
                char_seq) in self.known_words:
            # Frequent word = (word + word_embed) / 2
            return (word + dy.lookup(self.params['word_embed'],
                                     self.known_words[tuple(char_seq)])) / 2.
        return word
Exemple #23
0
    def word_repr(self, char_seq, cembs):
        # obtain the word representation when given its character sequence

        wlen = len(char_seq)
        if 'rgW%d' % wlen not in self.param_exprs:
            self.param_exprs['rgW%d' % wlen] = dy.parameter(
                self.params['reset_gate_W'][wlen - 1])
            self.param_exprs['rgb%d' % wlen] = dy.parameter(
                self.params['reset_gate_b'][wlen - 1])
            self.param_exprs['cW%d' % wlen] = dy.parameter(
                self.params['com_W'][wlen - 1])
            self.param_exprs['cb%d' % wlen] = dy.parameter(
                self.params['com_b'][wlen - 1])

        chars = dy.concatenate(cembs)
        reset_gate = dy.logistic(self.param_exprs['rgW%d' % wlen] * chars +
                                 self.param_exprs['rgb%d' % wlen])
        word = dy.tanh(self.param_exprs['cW%d' % wlen] *
                       dy.cmult(reset_gate, chars) +
                       self.param_exprs['cb%d' % wlen])
        if self.known_words is not None and tuple(
                char_seq) in self.known_words:
            return (word + dy.lookup(self.params['word_embed'],
                                     self.known_words[tuple(char_seq)])) / 2.
        return word
Exemple #24
0
def calc_scores(words):
  # Create a computation graph, and add parameters
  dy.renew_cg()
  # Take the sum of all the embedding vectors for each word
  score = dy.esum([dy.lookup(W, x) for x in words])
  # Add the bias vector and return
  return score + b
Exemple #25
0
 def encode(self, sentence):
     freqs = [float(self.word_count.get(root.norm, 0)) for root in sentence]
     wembs = [dy.lookup(self.WORD_LOOKUP, self.word_vocab.get(root.norm, 0)
        if not self._train_flag or (random.random() < \
         (c / (self.word_dropout_rate + c)))
        else 0) for (root, c) in zip(sentence, freqs)]
     pembs = [
         dy.lookup(self.POS_LOOKUP, self.pos_vocab[root.pos]) if
         (not self._train_flag or
          (random.random() > self.pos_dropout_rate)) else wembs[root.w_id]
         for root in sentence
     ]
     encode_states = [
         dy.concatenate([wi, pi]) for wi, pi in zip(wembs, pembs)
     ]
     return dy.concatenate_cols(encode_states)
Exemple #26
0
    def get_word_features(self, word_indices):
        """
        Produce word and character features that can be used as input for the
        predictions.
        :param word_indices: a list of word indices
        :return: a list of word embeddigs
        """
        dynet.renew_cg(
            immediate_compute=True
        )  #(immediate_compute = True, check_validity = True)  # new graph #is_valid() not implemented for CUDA yet

        features = []

        for w_idx in word_indices:
            update_flag = False
            if (w_idx in self.oov_id):
                #Allow the vocabs which are not in pre-load embeddings to
                #be updated during training
                update_flag = True

            embed_vec = dynet.lookup(self.wembeds,
                                     index=w_idx,
                                     update=update_flag)
            features.append(embed_vec)

        return features
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print ('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print (display_activations(words, activations))
    print ('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print ('  bias=%s' % bias)
    contributions = W * features
    print (' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print ('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print ('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print ('     good (%.4f): %s' % (scores[3], contributions[3]))
    print ('very good (%.4f): %s' % (scores[4], contributions[4]))
Exemple #28
0
    def train(self, inputs, target):
        words_emb = []
        dropout = self.Config.train.dropout
        for u in inputs:
            for word in u:
                words_emb.append(
                    dy.dropout(
                        dy.lookup(
                            self.input_lookup,
                            word,
                            update=True if word < 4 + self.Config.data.oov_size
                            else False), dropout))
                # words_emb.append(self.input_lookup[word])
        fwd_vectors, state = self.run_lstm(self.enc_fwd_lstm.initial_state(),
                                           words_emb)

        # s = self.sess_lstm.initial_state(state.s()).add_input(dy.lookup(self.input_lookup, self.Config.data.EOS_ID))
        s = self.sess_lstm.initial_state(state.s()).add_input(
            self.input_lookup[self.Config.data.EOS_ID])
        loss = []

        for char in target:
            s = s.add_input(s.output())
            out_vector = self.decoder_w * s.output() + self.decoder_b
            probs = dy.softmax(out_vector)
            loss.append(-dy.log(dy.pick(probs, char)))
        loss = dy.esum(loss)

        return loss
    def _predict(self, batch, train=True):

        # load the network parameters
        W_hid = dy.parameter(self.W_hid)
        b_hid = dy.parameter(self.b_hid)
        w_clf = dy.parameter(self.w_clf)
        b_clf = dy.parameter(self.b_clf)

        probas = []
        # predict the probability of positive sentiment for each sentence
        for _, sent in batch:

            sent_embed = [dy.lookup(self.embed, w) for w in sent]
            sent_embed = dy.average(sent_embed)

            # hid = tanh(b + W * sent_embed)
            # but it's faster to use affine_transform in dynet
            hid = dy.affine_transform([b_hid, W_hid, sent_embed])
            hid = dy.tanh(hid)

            y_score = dy.affine_transform([b_clf, w_clf, hid])
            y_proba = dy.logistic(y_score)
            probas.append(y_proba)

        return probas
Exemple #30
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE - len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in,
                             W_cnn,
                             b_cnn,
                             stride=(1, 1),
                             is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE, ))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print(display_activations(words, activations))
    print('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print('  bias=%s' % bias)
    contributions = W * features
    print(' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print('     good (%.4f): %s' % (scores[3], contributions[3]))
    print('very good (%.4f): %s' % (scores[4], contributions[4]))
Exemple #31
0
    def backward(self, char_seq, truth):
        self.renew_cg()

        cembs = [
            dy.dropout(dy.lookup(self.params['embed'], char),
                       self.options['dropout_rate']) for char in char_seq
        ]

        word_seq, word = [], []
        for char, label in zip(cembs, truth):
            word.append(char)
            if label > 0:
                word_seq.append(word)
                word = []

        score = self.truth_score(word_seq)

        score_plus_margin_loss = self.beam_search(
            cembs, truth, self.options['margin_loss_discount'])

        loss = score_plus_margin_loss - score

        res = loss.scalar_value()
        loss.backward()
        return res
Exemple #32
0
def calc_scores(words):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    return W_sm * dy.concatenate([fwd_embs[-1], bwd_embs[-1]]) + b_sm
def calc_loss(words, labels, heads):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))]
    return biaffineParser.decode_loss(src_encodings, ([heads], [labels]))
def calc_acc(words, labels, heads):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))]
    pred_heads, pred_labels = biaffineParser.decoding(src_encodings)
    return biaffineParser.cal_accuracy(pred_heads, pred_labels, heads, labels)
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
Exemple #36
0
 def forward(self, char_seq):
     self.renew_cg()
     cembs = [dy.lookup(self.params['embed'],char) for char in char_seq ]
     
     agenda = self.beam_search(cembs)
     now = agenda[-1].max()
     ans = []
     while now.prevState is not None:
         ans.append(now.wlen)
         now = now.prevState
     return reversed(ans)
Exemple #37
0
    def build_lm_graph(self, sent):
        dy.renew_cg()
        init_state = self.builder.initial_state()

        errs = [] # will hold expressions
        es=[]
        state = init_state
        for (cw,nw) in zip(sent,sent[1:]):
            # assume word is already a word-id
            x_t = dy.lookup(self.lookup, int(cw))
            state = state.add_input(x_t)
            y_t = state.output()
            r_t = self.bias + (self.R * y_t)
            err = dy.pickneglogsoftmax(r_t, int(nw))
            errs.append(err)
        nerr = dy.esum(errs)
        return nerr
Exemple #38
0
    def backward(self, char_seq, truth):
        self.renew_cg()

        cembs = [ dy.dropout(dy.lookup(self.params['embed'],char),self.options['dropout_rate']) for char in char_seq ]
    
        word_seq,word = [],[]
        for char,label in zip(cembs,truth):
            word.append(char)
            if label > 0:
                word_seq.append(word)
                word = []

        score = self.truth_score(word_seq)

        score_plus_margin_loss = self.beam_search(cembs,truth,self.options['margin_loss_discount'])

        loss = score_plus_margin_loss - score

        res = loss.scalar_value()
        loss.backward()
        return res
Exemple #39
0
    def sample(self, first=1, nchars=0, stop=-1):
        res = [first]
        dy.renew_cg()
        state = self.builder.initial_state()

        R = dy.parameter(self.R)
        bias = dy.parameter(self.bias)
        cw = first
        while True:
            x_t = dy.lookup(self.lookup, cw)
            state = state.add_input(x_t)
            y_t = state.output()
            r_t = bias + (R * y_t)
            ydist = dy.softmax(r_t)
            dist = ydist.vec_value()
            rnd = random.random()
            for i,p in enumerate(dist):
                rnd -= p
                if rnd <= 0: break
            res.append(i)
            cw = i
            if cw == stop: break
            if nchars and len(res) > nchars: break
        return res
Exemple #40
0
def calc_scores(words):
    dy.renew_cg()
    b_sm_exp = dy.parameter(b_sm)
    score = dy.esum([dy.lookup(W_sm, x) for x in words])
    return score + b_sm_exp
 def __call__(self, char, DIRECT_LOOKUP=False):
     char_i = char if DIRECT_LOOKUP else self.vocab[char]
     return dy.lookup(self.enc, char_i)
Exemple #42
0
def calc_scores(words):
  dy.renew_cg()
  cbow = dy.esum([dy.lookup(W_emb, x) for x in words])
  return W_sm * cbow + b_sm
Exemple #43
0
def calc_scores(words):
  dy.renew_cg()
  h = dy.esum([dy.lookup(W_emb, x) for x in words])
  for W_h_i, b_h_i in zip(W_h, b_h):
    h = dy.tanh( W_h_i * h + b_h_i )
  return W_sm * h + b_sm