Exemple #1
0
    def decode(self, input_vectors, output):
        tgt_toks = [self.tgt_vocab[tok] for tok in output]

        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)

        s = self.dec_lstm.initial_state()
        s = s.add_input(
            dynet.concatenate([
                input_vectors[-1],
                dynet.vecInput(self.args.hidden_dim * 2),
                dynet.vecInput(self.pronouncer.args.hidden_dim * 2)
            ]))
        loss = []
        for tok in tgt_toks:
            out_vector = w * s.output() + b
            probs = dynet.softmax(out_vector)
            loss.append(-dynet.log(dynet.pick(probs, tok.i)))

            embed_vector = self.tgt_lookup[tok.i]
            attn_vector = self.attend(input_vectors, s)

            spelling = [
                self.pronouncer.src_vocab[letter] for letter in tok.s.upper()
            ]
            embedded_spelling = self.pronouncer.embed_seq(spelling)
            pron_vector = self.pronouncer.encode_seq(embedded_spelling)[-1]
            fpv = dynet.nobackprop(pron_vector)

            inp = dynet.concatenate([embed_vector, attn_vector, fpv])
            s = s.add_input(inp)

        loss = dynet.esum(loss)
        return loss
Exemple #2
0
def run_one_doc(model, first_level, emb_doc, doc_labels, w_param, b_param):
    """
    Runs the given model on one document and makes predictions.
    @params: first_level is I, O, or P,
             model is the LSTM model,
             emb_doc is a numpy array of embeddings for one document,
             doc_labels is a list of the labels associated with emb_doc,
             w_param is a Dynet parameter multiplied with the layer output,
             b_param is a Dynet parameter added to the product of output and w_param.
    @returns: pred_gold is a list of tuples in the form of (prediction, gold label)
    """
    dy.renew_cg()
    s = model.initial_state()
    i = dy.vecInput(200)
    o = dy.vecInput(200)
    p = dy.vecInput(200)
    si = s.add_input(i)
    so = s.add_input(o)
    sp = s.add_input(p)
    pred_gold = []

    for wdemb, label in zip(emb_doc, doc_labels):
        x = dy.inputVector(wdemb)
        if first_level == 'I':
            s2 = si.add_input(x)
        elif first_level == 'O':
            s2 = so.add_input(x)
        else:
            s2 = sp.add_input(x)
        out_class = dy.softmax((w_param * s2.output()) + b_param)
        chosen_class = np.argmax(out_class.npvalue())
        pred_gold.append((int(chosen_class), int(label)))

    return pred_gold
Exemple #3
0
    def train(self, words, lemmas, gold, bad):
        dy.renew_cg()
        W = dy.parameter(self.pW)
        b = dy.parameter(self.pb)

        losses = []
        gold_scores = []
        bad_scores = []

        for item in gold:
            lf, denotation = item[0], item[1]
            feature = self.extract_feature(words, lemmas, lf, denotation)
            feature_vec = dy.vecInput(self.nfeatures)
            feature_vec.set(feature)
            gold_scores.append(W * feature_vec + b)

        for item in bad:
            lf, denotation = item[0], item[1]
            feature = self.extract_feature(words, lemmas, lf, denotation)
            feature_vec = dy.vecInput(self.nfeatures)
            feature_vec.set(feature)
            bad_scores.append(W * feature_vec + b)

        log_prob = dy.log_softmax(dy.concatenate(gold_scores + bad_scores))
        for i in range(len(gold_scores)):
            losses.append(dy.pick(log_prob, i))

        return -dy.esum(losses)
Exemple #4
0
    def forward(self, tokens, parents, children, node_order, inds_for_loss):
        hs_up = [dy.vecInput(self.emb_size) for _ in range(len(tokens))]
        cs_up = [dy.vecInput(self.emb_size) for _ in range(len(tokens))]
        hs_dn = [dy.vecInput(self.emb_size) for _ in range(len(tokens))]
        cs_dn = [dy.vecInput(self.emb_size) for _ in range(len(tokens))]

        for node in node_order:
            h_ch = [hs_up[ch] for ch in children[node]]
            c_ch = [cs_up[ch] for ch in children[node]]
            h_, c_ = self.tree_lstm_up.state(self.emb[tokens[node]], h_ch,
                                             c_ch)
            hs_up[node] = h_
            cs_up[node] = c_

        for node in reversed(node_order):
            h_pa = [hs_dn[pa] for pa in parents[node]]
            c_pa = [cs_dn[pa] for pa in parents[node]]
            h_, c_ = self.tree_lstm_dn.state(self.emb[tokens[node]], h_pa,
                                             c_pa)
            hs_dn[node] = h_
            cs_dn[node] = c_

        hs_return = [
            dy.affine_transform(
                [self.b, self.W,
                 dy.concatenate([hs_up[i], hs_dn[i]])]) for i in inds_for_loss
        ]
        #cs_return = [dy.concatenate([cs_up[i], cs_dn[i]]) for i in inds_for_loss]
        return hs_return
Exemple #5
0
    def predict(self, word):
        hidden_size = 64
        vocabulary_size = len(self.vocab_to_index)
        input_size = output_size = vocabulary_size

        m = dy.Model()

        W = m.add_parameters((hidden_size, input_size))
        b = m.add_parameters(hidden_size)
        V = m.add_parameters((output_size, hidden_size))  # Softmax weights
        a = m.add_parameters(output_size)  # Softmax bias

        x = dy.vecInput(input_size)
        y = dy.vecInput(output_size)
        h = dy.tanh((W * x) + b)
        output = dy.softmax(V * h)

        x.set(self.word_vectors[word])
        probabilities = output.npvalue()

        predicted_word = np.random.choice(self.unique_words,
                                          p=(probabilities + 0.0002) /
                                          sum(probabilities + 0.0002))

        return predicted_word
Exemple #6
0
def build_model(first_level, model, emb_doc, doc_labels, w_param, b_param):
    """
    Runs the model for training, calculating the loss. 
    @params: first_level is I, O, or P,
             model is the LSTM model,
             emb_doc is a numpy array of embeddings for one document,
             doc_labels is a list of the labels associated with emb_doc,
             w_param is a Dynet parameter multiplied with the layer output,
             b_param is a Dynet parameter added to the product of output and w_param.
    @returns: the sum of the errors computed for the document
    """
    dy.renew_cg()
    s = model.initial_state()
    i = dy.vecInput(200)
    o = dy.vecInput(200)
    p = dy.vecInput(200)
    si = s.add_input(i)
    so = s.add_input(o)
    sp = s.add_input(p)
    loss = []

    for wdemb, label in zip(emb_doc, doc_labels):
        x = dy.inputVector(wdemb)
        dy.noise(x, 0.5)  #noise for student model
        if first_level == 'I':
            s2 = si.add_input(x)
        elif first_level == 'O':
            s2 = so.add_input(x)
        else:
            s2 = sp.add_input(x)
        loss.append(
            dy.pickneglogsoftmax((w_param * s2.output()) + b_param, label))
    return dy.esum(loss)
Exemple #7
0
def decode(
    vectors,
    output,
    decode_char=True
):  #if char --> decode into characters to produce tgt form; else decode into msd's to produce tag sequence
    #if not decode_char: pdb.set_trace()
    output = [EOS] + list(output) + [EOS]
    if decode_char:
        x2id = char2id
        output_x_lookup = output_lookup
        w = decoder_w
        b = decoder_b
        w1 = attention_w1
        x_lstm = dec_lstm
        input_mat = dy.concatenate_cols(vectors)
    else:
        x2id = msd2id_split
        output_x_lookup = output_msd_lookup
        w = decoder_msd_w
        b = decoder_msd_b
        x_lstm = dec_msd_lstm
        input_mat = vectors  #dy.concatenate(vectors)

    output = [x2id[c] for c in output]

    w1dt = None

    last_output_embeddings = output_x_lookup[x2id[EOS]]
    if decode_char:
        s = x_lstm.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))
    else:
        s = x_lstm.initial_state().add_input(
            dy.concatenate([
                dy.vecInput(STATE_SIZE * 2 + EMBEDDINGS_SIZE),
                last_output_embeddings
            ]))
    loss = []

    for char in output:

        # w1dt can be computed and cached once for the entire decoding phase
        if decode_char:
            w1dt = w1dt or w1 * input_mat
            vector = dy.concatenate(
                [attend(input_mat, s, w1dt), last_output_embeddings])
        else:
            vector = dy.concatenate([input_mat, last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_x_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemple #8
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemple #9
0
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    embedded = embed_sentence(in_seq)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(encoded)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))

    out = ''
    count_EOS = 0
    for i in range(len(in_seq)*2):
        if count_EOS == 2: break
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector).vec_value()
        next_char = probs.index(max(probs))
        last_output_embeddings = output_lookup[next_char]
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
Exemple #10
0
    def task_mlp(self, vec_sen, train, y_s=None):
        """
        calculating the mlp function over the sentence representation vector
        """
        w1 = dy.parameter(self._params["task_w1"])
        b1 = dy.parameter(self._params["task_b1"])
        w2 = dy.parameter(self._params["task_w2"])
        b2 = dy.parameter(self._params["task_b2"])

        if train:
            drop = self._dropout
        else:
            drop = 0

        if y_s is not None:
            v = dy.vecInput(1)
            v.set([y_s])
            in_vec = dy.concatenate([vec_sen, v])
        else:
            in_vec = vec_sen

        out = dy.tanh(dy.dropout(dy.affine_transform([b1, w1, in_vec]), drop))
        out = dy.affine_transform([b2, w2, out])

        return out
Exemple #11
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]
    output = [2, 5, 6, 7, 8, 9, 3]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    # [2*state_size, sent_len]
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    # last_output_embeddings = output_lookup[char2int[EOS]]
    last_output_embeddings = output_lookup[2]
    # s = dec_lstm.initial_state_from_raw_vectors([np.random.normal(0, 0.1, STATE_SIZE) for i in range(2 * LSTM_NUM_OF_LAYERS)])

    s = dec_lstm.initial_state().add_input(
        dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate(
            [attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemple #12
0
    def get_loss(self, input_string, output_string):
        # Adding <EOS>
        input_string = self._add_eos(input_string)
        output_string = self._add_eos(output_string)
        # Create a new computation graph
        dy.renew_cg()
        # Vectorizing input and output (character-level, word-level, etc.)
        embedded_string = self._embed_string(input_string)
        # Hidden states of all the slices of the RNN for the input
        encoded_string = self._encode_string(embedded_string)
        # adding to DEC_RNN and getting the states of the decoder
        rnn_state = (self.DEC_RNN.initial_state()).add_input(dy.vecInput(self.enc_state_size))

        loss = list()

        for output_char in output_string:
            # getting the context vector for each character (or word)
            attended_encoding = self._attend(encoded_string, rnn_state)
#            print attended_encoding.dim()
            
            # con(y{i-1}, attended_encoding)
           # attended_encoding, 
            rnn_state = rnn_state.add_input(attended_encoding)
#            rnn_state = rnn_state.add_input(dy.concatenate(attended_encoding))
            probs = self._get_probs(rnn_state.output())
            # probs =self._get_probs(rnn_state.add_input(attended_encoding).output())
            # - log(probs[output_char]) as loss
            loss.append(-dy.log(dy.pick(probs, output_char)))
        loss = dy.esum(loss)
        
        return loss
Exemple #13
0
 def generate_from_encoding_vector(self, numpy_vec):
    
         with open("gen.txt", "a+") as f:
         
             
             dy.renew_cg()
             gen = ["<S>"]
             encoded = dy.vecInput(len(numpy_vec))
             encoded.set(numpy_vec)
             s = self.decoder.initial_state()
             start_encoded = self.E[self.tok2ind["<S>"]]
             s = s.add_input(dy.concatenate([start_encoded, encoded]))   
             counter = 0
             current = "<S>"
        
             while counter < 50 and current != "<E>":
        
                counter += 1
                probs, scores = self.predict_word(s.output())
                s = s.add_input(dy.concatenate([self.E[self.tok2ind[current]], encoded]))
                current = self.ind2tok[np.argmax(probs.npvalue())]
                gen.append(current)
             
             gen = " ".join(gen)    
             f.write(gen + "\n")                
Exemple #14
0
    def decode_to_loss(self, vectors, output):
        w = dy.parameter(self.w_softmax)
        b = dy.parameter(self.b_softmax)
        w1 = dy.parameter(self.attention_source)
        output = list(output)

        encoded_states = dy.concatenate_cols(vectors)

        prev_output_embeddings = self.target_lookup[self.eos_target]
        current_state = self.decoder.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(self.hidden_size * 2), prev_output_embeddings]))
        losses = []
        attentional_component = w1 * encoded_states
        for next_word in output:

            vector = dy.concatenate([
                self.attention(encoded_states, current_state,
                               attentional_component), prev_output_embeddings
            ])

            current_state = current_state.add_input(vector)
            s = dy.affine_transform([b, w, current_state.output()])
            item_loss = dy.pickneglogsoftmax(s, next_word)
            losses.append(item_loss)
            prev_output_embeddings = self.target_lookup[next_word]

        loss = dy.esum(losses)
        return loss
Exemple #15
0
    def calc_loss(self, src_seqs, trg_seqs, training=True):
        batch_size = len(src_seqs)
        src_encodings = self.encoder.encode(src_seqs, training=training)
        src_enc_all = dy.concatenate_cols(src_encodings)
        src_trans_att = self.attender.get_src_transformation(src_enc_all)
        state = self.decoder.initialize(src_encodings, training=training)
        ctx_tm1 = dy.vecInput(self.encoder.state_dim)
        losses = []

        max_len = max(map(len, trg_seqs))
        for i in xrange(1, max_len):
            y_tm1 = [trg_seq[i - 1] if i < len(trg_seq) else trg_seq[-1] for trg_seq in trg_seqs]
            ref_y_t = [trg_seq[i] if i < len(trg_seq) else trg_seq[-1] for trg_seq in trg_seqs]
            y_tm1_embed = self.decoder.embedder.embed_item(y_tm1, training=training)

            x = dy.concatenate([y_tm1_embed, ctx_tm1])
            state = state.add_input(x)
            h_t = state.output()
            ctx_t, alpha_t = self.attender.calc_context(src_enc_all, src_trans_att, h_t)

            loss_t = self.decoder.calc_loss(h_t, ctx_t, ref_y_t, training=training)

            mask = dy.inputVector([1 if i < len(trg_seq) else 0 for trg_seq in trg_seqs])
            mask = dy.reshape(mask, (1,), batch_size)
            loss_t = dy.sum_batches(loss_t * mask)

            ctx_tm1 = ctx_t
            losses.append(loss_t)

        loss = dy.esum(losses)
        return loss
Exemple #16
0
    def decode(self, vectors, output, end_token, durs):
        #output = [EOS] + list(output) + [EOS]
        #output = [char2int[c] for c in output]

        w = dy.parameter(self.decoder_w)
        b = dy.parameter(self.decoder_b)
        w1 = dy.parameter(self.attention_w1)
        input_mat = dy.concatenate_cols(vectors)
        w1dt = None

        last_output_embeddings = self.output_lookup[2]
        s = self.dec_lstm.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(self.state_size * 2), last_output_embeddings]))
        loss = []
        dur_loss = []
        c = 1
        for word, dur in zip(output, durs):
            c += 1
            # w1dt can be computed and cached once for the entire decoding phase
            w1dt = w1dt or w1 * input_mat
            vector = dy.concatenate(
                [self.attend(input_mat, s, w1dt), last_output_embeddings])
            s = s.add_input(vector)
            k = s
            #print "Going"
            dloss = self.test_duration(k, dur)
            #print "Back"
            dur_loss.append(dloss)
            out_vector = w * s.output() + b
            probs = dy.softmax(out_vector)
            last_output_embeddings = self.output_lookup[word]
            loss.append(-dy.log(dy.pick(probs, word)))
        loss = dy.esum(loss)
        return loss, dy.esum(dur_loss)
Exemple #17
0
    def decode_to_prediction(self, encoded, max_length):

        w = dy.parameter(self.w_softmax)
        b = dy.parameter(self.b_softmax)
        w1 = dy.parameter(self.attention_source)
        encoded_states = dy.concatenate_cols(encoded)

        attentional_component = w1 * encoded_states

        prev_output_embeddings = self.target_lookup[self.eos_target]
        current_state = self.decoder.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(self.hidden_size * 2), prev_output_embeddings]))

        result = ""
        for i in range(max_length):
            vector = dy.concatenate([
                self.attention(encoded_states, current_state,
                               attentional_component), prev_output_embeddings
            ])

            current_state = current_state.add_input(vector)
            s = dy.affine_transform([b, w, current_state.output()])
            probs = (dy.log_softmax(s)).value()
            next_word = np.argmax(probs)
            prev_output_embeddings = self.target_lookup[next_word]

            if (next_word == self.eos_target):
                return result[:-1]
            if next_word in self.targetDictionnary.keys():
                result += self.targetDictionnary[next_word] + " "
            else:
                result += self.targetDictionnary[unk_target] + " "
        return result[:-1]
Exemple #18
0
    def _attend(self, input_vectors, state):
        w1 = self.att_w1.expr()
        w2 = self.att_w2.expr()
        v = self.att_v.expr()
        attention_weights = []

        w2dt = w2 * state.h()[-1]
        for input_vector in input_vectors:
            attention_weight = v * dy.tanh(w1 * input_vector + w2dt)
            attention_weights.append(attention_weight)
        attention_weights = dy.softmax(dy.concatenate(attention_weights))
        pos = self.argmax(attention_weights.value())
        #print pos
        att_inp = []
        for x in range(pos - self.attention_window,
                       pos + self.attention_window + 1):
            gaussian_value = self._gaussian(x, pos, self.attention_window)
            #print gaussian_value
            if x >= 0 and x < len(input_vectors):
                vector = input_vectors[x]
            else:
                vector = dy.vecInput(self.config.encoder_size * 2)
            att_inp.append(vector * gaussian_value)
        #output_vectors = dy.esum([vector * attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
        output_vectors = dy.esum(att_inp)
        return output_vectors
    def train(self, trainning_set):
        for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set:
            features = self.encode_sentence(sentence, pos, chars)
            loss = []            

            entity_embeds = features[entity]

            attention, context = self.self_attend(features)
            ty = dy.vecInput(len(sentence))
            ty.set([0 if i!=trigger else 1 for i in range(len(sentence))])
            loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty))
            h_t = dy.concatenate([context, entity_embeds])
            hidden = dy.tanh(self.lb.expr() * h_t + self.lb_bias.expr())
            out_vector = dy.reshape(dy.logistic(self.lb2.expr() * hidden + self.lb2_bias.expr()), (1,))
            label = dy.scalarInput(label)
            loss.append(dy.binary_log_loss(out_vector, label))

            pres = [0]
            for pattern in rule:
                probs = self.decoder(features, pres)
                loss.append(-dy.log(dy.pick(probs, pattern)))
                pres.append(pattern)

            loss = dy.esum(loss)
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
Exemple #20
0
def generate(input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    def sample(probs):
        rnd = random.random()
        for i, p in enumerate(probs):
            rnd -= p
            if rnd <= 0: break
        return i

    embedded = embed_sentence(input)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))
    out = ''
    count_EOS = 0
    for i in range(len(input)*2):
        if count_EOS == 2: break
        vector = dy.concatenate([attend(encoded, s), last_output_embeddings])

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        probs = probs.vec_value()
        next_char = sample(probs)
        last_output_embeddings = output_lookup[next_char]
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
    def train(self, trainning_set):
        for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set:
            features = self.encode_sentence(sentence, pos, chars)
            loss = []            

            # entity_embeds = features[entity]

            # attention, context = self.self_attend(features)
            # ty = dy.vecInput(len(sentence))
            # ty.set([0 if i!=trigger else 1 for i in range(len(sentence))])
            # loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty))
            # h_t = dy.concatenate([context, entity_embeds])
            # hidden = dy.tanh(self.lb * h_t + self.lb_bias)
            # out_vector = dy.reshape(dy.logistic(self.lb2 * hidden + self.lb2_bias), (1,))
            # label = dy.scalarInput(label)
            # loss.append(dy.binary_log_loss(out_vector, label))

            # Get decoding losses
            last_output_embeddings = self.pattern_embeddings[0]
            s = self.decoder_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(self.hidden_dim), last_output_embeddings]))
            for pattern in rule:
                h_t = s.output()
                context = self.attend(features, h_t)
                out_vector = self.pt * dy.concatenate([context, h_t]) + self.pt_bias
                probs = dy.softmax(out_vector)
                loss.append(-dy.log(dy.pick(probs, pattern)))
                last_output_embeddings = self.pattern_embeddings[pattern]
                s = s.add_input(dy.concatenate([context, last_output_embeddings]))
            
            loss = dy.esum(loss)
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
def test(test_list, pWeight, unique_vector, features_total, unique_class):
    input_dy = dy.vecInput(features_total)
    output_list = []
    # print(unique_class)
    for line in test_list:
        test_line = line.split()
        target = test_line[0]
        test_line = test_line[1:]
        test_vector = [0] * features_total
        all_unique = True
        # print(unique_vector)
        for word in test_line:
            # print(word)
            try:
                test_vector[unique_vector.index(word)] = 1
                all_unique = False
            except:
                continue

        input_dy.set(test_vector)

        if all_unique:
            print("none")
            # print( "%s: %s" %(unique_class.index(target), output))
            # output_list.append(output)
        else:
            output = test_network(pWeight, input_dy)
            # print( "%s: %s" %(unique_class.index(target), output))
            # output_list.append(output)
            print("target: %s, output: %s" % (target, unique_class[output]))
def test(test_list, pWeight, unique_vector, features_total):
    input_dy = dy.vecInput(features_total)

    for line in test_list:
        test_line = line.split()
        target = test_line[0]
        test_line = test_line[1:]
        test_vector = [0] * features_total
        all_unique = True

        for word in test_line:
            try:
                test_vector[unique_vector.index(word)] = 1
                all_unique = False
            except:
                continue

        input_dy.set(test_vector)

        if all_unique:
            output = 0
            print("%s: %s" % (target, output))
        else:
            output = test_network(pWeight, input_dy)
            print("%s: %s" % (target, output.value()))
Exemple #24
0
def generate(lemma, tag, enc_fwd_lstm, enc_bwd_lstm, dec_lstm, cencoder,
             cdecoder):
    embedded = embed_sentence(lemma, tag)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    encoded = encoded[-1]
    w1dt = None

    last_output_embeddings = char_lookup[cencoder["#"]]
    s = dec_lstm.initial_state().add_input(
        dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))

    out = ''
    count_EOS = 0
    for i in range(len(lemma) * 2):
        if count_EOS == 2: break
        vector = dy.concatenate([encoded, last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector).vec_value()
        next_char = probs.index(max(probs))
        last_output_embeddings = char_lookup[next_char]
        if cdecoder[next_char] == "#":
            count_EOS += 1
            continue

        out += cdecoder[next_char]
    return out
    def encode(self, w, o, s):

        k = 5

        suffixes, prefixes = [], []

        for i in range(1, k + 1):

            pre, suf = w[:i], w[-i:]
            pre_idx = self.P2I[pre] if pre in self.P2I else self.P2I["<unk>"]
            suf_idx = self.S2I[pre] if pre in self.S2I else self.S2I["<unk>"]
            suf_e = dy.lookup(self.E_suf, suf_idx)
            pre_e = dy.lookup(self.E_pre, pre_idx)
            suffixes.append(suf_e)
            prefixes.append(pre_e)

        word_encoded = self.W2I[w] if w in self.W2I else self.W2I["<unk>"]
        word_e = dy.lookup(self.E, word_encoded)

        exp_out = dy.vecInput(EMBEDDING_SIZE)
        if o == []: o = ["<unk>"]
        for out_token in o:
            out_token_encoded = self.OUTPUT2IND[
                out_token] if out_token in self.OUTPUT2IND else self.OUTPUT2IND[
                    "<unk>"]
        out_embedding = dy.lookup(self.E_output, out_token_encoded)
        exp_out = exp_out + out_embedding

        W = dy.parameter(self.W)

        return W * dy.concatenate(
            [word_e,
             dy.esum(suffixes),
             dy.esum(prefixes), out_embedding])
Exemple #26
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(
        dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate(
            [attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemple #27
0
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    embedded = embed_sentence(in_seq)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(encoded)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(
        dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings]))

    out = ''
    count_EOS = 0
    for i in range(len(in_seq) * 2):
        if count_EOS == 2: break
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate(
            [attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector).vec_value()
        next_char = probs.index(max(probs))
        last_output_embeddings = output_lookup[next_char]
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
    def train(self, trainning_set):
        for sentence, rule in trainning_set:
            features = self.encode_sentence(sentence)
            loss = []
            # Get decoding losses
            last_output_embeddings = self.pattern_embeddings[0]
            s = self.decoder_lstm.initial_state().add_input(
                dy.concatenate(
                    [dy.vecInput(self.hidden_dim), last_output_embeddings]))

            rule.append(1)
            for pattern in rule:
                h_t = s.output()
                context = self.attend(features, h_t)
                out_vector = self.pt.expr() * dy.concatenate(
                    [context, h_t]) + self.pt_bias.expr()
                probs = dy.softmax(out_vector)
                loss.append(-dy.log(dy.pick(probs, pattern)))
                last_output_embeddings = self.pattern_embeddings[pattern]
                s = s.add_input(
                    dy.concatenate([context, last_output_embeddings]))
            loss = dy.esum(loss)
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
 def attend(self, H_e, h_t):
     context_vector = dy.vecInput(self.hidden_dim)
     for h_e in H_e:
         s = dy.transpose(h_t) * self.attention_weight.expr() * h_e
         a = dy.softmax(s)
         context_vector += h_e * a
     return context_vector / len(H_e)
Exemple #30
0
    def Train_Morph(self):
        self.trainer.set_sparse_updates(False)
        start = time.time()
        for iWord, word in enumerate(list(self.morph_dict.keys())):
            if iWord % 2000 == 0 and iWord != 0:
                print("Processing word number: %d" % iWord, ", Time: %.2f" % (time.time() - start))
                start = time.time()

            morph_seg = self.morph_dict[word]
            morph_vec = self.__getWordVector(morph_seg)

            if self.ext_embeddings is None:
                vec_gold = self.wlookup[int(self.vocab.get(word, 0))].vec_value()
            elif word in self.ext_embeddings:
                vec_gold = self.ext_embeddings[word]
            else:
                vec_gold = None

            if vec_gold is not None:
                y_gold = dynet.vecInput(self.wdims)
                y_gold.set(vec_gold)
                mErrs = self.cosine_proximity(morph_vec, y_gold)
                mErrs.backward()
                self.trainer.update()
            renew_cg()
Exemple #31
0
def generate(input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
    def sample(probs):
        rnd = random.random()
        for i, p in enumerate(probs):
            rnd -= p
            if rnd <= 0: break
        return i

    embedded = embed_sentence(input)
    encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

    w = pc.parameter(decoder_w)
    b = pc.parameter(decoder_b)

    s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2))
    out = ''
    count_EOS = 0
    for i in range(len(input)*2):
        if count_EOS == 2: break
        vector = attend(encoded, s)

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = pc.softmax(out_vector)
        probs = probs.vec_value()
        next_char = sample(probs)
        if int2char[next_char] == EOS:
            count_EOS += 1
            continue

        out += int2char[next_char]
    return out
Exemple #32
0
    def generate(self, src_seq, sampled=False):
        def sample(probs):
            rnd = random.random()
            for i, p in enumerate(probs):
                rnd -= p
                if rnd <= 0: break
            return i

        dynet.renew_cg()

        embedded = self.embed_seq(src_seq)
        input_vectors = self.encode_seq(embedded)

        w = dynet.parameter(self.decoder_w)
        b = dynet.parameter(self.decoder_b)

        s = self.dec_lstm.initial_state()
        s = s.add_input(
            dynet.concatenate([
                input_vectors[-1],
                dynet.vecInput(self.args.hidden_dim * 2),
                dynet.vecInput(self.pronouncer.args.hidden_dim * 2)
            ]))
        out = []
        for i in range(1 + len(src_seq) * 5):
            out_vector = w * s.output() + b
            probs = dynet.softmax(out_vector)
            probs = probs.vec_value()
            next_symbol = sample(probs) if sampled else max(
                enumerate(probs), key=lambda x: x[1])[0]
            out.append(self.tgt_vocab[next_symbol])
            if self.tgt_vocab[next_symbol] == self.tgt_vocab.END_TOK:
                break
            embed_vector = self.tgt_lookup[out[-1].i]
            attn_vector = self.attend(input_vectors, s)

            spelling = [
                self.pronouncer.src_vocab[letter]
                for letter in out[-1].s.upper()
            ]
            embedded_spelling = self.pronouncer.embed_seq(spelling)
            pron_vector = self.pronouncer.encode_seq(embedded_spelling)[-1]
            fpv = dynet.nobackprop(pron_vector)

            inp = dynet.concatenate([embed_vector, attn_vector, fpv])
            s = s.add_input(inp)
        return out
Exemple #33
0
    def generate(self, pre_context, pos_context, entity):
        embedded = self.embed_sentence(pre_context)
        pre_encoded = self.encode_sentence(self.encpre_fwd_lstm,
                                           self.encpre_bwd_lstm, embedded)

        embedded = self.embed_sentence(pos_context)
        pos_encoded = self.encode_sentence(self.encpos_fwd_lstm,
                                           self.encpos_bwd_lstm, embedded)

        w = dy.parameter(self.decoder_w)
        b = dy.parameter(self.decoder_b)

        w1_pre = dy.parameter(self.attention_w1_pre)
        h_pre = dy.concatenate_cols(pre_encoded)
        w1dt_pre = None

        w1_pos = dy.parameter(self.attention_w1_pos)
        h_pos = dy.concatenate_cols(pos_encoded)
        w1dt_pos = None

        last_output_embeddings = self.output_lookup[self.output2int[self.EOS]]
        entity_embedding = self.input_lookup[self.input2int[entity]]
        s = self.dec_lstm.initial_state().add_input(
            dy.concatenate([
                dy.vecInput(self.STATE_SIZE * 2), last_output_embeddings,
                entity_embedding
            ]))

        out = []
        count_EOS = 0
        for i in range(self.config['GENERATION']):
            if count_EOS == 2: break
            # w1dt can be computed and cached once for the entire decoding phase
            w1dt_pre = w1dt_pre or w1_pre * h_pre
            w1dt_pos = w1dt_pos or w1_pos * h_pos

            attention_pre = self.attend(h_pre, s, w1dt_pre,
                                        self.attention_w2_pre,
                                        self.attention_v_pre)
            attention_pos = self.attend(h_pos, s, w1dt_pos,
                                        self.attention_w2_pos,
                                        self.attention_v_pos)

            vector = dy.concatenate([
                self.hier_attend(attention_pre, attention_pos, s),
                last_output_embeddings, entity_embedding
            ])
            s = s.add_input(vector)
            out_vector = w * s.output() + b
            probs = dy.softmax(out_vector).vec_value()
            next_word = probs.index(max(probs))
            last_output_embeddings = self.output_lookup[next_word]
            if self.int2output[next_word] == self.EOS:
                count_EOS += 1
                continue

            out.append(self.int2output[next_word])

        return out
Exemple #34
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []
    for char in output:
        vector = dy.concatenate([attend(vectors, s), last_output_embeddings])

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemple #35
0
def create_network_return_best(inputs):
    '''
    inputs is a list of numbers
    '''
    dy.renew_cg()
    W = dy.parameter(pW)
    b = dy.parameter(pB)

    if(len(inputs) > documentLength):
       inputs = inputs[0:documentLength]    
    
    emb_vectors = [lookup[i] for i in inputs]
    
    while(len(emb_vectors) < documentLength):
        pad = dy.vecInput(embDimension)
        pad.set(np.zeros(embDimension))
        emb_vectors.append(pad)    
    
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    return np.argmax(net_output.npvalue())
Exemple #36
0
def create_network_return_loss(inputs, expected_output):
    '''
    inputs is a list of numbers
    '''
    dy.renew_cg()
    W = dy.parameter(pW) # from parameters to expressions
    b = dy.parameter(pB)
    
    if(len(inputs) > documentLength):
       inputs = inputs[0:documentLength]
    
    emb_vectors = [lookup[i] for i in inputs]
    
    while(len(emb_vectors) < documentLength):
        pad = dy.vecInput(embDimension)
        pad.set(np.zeros(embDimension))
        emb_vectors.append(pad)
    
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    loss = -dy.log(dy.pick(net_output, expected_output))
    return loss
Exemple #37
0
HIDDEN_SIZE = 8
ITERATIONS = 2000

m = dy.Model()
trainer = dy.SimpleSGDTrainer(m)

W = m.add_parameters((HIDDEN_SIZE, 2))
b = m.add_parameters(HIDDEN_SIZE)
V = m.add_parameters((1, HIDDEN_SIZE))
a = m.add_parameters(1)

if len(sys.argv) == 2:
  m.populate_from_textfile(sys.argv[1])

x = dy.vecInput(2)
y = dy.scalarInput(0)
h = dy.tanh((W*x) + b)
if xsent:
    y_pred = dy.logistic((V*h) + a)
    loss = dy.binary_log_loss(y_pred, y)
    T = 1
    F = 0
else:
    y_pred = (V*h) + a
    loss = dy.squared_distance(y_pred, y)
    T = 1
    F = -1


for iter in range(ITERATIONS):
Exemple #38
0
trainer = dy.SimpleSGDTrainer(m)

pW1 = m.add_parameters((HIDDEN_SIZE, 2), device="GPU:1")
pb1 = m.add_parameters(HIDDEN_SIZE, device="GPU:1")
pW2 = m.add_parameters((HIDDEN_SIZE, HIDDEN_SIZE), device="GPU:0")
pb2 = m.add_parameters(HIDDEN_SIZE, device="GPU:0")
pV = m.add_parameters((1, HIDDEN_SIZE), device="CPU")
pa = m.add_parameters(1, device="CPU")

if len(sys.argv) == 2:
  m.populate_from_textfile(sys.argv[1])

dy.renew_cg()
W1, b1, W2, b2, V, a = dy.parameter(pW1, pb1, pW2, pb2, pV, pa)

x = dy.vecInput(2, "GPU:1")
y = dy.scalarInput(0, "CPU")
h1 = dy.tanh((W1*x) + b1)
h1_gpu0 = dy.to_device(h1, "GPU:0")
h2 = dy.tanh((W2*h1_gpu0) + b2)
h2_cpu = dy.to_device(h2, "CPU")
if xsent:
    y_pred = dy.logistic((V*h2_cpu) + a)
    loss = dy.binary_log_loss(y_pred, y)
    T = 1 
    F = 0 
else:
    y_pred = (V*h2_cpu) + a 
    loss = dy.squared_distance(y_pred, y)
    T = 1 
    F = -1