Beispiel #1
0
    def predict_batch(self, words_batch):
        dynet.renew_cg()
        length = max(len(words) for words in words_batch)
        word_ids = np.zeros((length, len(words_batch)), dtype='int32')
        for j, words in enumerate(words_batch):
            for i, word in enumerate(words):
                word_ids[i, j] = self.vw.w2i.get(word, self.UNK)
        wembs = [dynet.lookup_batch(self._E, word_ids[i]) for i in range(length)]
        
        f_state = self._fwd_lstm.initial_state()
        b_state = self._bwd_lstm.initial_state()

        fw = [x.output() for x in f_state.add_inputs(wembs)]
        bw = [x.output() for x in b_state.add_inputs(reversed(wembs))]

        H = dynet.parameter(self._pH)
        O = dynet.parameter(self._pO)
        
        tags_batch = [[] for _ in range(len(words_batch))]
        for i, (f, b) in enumerate(zip(fw, reversed(bw))):
            r_t = O * (dynet.tanh(H * dynet.concatenate([f, b])))
            out = dynet.softmax(r_t).npvalue()
            for j in range(len(words_batch)):
                tags_batch[j].append(self.vt.i2w[np.argmax(out.T[j])])
        return tags_batch
Beispiel #2
0
 def __attention_mlp(self, h_fs_matrix, h_e, F):
     W1_att_f = dy.parameter(self.W1_att_f)
     W1_att_e = dy.parameter(self.W1_att_e)
     w2_att = dy.parameter(self.w2_att)
     # Calculate the alignment score vector
     # Hint: Can we make this more efficient?
     a_t = self._mlp(W1_att_f, W1_att_e, w2_att, h_fs_matrix, h_e, F)
     alignment = dy.softmax(a_t)
     c_t = h_fs_matrix * alignment
     return alignment, c_t
    def attend(self, input_mat, state, w1dt, input_len, batch_size):
        global attention_w2
        global attention_v
        w2 = dy.parameter(attention_w2)
        v = dy.parameter(attention_v)
        w2dt = w2 * dy.concatenate(list(state.s()))
        unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
        unnormalized = dy.reshape(unnormalized, (input_len, ), batch_size)
        att_weights = dy.softmax(unnormalized)

        context = input_mat * att_weights
        return context, att_weights
Beispiel #4
0
def attend_batch(input_mat, state, w1dt, batch_size, input_length):
	#print "in attend batch"
	global attention_w2
	global attention_v
	w2 = dy.parameter(attention_w2)
	v = dy.parameter(attention_v)
        #print "Calculating w2dt"
	w2dt = w2*dy.concatenate(list(state.s()))
        unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
        attention_reshaped = dy.reshape(unnormalized, (input_length, ), batch_size)
	att_weights = dy.softmax(attention_reshaped)
	context = input_mat * att_weights
	return context
Beispiel #5
0
def predictNextWord(sentence, builder, wlookup, mR, mB):
    dy.renew_cg()
    init_state = builder.initial_state()
    R = dy.parameter(mR)
    bias = dy.parameter(mB)
    state = init_state
    for cw in sentence:
        # assume word is already a word-id
        x_t = dy.lookup(wlookup, int(cw))
        state = state.add_input(x_t)
    y_t = state.output()
    r_t = bias + (R * y_t)
    prob = dy.softmax(r_t)
    return prob
Beispiel #6
0
def attend(input_mat, state, w1dt):
	global attention_w2
	global attention_v
	w2 = dy.parameter(attention_w2)
	v = dy.parameter(attention_v)

	# input_mat: (encoder_state x seqlen) => input vecs concatenated as cols
	# w1dt: (attdim x seqlen)
	# w2dt: (attdim x attdim)
	w2dt = w2*dy.concatenate(list(state.s()))
	# att_weights: (seqlen,) row vector
	unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt)))
	att_weights = dy.softmax(unnormalized)
	# context: (encoder_state)
	context = input_mat * att_weights
	return context
    def generate(self, in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
        embedded = self.embed_sentence(in_seq)
        encoded = self.encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded)

        w = dy.parameter(decoder_w)
        b = dy.parameter(decoder_b)
        w1 = dy.parameter(attention_w1)
        input_mat = dy.concatenate_cols(encoded)

        w1dt = w1 * input_mat

        last_output_embeddings = output_lookup[EOS]

        s = dec_lstm.initial_state()
        c_t_previous = dy.vecInput(STATE_SIZE * 2)

        out = ''
        count_EOS = 0

        for i in range(len(in_seq) * 2):
            if count_EOS == 2: break
            vector = dy.concatenate([last_output_embeddings, c_t_previous])
            s = s.add_input(vector)
            h_t = s.output()
            c_t, alpha_t = self.attend(input_mat, s, w1dt)

            h_c_concat = dy.concatenate([h_t, c_t])
            out_vector = dy.affine_transform([b, w, h_c_concat])

            probs = dy.softmax(out_vector).vec_value()

            next_char = probs.index(max(probs))
            last_output_embeddings = output_lookup[next_char]
            c_t_previous = c_t

            if next_char == EOS:

                count_EOS += 1
                continue

            out += " " + id2word_en[next_char]

        return out
Beispiel #8
0
    def __call__(self, words):
        dynet.renew_cg()
        word_ids = [self.vw.w2i.get(w, self.UNK) for w in words]
        wembs = [self._E[w] for w in word_ids]
        
        f_state = self._fwd_lstm.initial_state()
        b_state = self._bwd_lstm.initial_state()

        fw = [x.output() for x in f_state.add_inputs(wembs)]
        bw = [x.output() for x in b_state.add_inputs(reversed(wembs))]

        H = dynet.parameter(self._pH)
        O = dynet.parameter(self._pO)
        
        tags = []
        for i, (f, b) in enumerate(zip(fw, reversed(bw))):
            r_t = O * (dynet.tanh(H * dynet.concatenate([f, b])))
            out = dynet.softmax(r_t)
            tags.append(self.vt.i2w[np.argmax(out.npvalue())])
        return tags
Beispiel #9
0
    def generate(self, in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
        embedded = self.embed_sentence(in_seq, True)
        encoded = self.encode_generation(enc_fwd_lstm, enc_bwd_lstm, embedded)
        h_len = len(encoded)
        curr_bsize = 1

        w = dy.parameter(decoder_w)
        b = dy.parameter(decoder_b)
        w1 = dy.parameter(attention_w1)

        H_source = dy.concatenate_cols(encoded)
        s = dec_lstm.initial_state()
        ctx_t0 = dy.vecInput(hidden_size * 2)
        last_output_embeddings = output_lookup[word2idx_en['<s>']]
        w1dt = w1 * H_source

        out = []
        count_EOS = 0
        for i in range(len(in_seq)*2):
            if count_EOS == 1: break
            # w1dt can be computed and cached once for the entire decoding phase
            x = dy.concatenate([ctx_t0, last_output_embeddings])
            #print "Attention: Generate"
            s = s.add_input(x)
            h_t = s.output()
            ctx_t, alpha_t = self.attend(H_source, s, w1dt, h_len, curr_bsize)

            out_vector = w * dy.concatenate([h_t, ctx_t]) + b
            probs = dy.softmax(out_vector).vec_value()
            next_char = probs.index(max(probs))
            last_output_embeddings = output_lookup[next_char]
            if idx2word_en[next_char] == '<EOS>':
                count_EOS += 1
                continue

            out.append(idx2word_en[next_char])
            ctx_t0 = ctx_t

        return ' '.join(out)
Beispiel #10
0
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm):
	#print "in generate"
	dy.renew_cg()
	embedded = embed_sentence(in_seq)
	encoded = encode_batch(enc_fwd_lstm, enc_bwd_lstm, embedded)

	w = dy.parameter(decoder_w)
	b = dy.parameter(decoder_b)
	w1 = dy.parameter(attention_w1)
	input_mat = dy.concatenate_cols(encoded)
	w1dt = None

	last_output_embeddings = output_lookup[BOS]
	#s = dec_lstm.initial_state([encoded[-1]])
	s = dec_lstm.initial_state()
	c_t_minus_1 = dy.vecInput(state_size*2)

	out = []
	count_EOS = 0
	for i in range(len(in_seq)*2):
		if count_EOS == 1: break
		# w1dt can be computed and cached once for the entire decoding phase
		w1dt = w1dt or w1 * input_mat
		vector = dy.concatenate([last_output_embeddings, c_t_minus_1])
		s = s.add_input(vector)
		h_t = s.output()
		c_t = attend_batch(input_mat, s, w1dt, 1, 1)
		out_vector = dy.affine_transform([b, w, dy.concatenate([h_t, c_t])])
		probs = dy.softmax(out_vector).vec_value()
		next_word = probs.index(max(probs))
		last_output_embeddings = output_lookup[next_word]
		c_t_minus_1 = c_t
		if next_word == EOS:
			count_EOS += 1
			continue

		out.append(english_word_vocab.i2w[next_word])
	return " ".join(out[1:])