def predict_batch(self, words_batch): dynet.renew_cg() length = max(len(words) for words in words_batch) word_ids = np.zeros((length, len(words_batch)), dtype='int32') for j, words in enumerate(words_batch): for i, word in enumerate(words): word_ids[i, j] = self.vw.w2i.get(word, self.UNK) wembs = [dynet.lookup_batch(self._E, word_ids[i]) for i in range(length)] f_state = self._fwd_lstm.initial_state() b_state = self._bwd_lstm.initial_state() fw = [x.output() for x in f_state.add_inputs(wembs)] bw = [x.output() for x in b_state.add_inputs(reversed(wembs))] H = dynet.parameter(self._pH) O = dynet.parameter(self._pO) tags_batch = [[] for _ in range(len(words_batch))] for i, (f, b) in enumerate(zip(fw, reversed(bw))): r_t = O * (dynet.tanh(H * dynet.concatenate([f, b]))) out = dynet.softmax(r_t).npvalue() for j in range(len(words_batch)): tags_batch[j].append(self.vt.i2w[np.argmax(out.T[j])]) return tags_batch
def __attention_mlp(self, h_fs_matrix, h_e, F): W1_att_f = dy.parameter(self.W1_att_f) W1_att_e = dy.parameter(self.W1_att_e) w2_att = dy.parameter(self.w2_att) # Calculate the alignment score vector # Hint: Can we make this more efficient? a_t = self._mlp(W1_att_f, W1_att_e, w2_att, h_fs_matrix, h_e, F) alignment = dy.softmax(a_t) c_t = h_fs_matrix * alignment return alignment, c_t
def attend(self, input_mat, state, w1dt, input_len, batch_size): global attention_w2 global attention_v w2 = dy.parameter(attention_w2) v = dy.parameter(attention_v) w2dt = w2 * dy.concatenate(list(state.s())) unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt))) unnormalized = dy.reshape(unnormalized, (input_len, ), batch_size) att_weights = dy.softmax(unnormalized) context = input_mat * att_weights return context, att_weights
def attend_batch(input_mat, state, w1dt, batch_size, input_length): #print "in attend batch" global attention_w2 global attention_v w2 = dy.parameter(attention_w2) v = dy.parameter(attention_v) #print "Calculating w2dt" w2dt = w2*dy.concatenate(list(state.s())) unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt))) attention_reshaped = dy.reshape(unnormalized, (input_length, ), batch_size) att_weights = dy.softmax(attention_reshaped) context = input_mat * att_weights return context
def predictNextWord(sentence, builder, wlookup, mR, mB): dy.renew_cg() init_state = builder.initial_state() R = dy.parameter(mR) bias = dy.parameter(mB) state = init_state for cw in sentence: # assume word is already a word-id x_t = dy.lookup(wlookup, int(cw)) state = state.add_input(x_t) y_t = state.output() r_t = bias + (R * y_t) prob = dy.softmax(r_t) return prob
def attend(input_mat, state, w1dt): global attention_w2 global attention_v w2 = dy.parameter(attention_w2) v = dy.parameter(attention_v) # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols # w1dt: (attdim x seqlen) # w2dt: (attdim x attdim) w2dt = w2*dy.concatenate(list(state.s())) # att_weights: (seqlen,) row vector unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt))) att_weights = dy.softmax(unnormalized) # context: (encoder_state) context = input_mat * att_weights return context
def generate(self, in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): embedded = self.embed_sentence(in_seq) encoded = self.encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded) w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) input_mat = dy.concatenate_cols(encoded) w1dt = w1 * input_mat last_output_embeddings = output_lookup[EOS] s = dec_lstm.initial_state() c_t_previous = dy.vecInput(STATE_SIZE * 2) out = '' count_EOS = 0 for i in range(len(in_seq) * 2): if count_EOS == 2: break vector = dy.concatenate([last_output_embeddings, c_t_previous]) s = s.add_input(vector) h_t = s.output() c_t, alpha_t = self.attend(input_mat, s, w1dt) h_c_concat = dy.concatenate([h_t, c_t]) out_vector = dy.affine_transform([b, w, h_c_concat]) probs = dy.softmax(out_vector).vec_value() next_char = probs.index(max(probs)) last_output_embeddings = output_lookup[next_char] c_t_previous = c_t if next_char == EOS: count_EOS += 1 continue out += " " + id2word_en[next_char] return out
def __call__(self, words): dynet.renew_cg() word_ids = [self.vw.w2i.get(w, self.UNK) for w in words] wembs = [self._E[w] for w in word_ids] f_state = self._fwd_lstm.initial_state() b_state = self._bwd_lstm.initial_state() fw = [x.output() for x in f_state.add_inputs(wembs)] bw = [x.output() for x in b_state.add_inputs(reversed(wembs))] H = dynet.parameter(self._pH) O = dynet.parameter(self._pO) tags = [] for i, (f, b) in enumerate(zip(fw, reversed(bw))): r_t = O * (dynet.tanh(H * dynet.concatenate([f, b]))) out = dynet.softmax(r_t) tags.append(self.vt.i2w[np.argmax(out.npvalue())]) return tags
def generate(self, in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): embedded = self.embed_sentence(in_seq, True) encoded = self.encode_generation(enc_fwd_lstm, enc_bwd_lstm, embedded) h_len = len(encoded) curr_bsize = 1 w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) H_source = dy.concatenate_cols(encoded) s = dec_lstm.initial_state() ctx_t0 = dy.vecInput(hidden_size * 2) last_output_embeddings = output_lookup[word2idx_en['<s>']] w1dt = w1 * H_source out = [] count_EOS = 0 for i in range(len(in_seq)*2): if count_EOS == 1: break # w1dt can be computed and cached once for the entire decoding phase x = dy.concatenate([ctx_t0, last_output_embeddings]) #print "Attention: Generate" s = s.add_input(x) h_t = s.output() ctx_t, alpha_t = self.attend(H_source, s, w1dt, h_len, curr_bsize) out_vector = w * dy.concatenate([h_t, ctx_t]) + b probs = dy.softmax(out_vector).vec_value() next_char = probs.index(max(probs)) last_output_embeddings = output_lookup[next_char] if idx2word_en[next_char] == '<EOS>': count_EOS += 1 continue out.append(idx2word_en[next_char]) ctx_t0 = ctx_t return ' '.join(out)
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): #print "in generate" dy.renew_cg() embedded = embed_sentence(in_seq) encoded = encode_batch(enc_fwd_lstm, enc_bwd_lstm, embedded) w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) input_mat = dy.concatenate_cols(encoded) w1dt = None last_output_embeddings = output_lookup[BOS] #s = dec_lstm.initial_state([encoded[-1]]) s = dec_lstm.initial_state() c_t_minus_1 = dy.vecInput(state_size*2) out = [] count_EOS = 0 for i in range(len(in_seq)*2): if count_EOS == 1: break # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate([last_output_embeddings, c_t_minus_1]) s = s.add_input(vector) h_t = s.output() c_t = attend_batch(input_mat, s, w1dt, 1, 1) out_vector = dy.affine_transform([b, w, dy.concatenate([h_t, c_t])]) probs = dy.softmax(out_vector).vec_value() next_word = probs.index(max(probs)) last_output_embeddings = output_lookup[next_word] c_t_minus_1 = c_t if next_word == EOS: count_EOS += 1 continue out.append(english_word_vocab.i2w[next_word]) return " ".join(out[1:])