def calc_sent_loss(sent): # Create a computation graph dy.renew_cg() # Get embeddings for the sentence emb = [W_w_p[x] for x in sent] # Sample K negative words for each predicted word at each position all_neg_words = np.random.choice(nwords, size=2 * N * K * len(emb), replace=True, p=word_probabilities) # W_w = dy.parameter(W_w_p) # Step through the sentence and calculate the negative and positive losses all_losses = [] for i, my_emb in enumerate(emb): neg_words = all_neg_words[i * K * 2 * N:(i + 1) * K * 2 * N] pos_words = ( [sent[x] if x >= 0 else S for x in range(i - N, i)] + [sent[x] if x < len(sent) else S for x in range(i + 1, i + N + 1)]) neg_loss = -dy.log( dy.logistic( -dy.dot_product(my_emb, dy.lookup_batch(W_c_p, neg_words)))) pos_loss = -dy.log( dy.logistic( dy.dot_product(my_emb, dy.lookup_batch(W_c_p, pos_words)))) all_losses.append(dy.sum_batches(neg_loss) + dy.sum_batches(pos_loss)) return dy.esum(all_losses)
def __call__(self, indexes: Dict[str, List[Indices]], is_train=False) -> List[dy.Expression]: len_s = len(indexes['head'][0]) batch_num = len(indexes['head']) vectors = [] for i in range(len_s): # map token indexes -> vector w_idxes = [indexes['word']['word'][x][i] for x in range(batch_num)] g_idxes = [ indexes['word']['glove'][x][i] for x in range(batch_num) ] t_idxes = [indexes['tag']['tag'][x][i] for x in range(batch_num)] w_vec = dy.lookup_batch(self.wlookup, w_idxes) g_vec = dy.lookup_batch(self.glookup, g_idxes, False) w_vec += g_vec t_vec = dy.lookup_batch(self.tlookup, t_idxes) # build token mask with dropout scale # For only word dropped: tag * 3 # For only tag dropped: word * 1.5 # For both word and tag dropped: 0 vector if is_train: wm = np.random.binomial(1, 1. - self.cfg.WORD_DROP, batch_num).astype(np.float32) tm = np.random.binomial(1, 1. - self.cfg.TAG_DROP, batch_num).astype(np.float32) scale = np.logical_or(wm, tm) * 3 / (2 * wm + tm + 1e-12) wm *= scale tm *= scale w_vec *= dy.inputTensor(wm, batched=True) t_vec *= dy.inputTensor(tm, batched=True) vectors.append(dy.concatenate([w_vec, t_vec])) return vectors
def init(self, x, usr, test=True, update=True, update_mode='full'): if update_mode=='biases': self.usr_vec = dy.logsumexp_dim(self.B_p.expr(True) + dy.lookup_batch(self.U_p, usr, True), d=1) elif update_mode=='mixture_weights': self.usr_vec = dy.logsumexp_dim(self.B_p.expr(update) + dy.lookup_batch(self.U_p, usr, True), d=1) else: self.usr_vec = dy.logsumexp_dim(self.B_p.expr(update) + dy.lookup_batch(self.U_p, usr, update), d=1)
def baseline(self, sentences): # LTR / random non-projective if self.order == 1 or self.order == 3: if self.order == 3: np.random.shuffle(sentences) vecs = [self.tree_lstm( L = None, R = None, x = dy.lookup_batch(self.embeddings, sentences[i], update=self.update_embeddings), ) for i in range(sentences.shape[0])] state = vecs[0] for i in range(1, sentences.shape[0]): state = self.tree_lstm(L = state, R = vecs[i], x = None) # RTL elif self.order == 2: vecs = [self.tree_lstm( L = None, R = None, x = dy.lookup_batch(self.embeddings, sentences[i], update=self.update_embeddings), ) for i in range(sentences.shape[0])] state = vecs[0] for i in range(1, sentences.shape[0]): state = self.tree_lstm(L = vecs[i], R = state, x = None) else: raise ValueError("Invalid composition order "+str(self.order)) return state.h
def get_loss_batch(self, sent_array): renew_cg() init_state = self.builder.initial_state() R = parameter(self.R) bias = parameter(self.bias) wids = [] masks = [] # get the wids and masks for each step # "I am good", "This is good", "Good Morning" -> [['I', 'Today', 'Good'], ['am', 'is', 'Morning'], ['good', 'good', '<S>'], ['I', 'Today', 'Good'], ['am', 'is', 'Morning'], ['good', 'good', '<S>']] tot_words = 0 wids = [] masks = [] for i in range(len(sent_array[0])): wids.append([(sent[i] if len(sent) > i else 3) for sent in sent_array]) mask = [(1 if len(sent) > i else 0) for sent in sent_array] masks.append(mask) tot_words += sum(mask) # start the rnn by inputting "<s>" init_ids = [2] * len(sent_array) #print dy.lookup_batch(self.lookup,init_ids) #print "Looked up" s = init_state.add_input(dy.lookup_batch(self.lookup, init_ids)) # feed word vectors into the RNN and predict the next word losses = [] for wid, mask in zip(wids, masks): # calculate the softmax and loss #print "WID ", wid score = dy.affine_transform([bias, R, s.output()]) loss = dy.pickneglogsoftmax_batch(score, wid) # mask the loss if at least one sentence is shorter if mask[-1] != 1: mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1, ), len(sent_array)) loss = loss * mask_expr losses.append(loss) # update the state of the RNN wemb = dy.lookup_batch(self.lookup, wid) s = s.add_input(wemb) return dy.sum_batches(dy.esum(losses)), tot_words errs = [] # will hold expressions es = [] for (wid, mask) in zip(wids, masks): # assume word is already a word-id x_t = lookup(self.lookup, int(cw)) state = state.add_input(x_t) y_t = state.output() r_t = bias + (R * y_t) err = pickneglogsoftmax(r_t, int(nw)) errs.append(err) nerr = esum(errs) return nerr
def step_batch(self, instances): dy.renew_cg() self.l2r_builder.set_dropout(0.2) self.r2l_builder.set_dropout(0.2) self.dec_builder.set_dropout(0.2) W_y = dy.parameter(self.W_y) b_y = dy.parameter(self.b_y) src_sents = [x[0] for x in instances] padded_src = self.__pad_batch(src_sents, True) src_cws = np.transpose(padded_src) tgt_sents = [x[1] for x in instances] padded_tgt = self.__pad_batch(tgt_sents, False) masks_tgt, num_words = self.__mask(tgt_sents) masks_tgt = np.transpose(masks_tgt) padded_tgt = np.transpose(padded_tgt) instance_size = len(instances) src_cws_rev = list(reversed(src_cws)) # Bidirectional representations l2r_state = self.l2r_builder.initial_state() r2l_state = self.r2l_builder.initial_state() l2r_contexts = [] r2l_contexts = [] for (cws_l2r, cws_r2l) in zip(src_cws, src_cws_rev): l2r_state = l2r_state.add_input(dy.lookup_batch(self.src_lookup, cws_l2r)) r2l_state = r2l_state.add_input(dy.lookup_batch(self.src_lookup, cws_r2l)) l2r_contexts.append(l2r_state.output()) #[<S>, x_1, x_2, ..., </S>] r2l_contexts.append(r2l_state.output()) #[</S> x_n, x_{n-1}, ... <S>] r2l_contexts.reverse() #[<S>, x_1, x_2, ..., </S>] # Combine the left and right representations for every word h_fs = [] for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts): h_fs.append(dy.concatenate([l2r_i, r2l_i])) h_fs_matrix = dy.concatenate_cols(h_fs) losses = [] # Decoder c_t = dy.vecInput(self.hidden_size * 2) start = dy.concatenate([dy.lookup_batch(self.tgt_lookup, len(tgt_sents) * [self.tgt_token_to_id[self.src_pad]]), c_t]) dec_state = self.dec_builder.initial_state().add_input(start) for (cws, nws, mask) in zip(padded_tgt, padded_tgt[1:], masks_tgt): h_e = dec_state.output() c_t = self.__attention_mlp(h_fs_matrix, h_e) # Get the embedding for the current target word embed_t = dy.lookup_batch(self.tgt_lookup, cws) # Create input vector to the decoder x_t = dy.concatenate([embed_t, c_t]) dec_state = dec_state.add_input(x_t) y_star = b_y + W_y * dec_state.output() loss = dy.pickneglogsoftmax_batch(y_star, nws) if mask[-1] == 0: mask_loss = dy.reshape(dy.inputVector(mask), (1,), instance_size) masked = loss * mask_loss losses.append(masked) else: losses.append(loss) #losses = [(x / num_words) for x in losses] return dy.sum_batches(dy.esum(losses)), num_words
def step_batch(self, instances): dy.renew_cg() W_y = dy.parameter(self.W_y) b_y = dy.parameter(self.b_y) src_sents = [x[0] for x in instances] padded_src = self.__pad_batch(src_sents) masks_src = np.transpose(self.__mask(padded_src)) src_cws = np.transpose(padded_src) tgt_sents = [x[1] for x in instances] tgt_ids = [] for sent in tgt_sents: sent = [self.tgt_token_to_id[x] for x in sent] tgt_ids.append(sent) tgt_ids = map(list, zip(*tgt_ids)) padded_src_rev = list(reversed(padded_src)) src_cws_rev = np.transpose(padded_src_rev) # Bidirectional representations l2r_state = self.l2r_builder.initial_state() r2l_state = self.r2l_builder.initial_state() l2r_contexts = [] r2l_contexts = [] for (cws_l2r, cws_r2l) in zip(src_cws, src_cws_rev): l2r_state = l2r_state.add_input(dy.lookup_batch(self.src_lookup, cws_l2r)) r2l_state = r2l_state.add_input(dy.lookup_batch(self.src_lookup, cws_r2l)) l2r_contexts.append(l2r_state.output()) #[<S>, x_1, x_2, ..., </S>] r2l_contexts.append(r2l_state.output()) #[</S> x_n, x_{n-1}, ... <S>] r2l_contexts.reverse() #[<S>, x_1, x_2, ..., </S>] # Combine the left and right representations for every word h_fs = [] for (l2r_i, r2l_i) in zip(l2r_contexts, r2l_contexts): h_fs.append(dy.concatenate([l2r_i, r2l_i])) h_fs_matrix = dy.concatenate_cols(h_fs) losses = [] num_words = 0 # Decoder c_t = dy.vecInput(self.hidden_size * 2) start = dy.concatenate([dy.lookup_batch(self.tgt_lookup, len(tgt_sents) * [self.tgt_token_to_id['<S>']]), c_t]) dec_state = self.dec_builder.initial_state().add_input(start) for (cws, nws, mask) in zip(tgt_ids, tgt_ids[1:], masks_src): h_e = dec_state.output() c_t = self.__attention_mlp(h_fs_matrix, h_e) # Get the embedding for the current target word embed_t = dy.lookup_batch(self.tgt_lookup, cws) # Create input vector to the decoder x_t = dy.concatenate([embed_t, c_t]) dec_state = dec_state.add_input(x_t) y_star = b_y + W_y * dec_state.output() loss = dy.pickneglogsoftmax_batch(y_star, nws) if mask[0] == 0: mask_loss = dy.reshape(dy.inputVector(mask), (1,), self.BATCH_SIZE) loss = loss * mask_loss losses.append(loss) num_words += 1 return dy.sum_batches(dy.esum(losses)/num_words), num_words
def calc_loss(sents): dy.renew_cg() # Transduce all batch elements with an LSTM src_sents = [x[0] for x in sents] tgt_sents = [x[1] for x in sents] src_cws = [] src_len = [len(sent) for sent in src_sents] max_src_len = np.max(src_len) num_words = 0 for i in range(max_src_len): src_cws.append([sent[i] for sent in src_sents]) #initialize the LSTM init_state_src = LSTM_SRC_BUILDER.initial_state() #get the output of the first LSTM src_output = init_state_src.add_inputs( [dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])[-1].output() #now decode all_losses = [] # Decoder #need to mask padding at end of sentence tgt_cws = [] tgt_len = [len(sent) for sent in sents] max_tgt_len = np.max(tgt_len) masks = [] for i in range(max_tgt_len): tgt_cws.append( [sent[i] if len(sent) > i else eos_trg for sent in tgt_sents]) mask = [(1 if len(sent) > i else 0) for sent in tgt_sents] masks.append(mask) num_words += sum(mask) current_state = LSTM_TRG_BUILDER.initial_state().set_s( [src_output, dy.tanh(src_output)]) prev_words = tgt_cws[0] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) for next_words, mask in zip(tgt_cws[1:], masks): #feed the current state into the current_state = current_state.add_input( dy.lookup_batch(LOOKUP_TRG, prev_words)) output_embedding = current_state.output() s = dy.affine_transform([b_sm, W_sm, output_embedding]) loss = (dy.pickneglogsoftmax_batch(s, next_words)) mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1, ), len(sents)) mask_loss = loss * mask_expr all_losses.append(mask_loss) prev_words = next_words return dy.sum_batches(dy.esum(all_losses)), num_words
def compute_decoder_batch_loss(self, encoded_inputs, input_masks, output_word_ids, output_masks, batch_size): self.readout = dn.parameter(self.params['readout']) self.bias = dn.parameter(self.params['bias']) self.w_c = dn.parameter(self.params['w_c']) self.u_a = dn.parameter(self.params['u_a']) self.v_a = dn.parameter(self.params['v_a']) self.w_a = dn.parameter(self.params['w_a']) # initialize the decoder rnn s_0 = self.decoder_rnn.initial_state() # initial "input feeding" vectors to feed decoder - 3*h init_input_feeding = dn.lookup_batch(self.init_lookup, [0] * batch_size) # initial feedback embeddings for the decoder, use begin seq symbol embedding init_feedback = dn.lookup_batch(self.output_lookup, [self.y2int[common.BEGIN_SEQ]] * batch_size) # init decoder rnn decoder_init = dn.concatenate([init_feedback, init_input_feeding]) s = s_0.add_input(decoder_init) # loss per timestep losses = [] # run the decoder through the output sequences and aggregate loss for i, step_word_ids in enumerate(output_word_ids): # returns h x batch size matrix decoder_rnn_output = s.output() # compute attention context vector for each sequence in the batch (returns 2h x batch size matrix) attention_output_vector, alphas = self.attend(encoded_inputs, decoder_rnn_output, input_masks) # compute output scores (returns vocab_size x batch size matrix) # h = readout * attention_output_vector + bias h = dn.affine_transform([self.bias, self.readout, attention_output_vector]) # get batch loss for this timestep batch_loss = dn.pickneglogsoftmax_batch(h, step_word_ids) # mask the loss if at least one sentence is shorter if output_masks and output_masks[i][-1] != 1: mask_expr = dn.inputVector(output_masks[i]) # noinspection PyArgumentList mask_expr = dn.reshape(mask_expr, (1,), batch_size) batch_loss = batch_loss * mask_expr # input feeding approach - input h (attention_output_vector) to the decoder # prepare for the next iteration - "feedback" feedback_embeddings = dn.lookup_batch(self.output_lookup, step_word_ids) decoder_input = dn.concatenate([feedback_embeddings, attention_output_vector]) s = s.add_input(decoder_input) losses.append(batch_loss) # sum the loss over the time steps and batch total_batch_loss = dn.sum_batches(dn.esum(losses)) return total_batch_loss
def init(self, x, usr, test=True, update=True, update_mode='full'): self.Wh = self.bh_p self.bh = self.bh_p self.Su = self.Su_p self.bu = self.bu_p if update_mode=='biases': self.usr_vec = dy.lookup_batch(self.BU_p, usr, True) else: self.usr_vec = dy.lookup_batch(self.BU_p, usr, update)
def calc_loss(sents): dy.renew_cg() # Transduce all batch elements with an LSTM src_sents = [x[0] for x in sents] tgt_sents = [x[1] for x in sents] src_cws = [] src_len = [len(sent) for sent in src_sents] max_src_len = np.max(src_len) num_words = 0 for i in range(max_src_len): src_cws.append([sent[i] for sent in src_sents]) #initialize the LSTM init_state_src = LSTM_SRC_BUILDER.initial_state() #get the output of the first LSTM src_output = init_state_src.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])[-1].output() #now decode all_losses = [] # Decoder #need to mask padding at end of sentence tgt_cws = [] tgt_len = [len(sent) for sent in sents] max_tgt_len = np.max(tgt_len) masks = [] for i in range(max_tgt_len): tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents]) mask = [(1 if len(sent) > i else 0) for sent in tgt_sents] masks.append(mask) num_words += sum(mask) current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)]) prev_words = tgt_cws[0] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) for next_words, mask in zip(tgt_cws[1:], masks): #feed the current state into the current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words)) output_embedding = current_state.output() s = dy.affine_transform([b_sm, W_sm, output_embedding]) loss = (dy.pickneglogsoftmax_batch(s, next_words)) mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1,),len(sents)) mask_loss = loss * mask_expr all_losses.append(mask_loss) prev_words = next_words return dy.sum_batches(dy.esum(all_losses)), num_words
def get_hidden_states(self, word_ids, upos_ids): n = word_ids.shape[-1] word_embs = [ dy.lookup_batch(self.wlookup, word_ids[:, i]) for i in range(n) ] upos_embs = [ dy.lookup_batch(self.tlookup, upos_ids[:, i]) for i in range(n) ] words = [dy.concatenate([w, p]) for w, p in zip(word_embs, upos_embs)] state_pairs_list = self.deep_bilstm.add_inputs(words) return state_pairs_list
def init(self, x, usr, test=True, update=True, update_mode='full'): self.Wh = self.bh_p self.bh = self.bh_p self.Su = self.Su_p self.bu = self.bu_p if update_mode=='biases': #self.usr_vec = self.BU_p.expr(True)#dy.pick(self.B_p.expr(True), index=0, dim=1)# * dy.lookup_batch(self.U_p, usr, True) self.usr_vec = self.B_p.expr(True) * dy.lookup_batch(self.U_p, usr, True) elif update_mode=='mixture_weights': self.usr_vec = self.B_p * dy.lookup_batch(self.U_p, usr, True) else: self.usr_vec = self.B_p * dy.lookup_batch(self.U_p, usr, update)
def encode(self, src, test=False): """Encode a batch of sentences Arguments: src (list): List of sentences. It is assumed that all source sentences have the same length Keyword Arguments: test (bool) -- Switch used for things like dropout where the behaviour is different at test time (default: (False) Returns: dynet.Expression -- Expression of the encodings """ # Prepare batch x, _ = self.prepare_batch(src, self.src_eos) # Add encoder to computation graph es = self.enc.initial_state() # Embed words wembs = [dy.lookup_batch(self.MS_p, iw) for iw in x] # Encode sentence encoded_states = es.transduce(wembs) # Use bidirectional encoder if self.bidir: res = self.rev_enc.initial_state() rev_encoded_states = res.transduce(wembs[::-1])[::-1] # Create encoding matrix H = dy.concatenate_cols(encoded_states) if self.bidir: H_bidir = dy.concatenate_cols(rev_encoded_states) H = dy.concatenate([H, H_bidir]) if self.word_emb: H_word_embs = dy.concatenate_cols(wembs) H = dy.concatenate([H, H_word_embs]) return H
def encode(self, src_sents): dy.renew_cg() # bidirectional representations forward_state = self.enc_forward_builder.initial_state() backward_state = self.enc_backward_builder.initial_state() src_words, src_masks = input_transpose(src_sents) src_words_embeds = [ dy.lookup_batch(self.src_lookup, wids) for wids in src_words ] src_words_embeds_reversed = src_words_embeds[::-1] forward_states = forward_state.add_inputs(src_words_embeds) backward_states = backward_state.add_inputs( src_words_embeds_reversed)[::-1] src_encodings = [] forward_cells = [] backward_cells = [] for forward_state, backward_state in zip(forward_states, backward_states): fwd_cell, fwd_enc = forward_state.s() bak_cell, bak_enc = backward_state.s() src_encodings.append(dy.concatenate([fwd_enc, bak_enc])) forward_cells.append(fwd_cell) backward_cells.append(bak_cell) decoder_init = dy.concatenate([forward_cells[-1], backward_cells[0]]) return src_encodings, decoder_init
def test_concatenate_to_batch(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elem(x, 0) z = dy.pick_batch_elem(x, 1) w = dy.concatenate_to_batch([y, z]) self.assertTrue(np.allclose(w.npvalue(), self.pval.T))
def Ext_embeds(self, sentences, predictFlag=False): if predictFlag: wordtoidx = self.ext_words_devtest lookup_matrix = self.elookup_devtest else: wordtoidx = self.ext_words_train lookup_matrix = self.elookup_train idxtoword = {ind: word for word, ind in wordtoidx.items()} ext_embs = [] for sent in sentences: ext_embs.extend([entry.norm for entry in sent]) ext_embs_set = list(set(ext_embs)) ext_embs_idx = [] for emb in ext_embs_set: try: w_ind = wordtoidx[emb] ext_embs_idx.append(w_ind) except KeyError: continue ext_lookup_batch = dy.lookup_batch(lookup_matrix, ext_embs_idx) projected_embs = self.projected_embs(ext_lookup_batch) proj_embs = {} for idx in range(len(ext_embs_idx)): proj_embs[idxtoword[ext_embs_idx[idx]]] = dy.pick_batch_elem( projected_embs, idx) return proj_embs
def __call__(self, x, test=True, update=True): wembs = [dy.lookup_batch(self.E, iw, update=update) for iw in x] # Encode sentence encoded_states = self.es.transduce(wembs) # Create encoding matrix H = dy.concatenate_cols(encoded_states) return H
def test_pick_batch_elems(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elems(x, [0]) self.assertTrue(np.allclose(y.npvalue(), self.pval[0])) z = dy.pick_batch_elems(x, [0, 1]) self.assertTrue(np.allclose(z.npvalue(), self.pval.T))
def RNN_embeds(self, sentences, predictFlag=False): tokenIdChars = [] for sent in sentences: tokenIdChars.extend([entry.idChars for entry in sent]) tokenIdChars_set = set(map(tuple, tokenIdChars)) tokenIdChars = list(map(list, tokenIdChars_set)) tokenIdChars.sort(key=lambda x: -len(x)) char_src_len = len(max(tokenIdChars, key=len)) chars_mask = [] char_ids = [] for i in range(char_src_len): char_ids.append([(chars[i] if len(chars) > i else 4) for chars in tokenIdChars]) char_mask = [(1 if len(chars) > i else 0) for chars in tokenIdChars] chars_mask.append(char_mask) char_embs = [] for cid in char_ids: char_embs.append(dy.lookup_batch(self.clookup, cid)) wordslen = list(map(lambda x: len(x), tokenIdChars)) chr_embs = self.HybridCharembs.predict_sequence_batched( char_embs, chars_mask, wordslen, predictFlag) RNN_embs = {} for idx in range(len(tokenIdChars)): RNN_embs[str(tokenIdChars[idx])] = dy.pick_batch_elem( chr_embs, idx) return RNN_embs
def decode(self, prev_words): prev_dec_output = self.decoder( dy.lookup_batch(self.tgt_embeddings, prev_words)) # Using Bahdanau-style attention so we use the previous decoder output context_vector, _ = self.attention(self.encoder.encodings_matrix, prev_dec_output) scores = self.decoder.score(prev_dec_output, context_vector) return scores
def score_one_sequence(self, tag_scores, tags, batch_size): ''' tags: list of tag ids at each time step ''' # print tags, batch_size # print batch_size # print "scoring one sentence" tags = [[self.start_id] * batch_size ] + tags # len(tag_scores) = len(tags) - 1 score = dy.inputTensor(np.zeros(batch_size), batched=True) # tag_scores = dy.concatenate_cols(tag_scores) # tot_tags, sent_len, batch_size # print "tag dim: ", tag_scores.dim() for i in range(len(tags) - 1): score += dy.pick_batch(dy.lookup_batch(self.transition_matrix, tags[i + 1]), tags[i]) \ + dy.pick_batch(tag_scores[i], tags[i + 1]) score += dy.pick_batch( dy.lookup_batch(self.transition_matrix, [self.end_id] * batch_size), tags[-1]) return score
def embed_sentence(self, ws, pwords, ts, chars, is_train): cembed = [dy.lookup_batch(self.clookup, c) for c in chars] char_fwd, char_bckd = self.char_lstm.builder_layers[0][0].initial_state().transduce(cembed)[-1], \ self.char_lstm.builder_layers[0][1].initial_state().transduce(reversed(cembed))[-1] crnn = dy.reshape(dy.concatenate_cols([char_fwd, char_bckd]), (self.options.we, ws.shape[0] * ws.shape[1])) cnn_reps = [list() for _ in range(len(ws))] for i in range(ws.shape[0]): cnn_reps[i] = dy.pick_batch(crnn, [i * ws.shape[1] + j for j in range(ws.shape[1])], 1) wembed = [dy.lookup_batch(self.wlookup, ws[i]) + dy.lookup_batch(self.elookup, pwords[i]) + cnn_reps[i] for i in range(len(ws))] posembed = [dy.lookup_batch(self.tlookup, ts[i]) for i in range(len(ts))] if (not is_train) or self.options.dropout == 0: return [dy.concatenate([wembed[i], posembed[i]]) for i in range(len(ts))] else: emb_masks = self.generate_emb_mask(ws.shape[0], ws.shape[1]) return [dy.concatenate([dy.cmult(w, wm), dy.cmult(pos, posm)]) for w, pos, (wm, posm) in zip(wembed, posembed, emb_masks)]
def run_lstm(self, word_inputs, tag_inputs, isTrain=True): batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] word_embs = [ dy.lookup_batch( self.word_embs, np.where(w < self._vocab.words_in_train, w, self._vocab.UNK)) + dy.lookup_batch(self.pret_word_embs, w, update=False) for w in word_inputs ] tag_embs = [dy.lookup_batch(self.tag_embs, pos) for pos in tag_inputs] if isTrain: emb_masks = self.generate_emb_mask(seq_len, batch_size) emb_inputs = [ dy.concatenate([dy.cmult(w, wm), dy.cmult(pos, posm)]) for w, pos, (wm, posm) in zip(word_embs, tag_embs, emb_masks) ] else: emb_inputs = [ dy.concatenate([w, pos]) for w, pos in zip(word_embs, tag_embs) ] common_top_input, c_fs, c_bs = biLSTM( self.cLSTM_builders, emb_inputs, batch_size, self.dropout_clstm_input if isTrain else 0., self.dropout_clstm_hidden if isTrain else 0.) common_top_recur = dy.concatenate_cols(common_top_input) private_top_input, p_fs, p_bs = biLSTM( self.pLSTM_builders, emb_inputs, batch_size, self.dropout_plstm_input if isTrain else 0., self.dropout_plstm_hidden if isTrain else 0.) private_top_recur = dy.concatenate_cols(private_top_input) if isTrain: common_top_recur = dy.dropout_dim(common_top_recur, 1, self.dropout_mlp) private_top_recur = dy.dropout_dim(private_top_recur, 1, self.dropout_mlp) return common_top_recur, private_top_recur, p_fs, p_bs
def decode_batch(dec_lstm, vectors, output): output = [EOS] + list(output) + [EOS] output = [char2int[c] for c in output] #output = [c for c in output] output = array([ output, ] * MB_SIZE) output = np.transpose(output) #print('output ',output) w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) #print('len vectors ', len(vectors)) #print('dim ', array(vectors[0].value()).shape) #print('dim ', vectors[0].value()) input_mat = dy.concatenate_cols(vectors) #print("input_mat dim ", array(input_mat.value()).shape) w1dt = None last_output_embeddings = dy.lookup_batch( output_lookup, array([ char2int[EOS], ] * MB_SIZE)) #last_output_embeddings = output_lookup[char2int[EOS]] #print("last_output_embeddings dim ",array(last_output_embeddings.value()).shape) s = dec_lstm.initial_state().add_input( dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) losses = [] for chars in output: #print(chars) # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate( [attend(input_mat, s, w1dt), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b #print(out_vector.value()) loss = dy.pickneglogsoftmax_batch(out_vector, chars) #probs = dy.softmax(out_vector) last_output_embeddings = dy.lookup_batch(output_lookup, chars) #loss.append(-dy.log(dy.pick(probs, char))) losses.append(loss) return dy.sum_batches(dy.esum(losses))
def generate(self, minibatch): words, pwords, tags, _, _, _, chars, sen_lens, masks = minibatch embedded = self.embed_sentence(words, pwords, tags, chars, False) encoded = self.encode_sentence(embedded) input_mat = dy.concatenate_cols(encoded) w1dt = None last_output_embeddings = dy.lookup_batch(self.wlookup, words[0]) last_tag_embeddings = dy.lookup_batch(self.tlookup, tags[0]) empty_tensor = dy.reshape(dy.inputTensor(np.zeros((self.options.hdim * 2, len(words[0])), dtype=float)), (self.options.hdim * 2,), len(words[0])) s = self.dec_lstm.initial_state().add_input(dy.concatenate([empty_tensor, last_output_embeddings, last_tag_embeddings])) out = np.zeros((words.shape[1], words.shape[0]), dtype=int) first_mask = np.full((words.shape[0], words.shape[1]), -float('inf'), dtype=float) mask = np.zeros((words.shape[0], words.shape[1]), dtype=float) first_mask[0] = np.array([0] * words.shape[1]) mask[0] = np.array([-float('inf')] * words.shape[1]) for m1 in range(masks.shape[0]): for m2 in range(masks.shape[1]): if masks[m1][m2] == 0: mask[m1][m2] = -float('inf') if sen_lens[m2] - 1 <= m1: mask[m1][m2] = -float('inf') for p in range(len(words)): # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or self.attention_w1.expr() * input_mat att_weights = self.attend(s, w1dt, False) vector = dy.concatenate([input_mat * att_weights, last_output_embeddings, last_tag_embeddings]) s = s.add_input(vector) scores = (att_weights).npvalue().reshape((mask.shape[0], mask.shape[1])) cur_mask = first_mask if p == 0 else mask scores = np.sum([scores, cur_mask], axis=0) next_positions = np.argmax(scores, axis=0) next_words = [words[position][i] for i, position in enumerate(next_positions)] next_tags = [tags[position][i] for i, position in enumerate(next_positions)] for i, position in enumerate(next_positions): mask[position][i] = -float('inf') out[i][p] = position last_output_embeddings = dy.lookup_batch(self.wlookup, next_words) last_tag_embeddings = dy.lookup_batch(self.tlookup, next_tags) dy.renew_cg() return out
def embed_batch_seq(self, wids): """ Embedding method for a batch of sentences :param wids: Word IDs for a batch of sentences :return: Word embedding matrix """ wembs_batch = [dynet.lookup_batch(self.src_lookup, wid) for wid in wids] return wembs_batch
def BuildLMGraph_batch(self, batch, sent_args=None): if "skip_renew" not in sent_args: dynet.renew_cg() APPLY_DROPOUT = self.args.dropout is not None and ( "test" not in sent_args or sent_args["test"] != True) if APPLY_DROPOUT: self.gen_rnn.set_dropout(self.args.dropout) else: self.gen_rnn.disable_dropout() init_state = self.gen_rnn.initial_state() #MASK SENTENCES isents = [] # Dimension: maxSentLength * minibatch_size # List of lists to store whether an input is # present(1)/absent(0) for an example at a time step masks = [] # Dimension: maxSentLength * minibatch_size #No of words processed in this batch maxSentLength = max([len(sent) for sent in batch]) for sent in batch: isents.append([self.vocab[word].i for word in sent] + [ self.vocab[self.vocab.END_TOK].i for _ in range(maxSentLength - len(sent)) ]) masks.append([1 for _ in sent] + [0 for _ in range(maxSentLength - len(sent))]) isents = map(list, zip(*isents)) # transposes masks = map(list, zip(*masks)) R = dynet.parameter(self.gen_R) bias = dynet.parameter(self.gen_bias) vocab_basis = dynet.transpose( dynet.concatenate_cols( [self.gen_lookup[i] for i in range(self.vocab.size)])) errs = [] # will hold expressions state = init_state for (mask, curr_words, next_words) in zip(masks[1:], isents, isents[1:]): x_t = dynet.lookup_batch(self.gen_lookup, curr_words) state = state.add_input(x_t) y_t = state.output() if APPLY_DROPOUT: y_t = dynet.dropout(y_t, self.args.dropout) r_t = vocab_basis * (bias + (R * y_t)) err = dynet.pickneglogsoftmax_batch(r_t, next_words) ## mask the loss if at least one sentence is shorter. (sents sorted reverse-length, so it must be bottom) if mask[-1] == 0: mask_expr = dynet.inputVector(mask) mask_expr = dynet.reshape(mask_expr, (1, ), len(mask)) err = err * mask_expr errs.append(err) nerr = dynet.esum(errs) return nerr
def BuildLMGraph(self, sents): dy.renew_cg() # initialize the RNN init_state = self.builder.initial_state() # parameters -> expressions R = dy.parameter(self.R) bias = dy.parameter(self.bias) S = vocab.w2i["<s>"] # get the cids and masks for each step tot_chars = 0 cids = [] masks = [] for i in range(len(sents[0])): cids.append([(vocab.w2i[sent[i]] if len(sent) > i else S) for sent in sents]) mask = [(1 if len(sent) > i else 0) for sent in sents] masks.append(mask) tot_chars += sum(mask) # start the rnn with "<s>" init_ids = cids[0] s = init_state.add_input(dy.lookup_batch(self.lookup, init_ids)) losses = [] # feed char vectors into the RNN and predict the next char for cid, mask in zip(cids[1:], masks[1:]): score = dy.affine_transform([bias, R, s.output()]) loss = dy.pickneglogsoftmax_batch(score, cid) # mask the loss if at least one sentence is shorter if mask[-1] != 1: mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1, ), len(sents)) loss = loss * mask_expr losses.append(loss) # update the state of the RNN cemb = dy.lookup_batch(self.lookup, cid) s = s.add_input(cemb) return dy.sum_batches(dy.esum(losses)), tot_chars
def _get_probabilities_over_batch(self, batch): dy.renew_cg() # The I iteration embed all the i-th items in all batches embedded = [ dy.lookup_batch(self.input_lookup, chars) for chars in zip(*batch) ] state = self.rnn.initial_state() output_vec = state.transduce(embedded)[-1] w = self.W.expr(update=False) return w * output_vec
def cal_scores(self, src_encodings, masks, train): src_len = len(src_encodings) batch_size = src_encodings[0].dim()[1] heads_LRlayer = [] mods_LRlayer = [] for encoding in src_encodings: heads_LRlayer.append( self.leaky_ReLu(self.b_head.expr() + self.W_head.expr() * encoding)) mods_LRlayer.append( self.leaky_ReLu(self.b_mod.expr() + self.W_mod.expr() * encoding)) heads_labels = [] heads = [] labels = [] neg_inf = dy.constant(1, -float("inf")) for row in range( 1, src_len ): #exclude root @ index=0 since roots do not have heads scores_idx = [] for col in range(src_len): dist = col - row mdist = self.dist_max dist_i = (min(dist, mdist - 1) + mdist if dist >= 0 else int( min(-1.0 * dist, mdist - 1))) dist_vec = dy.lookup_batch(self.dlookup, [dist_i] * batch_size) if train: input_vec = dy.concatenate([ dy.esum([ dy.dropout(heads_LRlayer[col], self.dropout), dy.dropout(mods_LRlayer[row], self.dropout) ]), dist_vec ]) else: input_vec = dy.concatenate([ dy.esum([heads_LRlayer[col], mods_LRlayer[row]]), dist_vec ]) score = self.scoreHeadModLabel(input_vec, train) mask = masks[row] and masks[col] join_scores = [] for bdx in range(batch_size): if (mask[bdx] == 1): join_scores.append(dy.pick_batch_elem(score, bdx)) else: join_scores.append( dy.concatenate([neg_inf] * self.n_labels)) scores_idx.append(dy.concatenate_to_batch(join_scores)) heads_labels.append(dy.concatenate(scores_idx)) return heads_labels
def BuildLMGraph(self, sents): dy.renew_cg() # initialize the RNN init_state = self.builder.initial_state() # parameters -> expressions R = dy.parameter(self.R) bias = dy.parameter(self.bias) S = vocab.w2i["<s>"] # get the cids and masks for each step tot_chars = 0 cids = [] masks = [] for i in range(len(sents[0])): cids.append([(vocab.w2i[sent[i]] if len(sent) > i else S) for sent in sents]) mask = [(1 if len(sent)>i else 0) for sent in sents] masks.append(mask) tot_chars += sum(mask) # start the rnn with "<s>" init_ids = cids[0] s = init_state.add_input(dy.lookup_batch(self.lookup, init_ids)) losses = [] # feed char vectors into the RNN and predict the next char for cid, mask in zip(cids[1:], masks[1:]): score = dy.affine_transform([bias, R, s.output()]) loss = dy.pickneglogsoftmax_batch(score, cid) # mask the loss if at least one sentence is shorter if mask[-1] != 1: mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1,), len(sents)) loss = loss * mask_expr losses.append(loss) # update the state of the RNN cemb = dy.lookup_batch(self.lookup, cid) s = s.add_input(cemb) return dy.sum_batches(dy.esum(losses)), tot_chars
def calc_score_of_histories(words, dropout=0.0): # This will change from a list of histories, to a list of words in each history position words = np.transpose(words) # Lookup the embeddings and concatenate them emb = dy.concatenate([dy.lookup_batch(W_emb, x) for x in words]) # Create the hidden layer h = dy.tanh(dy.affine_transform([b_h, W_h, emb])) # Perform dropout if dropout != 0.0: h = dy.dropout(h, dropout) # Calculate the score and return return dy.affine_transform([b_sm, W_sm, h])
def calc_sent_loss(sent): # Create a computation graph dy.renew_cg() # Get embeddings for the sentence emb = [W_w_p[x] for x in sent] # Sample K negative words for each predicted word at each position all_neg_words = np.random.choice(nwords, size=2*N*K*len(emb), replace=True, p=word_probabilities) # W_w = dy.parameter(W_w_p) # Step through the sentence and calculate the negative and positive losses all_losses = [] for i, my_emb in enumerate(emb): neg_words = all_neg_words[i*K*2*N:(i+1)*K*2*N] pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] + [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)]) neg_loss = -dy.log(dy.logistic(-dy.dot_product(my_emb, dy.lookup_batch(W_c_p, neg_words)))) pos_loss = -dy.log(dy.logistic(dy.dot_product(my_emb, dy.lookup_batch(W_c_p, pos_words)))) all_losses.append(dy.sum_batches(neg_loss) + dy.sum_batches(pos_loss)) return dy.esum(all_losses)
def calc_lm_loss(sents): dy.renew_cg() # initialize the RNN f_init = RNN.initial_state() # get the wids and masks for each step tot_words = 0 wids = [] masks = [] for i in range(len(sents[0])): wids.append([(sent[i] if len(sent) > i else S) for sent in sents]) mask = [(1 if len(sent) > i else 0) for sent in sents] masks.append(mask) tot_words += sum(mask) # start the rnn by inputting "<s>" init_ids = [S] * len(sents) s = f_init.add_input(dy.lookup_batch(WORDS_LOOKUP, init_ids)) # feed word vectors into the RNN and predict the next word losses = [] for wid, mask in zip(wids, masks): # calculate the softmax and loss score = dy.affine_transform([b_exp, W_exp, s.output()]) loss = dy.pickneglogsoftmax_batch(score, wid) # mask the loss if at least one sentence is shorter if mask[-1] != 1: mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1,), len(sents)) loss = loss * mask_expr losses.append(loss) # update the state of the RNN wemb = dy.lookup_batch(WORDS_LOOKUP, wid) s = s.add_input(wemb) return dy.sum_batches(dy.esum(losses)), tot_words
import dynet as dy import numpy as np m = dy.Model() lp = m.add_lookup_parameters((100,10)) # regular lookup a = lp[1].npvalue() b = lp[2].npvalue() c = lp[3].npvalue() # batch lookup instead of single elements. # two ways of doing this. abc1 = dy.lookup_batch(lp, [1,2,3]) print(abc1.npvalue()) abc2 = lp.batch([1,2,3]) print(abc2.npvalue()) print(np.hstack([a,b,c])) # use pick and pickneglogsoftmax in batch mode # (must be used in conjunction with lookup_batch): print("\nPick") W = dy.parameter( m.add_parameters((5, 10)) ) h = W * lp.batch([1,2,3]) print(h.npvalue()) print(dy.pick_batch(h,[1,2,3]).npvalue()) print(dy.pick(W*lp[1],1).value(), dy.pick(W*lp[2],2).value(), dy.pick(W*lp[3],3).value())
def test_lookup_batch(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) self.assertTrue(np.allclose(x.npvalue(), self.pval.T))
def calc_loss(sents): dy.renew_cg() # Transduce all batch elements with an LSTM src_sents = [x[0] for x in sents] tgt_sents = [x[1] for x in sents] src_cws = [] src_len = [len(sent) for sent in src_sents] max_src_len = np.max(src_len) num_words = 0 for i in range(max_src_len): src_cws.append([sent[i] for sent in src_sents]) #get the outputs of the first LSTM src_outputs = [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])] src_output = src_outputs[-1] #gets the parameters for the attention src_output_matrix = dy.concatenate_cols(src_outputs) w1_att_src = dy.parameter(w1_att_src_p) fixed_attentional_component = w1_att_src * src_output_matrix #now decode all_losses = [] # Decoder #need to mask padding at end of sentence tgt_cws = [] tgt_len = [len(sent) for sent in sents] max_tgt_len = np.max(tgt_len) masks = [] for i in range(max_tgt_len): tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents]) mask = [(1 if len(sent) > i else 0) for sent in tgt_sents] masks.append(mask) num_words += sum(mask) current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)]) prev_words = tgt_cws[0] W_sm = dy.parameter(W_sm_p) b_sm = dy.parameter(b_sm_p) W_m = dy.parameter(W_m_p) b_m = dy.parameter(b_m_p) for next_words, mask in zip(tgt_cws[1:], masks): #feed the current state into the current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words)) output_embedding = current_state.output() att_output, _ = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component) middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])])) s = dy.affine_transform([b_sm, W_sm, middle_expr]) loss = (dy.pickneglogsoftmax_batch(s, next_words)) mask_expr = dy.inputVector(mask) mask_expr = dy.reshape(mask_expr, (1,),len(sents)) mask_loss = loss * mask_expr all_losses.append(mask_loss) prev_words = next_words return dy.sum_batches(dy.esum(all_losses)), num_words