def test_concatenate_to_batch(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elem(x, 0) z = dy.pick_batch_elem(x, 1) w = dy.concatenate_to_batch([y, z]) self.assertTrue(np.allclose(w.npvalue(), self.pval.T))
def decoding(self, src_encodings, sentences_length, test=False): pred_heads = [] pred_labels = [] s_arc, s_label = self.cal_scores(src_encodings, True) for idx in range(len(sentences_length)): s_arc_values = dy.pick_batch_elem( s_arc, idx).npvalue()[:sentences_length[idx] + 1, :sentences_length[idx] + 1] # src_len, src_len s_label_values = np.asarray([ dy.pick_batch_elem(label, idx).npvalue() for label in s_label ]).transpose( (2, 1, 0))[:sentences_length[idx] + 1, :sentences_length[idx] + 1, :] # src_len, src_len, n_labels if test: weights = s_arc_values spred_heads = parse_proj(weights) if (idx == 0): print("Parsing batch with Edmonds decoder...") else: spred_heads = np.argmax(s_arc_values, axis=0).tolist() spred_labels = [ np.argmax(labels[head]) for head, labels in zip(spred_heads, s_label_values) ] pred_heads.append(spred_heads[1:]) pred_labels.append(spred_labels[1:]) return pred_heads, pred_labels
def test_concatenate_to_batch(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elem(x, 0) z = dy.pick_batch_elem(x, 1) w = dy.concatenate_to_batch([y, z]) self.assertTrue(np.allclose(w.npvalue(), self.pval.T))
def transduce(self, inputs, masks, predict=False): if not self.init: print("No Initial state provided") return outputs = [] batch_size = inputs[0].dim()[1] for idx, input_tensor in enumerate(inputs): recur_s = [] cell_s = [] out = [] hidden = self.hidden_previous cell = self.cell_previous if not predict: input_tensor = dy.cmult(input_tensor, self.input_drop_mask) hidden = dy.cmult(hidden, self.recur_drop_mask) gates = dy.affine_transform([ self.b.expr(), self.WXH.expr(), dy.concatenate([input_tensor, hidden]) ]) iga = dy.pickrange(gates, 0, self.recur_size) fga = dy.pickrange(gates, self.recur_size, 2 * self.recur_size) oga = dy.pickrange(gates, 2 * self.recur_size, 3 * self.recur_size) cga = dy.pickrange(gates, 3 * self.recur_size, 4 * self.recur_size) ig = dy.logistic(iga) fg = dy.logistic(fga) # +self.forget_bias og = dy.logistic(oga) c_tilda = dy.tanh(cga) new_cell = dy.cmult(cell, fg) + dy.cmult(c_tilda, ig) new_hidden = dy.cmult(dy.tanh(new_cell), og) for jdx in range(batch_size): if masks[idx][jdx] == 1: h_t = dy.pick_batch_elem(new_hidden, jdx) recur_s.append(h_t) cell_s.append(dy.pick_batch_elem(new_cell, jdx)) out.append(h_t) else: recur_s.append(dy.pick_batch_elem(hidden, jdx)) cell_s.append(dy.pick_batch_elem(cell, jdx)) out.append(dy.zeros(self.recur_size)) new_cell = dy.concatenate_to_batch(cell_s) new_hidden = dy.concatenate_to_batch(recur_s) self.cell_previous = new_cell self.hidden_previous = new_hidden outputs.append(dy.concatenate_to_batch(out)) return outputs
def predict(self, sents): e = self.calc_output(sents, False) if args.task2: res = [0, 0, 0, 0] total = [0, 0, 0, 0] else: res = [0, 0] total = [0, 0] ys = [self.data.map_cat(y) for (x, y) in sents] #print (e.dim()) n = 0 #print (ys) for i in range(len(ys)): b = dy.pick_batch_elem(e, i) v = dy.argmax(b, gradient_mode="zero_gradient") v = v.vec_value() #print(v) #print(ys[i],v.index(max(v))) if ys[i] == v.index(max(v)): res[ys[i]] += 1 total[ys[i]] += 1 #print (e.dim()) #print(n) return res, total
def Ext_embeds(self, sentences, predictFlag=False): if predictFlag: wordtoidx = self.ext_words_devtest lookup_matrix = self.elookup_devtest else: wordtoidx = self.ext_words_train lookup_matrix = self.elookup_train idxtoword = {ind: word for word, ind in wordtoidx.items()} ext_embs = [] for sent in sentences: ext_embs.extend([entry.norm for entry in sent]) ext_embs_set = list(set(ext_embs)) ext_embs_idx = [] for emb in ext_embs_set: try: w_ind = wordtoidx[emb] ext_embs_idx.append(w_ind) except KeyError: continue ext_lookup_batch = dy.lookup_batch(lookup_matrix, ext_embs_idx) projected_embs = self.projected_embs(ext_lookup_batch) proj_embs = {} for idx in range(len(ext_embs_idx)): proj_embs[idxtoword[ext_embs_idx[idx]]] = dy.pick_batch_elem( projected_embs, idx) return proj_embs
def candidate_to_state(candidate_): state = beam[candidate_.state_idx] new_state = state.copy() action, relation, action_idx, relation_idx = self.actions.decoded_with_relation[ candidate_.joint_idx] correctness = (action, relation) == oracle[len( state.history)] if train else True score_repr = dn.pick_batch_elem(action_scores, candidate_.state_idx)[action_idx] + \ dn.pick_batch_elem(label_scores, candidate_.state_idx)[candidate_.joint_idx] \ if train else None action.do_action( new_state, relation, transition_utils.History( ActionScore(relation, action, candidate_.local_score, score_repr), correctness)) return new_state
def transduce( self, embed_sent: expr_seq.ExpressionSequence ) -> List[expr_seq.ExpressionSequence]: self.create_trajectories(embed_sent, force_oracle=False) actions = [np.nonzero(a.content) for a in self.actions] actions = [[ a for a in actions[i] if a < self.src_sents[i].len_unpadded() ] for i in range(len(actions))] # Create sentence embedding outputs = [] embeddings = dy.concatenate(embed_sent.expr_list, d=1) for i in range(self.src_sents.batch_size()): sequence = dy.pick_batch_elem(embeddings, i) src = self.src_sents[i] lower_bound = 0 output = [] for j, upper_bound in enumerate(actions[i]): char_sequence = dy.pick_range( sequence, lower_bound, upper_bound + 1, 1) if self.no_char_embed else None output.append( self.segment_composer.compose_single( char_sequence, src, lower_bound, upper_bound + 1)) lower_bound = upper_bound + 1 outputs.append(output) outputs = pad_output() return self.final_transducer.transduce(outputs)
def __evaluate(self, lstm_output): length = len(lstm_output) # (i, j) -> (i * length + j,) # i = k / length, j = k % length # 1 1 2 2 3 3 4 4 .. heads = [ dn.transpose(self.activation(self.head_dense_layer( lstm_output[i]))) for i in range(length) ] mods = [ self.activation(self.dep_dense_layer(lstm_output[i])) for i in range(length) ] head_part = dn.concatenate_to_batch( [heads[i // len(lstm_output)] for i in range(length * length)]) # 1 2 3 4 .. 1 2 3 4 ... mod_part = dn.concatenate_to_batch([mods[i] for i in range(length)] * length) output = self.fusion_layer(head_part, mod_part) exprs = [[ dn.pick_batch_elem(output, i * length + j) for j in range(length) ] for i in range(length)] scores = output.npvalue() scores = scores.reshape((len(lstm_output), len(lstm_output))) return scores, exprs
def _embed_word(self, word: sent.SegmentedWord, is_batched: bool = False): char_embeds = self.embeddings.batch(batchers.mark_as_batch(word.chars)) char_embeds = [ dy.pick_batch_elem(char_embeds, i) for i in range(len(word.chars)) ] return self.composer.compose(char_embeds)
def RNN_embeds(self, sentences, predictFlag=False): tokenIdChars = [] for sent in sentences: tokenIdChars.extend([entry.idChars for entry in sent]) tokenIdChars_set = set(map(tuple, tokenIdChars)) tokenIdChars = list(map(list, tokenIdChars_set)) tokenIdChars.sort(key=lambda x: -len(x)) char_src_len = len(max(tokenIdChars, key=len)) chars_mask = [] char_ids = [] for i in range(char_src_len): char_ids.append([(chars[i] if len(chars) > i else 4) for chars in tokenIdChars]) char_mask = [(1 if len(chars) > i else 0) for chars in tokenIdChars] chars_mask.append(char_mask) char_embs = [] for cid in char_ids: char_embs.append(dy.lookup_batch(self.clookup, cid)) wordslen = list(map(lambda x: len(x), tokenIdChars)) chr_embs = self.HybridCharembs.predict_sequence_batched( char_embs, chars_mask, wordslen, predictFlag) RNN_embs = {} for idx in range(len(tokenIdChars)): RNN_embs[str(tokenIdChars[idx])] = dy.pick_batch_elem( chr_embs, idx) return RNN_embs
def predict_sequence_batched(self, inputs, mask_array, wlen, predictFlag=False): batch_size = inputs[0].dim()[1] src_len = len(inputs) if not predictFlag: self.charlstm.set_dropouts(self.dropout, self.dropout) self.charlstm.set_dropout_masks(batch_size) char_fwd = self.charlstm.initial_state(batch_size) recur_states, cells = char_fwd.add_inputs(inputs, mask_array, predictFlag) hidden_states = [] for idx in range(src_len): mask = dy.inputVector(mask_array[idx]) mask_expr = dy.reshape(mask, (1, ), batch_size) hidden_states.append(recur_states[idx] * mask_expr) H = dy.concatenate_cols(hidden_states) if (predictFlag): a = dy.softmax(dy.transpose(self.W_atten.expr()) * H) else: #dropout attention connections(keep the same dim across the sequence) a = dy.softmax( dy.transpose(self.W_atten.expr()) * dy.dropout_dim(H, 1, self.dropout)) cell_states = [] for idx in range(batch_size): if (wlen[idx] > 0): cell = dy.pick_batch_elem(cells[wlen[idx] - 1], idx) else: cell = dy.zeros(self.ldims) cell_states.append(cell) C = dy.concatenate_to_batch(cell_states) H_atten = H * dy.transpose(a) char_emb = dy.concatenate([H_atten, C]) if predictFlag: proj_char_emb = dy.affine_transform( [self.b_linear.expr(), self.W_linear.expr(), char_emb]) else: proj_char_emb = dy.affine_transform([ self.b_linear.expr(), self.W_linear.expr(), dy.dropout(char_emb, self.dropout) ]) return proj_char_emb
def decoding(self, src_encodings,sentences_length): pred_pos = [] pred_xpos = [] pos_label, xpos_label = self.cal_scores(src_encodings,True) for idx in range(len(sentences_length)): s_pos_values = dy.pick_batch_elem(pos_label,idx).npvalue()[:,:sentences_length[idx]] # n_pos_labels*sent_length spred_pos = list(np.argmax(s_pos_values,axis =0).astype(int)) pred_pos.append(spred_pos) s_xpos_values = dy.pick_batch_elem(xpos_label,idx).npvalue()[:,:sentences_length[idx]] # n_xpos_labels*sent_length spred_xpos = list(np.argmax(s_xpos_values,axis =0).astype(int)) pred_xpos.append(spred_xpos) return pred_pos, pred_xpos
def compose( self, embeds: Union[dy.Expression, List[dy.Expression]]) -> dy.Expression: if type(embeds) != list: embeds = [ dy.pick_batch_elem(embeds, i) for i in range(embeds.dim()[1]) ] return dy.emax(embeds)
def decoding(self, src_encodings, masks, sentences_length, test): src_len = len(src_encodings) batch_size = src_encodings[0].dim()[1] pred_heads = [[] for _ in range(batch_size)] pred_labels = [[] for _ in range(batch_size)] heads_labels_idx = self.cal_scores(src_encodings, masks, False) scores = dy.concatenate_cols(heads_labels_idx) for idx in range(batch_size): scores_np = dy.pick_batch_elem( scores, idx).npvalue()[:(sentences_length[idx] + 1) * self.n_labels, :sentences_length[idx]] if test: scores_np = np.transpose(scores_np) m_scores = [] h_indexes = [] for jdx in range(sentences_length[idx]): hm_score = [] hm_index = [] head = scores_np[jdx, :] for kdx in range(sentences_length[idx] + 1): head_mod = head[kdx * self.n_labels:(kdx + 1) * self.n_labels] arg_max = np.argmax(head_mod) arg_maxv = head_mod[arg_max] hm_score.append(arg_maxv) hm_index.append(arg_max) m_scores.append(hm_score) h_indexes.append(hm_index) m_scores = [[0] * (sentences_length[idx] + 1)] + m_scores edmonds_np = np.stack(m_scores, axis=1) heads = parse_proj(edmonds_np) pred_heads[idx].extend(heads[1:]) labels = [] for hdx, h in enumerate(heads[1:]): labels.append(h_indexes[hdx][h]) pred_labels[idx].extend(labels) else: pred_idx = np.argmax(scores_np, axis=0) pred_label = pred_idx % self.n_labels pred_head = (pred_idx - pred_label) / self.n_labels pred_head = pred_head.astype(int) pred_label = pred_label.astype(int) pred_heads[idx].extend(pred_head.tolist()) pred_labels[idx].extend(pred_label.tolist()) return pred_heads, pred_labels
def compose( self, embeds: Union[dy.Expression, List[dy.Expression]]) -> dy.Expression: if type(embeds) != list: embeds = [ dy.pick_batch_elem(embeds, i) for i in range(embeds.dim()[1]) ] self.seq_transducer.transduce( expr_seq.ExpressionSequence(expr_list=embeds)) return self.seq_transducer.get_final_states()[-1].main_expr()
def cal_scores(self, src_encodings, masks, train): src_len = len(src_encodings) batch_size = src_encodings[0].dim()[1] heads_LRlayer = [] mods_LRlayer = [] for encoding in src_encodings: heads_LRlayer.append( self.leaky_ReLu(self.b_head.expr() + self.W_head.expr() * encoding)) mods_LRlayer.append( self.leaky_ReLu(self.b_mod.expr() + self.W_mod.expr() * encoding)) heads_labels = [] heads = [] labels = [] neg_inf = dy.constant(1, -float("inf")) for row in range( 1, src_len ): #exclude root @ index=0 since roots do not have heads scores_idx = [] for col in range(src_len): dist = col - row mdist = self.dist_max dist_i = (min(dist, mdist - 1) + mdist if dist >= 0 else int( min(-1.0 * dist, mdist - 1))) dist_vec = dy.lookup_batch(self.dlookup, [dist_i] * batch_size) if train: input_vec = dy.concatenate([ dy.esum([ dy.dropout(heads_LRlayer[col], self.dropout), dy.dropout(mods_LRlayer[row], self.dropout) ]), dist_vec ]) else: input_vec = dy.concatenate([ dy.esum([heads_LRlayer[col], mods_LRlayer[row]]), dist_vec ]) score = self.scoreHeadModLabel(input_vec, train) mask = masks[row] and masks[col] join_scores = [] for bdx in range(batch_size): if (mask[bdx] == 1): join_scores.append(dy.pick_batch_elem(score, bdx)) else: join_scores.append( dy.concatenate([neg_inf] * self.n_labels)) scores_idx.append(dy.concatenate_to_batch(join_scores)) heads_labels.append(dy.concatenate(scores_idx)) return heads_labels
def transduce(self, embed_sent: ExpressionSequence) -> List[ExpressionSequence]: batch_size = embed_sent[0].dim()[1] actions = self.sample_segmentation(embed_sent, batch_size) sample_size = len(actions) embeddings = dy.concatenate(embed_sent.expr_list, d=1) embeddings.value() # composed_words = [] for i in range(batch_size): sequence = dy.pick_batch_elem(embeddings, i) # For each sampled segmentations for j, sample in enumerate(actions): lower_bound = 0 # Read every 'segment' decision for k, upper_bound in enumerate(sample[i]): char_sequence = dy.pick_range(sequence, lower_bound, upper_bound + 1, 1) composed_words.append( (dy.pick_range(sequence, lower_bound, upper_bound + 1, 1), j, i, k, lower_bound, upper_bound + 1)) #self.segment_composer.set_word_boundary(lower_bound, upper_bound, self.src_sent[i]) #composed = self.segment_composer.transduce(char_sequence) #outputs[j][i].append(composed) lower_bound = upper_bound + 1 outputs = self.segment_composer.compose(composed_words, sample_size, batch_size) # Padding + return try: if self.length_prior: seg_size_unpadded = [[ len(outputs[i][j]) for j in range(batch_size) ] for i in range(sample_size)] enc_outputs = [] for batched_sampled_sentence in outputs: sampled_sentence, segment_mask = self.pad( batched_sampled_sentence) expr_seq = ExpressionSequence( expr_tensor=dy.concatenate_to_batch(sampled_sentence), mask=segment_mask) sent_context = self.final_transducer.transduce(expr_seq) self.final_states.append( self.final_transducer.get_final_states()) enc_outputs.append(sent_context) return CompoundSeqExpression(enc_outputs) finally: if self.length_prior: self.seg_size_unpadded = seg_size_unpadded self.compose_output = outputs self.segment_actions = actions if not self.train and self.compute_report: self.add_sent_for_report({"segment_actions": actions})
def get_complete_raw_exprs(self, lstm_output): length = len(lstm_output) lstm_output_as_batch = dn.concatenate_to_batch(lstm_output) headfov = self.bilinear_layer.w1.expr() * lstm_output_as_batch modfov = self.bilinear_layer.w2.expr() * lstm_output_as_batch # (i, j) -> (i * length + j,) # i = k / length, j = k % length # 1 1 2 2 3 3 4 4 .. heads = [dn.pick_batch_elem(headfov, i) for i in range(length)] mods = [dn.pick_batch_elem(modfov, i) for i in range(length)] head_part = dn.concatenate_to_batch( [heads[i // len(lstm_output)] for i in range(length * length)]) # 1 2 3 4 .. 1 2 3 4 ... mod_part = dn.concatenate_to_batch([mods[i] for i in range(length)] * length) output = self.dense_layer( self.activation(head_part + mod_part + self.bilinear_layer.bias.expr())) return output
def get_complete_raw_exprs(self, lstm_output): length = len(lstm_output) lstm_output_as_batch = dn.concatenate_to_batch(lstm_output) headfov = self.bilinear_layer.w1.expr() * lstm_output_as_batch modfov = self.bilinear_layer.w2.expr() * lstm_output_as_batch # (i, j) -> (i * length + j,) # i = k / length, j = k % length # 1 1 2 2 3 3 4 4 .. heads = [dn.pick_batch_elem(headfov, i) for i in range(length)] mods = [dn.pick_batch_elem(modfov, i) for i in range(length)] head_part = dn.concatenate_to_batch([heads[i // len(lstm_output)] for i in range(length * length)]) # 1 2 3 4 .. 1 2 3 4 ... mod_part = dn.concatenate_to_batch([mods[i] for i in range(length)] * length) hidden = self.activation(head_part + mod_part + self.bilinear_layer.bias.expr()) struct_dropout = getattr(self.options, "struct_dropout", 0.0) if self.options.is_train and struct_dropout > 0: hidden = dn.dropout(hidden, struct_dropout) output = self.dense_layer(hidden) return output
def rnn_encode(rnn, input_, lengths): """Return the final output for each batch based on lengths. :param rnn: dy.RNNBuilder or dy.BiRNNBuilder :param input_: List[dy.Expression] :param lengths: List[int] Returns: dy.Expression """ states = rnn_forward(rnn, input_) final_states = [dy.pick_batch_elem(states[l - 1], i) for i, l in enumerate(lengths)] return dy.concatenate_to_batch(final_states)
def rnn_encode(rnn, input_, lengths): """Return the final output for each batch based on lengths. :param rnn: dy.RNNBuilder or dy.BiRNNBuilder :param input_: List[dy.Expression] :param lengths: List[int] Returns: dy.Expression """ states = rnn_forward(rnn, input_) final_states = [dy.pick_batch_elem(states[l - 1], i) for i, l in enumerate(lengths)] return dy.concatenate_to_batch(final_states)
def rnn_forward_with_state(rnn, input_, lengths=None, state=None, batched=True, backward=False): """Return the output of the final layers and the final state of the RNN. :param rnn: dy.RNNBuilder :param input_: List[dy.Expression] :param lengths: List[int] :param state: List[np.ndarray] The previous state (used in TBPTT) :param batched: bool Is the state batched? :param backward: bool Is this a backward rnn in a bRNN? Returns: List[dy.Expression] (Seq_len): The outputs List[dy.Expression] (2 * layers if lstm): The state """ if state is not None: state = [dy.inputTensor(s, batched) for s in state] lstm_state = rnn.initial_state(state) if backward: states = lstm_state.add_inputs(reversed(input_)) outputs = list(reversed([s.h()[-1] for s in states])) # When going backwards (we pad right) the final state of the rnn # is always the last one. final_state = states[-1].s() return outputs, final_state states = lstm_state.add_inputs(input_) outputs = [s.h()[-1] for s in states] if lengths is None: if backward: outputs = list(reversed(outputs)) return outputs, states[-1].s() final_states = [states[l - 1].s() for l in lengths] final_state_by_batch = [] for i, state in enumerate(final_states): batch_state = [dy.pick_batch_elem(s, i) for s in state] final_state_by_batch.append(batch_state) final_state = [] for i in range(len(final_state_by_batch[0])): col = dy.concatenate_to_batch([ final_state_by_batch[j][i] for j in range(len(final_state_by_batch)) ]) final_state.append(col) if backward: outputs = list(reversed(outputs)) return outputs, final_state
def compose(self, embeds): if type(embeds) != list: embeds = [ dy.pick_batch_elem(embeds, i) for i in range(embeds.dim()[1]) ] if len(embeds) < self.ngram_size: embeds.extend([dy.zeros(self.embed_dim)] * (self.ngram_size - len(embeds))) embeds = dy.transpose( dy.concatenate([dy.concatenate_cols(embeds)], d=2), [2, 1, 0]) embeds = dy.conv2d_bias(embeds, self.filter, self.bias, (self.embed_dim, 1)) embeds = dy.max_dim(dy.pick(embeds, index=0), d=0) return self.transform.transform(embeds)
def rnn_forward_with_state(rnn, input_, lengths=None, state=None, batched=True, backward=False): """Return the output of the final layers and the final state of the RNN. :param rnn: dy.RNNBuilder :param input_: List[dy.Expression] :param lengths: List[int] :param state: List[np.ndarray] The previous state (used in TBPTT) :param batched: bool Is the state batched? :param backward: bool Is this a backward rnn in a bRNN? Returns: List[dy.Expression] (Seq_len): The outputs List[dy.Expression] (2 * layers if lstm): The state """ if state is not None: state = [dy.inputTensor(s, batched) for s in state] lstm_state = rnn.initial_state(state) if backward: states = lstm_state.add_inputs(reversed(input_)) outputs = list(reversed([s.h()[-1] for s in states])) # When going backwards (we pad right) the final state of the rnn # is always the last one. final_state = states[-1].s() return outputs, final_state states = lstm_state.add_inputs(input_) outputs = [s.h()[-1] for s in states] if lengths is None: if backward: outputs = list(reversed(outputs)) return outputs, states[-1].s() final_states = [states[l - 1].s() for l in lengths] final_state_by_batch = [] for i, state in enumerate(final_states): batch_state = [dy.pick_batch_elem(s, i) for s in state] final_state_by_batch.append(batch_state) final_state = [] for i in range(len(final_state_by_batch[0])): col = dy.concatenate_to_batch([final_state_by_batch[j][i] for j in range(len(final_state_by_batch))]) final_state.append(col) if backward: outputs = list(reversed(outputs)) return outputs, final_state
def transduce(self, embed_sent: ExpressionSequence) -> List[ExpressionSequence]: batch_size = embed_sent[0].dim()[1] actions = self.sample_segmentation(embed_sent, batch_size) embeddings = dy.concatenate(embed_sent.expr_list, d=1) embeddings.value() # composed_words = [] for i in range(batch_size): sequence = dy.pick_batch_elem(embeddings, i) # For each sampled segmentations lower_bound = 0 for j, upper_bound in enumerate(actions[i]): if self.no_char_embed: char_sequence = [] else: char_sequence = dy.pick_range(sequence, lower_bound, upper_bound + 1, 1) composed_words.append( (char_sequence, i, j, lower_bound, upper_bound + 1)) lower_bound = upper_bound + 1 outputs = self.segment_composer.compose(composed_words, batch_size) # Padding + return try: if self.length_prior: seg_size_unpadded = [ len(outputs[i]) for i in range(batch_size) ] sampled_sentence, segment_mask = self.pad(outputs) expr_seq = ExpressionSequence( expr_tensor=dy.concatenate_to_batch(sampled_sentence), mask=segment_mask) return self.final_transducer.transduce(expr_seq) finally: if self.length_prior: self.seg_size_unpadded = seg_size_unpadded self.compose_output = outputs self.segment_actions = actions if not self.train and self.is_reporting(): if len(actions) == 1: # Support only AccuracyEvalTask self.report_sent_info({"segment_actions": actions})
def compose( self, embeds: Union[dy.Expression, List[dy.Expression]]) -> dy.Expression: if type(embeds) != list: embeds = [ dy.pick_batch_elem(embeds, i) for i in range(embeds.dim()[1]) ] fwd_state = self.fwd_combinator.initial_state() bwd_state = self.fwd_combinator.initial_state() # The embedding of the Head should be in the first element of the list fwd_state = fwd_state.add_input(embeds[-1]) bwd_state = bwd_state.add_input(embeds[-1]) for i in range(len(embeds) - 1): fwd_state = fwd_state.add_input(embeds[i]) bwd_state = bwd_state.add_input(embeds[-(i + 1)]) return self.transform.transform( dy.concatenate([fwd_state.output(), bwd_state.output()]))
def __call__(self, sentence, c2i, maxn_char, act, train=False): words_batch = [] for token in sentence: chars_emb = [self.clookup[int(c2i.get(c, 0))] for c in token.chars] c2w = dy.concatenate_cols(chars_emb) c2w = dy.reshape(c2w, tuple(list(c2w.dim()[0]) + [1])) words_batch.append(c2w) words_batch = dy.concatenate_to_batch(words_batch) convds = [dy.conv2d(words_batch, W, stride=( 1, 1), is_valid=True) for W in self.Ws] actds = [act(convd) for convd in convds] poolds = [dy.maxpooling2d(actd, ksize=(1, maxn_char-win_size+1), stride=(1, 1)) for win_size, actd in zip(self.win_sizes, actds)] words_batch = [dy.reshape(poold, (poold.dim()[0][2],)) for poold in poolds] words_batch = dy.concatenate([out for out in words_batch]) c2w_emb = [] for idx, token in enumerate(sentence): c2w_emb.append(dy.pick_batch_elem(words_batch, idx)) return c2w_emb
def main_test(): if args.task2: all_data = deft_data.task2(args.test_file, None, 0) else: all_data = deft_data.task1(args.test_file, None, 0) test = list(all_data.id_to_text_cat_map.values()) with open('word_to_idx.txt', encoding="utf-8") as data_file: all_data.word_to_idx = json.load(data_file) net = deft_t12_nn(model, all_data) model.populate(args.test_model) for data in all_data.id_to_text_cat_map: sent = all_data.id_to_text_cat_map[data] print('#', sent[0]) e = net.calc_output([sent], False) b = dy.pick_batch_elem(e, 0) v = b.vec_value() r = v.index(max(v)) print('"%s"\t"%s"' % (data, all_data.reverse_map_cat(r)))
def compose(self, composed_words, sample_size, batch_size): batches = [] batch_maps = [] batch_words = [] seq_len = np.zeros((sample_size, batch_size), dtype=int) composed_words = sorted(composed_words, key=lambda x: x[5] - x[4]) # Batching expression now_length = -1 for expr_list, sample_num, batch_num, position, start, end in composed_words: length = end - start if length != now_length: now_length = length now_map = {} now_batch = [] now_words = [] now_idx = 0 batches.append(now_batch) batch_maps.append(now_map) batch_words.append(now_words) now_batch.append(expr_list) now_words.append(self.src_sent[batch_num][start:end]) now_map[now_idx] = (sample_num, batch_num, position) seq_len[sample_num, batch_num] += 1 now_idx += 1 # Composing outputs = [[[None for _ in range(seq_len[i, j])] for j in range(batch_size)] for i in range(sample_size)] expr_list = [] for batch, batch_map, batch_word in zip(batches, batch_maps, batch_words): self.set_words(batch_word) results = self.transduce(dy.concatenate_to_batch(batch)) results.value() for idx, (sample_num, batch_num, position) in batch_map.items(): expr_list.append(dy.pick_batch_elem(results, idx)) outputs[sample_num][batch_num][position] = expr_list[-1] dy.forward(expr_list) return outputs
def test_pick_batch_elem(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elem(x, 1) self.assertTrue(np.allclose(y.npvalue(), self.pval[1]))
def predict_one(self, src, encoder_outputs, **kwargs): K = int(kwargs.get('beam', 5)) mxlen = int(kwargs.get('mxlen', 100)) paths = [[Offsets.GO] for _ in range(K)] # Which beams are done? done = np.array([False] * K) scores = np.array([0.0] * K) hidden, output_i, context = self.arc_policy(encoder_outputs, self.hsz, beam_width=K) num_states = len(hidden) rnn_state = self.decoder_rnn.initial_state(hidden) self.attn_cache(context) src_mask = encoder_outputs.src_mask for i in range(mxlen): dst_last = np.array([path[-1] for path in paths]).reshape(1, K) embed_i = self.tgt_embeddings.encode(dst_last)[-1] embed_i = self.input_i(embed_i, output_i) rnn_state = rnn_state.add_input(embed_i) rnn_output_i = rnn_state.output() output_i = self.attn(rnn_output_i, src_mask) wll = self.prediction([output_i])[-1].npvalue() # (V,) K V = wll.shape[0] if i > 0: # expanded_history = np.expand_dims(scores, -1) # done_mask = np.expand_dims((done == False).astype(np.uint8), -1) # sll = np.multiply(wll.T, done_mask) + expanded_history wll = wll.T expanded_history = np.expand_dims(scores, -1) done_mask = np.expand_dims((done == False).astype(np.uint8), -1) done_mask_inv = (done_mask != 1).astype(np.uint8) eos_mask = np.zeros((1, V)).astype(np.uint8) mask = ((done_mask & eos_mask) != 1).astype(np.uint8) masked_wll = np.multiply(done_mask, wll) negged_wll = masked_wll + (done_mask_inv * -1e4) removed_eos = np.multiply(mask, negged_wll) sll = removed_eos + expanded_history else: sll = wll.T flat_sll = sll.reshape(-1) bests = topk(K, flat_sll) best_idx_flat = np.array(list(bests.keys())) best_beams = best_idx_flat // V best_idx = best_idx_flat % V new_paths = [] new_done = [] hidden = rnn_state.s() new_hidden = [[] for _ in range(num_states)] for j, best_flat in enumerate(best_idx_flat): beam_id = best_beams[j] best_word = best_idx[j] if done[j]: new_paths.append(paths[beam_id] + [Offsets.EOS]) else: new_paths.append(paths[beam_id] + [best_word]) if best_word == Offsets.EOS: done[j] = True new_done.append(done[beam_id]) scores[j] = bests[best_flat] # For each path, we need to pick that out and add it to the hiddens # This will be (c1, c2, ..., h1, h2, ...) for h_i, h in enumerate(hidden): new_hidden[h_i].append(dy.pick_batch_elem(h, beam_id)) done = np.array(new_done) new_hidden = [ dy.concatenate_to_batch(new_h) for new_h in new_hidden ] paths = new_paths rnn_state = self.decoder_rnn.initial_state(new_hidden) paths = np.stack([p[1:] for p in paths]) return paths, scores
def __call__(self, embed_sent): batch_size = embed_sent[0].dim()[1] # Softmax + segment decision encodings = self.embed_encoder(embed_sent) enc_mask = encodings.mask segment_decisions, segment_logsoftmaxes = self.sample_segmentation( encodings, batch_size) # Some checks assert len(encodings) == len(segment_decisions), \ "Encoding={}, segment={}".format(len(encodings), len(segment_decisions)) # Buffer for output buffers = [[] for _ in range(batch_size)] outputs = [[] for _ in range(batch_size)] last_segment = [-1 for _ in range(batch_size)] length_prior = [0 for _ in range(batch_size)] length_prior_enabled = self.length_prior_alpha is not None and self.length_prior_alpha.value( ) > 0 self.segment_composer.set_input_size(batch_size, len(encodings)) # input enc_inp = encodings if not self.compose_char else embed_sent # Loop through all the frames (word / item) in input. for j, (encoding, segment_decision) in enumerate( zip(encodings, segment_decisions)): # For each decision in the batch for i, decision in enumerate(segment_decision): # If segment for this particular input decision = int(decision) if decision == SegmentingAction.DELETE.value or \ (enc_mask is not None and enc_mask.np_arr[i][j] == 1): continue # Get the particular encoding for that batch item enc_i = dy.pick_batch_elem(encoding, i) # Append the encoding for this item to the buffer buffers[i].append(enc_i) if decision == SegmentingAction.SEGMENT.value: # Special case for TailWordSegmentTransformer only words = None vocab = self.src_sent[i].vocab words = self.src_sent[i].words[last_segment[i] + 1:j + 1] if vocab is not None: words = "".join( w for w in [vocab[c] for c in words if c != vocab.unk_token]) else: words = tuple(words) # Reducing the [expression] -> expression expr_seq = expression_sequence.ExpressionSequence( expr_list=buffers[i]) transduce_output = self.segment_composer.transduce( expr_seq, words) outputs[i].append(transduce_output) buffers[i] = [] # Calculate length prior if length_prior_enabled: length_prior[i] += numpy.log( poisson.pmf(j - last_segment[i], self.length_prior)) last_segment[i] = j # Notify the segment transducer to process the next decision self.segment_composer.next_item() # Padding outputs, masks = self.pad(outputs) self.segment_decisions = segment_decisions self.segment_logsoftmaxes = segment_logsoftmaxes self.enc_mask = enc_mask # Packing output together if self.learn_segmentation: self.segment_length_prior = dy.inputTensor(length_prior, batched=True) if self.use_baseline: self.bs = [ self.baseline(dy.nobackprop(enc)) for enc in encodings ] if not self.train: # Rewrite segmentation self.set_report_resource("segmentation", self.segment_decisions) self.set_report_input(segment_decisions) # Return the encoded batch by the size of [(encode,segment)] * batch_size return self.final_transducer( expression_sequence.ExpressionSequence(expr_tensor=outputs, mask=masks))
def predict_one(self, src, encoder_outputs, **kwargs): K = int(kwargs.get('beam', 5)) mxlen = int(kwargs.get('mxlen', 100)) paths = [[Offsets.GO] for _ in range(K)] # Which beams are done? done = np.array([False] * K) scores = np.array([0.0]*K) hidden, output_i, context = self.arc_policy(encoder_outputs, self.hsz, beam_width=K) num_states = len(hidden) rnn_state = self.decoder_rnn.initial_state(hidden) self.attn_cache(context) src_mask = encoder_outputs.src_mask for i in range(mxlen): dst_last = np.array([path[-1] for path in paths]).reshape(1, K) embed_i = self.tgt_embeddings.encode(dst_last)[-1] embed_i = self.input_i(embed_i, output_i) rnn_state = rnn_state.add_input(embed_i) rnn_output_i = rnn_state.output() output_i = self.attn(rnn_output_i, src_mask) wll = self.prediction([output_i])[-1].npvalue() # (V,) K V = wll.shape[0] if i > 0: # expanded_history = np.expand_dims(scores, -1) # done_mask = np.expand_dims((done == False).astype(np.uint8), -1) # sll = np.multiply(wll.T, done_mask) + expanded_history wll = wll.T expanded_history = np.expand_dims(scores, -1) done_mask = np.expand_dims((done == False).astype(np.uint8), -1) done_mask_inv = (done_mask != 1).astype(np.uint8) eos_mask = np.zeros((1, V)).astype(np.uint8) mask = ((done_mask & eos_mask) != 1).astype(np.uint8) masked_wll = np.multiply(done_mask, wll) negged_wll = masked_wll + (done_mask_inv * -1e4) removed_eos = np.multiply(mask, negged_wll) sll = removed_eos + expanded_history else: sll = wll.T flat_sll = sll.reshape(-1) bests = topk(K, flat_sll) best_idx_flat = np.array(list(bests.keys())) best_beams = best_idx_flat // V best_idx = best_idx_flat % V new_paths = [] new_done = [] hidden = rnn_state.s() new_hidden = [[] for _ in range(num_states)] for j, best_flat in enumerate(best_idx_flat): beam_id = best_beams[j] best_word = best_idx[j] if done[j]: new_paths.append(paths[beam_id] + [Offsets.EOS]) else: new_paths.append(paths[beam_id] + [best_word]) if best_word == Offsets.EOS: done[j] = True new_done.append(done[beam_id]) scores[j] = bests[best_flat] # For each path, we need to pick that out and add it to the hiddens # This will be (c1, c2, ..., h1, h2, ...) for h_i, h in enumerate(hidden): new_hidden[h_i].append(dy.pick_batch_elem(h, beam_id)) done = np.array(new_done) new_hidden = [dy.concatenate_to_batch(new_h) for new_h in new_hidden] paths = new_paths rnn_state = self.decoder_rnn.initial_state(new_hidden) paths = np.stack([p[1:] for p in paths]) return paths, scores
def test_pick_batch_elem(self): dy.renew_cg() x = dy.lookup_batch(self.p, [0, 1]) y = dy.pick_batch_elem(x, 1) self.assertTrue(np.allclose(y.npvalue(), self.pval[1]))