def add_input(self, dec_state: RNNGDecoderState, actions: List[sent.RNNGAction]): action = actions[0] if batchers.is_batched(actions) else actions action_type = action.action_type if action_type == sent.RNNGAction.Type.GEN: # Shifting the embedding of a word if self.shift_from_enc: # Feed in the decoder based on input string return self._perform_gen(dec_state, self.sent_enc[dec_state.word_read]) else: # Feed in the decoder based on the previously generated output / oracle output return self._perform_gen( dec_state, self.term_embedder.embed(action.action_content), finish_generating=action.action_content == vocabs.Vocab.ES) elif action_type == sent.RNNGAction.Type.REDUCE_LEFT or \ action_type == sent.RNNGAction.Type.REDUCE_RIGHT: # Perform Reduce on Left direction or right direction return self._perform_reduce( dec_state, action == sent.RNNGAction.Type.REDUCE_LEFT, action.action_content) elif action_type == sent.RNNGAction.Type.NT: # Shifting the embedding of the NT's head return self._perform_nt(dec_state, action.action_content) elif action_type == sent.RNNGAction.Type.REDUCE_NT: return self._perform_reduce_nt(dec_state) elif action_type == sent.RNNGAction.Type.NONE: return dec_state else: raise NotImplementedError("Unimplemented for action word:", action)
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \ -> tt.Tensor: if not batchers.is_batched(src): src = batchers.ListBatch([src]) src_inputs = batchers.ListBatch( [s[:-1] for s in src], mask=batchers.Mask(src.mask.np_arr[:, :-1]) if src.mask else None) src_targets = batchers.ListBatch( [s[1:] for s in src], mask=batchers.Mask(src.mask.np_arr[:, 1:]) if src.mask else None) event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src_inputs) encodings = self.rnn.transduce(embeddings) encodings_tensor = encodings.as_tensor() encoding_reshaped = tt.merge_time_batch_dims(encodings_tensor) seq_len = tt.sent_len(encodings_tensor) batch_size = tt.batch_size(encodings_tensor) outputs = self.transform.transform(encoding_reshaped) ref_action = np.asarray([sent.words for sent in src_targets]).reshape( (seq_len * batch_size, )) loss_expr_perstep = self.scorer.calc_loss( outputs, batchers.mark_as_batch(ref_action)) loss_expr_perstep = tt.unmerge_time_batch_dims(loss_expr_perstep, batch_size) loss = tt.aggregate_masked_loss(loss_expr_perstep, src_targets.mask) return loss
def _initial_state(self, src): if batchers.is_batched(src): src = src[0] if type(src) == sent.CompoundSentence: src = src.sents[0] self.src_encoding = self.encoder.transduce( self.src_embedder.embed_sent(src)) return SimultaneousState(self, encoder_state=None, decoder_state=None)
def _encode_src(self, src: Union[batchers.Batch, sent.Sentence]): embeddings = self.src_embedder.embed_sent(src) encoding = self.encoder.transduce(embeddings) final_state = self.encoder.get_final_states() self.attender.init_sent(encoding) ss = batchers.mark_as_batch([Vocab.SS] * src.batch_size()) if batchers.is_batched(src) else Vocab.SS initial_state = self.decoder.initial_state(final_state, self.trg_embedder.embed(ss)) return initial_state
def __len__(self): if self.expr_list or self.expr_tensor: return super(LazyNumpyExpressionSequence, self).__len__() else: if batchers.is_batched(self.lazy_data): return self.lazy_data[0].get_array().shape[1] else: return self.lazy_data.get_array().shape[1]
def calc_loss(self, src, trg, infer_prediction=False): event_trigger.start_sent(src) if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) src_words = np.array([[Vocab.SS] + x.words for x in src]) batch_size, src_len = src_words.shape if isinstance(src.mask, type(None)): src_mask = np.zeros((batch_size, src_len), dtype=np.int) else: src_mask = np.concatenate([np.zeros((batch_size, 1), dtype=np.int), src.mask.np_arr.astype(np.int)], axis=1) src_embeddings = self.sentence_block_embed(self.src_embedder.embeddings, src_words, src_mask) src_embeddings = self.make_input_embedding(src_embeddings, src_len) trg_words = np.array(list(map(lambda x: [Vocab.SS] + x.words[:-1], trg))) batch_size, trg_len = trg_words.shape if isinstance(trg.mask, type(None)): trg_mask = np.zeros((batch_size, trg_len), dtype=np.int) else: trg_mask = trg.mask.np_arr.astype(np.int) trg_embeddings = self.sentence_block_embed(self.trg_embedder.embeddings, trg_words, trg_mask) trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len) xx_mask = self.make_attention_mask(src_mask, src_mask) xy_mask = self.make_attention_mask(trg_mask, src_mask) yy_mask = self.make_attention_mask(trg_mask, trg_mask) yy_mask *= self.make_history_mask(trg_mask) z_blocks = self.encoder.transduce(src_embeddings, xx_mask) h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask) if infer_prediction: y_len = h_block.dim()[0][1] last_col = dy.pick(h_block, dim=1, index=y_len - 1) logits = self.decoder.output(last_col) return logits ref_list = list(itertools.chain.from_iterable(map(lambda x: x.words, trg))) concat_t_block = (1 - trg_mask.ravel()).reshape(-1) * np.array(ref_list) loss = self.decoder.output_and_loss(h_block, concat_t_block) return FactoredLossExpr({"mle": loss})
def generate( self, src: batchers.Batch, search_strategy: search_strategies.SearchStrategy, forced_trg_ids: batchers.Batch = None) -> Sequence[sent.Sentence]: event_trigger.start_sent(src) if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) outputs = [] trg = sent.SimpleSentence([0]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) output_actions = [] score = 0. # TODO Fix this with generate_one_step and use the appropriate search_strategy self.max_len = 100 # This is a temporary hack for _ in range(self.max_len): dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY) log_prob_tail = self.calc_loss(src, trg, loss_cal=None, infer_prediction=True) ys = np.argmax(log_prob_tail.npvalue(), axis=0).astype('i') if ys == vocabs.Vocab.ES: output_actions.append(ys) break output_actions.append(ys) trg = sent.SimpleSentence(words=output_actions + [0]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) # Append output to the outputs if hasattr(self, "trg_vocab") and self.trg_vocab is not None: outputs.append( sent.SimpleSentence(words=output_actions, vocab=self.trg_vocab)) else: outputs.append((output_actions, score)) return outputs
def _select_ref_words(sent, index, truncate_masked = False): if truncate_masked: mask = sent.mask if batchers.is_batched(sent) else None if not batchers.is_batched(sent): return sent[index] else: ret = [] found_masked = False for (j, single_trg) in enumerate(sent): if mask is None or mask.np_arr[j, index] == 0 or np.sum(mask.np_arr[:, index]) == mask.np_arr.shape[0]: assert not found_masked, "sentences must be sorted by decreasing target length" ret.append(single_trg[index]) else: found_masked = True return batchers.mark_as_batch(ret) else: if not batchers.is_batched(sent): return sent[index] else: return batchers.mark_as_batch([single_trg[index] for single_trg in sent])
def embed_factor_sent(self, x, speech_len): # single mode if not batchers.is_batched(x): embeddings = [self.embed_factor(word) for word in x] # minibatch mode else: embeddings = [] seq_len = x.sent_len() for single_sent in x: assert single_sent.sent_len() == seq_len # for word_i in range(seq_len): for word_i in range(speech_len): batch = batchers.mark_as_batch( [single_sent[word_i] for single_sent in x]) embeddings.append(self.embed_factor(batch)) return expression_seqs.ExpressionSequence( expr_list=embeddings, mask=x.mask if batchers.is_batched(x) else None)
def __getitem__(self, key): if self.expr_list or self.expr_tensor: return super().__getitem__(key) else: if batchers.is_batched(self.lazy_data): return dy.inputTensor( [self.lazy_data[batch].get_array()[:, key] for batch in range(self.lazy_data.batch_size())], batched=True) else: return dy.inputTensor(self.lazy_data.get_array()[:,key], batched=False)
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> LossExpr: if isinstance(src, batchers.CompoundBatch): src = src.batches[0] # Encode the sentence initial_state = self._initial_state(src) dec_state = initial_state trg_mask = trg.mask if batchers.is_batched(trg) else None cur_losses = [] seq_len = trg.sent_len() # Sanity check if requested if settings.CHECK_VALIDITY and batchers.is_batched(src): for j, single_trg in enumerate(trg): # assert consistent length assert single_trg.sent_len() == seq_len # assert exactly one unmasked ES token assert 1 == len([ i for i in range(seq_len) if (trg_mask is None or trg_mask.np_arr[j, i] == 0) and single_trg[i] == vocabs.Vocab.ES ]) input_word = None for i in range(seq_len): ref_word = self._select_ref_words( trg, i, truncate_masked=self.truncate_dec_batches) if input_word is not None: dec_state = self.decoder.add_input(dec_state, input_word) rnn_output = dec_state.as_vector() dec_state.context = self.attender.calc_context(rnn_output) word_loss = self.decoder.calc_loss(dec_state, ref_word) if not self.truncate_dec_batches and batchers.is_batched( src) and trg_mask is not None: word_loss = trg_mask.cmult_by_timestep_expr(word_loss, i, inverse=True) cur_losses.append(word_loss) input_word = ref_word units = [t.len_unpadded() for t in trg] return LossExpr(dy.esum(cur_losses), units)
def embed(self, x: Union[numbers.Integral, batchers.Batch]) -> tt.Tensor: if self.train and self.word_dropout > 0.0 and self.word_id_mask is None: batch_size = x.batch_size() if batchers.is_batched(x) else 1 self.word_id_mask = [ set( np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size) ] # single mode if not batchers.is_batched(x): if self.train and self.word_id_mask and x in self.word_id_mask[0]: ret = tt.zeroes(hidden_dim=self.emb_dim) else: ret = self.embeddings( torch.tensor(x, dtype=torch.long).to(xnmt.device)) ret = ret.unsqueeze(0) if self.fix_norm is not None: ret = torch.div(ret, torch.norm(ret)) if self.fix_norm != 1: ret = torch.mul(ret, self.fix_norm) # minibatch mode else: ret = self.embeddings( torch.tensor(x, dtype=torch.long).to(xnmt.device)) if self.fix_norm is not None: ret = torch.div(ret, torch.norm(ret, dim=1).unsqueeze(1)) if self.fix_norm != 1: ret = torch.mul(self.fix_norm) if self.train and self.word_id_mask and any( x[i] in self.word_id_mask[i] for i in range(x.batch_size())): dropout_mask = torch.tensor( [[0.0] * self.emb_dim if x[i] in self.word_id_mask[i] else [1.0] * self.emb_dim for i in range(x.batch_size())], device=xnmt.device) ret = torch.mul(ret, dropout_mask) if self.train and self.weight_noise > 0.0: noise = torch.autograd.Variable( ret.data.new(ret.size(), device=xnmt.device).normal_( 0.0, self.weight_noise)) ret = ret + noise return ret
def generate(self, src, forced_trg_ids): assert not forced_trg_ids assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented" src = src[0] # Generating outputs outputs = [] event_trigger.start_sent(src) embeddings = self.src_embedder.embed_sent(src) encodings = self.encoder.transduce(embeddings) if self.mode in ["avg_mlp", "final_mlp"]: if self.generate_per_step: assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True" scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings] else: if self.mode == "avg_mlp": encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1]) elif self.mode == "final_mlp": encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr() scores = dy.logistic(self.output_layer.transform(encoding_fixed_size)) elif self.mode == "lin_sum_sig": enc_lin = [] for step_i, enc_i in enumerate(encodings): step_linear = self.output_layer.transform(enc_i) if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0: step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True)) enc_lin.append(step_linear) if self.generate_per_step: scores = [dy.logistic(enc_i) for enc_i in enc_lin] else: if encodings.mask: encoding_fixed_size = dy.cdiv(dy.esum(enc_lin), dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True)) else: encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1] scores = dy.logistic(encoding_fixed_size) else: raise ValueError(f"unknown mode '{self.mode}'") if self.generate_per_step: output_actions = [np.argmax(score_i.npvalue()) for score_i in scores] score = np.sum([np.max(score_i.npvalue()) for score_i in scores]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) else: scores_arr = scores.npvalue() output_actions = list(np.nonzero(scores_arr > 0.5)[0]) score = np.sum(scores_arr[scores_arr > 0.5]) outputs.append(sent.SimpleSentence(words=output_actions, idx=src.idx, vocab=getattr(self.trg_reader, "vocab", None), score=score, output_procs=self.trg_reader.output_procs)) return outputs
def calc_nll(self, src, trg): assert batchers.is_batched(src) and batchers.is_batched(trg) batch_size, encodings, outputs, seq_len = self._encode_src(src) if trg.sent_len() != seq_len: if self.auto_cut_pad: trg = self._cut_or_pad_targets(seq_len, trg) else: raise ValueError(f"src/trg length do not match: {seq_len} != {len(trg[0])}") ref_action = np.asarray([trg_sent.words for trg_sent in trg]).reshape((seq_len * batch_size,)) loss_expr_perstep = self.scorer.calc_loss(outputs, batchers.mark_as_batch(ref_action)) # loss_expr_perstep = dy.pickneglogsoftmax_batch(outputs, ref_action) loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len,), batch_size=batch_size) if trg.mask: loss_expr_perstep = dy.cmult(loss_expr_perstep, dy.inputTensor(1.0-trg.mask.np_arr.T, batched=True)) loss_expr = dy.sum_elems(loss_expr_perstep) return loss_expr
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \ -> LossExpr: if batchers.is_batched(trg): units = [t.len_unpadded() for t in trg] ids = batchers.ListBatch([t.value for t in trg]) else: units = trg.len_unpadded() ids = trg.value h = self._encode_src(src) loss_expr = self.scorer.calc_loss(h, ids) return LossExpr(loss_expr, units)
def calc_loss(self, x: dy.Expression, y: Union[numbers.Integral, List[numbers.Integral]]) -> dy.Expression: if self.can_loss_be_derived_from_scores(): scores = self.calc_scores(x) # single mode if not batchers.is_batched(y): loss = dy.pickneglogsoftmax(scores, y) # minibatch mode else: loss = dy.pickneglogsoftmax_batch(scores, y) else: log_prob = self.calc_log_probs(x) if not batchers.is_batched(y): loss = -dy.pick(log_prob, y) else: loss = -dy.pick_batch(log_prob, y) if self.label_smoothing > 0: ls_loss = -dy.mean_elems(log_prob) loss = ((1 - self.label_smoothing) * loss) + (self.label_smoothing * ls_loss) return loss
def embed(self, x: Union[batchers.Batch, numbers.Integral]) -> dy.Expression: if self.train and self.word_dropout > 0.0 and self.word_id_mask is None: batch_size = x.batch_size() if batchers.is_batched(x) else 1 self.word_id_mask = [ set( np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size) ] emb_e = dy.parameter(self.embeddings) # single mode if not batchers.is_batched(x): if self.train and self.word_id_mask and x in self.word_id_mask[0]: ret = dy.zeros((self.emb_dim, )) else: ret = dy.pick(emb_e, index=x) if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm # minibatch mode else: ret = dy.pick_batch(emb_e, x) if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm if self.train and self.word_id_mask and any( x[i] in self.word_id_mask[i] for i in range(x.batch_size())): dropout_mask = dy.inputTensor(np.transpose( [[0.0] * self.emb_dim if x[i] in self.word_id_mask[i] else [1.0] * self.emb_dim for i in range(x.batch_size())]), batched=True) ret = dy.cmult(ret, dropout_mask) if self.train and self.weight_noise > 0.0: ret = dy.noise(ret, self.weight_noise) return ret
def calc_nll(self, src, trg): event_trigger.start_sent(src) if isinstance(src, batchers.CompoundBatch): src, _ = src.batches initial_state = self._encode_src(src) dec_state = initial_state trg_mask = trg.mask if batchers.is_batched(trg) else None losses = [] seq_len = trg.sent_len() if batchers.is_batched(src): for j, single_trg in enumerate(trg): assert single_trg.sent_len( ) == seq_len # assert consistent length assert 1 == len([ i for i in range(seq_len) if (trg_mask is None or trg_mask.np_arr[j, i] == 0) and single_trg[i] == vocabs.Vocab.ES ]) # assert exactly one unmasked ES token prev_ref_word = None for i in range(seq_len): if not batchers.is_batched(trg): ref_word = trg[i] else: ref_word = batchers.mark_as_batch( [single_trg[i] for single_trg in trg]) word_loss = self.calc_loss_one_step( dec_state=dec_state, batch_size=ref_word.batch_size(), ref_action=ref_word, prev_ref_action=prev_ref_word, mode=self.mode_translate) if batchers.is_batched(src) and trg_mask is not None: word_loss = trg_mask.cmult_by_timestep_expr(word_loss, i, inverse=True) losses.append(word_loss) prev_ref_word = ref_word return dy.esum(losses)
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) \ -> tt.Tensor: assert batchers.is_batched(src) and batchers.is_batched(trg) batch_size, encodings, outputs, seq_len = self._encode_src(src) if trg.sent_len() != seq_len: if self.auto_cut_pad: trg = self._cut_or_pad_targets(seq_len, trg) else: raise ValueError( f"src/trg length do not match: {seq_len} != {trg.sent_len()}" ) ref_action = np.asarray([trg_sent.words for trg_sent in trg]).reshape( (seq_len * batch_size, )) loss_expr_perstep = self.scorer.calc_loss( outputs, batchers.mark_as_batch(ref_action)) loss_expr_perstep = tt.unmerge_time_batch_dims(loss_expr_perstep, batch_size) loss_expr = tt.aggregate_masked_loss(loss_expr_perstep, trg.mask) return loss_expr
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression: event_trigger.start_sent(src) # if isinstance(src, batchers.CompoundBatch): src = src.batches[0] # Encode the sentence initial_state = self._encode_src(src) dec_state = initial_state trg_mask = trg.mask if batchers.is_batched(trg) else None losses = [] seq_len = trg.sent_len() if settings.CHECK_VALIDITY and batchers.is_batched(src): for j, single_trg in enumerate(trg): assert single_trg.sent_len() == seq_len # assert consistent length assert 1==len([i for i in range(seq_len) if (trg_mask is None or trg_mask.np_arr[j,i]==0) and single_trg[i]==Vocab.ES]) # assert exactly one unmasked ES token input_word = None for i in range(seq_len): ref_word = DefaultTranslator._select_ref_words(trg, i, truncate_masked=self.truncate_dec_batches) if self.truncate_dec_batches and batchers.is_batched(ref_word): dec_state.rnn_state, ref_word = batchers.truncate_batches(dec_state.rnn_state, ref_word) if input_word is not None: dec_state = self.decoder.add_input(dec_state, self.trg_embedder.embed(input_word)) rnn_output = dec_state.rnn_state.output() dec_state.context = self.attender.calc_context(rnn_output) word_loss = self.decoder.calc_loss(dec_state, ref_word) if not self.truncate_dec_batches and batchers.is_batched(src) and trg_mask is not None: word_loss = trg_mask.cmult_by_timestep_expr(word_loss, i, inverse=True) losses.append(word_loss) input_word = ref_word if self.truncate_dec_batches: loss_expr = dy.esum([dy.sum_batches(wl) for wl in losses]) else: loss_expr = dy.esum(losses) return loss_expr
def embed_factor(self, x): if self.train and self.word_dropout > 0.0 and self.word_id_mask is None: batch_size = x.batch_size() if batchers.is_batched(x) else 1 self.word_id_mask = [ set( np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size) ] # single mode if not batchers.is_batched(x): if self.train and self.word_id_mask and x in self.word_id_mask[0]: ret = dy.zeros((self.fact_emb_dim, )) else: ret = self.embeddings[x] if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm # minibatch mode else: ret = self.embeddings.batch(x) if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) if self.fix_norm != 1: ret *= self.fix_norm if self.train and self.word_id_mask and any( x[i] in self.word_id_mask[i] for i in range(x.batch_size())): dropout_mask = dy.inputTensor(np.transpose( [[0.0] * self.fact_emb_dim if x[i] in self.word_id_mask[i] else [1.0] * self.fact_emb_dim for i in range(x.batch_size())]), batched=True) ret = dy.cmult(ret, dropout_mask) if self.train and self.weight_noise > 0.0: ret = dy.noise(ret, self.weight_noise) return ret
def embed_sent(self, x: Any) -> expression_seqs.ExpressionSequence: """Embed a full sentence worth of words. By default, just do a for loop. Args: x: This will generally be a list of word IDs, but could also be a list of strings or some other format. It could also be batched, in which case it will be a (possibly masked) :class:`xnmt.batcher.Batch` object Returns: An expression sequence representing vectors of each word in the input. """ # single mode if not batchers.is_batched(x): embeddings = [self.embed(word) for word in x] # minibatch mode else: embeddings = [] seq_len = x.sent_len() for single_sent in x: assert single_sent.sent_len()==seq_len for word_i in range(seq_len): batch = batchers.mark_as_batch([single_sent[word_i] for single_sent in x]) embeddings.append(self.embed(batch)) return expression_seqs.ExpressionSequence(expr_list=embeddings, mask=x.mask if batchers.is_batched(x) else None)
def calc_loss(self, x: dy.Expression, y: Union[int, List[int]]) -> dy.Expression: scores = self.calc_scores(x) if self.label_smoothing == 0.0: # single mode if not batchers.is_batched(y): loss = dy.pickneglogsoftmax(scores, y) # minibatch mode else: loss = dy.pickneglogsoftmax_batch(scores, y) else: log_prob = dy.log_softmax(scores) if not batchers.is_batched(y): pre_loss = -dy.pick(log_prob, y) else: pre_loss = -dy.pick_batch(log_prob, y) ls_loss = -dy.mean_elems(log_prob) loss = ((1 - self.label_smoothing) * pre_loss) + (self.label_smoothing * ls_loss) return loss
def embed(self, x: Union[batchers.Batch, numbers.Integral]) -> dy.Expression: """ Embed a single word in a sentence. :param x: A word id. :return: Embedded word. """ ret = self._embed_word(x, batchers.is_batched(x)) ## Applying Fix normalization if self.fix_norm is not None: ret = dy.cdiv(ret, dy.l2_norm(ret)) * self.fix_norm ## Weight noise only when training if self.train and self.weight_noise > 0.0: ret = dy.noise(ret, self.weight_noise) return ret
def __exit__(self, et, ev, traceback): if et is not None: # exception occurred logger.error("------ Error Report ------") for key, val in self.args.items(): logger.error(f"*** {key} ***") if callable(val): val() elif batchers.is_batched(val): for sent in val: if hasattr(sent, "idx"): print("{:>10}. {}".format(sent.idx, str(sent)[:100])) else: print("{}".format(str(sent))) else: logger.error(str(val))
def generate_output(self, translator, initial_state, src_length=None, forced_trg_ids=None): # Output variables score = [] word_ids = [] attentions = [] logsoftmaxes = [] states = [] masks = [] # Search Variables done = None current_state = initial_state for length in range(self.max_len): prev_word = word_ids[length-1] if length > 0 else None current_output = translator.generate_one_step(prev_word, current_state) current_state = current_output.state if forced_trg_ids is None: word_id = np.argmax(current_output.logsoftmax.npvalue(), axis=0) if len(word_id.shape) == 2: word_id = word_id[0] else: if batchers.is_batched(forced_trg_ids): word_id = [forced_trg_ids[i][length] for i in range(len(forced_trg_ids))] else: word_id = [forced_trg_ids[length]] logsoft = dy.pick_batch(current_output.logsoftmax, word_id) if done is not None: word_id = [word_id[i] if not done[i] else Vocab.ES for i in range(len(done))] # masking for logsoftmax mask = [1 if not done[i] else 0 for i in range(len(done))] logsoft = dy.cmult(logsoft, dy.inputTensor(mask, batched=True)) masks.append(mask) # Packing outputs score.append(logsoft.npvalue()) word_ids.append(word_id) attentions.append(current_output.attention) logsoftmaxes.append(dy.pick_batch(current_output.logsoftmax, word_id)) states.append(translator.get_nobp_state(current_state)) # Check if we are done. done = [x == Vocab.ES for x in word_id] if all(done): break masks.insert(0, [1 for _ in range(len(done))]) words = np.stack(word_ids, axis=1) score = np.sum(score, axis=0) return [SearchOutput(words, attentions, score, logsoftmaxes, states, masks)]
def embed_speech_sent(self, x): # TODO refactor: seems a bit too many special cases that need to be distinguished # x = x.batches[0] batched = batchers.is_batched(x) first_sent = x[0] if batched else x if hasattr(first_sent, "get_array"): if not batched: return expression_seqs.LazyNumpyExpressionSequence( lazy_data=x.get_array()) else: return expression_seqs.LazyNumpyExpressionSequence( lazy_data=batchers.mark_as_batch([s for s in x]), mask=x.mask) else: raise ValueError("!! Expected to use above") return expression_seqs.ExpressionSequence(expr_list=embeddings, mask=x.mask)
def embed_sent(self, x: Any) -> expression_seqs.ExpressionSequence: """Embed a full sentence worth of words. By default, just do a for loop. Args: x: This will generally be a list of word IDs, but could also be a list of strings or some other format. It could also be batched, in which case it will be a (possibly masked) :class:`xnmt.batcher.Batch` object Returns: An expression sequence representing vectors of each word in the input. """ # single mode if not batchers.is_batched(x): expr = expression_seqs.ExpressionSequence( expr_list=[self.embed(word) for word in x]) # minibatch mode elif type(self) == LookupEmbedder: embeddings = [] for word_i in range(x.sent_len()): batch = batchers.mark_as_batch( [single_sent[word_i] for single_sent in x]) embeddings.append(self.embed(batch)) expr = expression_seqs.ExpressionSequence(expr_list=embeddings, mask=x.mask) else: assert type( x[0] ) == sent.SegmentedSentence, "Need to use CharFromWordTextReader for non standard embeddings." embeddings = [] all_embeddings = [] for sentence in x: embedding = [] for i in range(sentence.len_unpadded()): embed_word = self.embed(sentence.words[i]) embedding.append(embed_word) all_embeddings.append(embed_word) embeddings.append(embedding) # Useful when using dy.autobatch dy.forward(all_embeddings) all_embeddings.clear() # Pad the results expr = batchers.pad_embedding(embeddings) return expr
def embed_sent(self, x: sent.Sentence) -> expression_seqs.ExpressionSequence: # TODO refactor: seems a bit too many special cases that need to be distinguished batched = batchers.is_batched(x) first_sent = x[0] if batched else x if hasattr(first_sent, "get_array"): if not batched: return expression_seqs.LazyNumpyExpressionSequence(lazy_data=x.get_array()) else: return expression_seqs.LazyNumpyExpressionSequence(lazy_data=batchers.mark_as_batch( [s for s in x]), mask=x.mask) else: if not batched: embeddings = [self.embed(word) for word in x] else: embeddings = [] for word_i in range(x.sent_len()): embeddings.append(self.embed(batchers.mark_as_batch([single_sent[word_i] for single_sent in x]))) return expression_seqs.ExpressionSequence(expr_list=embeddings, mask=x.mask)
def generate(self, src: Union[batchers.Batch, sent.Sentence], normalize_scores: bool = False): if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) h = self._encode_src(src) best_words, best_scores = self.scorer.best_k(h, k=1, normalize_scores=normalize_scores) assert best_words.shape == (1, src.batch_size()) assert best_scores.shape == (1, src.batch_size()) outputs = [] for batch_i in range(src.batch_size()): if src.batch_size() > 1: word = best_words[0, batch_i] score = best_scores[0, batch_i] else: word = best_words[0] score = best_scores[0] outputs.append(sent.ScalarSentence(value=word, score=score)) return outputs