def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression: sub_losses = collections.defaultdict(list) for model in self.models: for loss_name, loss in model.calc_nll(src, trg).expr_factors.items(): sub_losses[loss_name].append(loss) model_loss = FactoredLossExpr() for loss_name, losslist in sub_losses.items(): # TODO: dy.average(losslist) _or_ dy.esum(losslist) / len(self.models) ? # -- might not be the same if not all models return all losses model_loss.add_loss(loss_name, dy.average(losslist)) return model_loss
def on_calc_additional_loss(self, trg, generator, generator_loss): if self.policy_learning is None: return None reward = FactoredLossExpr() reward.add_loss("generator", -dy.inputTensor(generator_loss.value(), batched=True)) if self.length_prior is not None: reward.add_loss('length_prior', self.length_prior.log_ll(self.seg_size_unpadded)) reward_value = reward.value() if trg.batch_size() == 1: reward_value = [reward_value] reward_tensor = dy.inputTensor(reward_value, batched=True) ### Calculate losses try: return self.policy_learning.calc_loss(reward_tensor) finally: self.reward = reward if self.train and self.reporter is not None: self.reporter.report_process(self)
def calc_loss(self, translator, src, trg): search_outputs = translator.generate_search_output(src, self.search_strategy) sign = -1 if self.inv_eval else 1 total_loss = FactoredLossExpr() for search_output in search_outputs: self.eval_score = [] for trg_i, sample_i in zip(trg, search_output.word_ids): # Removing EOS sample_i = self.remove_eos(sample_i.tolist()) ref_i = trg_i.words[:trg_i.len_unpadded()] score = self.evaluation_metric.evaluate_one_sent(ref_i, sample_i) self.eval_score.append(sign * score) self.reward = dy.inputTensor(self.eval_score, batched=True) # Composing losses loss = FactoredLossExpr() if self.baseline is not None: baseline_loss = [] losses = [] for state, logsoft, mask in zip(search_output.state, search_output.logsoftmaxes, search_output.mask): bs_score = self.baseline.transform(state) baseline_loss.append(dy.squared_distance(self.reward, bs_score)) loss_i = dy.cmult(logsoft, self.reward - bs_score) valid = list(np.nonzero(mask)[0]) losses.append(dy.cmult(loss_i, dy.inputTensor(mask, batched=True))) loss.add_loss("reinforce", dy.sum_elems(dy.esum(losses))) loss.add_loss("reinf_baseline", dy.sum_elems(dy.esum(baseline_loss))) else: loss.add_loss("reinforce", dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts)))) total_loss.add_factored_loss_expr(loss) return loss
def on_calc_additional_loss(self, trg, generator, generator_loss): if self.policy_learning is None: return None trg_counts = dy.inputTensor([t.len_unpadded() for t in trg], batched=True) reward = FactoredLossExpr() # Adding all reward from the translator for loss_key, loss_value in generator_loss.get_nobackprop_loss().items( ): if loss_key == 'mle': reward.add_loss('mle', dy.cdiv(-loss_value, trg_counts)) else: reward.add_loss(loss_key, -loss_value) if self.length_prior is not None: reward.add_loss('seg_lp', self.length_prior.log_ll(self.seg_size_unpadded)) reward = dy.inputTensor(reward.value(), batched=True) ### Calculate losses try: return self.policy_learning.calc_loss(reward) finally: self.reward = reward if self.reporter is not None: self.reporter.report_process(self)