def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> losses.LossExpr: loss_values = [model.calc_nll(src, trg).loss_value() for model in self.models] ret_expr = [] ret_units = [] for loss_expr, unit in loss_values: ret_expr.append(loss_expr) ret_units.append(unit) return losses.LossExpr(dy.esum(ret_expr), np.sum(ret_units))
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: batch_size = trg.batch_size() uniques = [set() for _ in range(batch_size)] deltas = [] probs = [] sign = -1 if self.inv_eval else 1 search_outputs = model.generate_search_output(src, self.search_strategy) # TODO: Fix this for search_output in search_outputs: assert len(search_output.word_ids) == 1 assert search_output.word_ids[0].shape == (len( search_output.state), ) logprob = [] for word, state in zip(search_output.word_ids[0], search_output.state): lpdist = model.decoder.scorer.calc_log_probs(state.as_vector()) lp = dy.pick(lpdist, word) logprob.append(lp) sample = search_output.word_ids logprob = dy.esum(logprob) * self.alpha # Calculate the evaluation score eval_score = np.zeros(batch_size, dtype=float) mask = np.zeros(batch_size, dtype=float) for j in range(batch_size): ref_j = utils.remove_eos(trg[j].words, vocabs.Vocab.ES) hyp_j = utils.remove_eos(sample[j].tolist(), vocabs.Vocab.ES) if self.unique_sample: hash_val = hash(tuple(hyp_j)) if len(hyp_j) == 0 or hash_val in uniques[j]: mask[j] = -1e20 # represents negative infinity continue else: uniques[j].add(hash_val) # Calc evaluation score eval_score[j] = self.evaluation_metric.evaluate_one_sent( ref_j, hyp_j) * sign # Appending the delta and logprob of this sample prob = logprob + dy.inputTensor(mask, batched=True) deltas.append(dy.inputTensor(eval_score, batched=True)) probs.append(prob) sample_prob = dy.softmax(dy.concatenate(probs)) deltas = dy.concatenate(deltas) risk = dy.sum_elems(dy.cmult(sample_prob, deltas)) units = [t.len_unpadded() for t in trg] return losses.FactoredLossExpr({"risk": losses.LossExpr(risk, units)})
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: assert hasattr(model, "attender") and hasattr(model.attender, "attention_vecs"), \ "Must be called after MLELoss with models that have attender." masked_attn = model.attender.attention_vecs if trg.mask is not None: trg_mask = 1 - (trg.mask.np_arr.transpose()) masked_attn = [ dy.cmult(attn, dy.inputTensor(mask, batched=True)) for attn, mask in zip(masked_attn, trg_mask) ] loss = dy.sum_elems(dy.square(1 - dy.esum(masked_attn))) units = [t.len_unpadded() for t in trg] return losses.FactoredLossExpr( {"global_fertility": losses.LossExpr(loss, units)})
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: search_outputs = model.generate_search_output(src, self.search_strategy) sign = -1 if self.inv_eval else 1 # TODO: Fix units total_loss = collections.defaultdict(int) for search_output in search_outputs: # Calculate rewards eval_score = [] for trg_i, sample_i in zip(trg, search_output.word_ids): # Removing EOS sample_i = utils.remove_eos(sample_i.tolist(), vocabs.Vocab.ES) ref_i = trg_i.words[:trg_i.len_unpadded()] score = self.evaluation_metric.evaluate_one_sent( ref_i, sample_i) eval_score.append(sign * score) reward = dy.inputTensor(eval_score, batched=True) # Composing losses baseline_loss = [] cur_losses = [] for state, mask in zip(search_output.state, search_output.mask): bs_score = self.baseline.transform( dy.nobackprop(state.as_vector())) baseline_loss.append(dy.squared_distance(reward, bs_score)) logsoft = model.decoder.scorer.calc_log_probs( state.as_vector()) loss_i = dy.cmult(logsoft, reward - bs_score) cur_losses.append( dy.cmult(loss_i, dy.inputTensor(mask, batched=True))) total_loss["polc_loss"] += dy.sum_elems(dy.esum(cur_losses)) total_loss["base_loss"] += dy.sum_elems(dy.esum(baseline_loss)) units = [t.len_unpadded() for t in trg] total_loss = losses.FactoredLossExpr( {k: losses.LossExpr(v, units) for k, v in total_loss.items()}) return losses.FactoredLossExpr({"risk": total_loss})
def calc_nll(self, src_batch, trg_batch) -> losses.LossExpr: event_trigger.start_sent(src_batch) self.create_trajectories(src_batch, trg_batch, force_oracle=not self._is_action_forced()) batch_loss = [] for src, trg, decoder_state in zip(src_batch, trg_batch, self.decoder_states): seq_loss = [ self.decoder.calc_loss(decoder_state[i], trg[i]) for i in range(len(decoder_state)) ] batch_loss.append(dy.esum(seq_loss)) dy.forward(batch_loss) total_loss = dy.concatenate_to_batch(batch_loss) total_units = [ trg_batch[i].len_unpadded() for i in range(trg_batch.batch_size()) ] return losses.LossExpr(total_loss, total_units)
def calc_policy_nll(self, src_batch, trg_batch) -> losses.LossExpr: assert self.policy_network is not None event_trigger.start_sent(src_batch) self.create_trajectories(src_batch, trg_batch, force_oracle=not self._is_action_forced()) batch_loss = [] for src, action, model_states in zip(src_batch, self.actions, self.model_states): policy_actions = model_states[-1].find_backward("policy_action") seq_ll = [ dy.pick(act.log_likelihood, act.content) for act in policy_actions ] batch_loss.append(-dy.esum(seq_ll)) dy.forward(batch_loss) total_loss = dy.concatenate_to_batch(batch_loss) total_units = [len(x) for x in self.actions] return losses.LossExpr(total_loss, total_units)
def calc_loss(self, policy_reward, results={}): """ Calc policy networks loss. """ assert len(policy_reward) == len(self.states), "There should be a reward for every action taken" batch_size = self.states[0].dim()[1] loss = {} # Calculate the baseline loss of the reinforce loss for each timestep: # b = W_b * s + b_b # R = r - b # Also calculate the baseline loss # b = r_p (predicted) # loss_b = squared_distance(r_p - r_r) rewards = [] baseline_loss = [] units = np.zeros(batch_size) for i, state in enumerate(self.states): r_p = self.baseline.transform(dy.nobackprop(state)) rewards.append(policy_reward[i] - r_p) if self.valid_pos[i] is not None: r_p = dy.pick_batch_elems(r_p, self.valid_pos[i]) r_r = dy.pick_batch_elems(policy_reward[i], self.valid_pos[i]) units[self.valid_pos[i]] += 1 else: r_r = policy_reward[i] units += 1 baseline_loss.append(dy.sum_batches(dy.squared_distance(r_p, r_r))) loss["rl_baseline"] = losses.LossExpr(dy.esum(baseline_loss), units) # Z Normalization # R = R - mean(R) / std(R) rewards = dy.concatenate(rewards, d=0) r_dim = rewards.dim() if self.z_normalization: rewards_shape = dy.reshape(rewards, (r_dim[0][0], r_dim[1])) rewards_mean = dy.mean_elems(rewards_shape) rewards_std = dy.std_elems(rewards_shape) + 1e-20 rewards = (rewards - rewards_mean.value()) / rewards_std.value() rewards = dy.nobackprop(rewards) # Calculate Confidence Penalty if self.confidence_penalty: loss["rl_confpen"] = self.confidence_penalty.calc_loss(self.policy_lls) # Calculate Reinforce Loss # L = - sum([R-b] * pi_ll) reinf_loss = [] units = np.zeros(batch_size) for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)): reward = dy.pick(rewards, i) ll = dy.pick_batch(policy, action) if self.valid_pos[i] is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) reward = dy.pick_batch_elems(reward, self.valid_pos[i]) units[self.valid_pos[i]] += 1 else: units += 1 reinf_loss.append(dy.sum_batches(dy.cmult(ll, reward))) loss["rl_reinf"] = losses.LossExpr(-dy.esum(reinf_loss), units) # Pack up + return return losses.FactoredLossExpr(loss)