def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: search_outputs = model.generate_search_output(src, self.search_strategy) sign = -1 if self.inv_eval else 1 total_loss = losses.FactoredLossExpr() for search_output in search_outputs: # Calculate rewards eval_score = [] for trg_i, sample_i in zip(trg, search_output.word_ids): # Removing EOS sample_i = self.remove_eos(sample_i.tolist()) ref_i = trg_i.words[:trg_i.len_unpadded()] score = self.evaluation_metric.evaluate_one_sent(ref_i, sample_i) eval_score.append(sign * score) reward = dy.inputTensor(eval_score, batched=True) # Composing losses loss = losses.FactoredLossExpr() baseline_loss = [] cur_losses = [] for state, mask in zip(search_output.state, search_output.mask): bs_score = self.baseline.transform(dy.nobackprop(state.as_vector())) baseline_loss.append(dy.squared_distance(reward, bs_score)) logsoft = model.decoder.scorer.calc_log_probs(state.as_vector()) loss_i = dy.cmult(logsoft, reward - bs_score) cur_losses.append(dy.cmult(loss_i, dy.inputTensor(mask, batched=True))) loss.add_loss("reinforce", dy.sum_elems(dy.esum(cur_losses))) loss.add_loss("reinf_baseline", dy.sum_elems(dy.esum(baseline_loss))) # Total losses total_loss.add_factored_loss_expr(loss) return loss
def run_training(self, save_fct: Callable) -> None: """ Main training loop (overwrites TrainingRegimen.run_training()) """ dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY) if self.run_for_epochs is None or self.run_for_epochs > 0: total_loss = losses.FactoredLossExpr() # Needed for report total_trg = [] for src, trg in self.next_minibatch(): if self.dev_zero: self.checkpoint_and_save(save_fct) self.dev_zero = False with utils.ReportOnException({ "src": src, "trg": trg, "graph": utils.print_cg_conditional }): with self.train_loss_tracker.time_tracker: event_trigger.set_train(True) total_trg.append(trg[0]) loss_builder = self.training_step(src, trg) total_loss.add_factored_loss_expr(loss_builder) # num_updates_skipped is incremented in update but # we need to call backward before update if self.num_updates_skipped == self.update_every - 1: self.backward(total_loss.compute(), self.dynet_profiling) self.update(self.trainer) if self.num_updates_skipped == 0: total_loss_val = total_loss.get_factored_loss_val( comb_method=self.loss_comb_method) reported_trg = batchers.ListBatch(total_trg) self.train_loss_tracker.report(reported_trg, total_loss_val) total_loss = losses.FactoredLossExpr() total_trg = [] dy.renew_cg( immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY) if self.checkpoint_needed(): # Do a last update before checkpoint # Force forward-backward for the last batch even if it's smaller than update_every self.num_updates_skipped = self.update_every - 1 self.backward(total_loss.compute(), self.dynet_profiling) self.update(self.trainer) total_loss_val = total_loss.get_factored_loss_val( comb_method=self.loss_comb_method) reported_trg = batchers.ListBatch(total_trg) self.train_loss_tracker.report(reported_trg, total_loss_val) total_loss = losses.FactoredLossExpr() total_trg = [] self.checkpoint_and_save(save_fct) if self.should_stop_training(): break
def calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: loss = model.calc_nll(src, trg) return losses.FactoredLossExpr({"mle": loss})
def calc_loss(self, policy_reward, only_final_reward=True): loss = losses.FactoredLossExpr() ## Calculate baseline pred_reward, baseline_loss = self.calc_baseline_loss(policy_reward, only_final_reward) if only_final_reward: rewards = [policy_reward - pw_i for pw_i in pred_reward] else: rewards = [pr_i - pw_i for pr_i, pw_i in zip(policy_reward, pred_reward)] loss.add_loss("rl_baseline", baseline_loss) ## Z-Normalization rewards = dy.concatenate(rewards, d=0) if self.z_normalization: rewards_value = rewards.value() rewards_mean = np.mean(rewards_value) rewards_std = np.std(rewards_value) + 1e-10 rewards = (rewards - rewards_mean) / rewards_std ## Calculate Confidence Penalty if self.confidence_penalty: cp_loss = self.confidence_penalty.calc_loss(self.policy_lls) loss.add_loss("rl_confpen", cp_loss) ## Calculate Reinforce Loss reinf_loss = [] # Loop through all action in one sequence for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)): # Main Reinforce calculation reward = dy.pick(rewards, i) ll = dy.pick_batch(policy, action) if self.valid_pos is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) reward = dy.pick_batch_elems(reward, self.valid_pos[i]) reinf_loss.append(dy.sum_batches(ll * reward)) loss.add_loss("rl_reinf", -self.weight * dy.esum(reinf_loss)) ## the composed losses return loss
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: total_loss = losses.FactoredLossExpr() for loss, weight in zip(self.pt_losses, self.loss_weight): total_loss.add_factored_loss_expr(loss.calc_loss(model, src, trg) * weight) return total_loss
def on_calc_additional_loss(self, *args, **kwargs): loss_dict = {} if self.transducer_loss and self.transducer_losses: loss_expr = dy.esum(self.transducer_losses) loss_dict["symm_transd_loss"] = loss_expr if self.split_reg_penalty_expr is not None: loss_dict["symm_transd_reg_penalty"] = self.split_reg_penalty_expr if len(loss_dict) == 0: return None else: return losses.FactoredLossExpr(loss_dict)
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: if self.model is None: model = self.model return losses.FactoredLossExpr( {"policy_mle": model.calc_policy_nll(src, trg)})
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: total_loss = {} for i, (loss, weight) in enumerate(zip(self.losses, self.loss_weight)): total_loss[str(i)] = loss._perform_calc_loss(model, src, trg) * weight return losses.FactoredLossExpr(total_loss)
def calc_loss(self, src, trg, infer_prediction=False): event_trigger.start_sent(src) if not batchers.is_batched(src): src = batchers.mark_as_batch([src]) if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg]) src_words = np.array([[vocabs.Vocab.SS] + x.words for x in src]) batch_size, src_len = src_words.shape if isinstance(src.mask, type(None)): src_mask = np.zeros((batch_size, src_len), dtype=np.int) else: src_mask = np.concatenate([ np.zeros((batch_size, 1), dtype=np.int), src.mask.np_arr.astype(np.int) ], axis=1) src_embeddings = self.sentence_block_embed( self.src_embedder.embeddings, src_words, src_mask) src_embeddings = self.make_input_embedding(src_embeddings, src_len) trg_words = np.array( list(map(lambda x: [vocabs.Vocab.SS] + x.words[:-1], trg))) batch_size, trg_len = trg_words.shape if isinstance(trg.mask, type(None)): trg_mask = np.zeros((batch_size, trg_len), dtype=np.int) else: trg_mask = trg.mask.np_arr.astype(np.int) trg_embeddings = self.sentence_block_embed( self.trg_embedder.embeddings, trg_words, trg_mask) trg_embeddings = self.make_input_embedding(trg_embeddings, trg_len) xx_mask = self.make_attention_mask(src_mask, src_mask) xy_mask = self.make_attention_mask(trg_mask, src_mask) yy_mask = self.make_attention_mask(trg_mask, trg_mask) yy_mask *= self.make_history_mask(trg_mask) z_blocks = self.encoder.transduce(src_embeddings, xx_mask) h_block = self.decoder(trg_embeddings, z_blocks, xy_mask, yy_mask) if infer_prediction: y_len = h_block.dim()[0][1] last_col = dy.pick(h_block, dim=1, index=y_len - 1) logits = self.decoder.output(last_col) return logits ref_list = list( itertools.chain.from_iterable(map(lambda x: x.words, trg))) concat_t_block = (1 - trg_mask.ravel()).reshape(-1) * np.array(ref_list) loss = self.decoder.output_and_loss(h_block, concat_t_block) return losses.FactoredLossExpr({"mle": loss})
def calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: batch_size = trg.batch_size() uniques = [set() for _ in range(batch_size)] deltas = [] probs = [] sign = -1 if self.inv_eval else 1 search_outputs = model.generate_search_output(src, self.search_strategy) for search_output in search_outputs: assert len(search_output.word_ids) == 1 assert search_output.word_ids[0].shape == (len( search_output.state), ) logprob = [] for word, state in zip(search_output.word_ids[0], search_output.state): lpdist = model.decoder.scorer.calc_log_probs(state.as_vector()) lp = dy.pick(lpdist, word) logprob.append(lp) sample = search_output.word_ids logprob = dy.esum(logprob) * self.alpha # Calculate the evaluation score eval_score = np.zeros(batch_size, dtype=float) mask = np.zeros(batch_size, dtype=float) for j in range(batch_size): ref_j = self.remove_eos(trg[j].words) hyp_j = self.remove_eos(sample[j].tolist()) if self.unique_sample: hash_val = hash(tuple(hyp_j)) if len(hyp_j) == 0 or hash_val in uniques[j]: mask[j] = -1e20 # represents negative infinity continue else: uniques[j].add(hash_val) # Calc evaluation score eval_score[j] = self.evaluation_metric.evaluate_one_sent( ref_j, hyp_j) * sign # Appending the delta and logprob of this sample prob = logprob + dy.inputTensor(mask, batched=True) deltas.append(dy.inputTensor(eval_score, batched=True)) probs.append(prob) sample_prob = dy.softmax(dy.concatenate(probs)) deltas = dy.concatenate(deltas) risk = dy.sum_elems(dy.cmult(sample_prob, deltas)) ### Debug #print(sample_prob.npvalue().transpose()[0]) #print(deltas.npvalue().transpose()[0]) #print("----------------------") ### End debug return losses.FactoredLossExpr({"risk": risk})
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: search_outputs = model.generate_search_output(src, self.search_strategy) sign = -1 if self.inv_eval else 1 # TODO: Fix units total_loss = collections.defaultdict(int) for search_output in search_outputs: # Calculate rewards eval_score = [] for trg_i, sample_i in zip(trg, search_output.word_ids): # Removing EOS sample_i = utils.remove_eos(sample_i.tolist(), vocabs.Vocab.ES) ref_i = trg_i.words[:trg_i.len_unpadded()] score = self.evaluation_metric.evaluate_one_sent( ref_i, sample_i) eval_score.append(sign * score) reward = dy.inputTensor(eval_score, batched=True) # Composing losses baseline_loss = [] cur_losses = [] for state, mask in zip(search_output.state, search_output.mask): bs_score = self.baseline.transform( dy.nobackprop(state.as_vector())) baseline_loss.append(dy.squared_distance(reward, bs_score)) logsoft = model.decoder.scorer.calc_log_probs( state.as_vector()) loss_i = dy.cmult(logsoft, reward - bs_score) cur_losses.append( dy.cmult(loss_i, dy.inputTensor(mask, batched=True))) total_loss["polc_loss"] += dy.sum_elems(dy.esum(cur_losses)) total_loss["base_loss"] += dy.sum_elems(dy.esum(baseline_loss)) units = [t.len_unpadded() for t in trg] total_loss = losses.FactoredLossExpr( {k: losses.LossExpr(v, units) for k, v in total_loss.items()}) return losses.FactoredLossExpr({"risk": total_loss})
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batcher.Batch'], trg: Union[sent.Sentence, 'batcher.Batch']) -> losses.FactoredLossExpr: loss_builder = losses.FactoredLossExpr() for _ in range(self.repeat): standard_loss = self.child_loss.calc_loss(model, src, trg) additional_loss = event_trigger.calc_additional_loss(trg, model, standard_loss) loss_builder.add_factored_loss_expr(standard_loss) loss_builder.add_factored_loss_expr(additional_loss) return loss_builder
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: assert hasattr(model, "attender") and hasattr(model.attender, "attention_vecs"), \ "Must be called after MLELoss with models that have attender." masked_attn = model.attender.attention_vecs if trg.mask is not None: trg_mask = 1-(trg.mask.np_arr.transpose()) masked_attn = [dy.cmult(attn, dy.inputTensor(mask, batched=True)) for attn, mask in zip(masked_attn, trg_mask)] loss = self.global_fertility(masked_attn) return losses.FactoredLossExpr({"global_fertility": loss})
def calc_nll(self, src: Union[batchers.Batch, sent.Sentence], trg: Union[batchers.Batch, sent.Sentence]) -> dy.Expression: sub_losses = collections.defaultdict(list) for model in self.models: for loss_name, loss in model.calc_nll(src, trg).expr_factors.items(): sub_losses[loss_name].append(loss) model_loss = losses.FactoredLossExpr() for loss_name, losslist in sub_losses.items(): # TODO: dy.average(losslist) _or_ dy.esum(losslist) / len(self.models) ? # -- might not be the same if not all models return all losses model_loss.add_loss(loss_name, dy.average(losslist)) return model_loss
def calc_loss(self, src, trg, loss_calculator): event_trigger.start_sent(src) src_embeddings = self.src_embedder.embed_sent(src) src_encodings = self.src_encoder(src_embeddings) trg_embeddings = self.trg_embedder.embed_sent(trg) trg_encodings = self.trg_encoder(trg_embeddings) model_loss = losses.FactoredLossExpr() model_loss.add_loss("dist", loss_calculator(src_encodings, trg_encodings)) return model_loss
def calc_loss(self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: search_outputs = model.generate_search_output(src, self.search_strategy) sign = -1 if self.inv_eval else 1 total_loss = losses.FactoredLossExpr() for search_output in search_outputs: self.eval_score = [] for trg_i, sample_i in zip(trg, search_output.word_ids): # Removing EOS sample_i = self.remove_eos(sample_i.tolist()) ref_i = trg_i.words[:trg_i.len_unpadded()] score = self.evaluation_metric.evaluate_one_sent(ref_i, sample_i) self.eval_score.append(sign * score) self.reward = dy.inputTensor(self.eval_score, batched=True) # Composing losses loss = losses.FactoredLossExpr() if self.baseline is not None: baseline_loss = [] cur_losses = [] for state, logsoft, mask in zip(search_output.state, search_output.logsoftmaxes, search_output.mask): bs_score = self.baseline.transform(state) baseline_loss.append(dy.squared_distance(self.reward, bs_score)) loss_i = dy.cmult(logsoft, self.reward - bs_score) valid = list(np.nonzero(mask)[0]) cur_losses.append(dy.cmult(loss_i, dy.inputTensor(mask, batched=True))) loss.add_loss("reinforce", dy.sum_elems(dy.esum(cur_losses))) loss.add_loss("reinf_baseline", dy.sum_elems(dy.esum(baseline_loss))) else: loss.add_loss("reinforce", dy.sum_elems(dy.cmult(self.true_score, dy.esum(logsofts)))) total_loss.add_factored_loss_expr(loss) return loss
def _perform_calc_loss( self, model: 'model_base.ConditionedModel', src: Union[sent.Sentence, 'batchers.Batch'], trg: Union[sent.Sentence, 'batchers.Batch']) -> losses.FactoredLossExpr: assert hasattr(model, "attender") and hasattr(model.attender, "attention_vecs"), \ "Must be called after MLELoss with models that have attender." masked_attn = model.attender.attention_vecs if trg.mask is not None: trg_mask = 1 - (trg.mask.np_arr.transpose()) masked_attn = [ dy.cmult(attn, dy.inputTensor(mask, batched=True)) for attn, mask in zip(masked_attn, trg_mask) ] loss = dy.sum_elems(dy.square(1 - dy.esum(masked_attn))) units = [t.len_unpadded() for t in trg] return losses.FactoredLossExpr( {"global_fertility": losses.LossExpr(loss, units)})
def on_calc_additional_loss(self, *args, **kwargs): seq_len = len(self.last_output) loss_expr = 0 for pos_i in range(seq_len): input_i = self.last_output[pos_i] affine = self.linear_layer(input_i) softmax_out = dy.softmax(affine) if self.mode == "entropy": loss_expr = loss_expr - dy.sum_dim( dy.cmult(dy.log(softmax_out), softmax_out), d=[0]) elif self.mode == "max": loss_expr = loss_expr - dy.log(dy.max_dim(softmax_out)) else: raise ValueError(f"unknown mode {self.mode}") # loss_expr = loss_expr * (self.scale / seq_len) loss_expr = loss_expr * self.scale return losses.FactoredLossExpr({"enc_entropy": loss_expr})
def calc_loss(self, policy_reward, results={}): """ Calc policy networks loss. """ assert len(policy_reward) == len(self.states), "There should be a reward for every action taken" batch_size = self.states[0].dim()[1] loss = {} # Calculate the baseline loss of the reinforce loss for each timestep: # b = W_b * s + b_b # R = r - b # Also calculate the baseline loss # b = r_p (predicted) # loss_b = squared_distance(r_p - r_r) rewards = [] baseline_loss = [] units = np.zeros(batch_size) for i, state in enumerate(self.states): r_p = self.baseline.transform(dy.nobackprop(state)) rewards.append(policy_reward[i] - r_p) if self.valid_pos[i] is not None: r_p = dy.pick_batch_elems(r_p, self.valid_pos[i]) r_r = dy.pick_batch_elems(policy_reward[i], self.valid_pos[i]) units[self.valid_pos[i]] += 1 else: r_r = policy_reward[i] units += 1 baseline_loss.append(dy.sum_batches(dy.squared_distance(r_p, r_r))) loss["rl_baseline"] = losses.LossExpr(dy.esum(baseline_loss), units) # Z Normalization # R = R - mean(R) / std(R) rewards = dy.concatenate(rewards, d=0) r_dim = rewards.dim() if self.z_normalization: rewards_shape = dy.reshape(rewards, (r_dim[0][0], r_dim[1])) rewards_mean = dy.mean_elems(rewards_shape) rewards_std = dy.std_elems(rewards_shape) + 1e-20 rewards = (rewards - rewards_mean.value()) / rewards_std.value() rewards = dy.nobackprop(rewards) # Calculate Confidence Penalty if self.confidence_penalty: loss["rl_confpen"] = self.confidence_penalty.calc_loss(self.policy_lls) # Calculate Reinforce Loss # L = - sum([R-b] * pi_ll) reinf_loss = [] units = np.zeros(batch_size) for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)): reward = dy.pick(rewards, i) ll = dy.pick_batch(policy, action) if self.valid_pos[i] is not None: ll = dy.pick_batch_elems(ll, self.valid_pos[i]) reward = dy.pick_batch_elems(reward, self.valid_pos[i]) units[self.valid_pos[i]] += 1 else: units += 1 reinf_loss.append(dy.sum_batches(dy.cmult(ll, reward))) loss["rl_reinf"] = losses.LossExpr(-dy.esum(reinf_loss), units) # Pack up + return return losses.FactoredLossExpr(loss)