def aggregate_task_losses(hparams, problem_hparams, logits, target_modality, feature): """Multiproblem loss function.""" summaries = [] main_task_id = hparams.problem.task_list[0].task_id # Primary task loss loss_num, loss_den = target_modality.loss( logits, feature, weights_fn= lambda x: common_layers.weights_multi_problem_all(x, main_task_id)) loss_val = loss_num / tf.maximum(1.0, loss_den) summaries.append([hparams.problem.task_list[0].name+"_loss", loss_val]) for task in hparams.problem.task_list[1:]: if hasattr(task, "num_classes"): task_loss_num_seq, task_loss_den_seq = target_modality.loss( logits, feature, weights_fn= lambda x: common_layers.weights_multi_problem_input(x, task.task_id)) # pylint: disable=cell-var-from-loop task_loss_num_seq *= problem_hparams.loss_multiplier task_loss_num_label, task_loss_den_label = target_modality.loss( logits, feature, weights_fn= lambda x: common_layers.weights_multi_problem(x, task.task_id)) # pylint: disable=cell-var-from-loop task_loss_num_label *= problem_hparams.loss_multiplier if hparams.multiproblem_reweight_label_loss: task_loss_num = (1 - hparams.multiproblem_label_weight) * \ task_loss_num_seq task_loss_num += hparams.multiproblem_label_weight * task_loss_num_label elif hparams.multiproblem_class_loss_multiplier > 0: task_loss_num = task_loss_num_seq task_loss_num += hparams.multiproblem_class_loss_multiplier * \ task_loss_num_label else: task_loss_num = task_loss_num_seq + task_loss_num_label task_loss_den = task_loss_den_seq + task_loss_den_label # Log the unscaled versions of the losses to tensorboard. task_loss_val = (task_loss_num_seq + task_loss_num_label) / tf.maximum( 1.0, task_loss_den) summaries.append([task.name+"_loss", task_loss_val]) task_loss_val_label = task_loss_num_label / tf.maximum( 1.0, task_loss_den_label) summaries.append([task.name+"_only_label_loss", task_loss_val_label]) loss_num += task_loss_num loss_den += task_loss_den else: raise ValueError("Non-classification secondary tasks are not supported.") return loss_num, loss_den, summaries
def aggregate_task_lm_losses(hparams, problem_hparams, logits, feature_name, feature): """LM loss for multiproblems.""" summaries = [] vocab_size = problem_hparams.vocab_size[feature_name] if vocab_size is not None and hasattr(hparams, "vocab_divisor"): vocab_size += (-vocab_size) % hparams.vocab_divisor modality = problem_hparams.modality[feature_name] loss = hparams.loss.get(feature_name, modalities.get_loss(modality)) weights_fn = hparams.weights_fn.get(feature_name, modalities.get_weights_fn(modality)) loss_num = 0. loss_den = 0. for task in hparams.problem.task_list: loss_num_, loss_den_ = loss( logits, feature, lambda x: common_layers.weights_multi_problem_all(x, task.task_id), # pylint: disable=cell-var-from-loop hparams, vocab_size, weights_fn) loss_num += loss_num_ loss_den += loss_den_ loss_val = loss_num_ / tf.maximum(1.0, loss_den_) summaries.append([task.name + "_loss", loss_val]) return loss_num, loss_den, summaries
def testWeightsMultiProblemAll(self): labels = tf.constant(np.array([[12, 15, 1, 20, 100], [67, 1, 34, 45, 124], [78, 2, 34, 18, 29], [78, 123, 55, 1, 33], [1, 18, 22, 36, 59]]), dtype=tf.int32) taskid = 1 expected_mask = np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]) actual_mask = common_layers.weights_multi_problem_all(labels, taskid) actual_mask_eval = self.evaluate(actual_mask) self.assertAllClose(expected_mask, actual_mask_eval)
def aggregate_task_lm_losses(hparams, logits, target_modality, feature): """LM loss for multiproblems.""" summaries = [] loss_num = 0. loss_den = 0. for task in hparams.problem.task_list: loss_num_, loss_den_ = target_modality.loss( logits, feature, weights_fn=lambda x: common_layers.weights_multi_problem_all( x, task.task_id)) # pylint: disable=cell-var-from-loop loss_num += loss_num_ loss_den += loss_den_ loss_val = loss_num_ / tf.maximum(1.0, loss_den_) summaries.append([task.name + "_loss", loss_val]) return loss_num, loss_den, summaries
def aggregate_task_losses(hparams, problem_hparams, logits, target_modality, feature): """Multiproblem loss function.""" summaries = [] main_task_id = hparams.problem.task_list[0].task_id # Primary task loss loss_num, loss_den = target_modality.loss( logits, feature, weights_fn=lambda x: common_layers.weights_multi_problem_all( x, main_task_id)) loss_val = loss_num / tf.maximum(1.0, loss_den) summaries.append([hparams.problem.task_list[0].name + "_loss", loss_val]) # Since the losses may undergo rescaling, they cannot exist as separate # numerators and denominators. Set the denominators to 1 in order to faciliate # loss averaging. loss_num = loss_val loss_den = tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32), loss_den) for task in hparams.problem.task_list[1:]: # Loss only from the input sequence -- the auxiliary LM loss. seq_loss_num, seq_loss_den = target_modality.loss( logits, feature, weights_fn=lambda x: common_layers.weights_multi_problem_input( x, task.task_id)) # pylint: disable=cell-var-from-loop seq_loss_num *= problem_hparams.loss_multiplier # Unscaled sequence loss. seq_loss = seq_loss_num / tf.maximum(1.0, seq_loss_den) summaries.append([task.name + "_seq_loss", seq_loss]) if hasattr(task, "num_classes"): # Loss only from the classification label. label_loss_num, label_loss_den = target_modality.loss( logits, feature, weights_fn=lambda x: common_layers.weights_multi_problem( x, task.task_id)) # pylint: disable=cell-var-from-loop label_loss_num *= problem_hparams.loss_multiplier # Unscaled classification label loss. label_loss = label_loss_num / tf.maximum(1.0, label_loss_den) summaries.append([task.name + "_label_loss", label_loss]) # Scaling. if hparams.multiproblem_reweight_label_loss: label_loss *= hparams.multiproblem_label_weight seq_loss *= (1 - hparams.multiproblem_label_weight) if hparams.multiproblem_class_loss_multiplier: label_loss *= hparams.multiproblem_class_loss_multiplier summaries.append( [task.name + "_scaled_label_loss", label_loss]) # This is the training loss for the optimizer after all the scaling. task_loss_val = seq_loss + label_loss loss_den_ = label_loss_den else: # Loss only from the target sequence. target_loss_num, target_loss_den = target_modality.loss( logits, feature, weights_fn=lambda x: common_layers.weights_multi_problem( x, task.task_id)) # pylint: disable=cell-var-from-loop target_loss_num *= problem_hparams.loss_multiplier # Unscaled target sequence loss. target_loss = target_loss_num / tf.maximum(1.0, target_loss_den) summaries.append([task.name + "_target_loss", target_loss]) # Scaling. if hparams.multiproblem_reweight_label_loss: target_loss *= hparams.multiproblem_label_weight seq_loss *= (1 - hparams.multiproblem_label_weight) # This is the training loss for the optimizer after all the scaling. task_loss_val = seq_loss + target_loss loss_den_ = target_loss_den summaries.append([task.name + "_loss", task_loss_val]) # Adding 1 to the loss den for each task leads to averaging task losses. # TODO(urvashik): Fix combination with other task losses - weighted # average based on the number of examples from that task. loss_num += task_loss_val loss_den += tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32), loss_den_) return loss_num, loss_den, summaries
def aggregate_task_losses(hparams, problem_hparams, logits, target_modality, feature): """Multiproblem loss function.""" summaries = [] main_task_id = hparams.problem.task_list[0].task_id # Primary task loss loss_num, loss_den = target_modality.loss( logits, feature, weights_fn= lambda x: common_layers.weights_multi_problem_all(x, main_task_id)) loss_val = loss_num / tf.maximum(1.0, loss_den) summaries.append([hparams.problem.task_list[0].name+"_loss", loss_val]) # Since the losses may undergo rescaling, they cannot exist as separate # numerators and denominators. Set the denominators to 1 in order to faciliate # loss averaging. loss_num = loss_val loss_den = tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32), loss_den) for task in hparams.problem.task_list[1:]: # Loss only from the input sequence -- the auxiliary LM loss. seq_loss_num, seq_loss_den = target_modality.loss( logits, feature, weights_fn= lambda x: common_layers.weights_multi_problem_input(x, task.task_id)) # pylint: disable=cell-var-from-loop seq_loss_num *= problem_hparams.loss_multiplier # Unscaled sequence loss. seq_loss = seq_loss_num / tf.maximum(1.0, seq_loss_den) summaries.append([task.name+"_seq_loss", seq_loss]) if hasattr(task, "num_classes"): # Loss only from the classification label. label_loss_num, label_loss_den = target_modality.loss( logits, feature, weights_fn= lambda x: common_layers.weights_multi_problem(x, task.task_id)) # pylint: disable=cell-var-from-loop label_loss_num *= problem_hparams.loss_multiplier # Unscaled classification label loss. label_loss = label_loss_num / tf.maximum(1.0, label_loss_den) summaries.append([task.name+"_label_loss", label_loss]) # Scaling. if hparams.multiproblem_reweight_label_loss: label_loss *= hparams.multiproblem_label_weight seq_loss *= (1 - hparams.multiproblem_label_weight) if hparams.multiproblem_class_loss_multiplier: label_loss *= hparams.multiproblem_class_loss_multiplier summaries.append([task.name+"_scaled_label_loss", label_loss]) # This is the training loss for the optimizer after all the scaling. task_loss_val = seq_loss + label_loss loss_den_ = label_loss_den else: # Loss only from the target sequence. target_loss_num, target_loss_den = target_modality.loss( logits, feature, weights_fn= lambda x: common_layers.weights_multi_problem(x, task.task_id)) # pylint: disable=cell-var-from-loop target_loss_num *= problem_hparams.loss_multiplier # Unscaled target sequence loss. target_loss = target_loss_num / tf.maximum(1.0, target_loss_den) summaries.append([task.name+"_target_loss", target_loss]) # Scaling. if hparams.multiproblem_reweight_label_loss: target_loss *= hparams.multiproblem_label_weight seq_loss *= (1 - hparams.multiproblem_label_weight) # This is the training loss for the optimizer after all the scaling. task_loss_val = seq_loss + target_loss loss_den_ = target_loss_den summaries.append([task.name+"_loss", task_loss_val]) # Adding 1 to the loss den for each task leads to averaging task losses. # TODO(urvashik): Fix combination with other task losses - weighted # average based on the number of examples from that task. loss_num += task_loss_val loss_den += tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32), loss_den_) return loss_num, loss_den, summaries