def add_args(parser): """Add criterion-specific arguments to the parser.""" LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument('--source-dependency-lambda', default=0.5, type=float, metavar='D', help='weight for the source side dependency loss') parser.add_argument('--target-dependency-lambda', default=0.5, type=float, metavar='D', help='weight for the target side dependency loss')
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" LabelSmoothedCrossEntropyCriterion.reduce_metrics(logging_outputs) mask_loss_sum = sum(log.get('mask_loss', 0) for log in logging_outputs) # mask_loss_final_sum = sum(log.get('mask_loss_final', 0) for log in logging_outputs) p_sum = sum(log.get('p2', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) mask_sum = sum(log.get('mask_ave', 0) for log in logging_outputs) metrics.log_scalar('mask_loss', mask_loss_sum / sample_size / math.log(2), sample_size, round=6) # metrics.log_scalar('mask_loss_final', mask_loss_final_sum / sample_size / math.log(2), sample_size, round=3) metrics.log_scalar('p_2', p_sum / sample_size, sample_size, round=5) metrics.log_scalar('mask_ave', mask_sum / sample_size, sample_size, round=3) metrics.log_scalar('new_weight', logging_outputs[0].get("new_weight", 0) / 4, len(logging_outputs), round=3)
def add_args(parser): """Add criterion-specific arguments to the parser.""" LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument('--sync-lambda', default=1.0, type=float, metavar='D', help='weight for the synchronization loss')
def add_args(parser): """Add criterion-specific arguments to the parser.""" LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument( "--alignment-lambda", default=0.05, type=float, metavar="D", help="weight for the alignment loss", )
def add_args(parser): LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument("--contrastive-lambda", type=float, default=0.0, help="The contrastive loss weight") parser.add_argument( "--temperature", type=float, default=1.0, )
def reduce_metrics(logging_outputs) -> None: """Aggregate logging outputs from data parallel training.""" LabelSmoothedCrossEntropyCriterion.reduce_metrics(logging_outputs) word_error = sum(log.get('word_error', 0) for log in logging_outputs) word_count = sum(log.get('word_count', 0) for log in logging_outputs) char_error = sum(log.get('char_error', 0) for log in logging_outputs) char_count = sum(log.get('char_count', 0) for log in logging_outputs) if word_count > 0: # model.training == False metrics.log_scalar('wer', float(word_error) / word_count * 100, word_count, round=4) if char_count > 0: # model.training == False metrics.log_scalar('cer', float(char_error) / char_count * 100, char_count, round=4)
def add_args(parser): """Add criterion-specific arguments to the parser.""" # fmt: off LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument("--print-training-sample-interval", type=int, metavar="N", dest="print_interval", default=500, help="print a training sample (reference + " "prediction) every this number of updates") parser.add_argument("--smoothing-type", type=str, default="uniform", choices=["uniform", "unigram", "temporal"], help="label smoothing type. Default: uniform") parser.add_argument("--unigram-pseudo-count", type=float, default=1.0, metavar="C", help="pseudo count for unigram label " "smoothing. Only relevant if --smoothing-type=unigram")
def add_args(parser): """Add criterion-specific arguments to the parser.""" # fmt: off LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument('--print-training-sample-interval', type=int, metavar='N', dest='print_interval', default=500, help='print a training sample (reference + ' 'prediction) every this number of updates') parser.add_argument('--smoothing-type', type=str, default='uniform', choices=['uniform', 'unigram', 'temporal'], help='label smoothing type. Default: uniform') parser.add_argument('--unigram-pseudo-count', type=float, default=1.0, metavar='C', help='pseudo count for unigram label ' 'smoothing. Only relevant if --smoothing-type=unigram')
def test_reduction(self): self.args.label_smoothing = 0.1 crit = LabelSmoothedCrossEntropyCriterion.build_criterion( self.args, self.task) loss, _, logging_output = crit(self.model, self.sample, reduce=True) unreduced_loss, _, _ = crit(self.model, self.sample, reduce=False) self.assertAlmostEqual(loss, unreduced_loss.sum())
def test_zero_eps(self): self.args.label_smoothing = 0.0 nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task) smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task) nll_loss, nll_sample_size, nll_logging_output = nll_crit(self.model, self.sample) smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(self.model, self.sample) self.assertAlmostEqual(nll_loss, smooth_loss)
def add_args(parser): """Add criterion-specific arguments to the parser.""" # fmt: off LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument('--edit-samples-path', type=str, metavar='D', help='path to training edits tsv') parser.add_argument('--stability-coeff', default=1e2, type=float, metavar='D', help='Stability loss multiplier') parser.add_argument('--editability-coeff', default=1e2, type=float, metavar='D', help='Failed edit penalty multiplier') parser.add_argument('--edit-max-steps', default=10, type=int, metavar='D', help='Max steps to perform during an editing') parser.add_argument('--edit-learning-rate', default=1e-3, type=float, metavar='D', help='Learning rate for RMSPror editor') parser.add_argument('--almost-last', default=0, type=int, metavar='D', help='if 0 use the last decoder layer to perform an edit else use penultimate')
def test_nll_loss(self): self.args.label_smoothing = 0.1 nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task) smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task) nll_loss, nll_sample_size, nll_logging_output = nll_crit(self.model, self.sample) smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(self.model, self.sample) self.assertLess(abs(nll_loss - nll_logging_output['loss']), 1e-6) self.assertLess(abs(nll_loss - smooth_logging_output['nll_loss']), 1e-6)
def aggregate_logging_outputs(logging_outputs): """Aggregate logging outputs from data parallel training.""" agg_output = LabelSmoothedCrossEntropyCriterion.aggregate_logging_outputs(logging_outputs) word_error = sum(log.get('word_error', 0) for log in logging_outputs) word_count = sum(log.get('word_count', 0) for log in logging_outputs) char_error = sum(log.get('char_error', 0) for log in logging_outputs) char_count = sum(log.get('char_count', 0) for log in logging_outputs) if word_count > 0: # model.training == False agg_output['word_error'] = word_error agg_output['word_count'] = word_count if char_count > 0: # model.training == False agg_output['char_error'] = char_error agg_output['char_count'] = char_count return agg_output
def add_args(parser): """Add criterion-specific arguments to the parser.""" # fmt: off LabelSmoothedCrossEntropyCriterion.add_args(parser) parser.add_argument('--print-training-sample-interval', type=int, metavar='N', dest='print_interval', default=500, help='print a training sample (reference + ' 'prediction) every this number of updates') parser.add_argument('--smoothing-type', type=str, default='uniform', choices=['uniform', 'unigram', 'temporal'], help='label smoothing type. Default: uniform') parser.add_argument( '--unigram-pseudo-count', type=float, default=1.0, metavar='C', help='pseudo count for unigram label ' 'smoothing. Only relevant if --smoothing-type=unigram') parser.add_argument( '--scheduled-sampling-probs', type=lambda p: eval_str_list(p), metavar='P_1,P_2,...,P_N', default=1.0, help='scheduled sampling probabilities of sampling the truth ' 'labels for N epochs starting from --start-schedule-sampling-epoch; ' 'all later epochs using P_N') parser.add_argument( '--start-scheduled-sampling-epoch', type=int, metavar='N', default=1, help='start scheduled sampling from the specified epoch')
def __init__(self, multitask_tasks): self.multitask_criterion = {} self.multitask_loss_weight = {} for task_name, task_obj in multitask_tasks.items(): if task_obj.args.decoder_type == "ctc": self.multitask_criterion[task_name] = CtcCriterion( task_obj.args.criterion_cfg, task_obj) else: self.multitask_criterion[ task_name] = LabelSmoothedCrossEntropyCriterion( task_obj, task_obj.args.criterion_cfg.sentence_avg, label_smoothing=task_obj.args.criterion_cfg. label_smoothing, )
def test_padding(self): self.args.label_smoothing = 0.1 crit = LabelSmoothedCrossEntropyCriterion(self.args, self.task) loss, _, logging_output = crit(self.model, self.sample) def get_one_no_padding(idx): # create a new sample with just a single batch item so that there's # no padding sample1 = next(test_utils.dummy_dataloader([self.data[idx]])) args1 = copy.copy(self.args) args1.probs = args1.probs[idx, :, :].unsqueeze(0) model1 = self.task.build_model(args1) loss1, _, _ = crit(model1, sample1) return loss1 loss1 = get_one_no_padding(0) loss2 = get_one_no_padding(1) self.assertAlmostEqual(loss, loss1 + loss2)
def aggregate_logging_outputs(logging_outputs): """Aggregate logging outputs from data parallel training.""" xent_outputs_dict = LabelSmoothedCrossEntropyCriterion.aggregate_logging_outputs(logging_outputs) ntokens = sum(log.get('ntokens', 0) for log in logging_outputs) nsentences = sum(log.get('nsentences', 0) for log in logging_outputs) sample_size = sum(log.get('sample_size', 0) for log in logging_outputs) if 'editability_loss' not in logging_outputs[0]: return xent_outputs_dict xent_outputs_dict['editability_loss'] = sum(log['editability_loss'] for log in logging_outputs) / len( logging_outputs) xent_outputs_dict['main_loss'] = sum( log.get('main_loss', 0) for log in logging_outputs) / sample_size / math.log(2) if sample_size > 0 else 0. xent_outputs_dict['stability_loss'] = sum(log['stability_loss'] for log in logging_outputs) / len( logging_outputs) xent_outputs_dict['edit_complexity'] = sum(log['edit_complexity'] for log in logging_outputs) / len( logging_outputs) return xent_outputs_dict
def aggregate_logging_outputs(cls, logging_outputs): """Aggregate logging outputs from data parallel training.""" return LabelSmoothedCrossEntropyCriterion.aggregate_logging_outputs( logging_outputs)