def test_new_root(self):
        with metrics.aggregate() as a:
            metrics.log_scalar('loss', 1)
            with metrics.aggregate(new_root=True) as b:
                metrics.log_scalar('loss', 2)

        self.assertEqual(a.get_smoothed_values()['loss'], 1)
        self.assertEqual(b.get_smoothed_values()['loss'], 2)
Ejemplo n.º 2
0
    def test_nesting(self):
        with metrics.aggregate() as a:
            metrics.log_scalar("loss", 1)
            with metrics.aggregate() as b:
                metrics.log_scalar("loss", 2)

        self.assertEqual(a.get_smoothed_values()["loss"], 1.5)
        self.assertEqual(b.get_smoothed_values()["loss"], 2)
    def test_nested_duplicate_names(self):
        name = str(uuid.uuid4())
        metrics.reset_meters(name)

        with metrics.aggregate(name):
            metrics.log_scalar('loss', 1)
            with metrics.aggregate() as other:
                with metrics.aggregate(name):
                    metrics.log_scalar('loss', 2)
            metrics.log_scalar('loss', 6)

        self.assertEqual(metrics.get_smoothed_values(name)['loss'], 3)
        self.assertEqual(other.get_smoothed_values()['loss'], 2)
    def test_named(self):
        name = str(uuid.uuid4())
        metrics.reset_meters(name)

        with metrics.aggregate(name):
            metrics.log_scalar('loss', 1)

        metrics.log_scalar('loss', 3)

        with metrics.aggregate(name):
            metrics.log_scalar('loss', 2)

        self.assertEqual(metrics.get_smoothed_values(name)['loss'], 1.5)
    def test_nested_new_root(self):
        with metrics.aggregate() as layer1:
            metrics.log_scalar('loss', 1)
            with metrics.aggregate(new_root=True) as layer2:
                metrics.log_scalar('loss', 2)
                with metrics.aggregate() as layer3:
                    metrics.log_scalar('loss', 3)
                    with metrics.aggregate(new_root=True) as layer4:
                        metrics.log_scalar('loss', 4)
            metrics.log_scalar('loss', 1.5)

        self.assertEqual(layer4.get_smoothed_values()['loss'], 4)
        self.assertEqual(layer3.get_smoothed_values()['loss'], 3)
        self.assertEqual(layer2.get_smoothed_values()['loss'], 2.5)
        self.assertEqual(layer1.get_smoothed_values()['loss'], 1.25)
Ejemplo n.º 6
0
    def reduce_metrics(self, logging_outputs, criterion, logging_output_keys=None):
        logging_output_keys = logging_output_keys or self.eval_lang_pairs

        # aggregate logging outputs for each language pair
        agg_logging_outputs = {}
        for key in logging_output_keys:
            with metrics.aggregate() as agg:
                logging_outputs_key = [
                    logging_output.get(key, {}) for logging_output in logging_outputs
                ]
                for k in ['sample_size', 'nsentences', 'ntokens']:
                    metrics.log_scalar(k, sum(l[k] for l in logging_outputs_key))
                super().reduce_metrics(logging_outputs_key, criterion)
                agg_logging_outputs[key] = agg.get_smoothed_values()

        def sum_over_languages(key):
            return sum(logging_output[key] for logging_output in agg_logging_outputs.values())

        # flatten logging outputs
        flat_logging_output = {
            '{}:{}'.format(lang_pair, k): v
            for lang_pair, agg_logging_output in agg_logging_outputs.items()
            for k, v in agg_logging_output.items()
        }
        flat_logging_output['loss'] = sum_over_languages('loss')
        if any('nll_loss' in logging_output for logging_output in agg_logging_outputs.values()):
            flat_logging_output['nll_loss'] = sum_over_languages('nll_loss')
        flat_logging_output['sample_size'] = sum_over_languages('sample_size')
        flat_logging_output['nsentences'] = sum_over_languages('nsentences')
        flat_logging_output['ntokens'] = sum_over_languages('ntokens')
        return flat_logging_output
Ejemplo n.º 7
0
 def aggregate_logging_outputs(self, logging_outputs, criterion):
     """[deprecated] Aggregate logging outputs from data parallel training."""
     utils.deprecation_warning(
         'The aggregate_logging_outputs API is deprecated. '
         'Please use the reduce_metrics API instead.')
     with metrics.aggregate() as agg:
         self.reduce_metrics(logging_outputs, criterion)
         return agg.get_smoothed_values()
Ejemplo n.º 8
0
def validate(args, trainer, task, epoch_itr, subsets):
    """Evaluate the model on the validation set(s) and return the losses."""

    if args.fixed_validation_seed is not None:
        # set fixed seed for every validation
        utils.set_torch_seed(args.fixed_validation_seed)

    hypothes = []
    references = []

    for subset in subsets:
        # Initialize data iterator
        itr = task.get_batch_iterator(
            dataset=task.dataset(subset),
            max_tokens=args.max_tokens_valid,
            max_sentences=args.max_sentences_valid,
            max_positions=utils.resolve_max_positions(
                task.max_positions(),
                trainer.get_model().max_positions(),
            ),
            ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
            required_batch_size_multiple=args.required_batch_size_multiple,
            seed=args.seed,
            num_shards=args.distributed_world_size,
            shard_id=args.distributed_rank,
            num_workers=args.num_workers,
        ).next_epoch_itr(shuffle=False)
        progress = progress_bar.build_progress_bar(
            args,
            itr,
            epoch_itr.epoch,
            prefix='valid on \'{}\' subset'.format(subset),
            no_progress_bar='simple')

        # create a new root metrics aggregator so validation metrics
        # don't pollute other aggregators (e.g., train meters)
        with metrics.aggregate(new_root=True) as agg:
            hyps, refs = [], []
            for sample in progress:
                logging_output, h, r = trainer.valid_step(sample,
                                                          generate=True)

                hyps.extend(h)
                refs.extend(r)

        # log validation stats
        stats = get_valid_stats(args, trainer, agg.get_smoothed_values())
        progress.print(stats, tag=subset, step=trainer.get_num_updates())

        hypothes.append(hyps)
        references.append(refs)

    return hypothes, references
Ejemplo n.º 9
0
def train(args, trainer, task, epoch_itr):
    """Train the model for one epoch."""
    # Initialize data iterator
    itr = epoch_itr.next_epoch_itr(
        fix_batches_to_gpus=args.fix_batches_to_gpus,
        shuffle=(epoch_itr.epoch >= args.curriculum),
    )
    update_freq = (args.update_freq[epoch_itr.epoch - 1]
                   if epoch_itr.epoch <= len(args.update_freq) else
                   args.update_freq[-1])
    itr = iterators.GroupedIterator(itr, update_freq)
    progress = progress_bar.build_progress_bar(
        args,
        itr,
        epoch_itr.epoch,
        no_progress_bar='simple',
    )

    valid_subsets = args.valid_subset.split(',')
    max_update = args.max_update or math.inf
    if hasattr(trainer.criterion, 'set_epoch'):
        trainer.criterion.set_epoch(epoch_itr.epoch)
    for samples in progress:
        if hasattr(trainer.criterion, 'set_num_updates'):
            trainer.criterion.set_num_updates(trainer.get_num_updates())

        with metrics.aggregate('train_inner'):
            log_output = trainer.train_step(samples)
            num_updates = trainer.get_num_updates()
            if log_output is None:
                continue

            # log mid-epoch stats
            stats = get_training_stats('train_inner')
            progress.log(stats, tag='train', step=num_updates)

        if (not args.disable_validation and args.save_interval_updates > 0
                and num_updates % args.save_interval_updates == 0
                and num_updates > 0):
            valid_losses = validate(args, trainer, task, epoch_itr,
                                    valid_subsets)
            checkpoint_utils.save_checkpoint(args, trainer, epoch_itr,
                                             valid_losses[0])

        if num_updates >= max_update:
            break

    # log end-of-epoch stats
    stats = get_training_stats('train')
    progress.print(stats, tag='train', step=num_updates)

    # reset epoch-level meters
    metrics.reset_meters('train')
Ejemplo n.º 10
0
def downstream_train_pytorch(args, trainer, task, epoch_itr, train_prefix):
    """Fine-tune PyTorch classifier on downstream training set for one epoch"""
    task.split = 'train'
    num_updates = trainer.get_num_updates()

    # Initialize data iterator
    itr = epoch_itr.next_epoch_itr(
        fix_batches_to_gpus=args.fix_batches_to_gpus,
        shuffle=(epoch_itr.next_epoch_idx > args.curriculum),
    )
    update_freq = (args.update_freq[epoch_itr.epoch - 1]
                   if epoch_itr.epoch <= len(args.update_freq) else
                   args.update_freq[-1])
    itr = iterators.GroupedIterator(itr, update_freq)
    progress = progress_bar.build_progress_bar(
        args,
        itr,
        epoch_itr.epoch,
        no_progress_bar='simple',
    )

    progress = maybe_wrap_neptune_logging(progress, args)

    # Task specific setup per epoch
    task.begin_epoch(epoch_itr.epoch, trainer.get_model())

    max_update = args.max_update or math.inf
    with metrics.aggregate() as agg:
        for samples in progress:

            # Train for one step
            log_output = trainer.train_step(samples)
            num_updates = trainer.get_num_updates()
            if log_output is None:
                continue

            # log mid-epoch stats
            stats = get_ft_train_stats(agg.get_smoothed_values())
            progress.log(stats, tag=train_prefix, step=num_updates)

            if num_updates >= max_update:
                break

    # log end-of-epoch stats
    stats = get_ft_train_stats(agg.get_smoothed_values())
    try:
        progress.print(stats, tag=train_prefix, step=num_updates, log=False)
    except:
        progress.print(stats, tag=train_prefix, step=num_updates)

    # Reset epoch-level meters
    metrics.reset_meters(train_prefix)
Ejemplo n.º 11
0
def validate(args, trainer, task, epoch_for_logging, valid_name, ckpt_idx):
    """Evaluate the model on the validation set(s) and return the losses."""
    task.split = 'valid'

    if args.fixed_validation_seed is not None:
        # Set fixed seed for every validation
        utils.set_torch_seed(args.fixed_validation_seed)

    # Initialize data iterator
    itr = task.get_batch_iterator(
        dataset=task.dataset('valid'),
        max_tokens=args.max_tokens_valid,
        max_sentences=args.max_sentences_valid,
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            trainer.get_model().max_positions(),
        ),
        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
        required_batch_size_multiple=args.required_batch_size_multiple,
        seed=args.seed,
        num_shards=args.distributed_world_size,
        shard_id=args.distributed_rank,
        num_workers=args.num_workers,
        epoch=epoch_for_logging,
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.build_progress_bar(
        args,
        itr,
        epoch_for_logging,
        prefix='valid on \'{}\' subset'.format(valid_name),
        no_progress_bar='simple')
    progress = maybe_wrap_neptune_logging(progress, args)

    # Reset validation meters
    metrics.reset_meters(valid_name)

    with metrics.aggregate(valid_name) as agg:
        for sample in progress:
            trainer.valid_step(sample)

    # Log validation stats
    stats = get_valid_stats(args, trainer, agg.get_smoothed_values())
    if args.log_valid_progress:
        valid_progress_prefix = '{}_ckpt{}'.format(valid_name, ckpt_idx)
        progress.print({args.eval_metric: stats[args.eval_metric]},
                       tag=valid_progress_prefix,
                       step=epoch_for_logging)

    # Return validations score
    return stats[args.best_checkpoint_metric], stats[
        args.eval_metric], progress
Ejemplo n.º 12
0
def train(args, trainer, task, epoch_itr):
    """Train the model for one epoch."""
    task.split = 'train'

    # Initialize data iterator
    itr = epoch_itr.next_epoch_itr(
        fix_batches_to_gpus=args.fix_batches_to_gpus,
        shuffle=(epoch_itr.next_epoch_idx > args.curriculum),
    )
    update_freq = (args.update_freq[epoch_itr.epoch - 1]
                   if epoch_itr.epoch <= len(args.update_freq) else
                   args.update_freq[-1])
    itr = iterators.GroupedIterator(itr, update_freq)
    progress = maybe_wrap_neptune_logging(
        progress_bar.build_progress_bar(
            args,
            itr,
            epoch_itr.epoch,
            no_progress_bar='simple',
        ),
        args=args,
    )

    # task specific setup per epoch
    task.begin_epoch(epoch_itr.epoch, trainer.get_model())

    valid_subsets = args.valid_subset.split(',')
    max_update = args.max_update or math.inf
    with metrics.aggregate() as agg:
        for samples in progress:
            log_output = trainer.train_step(samples)
            num_updates = trainer.get_num_updates()
            if log_output is None:
                continue

            # log mid-epoch stats
            stats = get_training_stats(agg.get_smoothed_values())
            progress.log(stats, tag='train', step=num_updates)

            if num_updates >= max_update:
                break

    # log end-of-epoch stats
    stats = get_training_stats(agg.get_smoothed_values())
    try:
        progress.print(stats, tag='train', step=num_updates, log=False)
    except:
        progress.print(stats, tag='train', step=num_updates)

    # reset epoch-level meters
    metrics.reset_meters('train')
Ejemplo n.º 13
0
    def _reduce_and_log_stats(self, logging_outputs, sample_size):
        with metrics.aggregate() as agg:
            # convert logging_outputs to CPU to avoid unnecessary
            # device-to-host transfers in reduce_metrics
            logging_outputs = utils.apply_to_sample(
                lambda t: t.to(device='cpu', non_blocking=True),
                logging_outputs)

            self.task.reduce_metrics(logging_outputs, self.get_criterion())

            # support legacy interface
            logging_output = agg.get_smoothed_values()
            logging_output["sample_size"] = sample_size
            for key_to_delete in ["ppl", "wps", "wpb", "bsz"]:
                if key_to_delete in logging_output:
                    del logging_output[key_to_delete]
            return logging_output
Ejemplo n.º 14
0
def validate(args, trainer, task, epoch_itr, subsets):
    """Evaluate the model on the validation set(s) and return the losses."""

    if args.fixed_validation_seed is not None:
        # set fixed seed for every validation
        utils.set_torch_seed(args.fixed_validation_seed)

    valid_losses = []
    for subset in subsets:
        # Initialize data iterator
        itr = task.get_batch_iterator(
            dataset=task.dataset(subset),
            max_tokens=args.max_tokens_valid,
            max_sentences=args.max_sentences_valid,
            max_positions=utils.resolve_max_positions(
                task.max_positions(),
                trainer.get_model().max_positions(),
            ),
            ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
            required_batch_size_multiple=args.required_batch_size_multiple,
            seed=args.seed,
            num_shards=args.distributed_world_size,
            shard_id=args.distributed_rank,
            num_workers=args.num_workers,
        ).next_epoch_itr(shuffle=False)
        progress = progress_bar.build_progress_bar(
            args,
            itr,
            epoch_itr.epoch,
            prefix='valid on \'{}\' subset'.format(subset),
            no_progress_bar='simple')

        # reset validation meters
        metrics.reset_meters('valid')

        with metrics.aggregate() as agg:
            for sample in progress:
                trainer.valid_step(sample)

        # log validation stats
        stats = get_valid_stats(args, trainer, agg.get_smoothed_values())
        progress.print(stats, tag=subset, step=trainer.get_num_updates())

        valid_losses.append(stats[args.best_checkpoint_metric])
    return valid_losses
Ejemplo n.º 15
0
 def reduce_metrics(self, logging_outputs, criterion):
     with metrics.aggregate():
         # pass 'sample_size', 'nsentences', 'ntokens' stats to fairseq_task
         super().reduce_metrics(logging_outputs, criterion)
         for k in ['sample_size', 'nsentences', 'ntokens']:
             metrics.log_scalar(k, sum(l[k] for l in logging_outputs))
Ejemplo n.º 16
0
def downstream_validate_pytorch(args,
                                task,
                                model,
                                criterion,
                                epoch_for_logging,
                                subsets,
                                valid_name,
                                num_updates,
                                global_epoch=None):
    """Evaluate the model on the validation set(s) and return the losses."""
    task.split = 'valid'
    valid_name_ = valid_name if valid_name is not None else 'valid'

    if args.fixed_validation_seed is not None:
        # Set fixed seed for every validation
        utils.set_torch_seed(args.fixed_validation_seed)

    valid_losses = []
    for subset in subsets:
        # Initialize data iterator
        itr = task.get_batch_iterator(
            dataset=task.dataset(subset),
            max_tokens=args.max_tokens_valid,
            max_sentences=args.max_sentences_valid,
            max_positions=utils.resolve_max_positions(
                task.max_positions(),
                model.max_positions(),
            ),
            ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
            required_batch_size_multiple=args.required_batch_size_multiple,
            seed=args.seed,
            num_shards=args.distributed_world_size,
            shard_id=args.distributed_rank,
            num_workers=args.num_workers,
            epoch=1,
        ).next_epoch_itr(shuffle=False)
        progress = progress_bar.build_progress_bar(
            args,
            itr,
            epoch_for_logging,
            prefix='valid on \'{}\' subset'.format(valid_name_),
            no_progress_bar='simple')

        # Add global epoch to beginning of progress bar description
        if global_epoch is not None:
            try:
                progress.wrapped_bar.tqdm.set_description(
                    desc='epoch {:03d} | \'{}\' {}'.format(
                        global_epoch, valid_name_,
                        progress.wrapped_bar.prefix),
                    refresh=True)
            except:
                progress.tqdm.set_description(
                    desc='epoch {:03d} | \'{}\' {}'.format(
                        global_epoch, valid_name_, progress.tqdm.desc),
                    refresh=True)

        progress = maybe_wrap_neptune_logging(progress, args)

        # Reset validation meters
        metrics.reset_meters(valid_name_)

        with metrics.aggregate(valid_name) as agg:
            dummy_batch = "DUMMY"
            for sample in progress:
                dummy_batch = sample if dummy_batch == "DUMMY" else dummy_batch
                valid_step(args, sample, task, model, criterion, dummy_batch,
                           logger)

        # Log validation stats
        stats = get_ft_valid_stats(args, agg.get_smoothed_values(),
                                   num_updates)
        progress.print(stats, tag=valid_name_, step=num_updates)

        valid_losses.append(stats[args.best_checkpoint_metric])
    return valid_losses
Ejemplo n.º 17
0
def main(args):
    utils.import_user_module(args)
    utils.set_torch_seed(args.seed)

    assert args.max_tokens is not None or args.max_sentences is not None, \
        'Must specify batch size either with --max-tokens or --max-sentences'

    use_fp16 = args.fp16
    use_cuda = torch.cuda.is_available() and not args.cpu

    # Setup task, e.g., translation, language modeling, etc.
    task = tasks.setup_task(args)
    task.split = 'valid'

    # Load model
    load_checkpoint = getattr(args, 'load_checkpoint')

    if load_checkpoint:
        logger.info('loading model(s) from {}'.format(load_checkpoint))
        if not os.path.exists(load_checkpoint):
            raise IOError("Model file not found: {}".format(load_checkpoint))
        state = checkpoint_utils.load_checkpoint_to_cpu(load_checkpoint)

        checkpoint_args = state["args"]
        if task is None:
            task = tasks.setup_task(args)

        load_component_prefix = getattr(args, 'load_component_prefix', None)

        model_state = state["model"]
        if load_component_prefix:
            model_state = select_component_state(model_state,
                                                 load_component_prefix)

        # build model for ensemble
        model = task.build_model(args)
        missing_keys, unexpected_keys = model.load_state_dict(model_state,
                                                              strict=False,
                                                              args=args)
        handle_state_dict_keys(missing_keys, unexpected_keys)

    else:
        model = task.build_model(args)

    # Move model to GPU
    if use_fp16:
        model.half()
    if use_cuda:
        model.cuda()

    # Print args
    logger.info(args)

    # Build criterion
    criterion = task.build_criterion(args)
    criterion.eval()

    # Load valid dataset (we load training data below, based on the latest checkpoint)
    for subset in args.valid_subset.split(','):
        try:
            task.load_dataset(subset, combine=False, epoch=0)
            dataset = task.dataset(subset)
        except KeyError:
            raise Exception('Cannot find dataset: ' + subset)

        # Initialize data iterator
        itr = task.get_batch_iterator(
            dataset=dataset,
            max_tokens=args.max_tokens,
            max_sentences=args.max_sentences,
            max_positions=utils.resolve_max_positions(
                task.max_positions(),
                model.max_positions(),
            ),
            ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
            required_batch_size_multiple=args.required_batch_size_multiple,
            seed=args.seed,
            num_workers=args.num_workers,
        ).next_epoch_itr(shuffle=False)
        progress = progress_bar.build_progress_bar(
            args,
            itr,
            prefix='valid on \'{}\' subset'.format(subset),
            no_progress_bar='simple')

        log_outputs = []
        for i, sample in enumerate(progress):
            sample = utils.move_to_cuda(sample) if use_cuda else sample
            _loss, _sample_size, log_output = task.valid_step(
                sample, model, criterion)
            progress.log(log_output, step=i)
            log_outputs.append(log_output)

        with metrics.aggregate() as agg:
            task.reduce_metrics(log_outputs, criterion)
            log_output = agg.get_smoothed_values()

        progress.print(log_output, tag=subset, step=i)
Ejemplo n.º 18
0
def main(args, override_args=None):
    utils.import_user_module(args)

    assert args.max_tokens is not None or args.max_sentences is not None, \
        'Must specify batch size either with --max-tokens or --max-sentences'

    use_fp16 = args.fp16
    use_cuda = torch.cuda.is_available() and not args.cpu

    if override_args is not None:
        overrides = vars(override_args)
        overrides.update(eval(getattr(override_args, 'model_overrides', '{}')))
    else:
        overrides = None

    # Load ensemble
    logger.info('loading model(s) from {}'.format(args.path))
    models, model_args, task = checkpoint_utils.load_model_ensemble_and_task(
        [args.path],
        arg_overrides=overrides,
    )
    model = models[0]

    # Move models to GPU
    for model in models:
        if use_fp16:
            model.half()
        if use_cuda:
            model.cuda()

    # Print args
    logger.info(model_args)

    # Build criterion
    criterion = task.build_criterion(model_args)
    criterion.eval()

    # Load valid dataset (we load training data below, based on the latest checkpoint)
    for subset in args.valid_subset.split(','):
        try:
            task.load_dataset(subset, combine=False, epoch=0)
            dataset = task.dataset(subset)
        except KeyError:
            raise Exception('Cannot find dataset: ' + subset)

        # Initialize data iterator
        itr = task.get_batch_iterator(
            dataset=dataset,
            max_tokens=args.max_tokens,
            max_sentences=args.max_sentences,
            max_positions=utils.resolve_max_positions(
                task.max_positions(),
                *[m.max_positions() for m in models],
            ),
            ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
            required_batch_size_multiple=args.required_batch_size_multiple,
            seed=args.seed,
            num_workers=args.num_workers,
        ).next_epoch_itr(shuffle=False)
        progress = progress_bar.build_progress_bar(
            args,
            itr,
            prefix='valid on \'{}\' subset'.format(subset),
            no_progress_bar='simple')

        log_outputs = []
        for i, sample in enumerate(progress):
            sample = utils.move_to_cuda(sample) if use_cuda else sample
            _loss, _sample_size, log_output = task.valid_step(
                sample, model, criterion)
            progress.log(log_output, step=i)
            log_outputs.append(log_output)

        with metrics.aggregate() as agg:
            task.reduce_metrics(log_outputs, criterion)
            log_output = agg.get_smoothed_values()

        progress.print(log_output, tag=subset, step=i)