def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file
                      ]),  # remove both None and empty strings
        msglogger.logdir,
        gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        distiller.set_deterministic(
            args.seed)  # For experiment reproducability
    else:
        if args.seed is not None:
            distiller.set_seed(args.seed)
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = distiller.apputils.classification_dataset_str_from_arch(
        args.arch)
    args.num_classes = distiller.apputils.classification_num_classes(
        args.dataset)

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning(
            'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.'
        )
        if not args.reset_optimizer:
            msglogger.warning(
                'If you wish to also reset the optimizer, call with: --reset-optimizer'
            )
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )

    # Define loss function (criterion)
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        return

    if args.export_onnx is not None:
        return distiller.export_img_classifier_to_onnx(model,
                                                       os.path.join(
                                                           msglogger.logdir,
                                                           args.export_onnx),
                                                       args.dataset,
                                                       add_softmax=True,
                                                       verbose=False)

    if args.qe_calibration:
        return acts_quant_stats_collection(model, criterion, pylogger, args)

    if args.activation_histograms:
        return acts_histogram_collection(model, criterion, pylogger, args)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = load_data(args)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'
            .format(start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')
    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch,
                                               optimizer,
                                               metrics={
                                                   'min': vloss,
                                                   'max': top1
                                               })

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5,
                                       epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {
            'current_top1': top1,
            'best_top1': perf_scores_history[0].top1,
            'best_epoch': perf_scores_history[0].epoch
        }
        apputils.save_checkpoint(epoch,
                                 args.arch,
                                 model,
                                 optimizer=optimizer,
                                 scheduler=compression_scheduler,
                                 extras=checkpoint_extras,
                                 is_best=is_best,
                                 name=args.name,
                                 dir=msglogger.logdir)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
Ejemplo n.º 2
0
def main():
    global msglogger
    check_pytorch_version()
    args = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(sys.argv, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_top1 = 0
    best_epoch = 0

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        torch.manual_seed(0)
        random.seed(0)
        np.random.seed(0)
        cudnn.deterministic = True
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.ADC:
        return automated_deep_compression(model, criterion, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_size, args.deterministic)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    activations_sparsity = None
    if args.activation_stats:
        # If your model has ReLU layers, then those layers have sparse activations.
        # ActivationSparsityCollector will collect information about this sparsity.
        # WARNING! Enabling activation sparsity collection will significantly slow down training!
        activations_sparsity = ActivationSparsityCollector(model)

    if args.sensitivity is not None:
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger, args)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.cuda()
    else:
        compression_scheduler = distiller.CompressionScheduler(model)

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher,
                                                     chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        train(train_loader,
              model,
              criterion,
              optimizer,
              epoch,
              compression_scheduler,
              loggers=[tflogger, pylogger],
              args=args)
        distiller.log_weights_sparsity(model,
                                       epoch,
                                       loggers=[tflogger, pylogger])
        if args.activation_stats:
            distiller.log_activation_sparsity(epoch,
                                              loggers=[tflogger, pylogger],
                                              collector=activations_sparsity)

        # evaluate on validation set
        top1, top5, vloss = validate(val_loader, model, criterion, [pylogger],
                                     args, epoch)
        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # remember best top1 and save checkpoint
        is_best = top1 > best_top1
        if is_best:
            best_epoch = epoch
            best_top1 = top1
        msglogger.info('==> Best Top1: %.3f   On Epoch: %d\n', best_top1,
                       best_epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler, best_top1, is_best,
                                 args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], args=args)
def _validate(data_loader, model, criterion, loggers, args, epoch=-1):
    """Execute the validation/test loop."""
    losses = {'objective_loss': tnt.AverageValueMeter()}
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))

    if args.earlyexit_thresholds:
        # for Early Exit, we have a list of errors and losses for each of the exits.
        args.exiterrors = []
        args.losses_exits = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(
                tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))
            args.losses_exits.append(tnt.AverageValueMeter())
        args.exit_taken = [0] * args.num_exits

    batch_time = tnt.AverageValueMeter()
    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    if args.display_confusion:
        confusion = tnt.ConfusionMeter(args.num_classes)
    total_steps = total_samples / batch_size
    msglogger.info('%d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to evaluation mode
    model.eval()

    end = time.time()
    for validation_step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(args.device), target.to(args.device)
            # compute output from model
            output = model(inputs)

            if not args.earlyexit_thresholds:
                # compute loss
                loss = criterion(output, target)
                # measure accuracy and record loss
                losses['objective_loss'].add(loss.item())
                classerr.add(output.data, target)
                if args.display_confusion:
                    confusion.add(output.data, target)
            else:
                earlyexit_validate_loss(output, target, criterion, args)

            # measure elapsed time
            batch_time.add(time.time() - end)
            end = time.time()

            steps_completed = (validation_step + 1)
            if steps_completed % args.print_freq == 0:
                if not args.earlyexit_thresholds:
                    stats = ('',
                             OrderedDict([('Loss',
                                           losses['objective_loss'].mean),
                                          ('Top1', classerr.value(1)),
                                          ('Top5', classerr.value(5))]))
                else:
                    stats_dict = OrderedDict()
                    stats_dict['Test'] = validation_step
                    for exitnum in range(args.num_exits):
                        la_string = 'LossAvg' + str(exitnum)
                        stats_dict[la_string] = args.losses_exits[exitnum].mean
                        # Because of the nature of ClassErrorMeter, if an exit is never taken during the batch,
                        # then accessing the value(k) will cause a divide by zero. So we'll build the OrderedDict
                        # accordingly and we will not print for an exit error when that exit is never taken.
                        if args.exit_taken[exitnum]:
                            t1 = 'Top1_exit' + str(exitnum)
                            t5 = 'Top5_exit' + str(exitnum)
                            stats_dict[t1] = args.exiterrors[exitnum].value(1)
                            stats_dict[t5] = args.exiterrors[exitnum].value(5)
                    stats = ('Performance/Validation/', stats_dict)

                distiller.log_training_progress(stats, None, epoch,
                                                steps_completed, total_steps,
                                                args.print_freq, loggers)
    if not args.earlyexit_thresholds:
        msglogger.info('==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n',
                       classerr.value()[0],
                       classerr.value()[1], losses['objective_loss'].mean)

        if args.display_confusion:
            msglogger.info('==> Confusion:\n%s\n', str(confusion.value()))
        return classerr.value(1), classerr.value(
            5), losses['objective_loss'].mean
    else:
        total_top1, total_top5, losses_exits_stats = earlyexit_validate_stats(
            args)
        return total_top1, total_top5, losses_exits_stats[args.num_exits - 1]
Ejemplo n.º 4
0
    def train(self, epoch, compression_scheduler):
        """
            Train Process

            Arguments:
                epoch (int): epoch id, 当前是training过程的第几个epoch \n
                compression_scheduler (class CompressionScheduler): 由compression schedule定义文件***.yaml构建的CompressionScheduler对象

            Examples:
                >>> compression_scheduler.on_minibatch_begin(epoch)
                >>> output = self.model(data)                # 模型inferecne
                >>> loss = self.criterion(output, target)    # 计算loss
                >>> compression_scheduler.before_backward_pass(epoch)
                >>> loss.backward()                          # 反向计算梯度
                >>> self.optimizer.step()                    # 根据梯度优化权重
                >>> compression_scheduler.on_minibatch_end(epoch)       
        """
        losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                              (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])
        batch_time = tnt.AverageValueMeter()

        self.model.train()
        start_time = time.time()
        for batch_num, (data, target) in enumerate(self.training_loader):
            data = self.img_preprocess(data)  # resize input image size
            data, target = data.to(self.device), target.to(self.device)

            if compression_scheduler:
                compression_scheduler.on_minibatch_begin(
                    epoch=epoch,
                    minibatch_id=batch_num,
                    minibatches_per_epoch=len(self.training_loader),
                    optimizer=self.optimizer)

            # self.model = self.model.to(self.device)
            loss = self.criterion(self.model(data), target)

            losses[OBJECTIVE_LOSS_KEY].add(loss.item())

            if compression_scheduler:
                # Before running the backward phase, we allow the scheduler to modify the loss
                agg_loss = compression_scheduler.before_backward_pass(
                    epoch,
                    minibatch_id=batch_num,
                    minibatches_per_epoch=len(self.training_loader),
                    loss=loss,
                    optimizer=self.optimizer,
                    return_loss_components=True)
                loss = agg_loss.overall_loss
                losses[OVERALL_LOSS_KEY].add(loss.item())
                for lc in agg_loss.loss_components:
                    if lc.name not in losses:
                        losses[lc.name] = tnt.AverageValueMeter()
                    losses[lc.name].add(lc.value.item())

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            if compression_scheduler:
                compression_scheduler.on_minibatch_end(
                    epoch=epoch,
                    minibatch_id=batch_num,
                    minibatches_per_epoch=len(self.training_loader),
                    optimizer=self.optimizer)

            # debug
            # for param_name, param in self.model.named_parameters():
            #     print(param_name)
            #     # print(param)

            # self.draw_model_to_file('arch_after_quantize.png')

            # dummy_input_test = torch.rand((1, 1, 5, 5), requires_grad=False).cuda()
            # # dummy_input_test = dummy_input_test * 2 - 1
            # test_drop_res = self.model(dummy_input_test) - dummy_input_test
            # x = dummy_input_test
            # for mod_name, layer in self.model.named_modules():
            #     if not distiller.has_children(layer):
            #         test_input = x
            #         x = layer(test_input)
            #         print('1')

            # progress_bar(batch_num, len(self.training_loader), 'Loss: %.4f' % (train_loss / (batch_num + 1)))

            # msglogger.info('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.4f} | ms/batch {:5.2f} | loss {:5.2f}'
            #                .format(epoch, batch_num, len(self.training_loader), lr, elapsed * 1000, cur_loss))

            # log push in
            stats_dict = OrderedDict()
            batch_time.add(time.time() - start_time)
            steps_completed = batch_num + 1
            lr = self.optimizer.param_groups[0]['lr']

            if steps_completed % self.freq == 0:
                for loss_name, meter in losses.items():
                    stats_dict[loss_name] = meter.mean
                stats_dict['LR'] = lr
                stats_dict['Batch Time'] = batch_time.mean * 1000
                stats = ('Performance/Training', stats_dict)

                distiller.log_training_progress(stats,
                                                self.model.named_parameters(),
                                                epoch, steps_completed,
                                                len(self.training_loader),
                                                self.freq,
                                                [tflogger, pylogger])

            start_time = time.time()
Ejemplo n.º 5
0
def main():
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress,
        msglogger.logdir, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_epochs = [distiller.MutableNamedTuple({'epoch': 0, 'top1': 0, 'sparsity': 0})
                   for i in range(args.num_best_scores)]

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error('ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1')
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error('ERROR: Argument --gpus must be a comma-separated list of integers only')
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error('ERROR: GPU device ID {0} requested, but only {1} devices available'
                                    .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained, args.dataset, args.arch,
                         parallel=not args.load_serialized, device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(model, chkpt_file=args.resume)
        model.to(args.device)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(args.device)

    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer, pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info('\tStats will be collected for {:.1%} of test dataset'.format(args.qe_calibration))
        msglogger.info('\tSetting constant seeds and converting model to serialized execution')
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size, args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"
        distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None)
        apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler,
                                 name="{}_thinned".format(args.resume.replace(".pth.tar", "")), dir=msglogger.logdir)
        print("Note: your model may have collapsed to random inference, so you may want to fine-tune")
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher, chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader, model, criterion, optimizer, epoch, compression_scheduler,
                  loggers=[tflogger, pylogger], args=args)
            distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(epoch, "train", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch)
            distiller.log_activation_statsitics(epoch, "valid", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss),
                              ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        is_best = top1 > best_epochs[-1].top1
        if top1 > best_epochs[0].top1:
            best_epochs[0].epoch = epoch
            best_epochs[0].top1 = top1
            # Keep best_epochs sorted such that best_epochs[0] is the lowest top1 in the best_epochs list
            best_epochs = sorted(best_epochs, key=lambda score: score.top1)
        for score in reversed(best_epochs):
            if score.top1 > 0:
                msglogger.info('==> Best Top1: %.3f on Epoch: %d', score.top1, score.epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler,
                                 best_epochs[-1].top1, is_best, args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training loop for one epoch."""
    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    # For Early Exit, we define statistics for each exit
    # So exiterrors is analogous to classerr for the non-Early Exit case
    if args.earlyexit_lossweights:
        args.exiterrors = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)
    epoch_frac = args.partial_epoch
    steps_per_frac_epoch = math.ceil((total_samples*epoch_frac) / batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)
        inputs, target = inputs.to('cuda'), target.to('cuda')

        if train_step == steps_per_frac_epoch:
            break
        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer)

        if args.kd_policy is None:
            # Amir: Running
            output = model(inputs)
        else:
            output = args.kd_policy.forward(inputs)
        if not args.earlyexit_lossweights:
            # ------------------------------------------------------------------ AHMED edit sin2-reg - April19
            """ adding sin2 regularization here"""
            qbits_dict = {}
            sin2_reg_loss = 0
            #print('weights:', (model.module.conv2.weight.size()))
            bw = 3
            qbits_dict['conv1'] = bw
            qbits_dict['conv2'] = bw
            qbits_dict['fc1'] = bw
            qbits_dict['fc2'] = bw
            qbits_dict['fc3'] = bw


            # ----------------------------------
            q = 4
            power = 2
            step = 1/(2**(q)-0.5) # dorefa 
            shift = step/2 

            #step = 1/(2**(q)-1) # wrpn
            #shift = 0 
            
            #amplitude   = (np.sin(pi*(weight+step/2)/(step)))**2

            kernel = model.module.conv1.float_weight
            kernel = model.module.conv1.weight
            #sin2_func_1 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['conv1']))-1)),2))
            sin2_func_1 =torch.mean((torch.sin(pi*(kernel+shift)/(step)))**power) # dorefa
            #print(sin2_func_1.data[0])

            kernel = model.module.conv2.float_weight
            #kernel = model.module.conv2.weight
            #sin2_func_2 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['conv2']))-1)),2))
            sin2_func_2 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            kernel = model.module.fc1.float_weight
            #kernel = model.module.fc1.weight
            #sin2_func_3 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc1']))-1)),2))
            sin2_func_3 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            kernel = model.module.fc2.float_weight
            #kernel = model.module.fc2.weight
            #sin2_func_4 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc2']))-1)),2))
            sin2_func_4 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            kernel = model.module.fc3.float_weight
            #kernel = model.module.fc3.weight
            #sin2_func_5 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc3']))-1)),2))
            sin2_func_5 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            # ----------------------------------

            sin2_reg_loss = sin2_func_1 + sin2_func_2 + sin2_func_3 + sin2_func_4 + sin2_func_5 

            #loss = criterion(output, target) 
            cost_factor = 1
            reg_loss = cost_factor*sin2_reg_loss
            loss = criterion(output, target) + reg_loss

            #print('sin2_reg_LOSS:', sin2_reg_loss.data[0])
            #print('total_LOSS:', loss.data[0])
            #print('MODEL:', (model.state_dict()))
            # ------------------------------------------------------------------ AHMED edit sin2-reg - April19
            # Measure accuracy and record loss
            classerr.add(output.data, target)
        else:
            # Measure accuracy and record loss
            loss = earlyexit_loss(output, target, criterion, args)
        losses[OBJECTIVE_LOSS_KEY].add(loss.item())
        #print('sin2_reg_LOSS:', sin2_reg_loss.data[0])

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss,
                                                                  optimizer=optimizer, return_loss_components=True)
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())
            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % args.print_freq == 0:
            # Log some statistics
            errs = OrderedDict()
            if not args.earlyexit_lossweights:
                errs['Top1'] = classerr.value(1)
                errs['Top5'] = classerr.value(5)
            else:
                # for Early Exit case, the Top1 and Top5 stats are computed for each exit.
                for exitnum in range(args.num_exits):
                    errs['Top1_exit' + str(exitnum)] = args.exiterrors[exitnum].value(1)
                    errs['Top5_exit' + str(exitnum)] = args.exiterrors[exitnum].value(5)

            stats_dict = OrderedDict()
            for loss_name, meter in losses.items():
                stats_dict[loss_name] = meter.mean
            stats_dict.update(errs)
            stats_dict['LR'] = optimizer.param_groups[0]['lr']
            stats_dict['Time'] = batch_time.mean
            stats = ('Peformance/Training/', stats_dict)

            params = model.named_parameters() if args.log_params_histograms else None
            distiller.log_training_progress(stats,
                                            params,
                                            epoch, steps_completed,
                                            steps_per_epoch, args.print_freq,
                                            loggers)
        end = time.time()

    kernel = model.module.conv1.float_weight
    print('00000000000000000000')
    w1 = kernel.data.cpu().numpy()
    np.save('w1_cifar', w1)
            
    print('======================================', reg_loss.data[0])
    write_to_csv2([loss.data.cpu().numpy(), reg_loss.data.cpu().numpy()])
Ejemplo n.º 7
0
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, print_freq, log_params_hist):
    """Training loop for one epoch."""
    losses = {
        'objective_loss': tnt.AverageValueMeter(),
        'regularizer_loss': tnt.AverageValueMeter()
    }
    if compression_scheduler is None:
        # Initialize the regularizer loss to zero
        losses['regularizer_loss'].add(0)

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)',
                   total_samples, batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)

        target = target.cuda(async=True)
        input_var = inputs.cuda()
        target_var = torch.autograd.Variable(target)

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step,
                                                     steps_per_epoch,
                                                     optimizer)
        output = model(input_var)
        loss = criterion(output, target_var)

        # Measure accuracy and record loss
        classerr.add(output.data, target)
        losses['objective_loss'].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we add any regularization loss computed by the scheduler
            regularizer_loss = compression_scheduler.before_backward_pass(
                epoch, train_step, steps_per_epoch, loss, optimizer)
            loss += regularizer_loss
            losses['regularizer_loss'].add(regularizer_loss.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step,
                                                   steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step + 1)

        if steps_completed % print_freq == 0:
            # Log some statistics
            lr = optimizer.param_groups[0]['lr']
            stats = ('Peformance/Training/',
                     OrderedDict([('Loss', losses['objective_loss'].mean),
                                  ('Reg Loss',
                                   losses['regularizer_loss'].mean),
                                  ('Top1', classerr.value(1)),
                                  ('Top5', classerr.value(5)), ('LR', lr),
                                  ('Time', batch_time.mean)]))

            distiller.log_training_progress(
                stats,
                model.named_parameters() if log_params_hist else None, epoch,
                steps_completed, steps_per_epoch, print_freq, loggers)
        end = time.time()
Ejemplo n.º 8
0
def _validate(data_loader, model, criterion, loggers, args, epoch=-1):
    """Execute the validation/test loop."""

    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()
    losses = tnt.AverageValueMeter()

    meter_dict = {
        'paf': tnt.AverageValueMeter(),
        'heatmap': tnt.AverageValueMeter(),
        'max_ht': tnt.AverageValueMeter(),
        'min_ht': tnt.AverageValueMeter(),
        'max_paf': tnt.AverageValueMeter(),
        'min_paf': tnt.AverageValueMeter()
    }

    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = total_samples / batch_size
    msglogger.info('%d samples (%d per mini-batch)', total_samples, batch_size)

    model.eval()  # TODO: model.train() in original repo
    end = time.time()

    # model = torch.nn.DataParallel(model, device_ids=args.gpus)
    # run_eval(image_dir=args.data, anno_dir=args.anno_dir, vis_dir=args.vis_dir,
    #          image_list_txt=args.image_list_txt,
    #          model=model, preprocess='vgg' if args.arch == 'vgg19' else 'rtpose')

    for validation_step, (inputs, heatmap_target, heat_mask, paf_target,
                          paf_mask) in enumerate(data_loader):
        with torch.no_grad():
            data_time.add(time.time() - end)

            inputs = inputs.to(args.device)
            heatmap_target = heatmap_target.to(args.device)
            heat_mask = heat_mask.to(args.device)
            paf_target = paf_target.to(args.device)
            paf_mask = paf_mask.to(args.device)

            _, saved_for_loss = model(inputs)
            total_loss, saved_for_log = criterion(saved_for_loss,
                                                  heatmap_target, heat_mask,
                                                  paf_target, paf_mask)

            losses.add(total_loss.item(), inputs.size(0))

            batch_time.add(time.time() - end)
            end = time.time()

            steps_completed = (validation_step + 1)
            if steps_completed % args.print_freq == 0:
                stats = ('', OrderedDict([
                    ('Loss', losses.mean),
                ]))
                distiller.log_training_progress(stats, None, epoch,
                                                steps_completed, total_steps,
                                                args.print_freq, loggers)

    msglogger.info('==> Loss: %.6f\n', losses.mean)

    # TODO: refactor me
    with open(
            '/home/CORP.PKUSC.ORG/hatsu3/research/compression/distiller/examples/openpose_compression/notebooks/results.txt',
            'w') as f:
        f.write('%.6f' % losses.mean)

    return losses.mean
Ejemplo n.º 9
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 200

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file
                      ]),  # remove both None and empty strings
        msglogger.logdir,
        gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        distiller.set_deterministic(
            args.seed)  # For experiment reproducability
    else:
        if args.seed is not None:
            distiller.set_seed(args.seed)
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    # TODO
    args.dataset = 'coco'
    # args.num_classes = 21  # wc -l ~/data/VOC2012/voc-model-labels.txt

    if args.load_vgg19 and args.arch != 'vgg19':
        raise ValueError(
            '``load_vgg19`` should be set only when vgg19 is used')

    model = create_pose_estimation_model(args.pretrained,
                                         args.dataset,
                                         args.arch,
                                         load_vgg19=args.load_vgg19,
                                         parallel=not args.load_serialized,
                                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # <editor-fold desc=">>> Load Model">

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)

    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )
    # </editor-fold>

    # Define loss function (criterion)
    # get_loss(saved_for_loss, heat_temp, heat_weight,vec_temp, vec_weight)
    criterion = {
        'shufflenetv2': shufflenetv2_get_loss,
        'vgg19': vgg19_get_loss,
        'hourglass': hourglass_get_loss,
    }[args.arch]

    if optimizer is None:
        trainable_vars = [
            param for param in model.parameters() if param.requires_grad
        ]
        optimizer = torch.optim.SGD(trainable_vars,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    # TODO: load lr_scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer,
                                     mode='min',
                                     factor=0.8,
                                     patience=5,
                                     verbose=True,
                                     threshold=0.0001,
                                     threshold_mode='rel',
                                     cooldown=3,
                                     min_lr=0,
                                     eps=1e-08)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        return

    if args.export_onnx is not None:
        return distiller.export_img_classifier_to_onnx(model,
                                                       os.path.join(
                                                           msglogger.logdir,
                                                           args.export_onnx),
                                                       args.dataset,
                                                       add_softmax=True,
                                                       verbose=False)

    if args.qe_calibration:
        return acts_quant_stats_collection(model, criterion, pylogger, args)

    if args.activation_histograms:
        return acts_histogram_collection(model, criterion, pylogger, args)

    print('Building activations_collectors...')
    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    print('Loading data...')
    train_loader, val_loader, test_loader, _ = load_data(args)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        # zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'
            .format(start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')

    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(
                epoch,
                metrics=(total_loss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            loss = validate(val_loader, model, criterion, [pylogger], args,
                            epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        lr_scheduler.step(loss)

        stats = ('Performance/Validation/', OrderedDict([('Loss', loss)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, loss, epoch,
                                       args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {
            'current_loss': loss,
            'best_loss': perf_scores_history[0].loss,
            'best_epoch': perf_scores_history[0].epoch
        }
        apputils.save_checkpoint(epoch,
                                 args.arch,
                                 model,
                                 optimizer=optimizer,
                                 scheduler=compression_scheduler,
                                 extras=checkpoint_extras,
                                 is_best=is_best,
                                 name=args.name,
                                 dir=msglogger.logdir)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
Ejemplo n.º 10
0
    def step(self, pruning_action):
        """Take a step, given an action.

        The action represents the desired sparsity for the "current" layer (i.e. the percentage of weights to remove).
        This function is invoked by the Agent.
        """
        if self.current_state_id == 0:
            msglogger.info("+" + "-" * 50 + "+")
            msglogger.info("Episode %d is starting" % self.episode)

        pruning_action = float(pruning_action[0])
        msglogger.debug(
            "env.step - current_state_id=%d (%s) episode=%d action=%.2f" %
            (self.current_state_id, self.current_layer().name, self.episode,
             pruning_action))
        self.agent_action_history.append(pruning_action)

        if is_using_continuous_action_space(self.amc_cfg.agent_algo):
            if self.amc_cfg.agent_algo == "ClippedPPO-continuous":
                # We need to map PPO's infinite action-space (actions sampled from a Gaussian) to our action-space.
                pruning_action = adjust_ppo_output(pruning_action,
                                                   self.action_high,
                                                   self.action_low)
            else:
                pruning_action = np.clip(pruning_action, self.action_low,
                                         self.action_high)
        else:
            # Divide the action space into 10 discrete levels (0%, 10%, 20%,....90% sparsity)
            pruning_action = pruning_action / 10
        msglogger.debug(
            "\tAgent clipped pruning_action={}".format(pruning_action))

        if self.amc_cfg.action_constrain_fn is not None:
            pruning_action = self.amc_cfg.action_constrain_fn(
                self, pruning_action=pruning_action)
            msglogger.debug(
                "Constrained pruning_action={}".format(pruning_action))

        # Calculate the final compression rate
        total_macs_before, _ = self.net_wrapper.get_resources_requirements()
        layer_macs = self.net_wrapper.layer_macs(self.current_layer())
        msglogger.debug("\tlayer_macs={:.2f}".format(layer_macs /
                                                     self.original_model_macs))
        msglogger.debug("\tremoved_macs={:.2f}".format(self.removed_macs_pct))
        msglogger.debug("\trest_macs={:.2f}".format(self.rest_macs()))
        msglogger.debug("\tcurrent_layer_id = %d" % self.current_layer_id)
        self.current_state_id += 1
        if pruning_action > 0:
            pruning_action = self.net_wrapper.remove_structures(
                self.current_layer_id,
                fraction_to_prune=pruning_action,
                prune_what=self.amc_cfg.pruning_pattern,
                prune_how=self.amc_cfg.pruning_method,
                group_size=self.amc_cfg.group_size,
                apply_thinning=self.episode_is_done(),
                ranking_noise=self.amc_cfg.ranking_noise)
            #random_state=self.random_state)
        else:
            pruning_action = 0

        self.action_history.append(pruning_action)
        total_macs_after_act, total_nnz_after_act = self.net_wrapper.get_resources_requirements(
        )
        layer_macs_after_action = self.net_wrapper.layer_macs(
            self.current_layer())

        # Update the various counters after taking the step
        self.removed_macs += (total_macs_before - total_macs_after_act)

        msglogger.debug("\tactual_action={}".format(pruning_action))
        msglogger.debug(
            "\tlayer_macs={} layer_macs_after_action={} removed now={}".format(
                layer_macs, layer_macs_after_action,
                (layer_macs - layer_macs_after_action)))
        msglogger.debug("\tself._removed_macs={}".format(self.removed_macs))
        assert math.isclose(layer_macs_after_action / layer_macs,
                            1 - pruning_action)

        stats = ('Performance/Validation/',
                 OrderedDict([('requested_action', pruning_action)]))

        distiller.log_training_progress(
            stats,
            None,
            self.episode,
            steps_completed=self.current_state_id,
            total_steps=self.net_wrapper.num_pruned_layers(),
            log_freq=1,
            loggers=[self.tflogger])

        if self.episode_is_done():
            msglogger.info("Episode %d is ending" % self.episode)
            observation = self.get_final_obs()
            reward, top1, top5, vloss = self.compute_reward(
                total_macs_after_act, total_nnz_after_act)
            self.finalize_episode(reward, (top1, top5, vloss),
                                  total_macs_after_act, total_nnz_after_act,
                                  self.action_history,
                                  self.agent_action_history)
            self.episode += 1
        else:
            self.current_layer_id = self.net_wrapper.model_metadata.pruned_idxs[
                self.current_state_id]

            if self.amc_cfg.ft_frequency is not None and self.current_state_id % self.amc_cfg.ft_frequency == 0:
                self.net_wrapper.train(1, self.episode)
            observation = self.get_obs()
            if self.amc_cfg.reward_frequency is not None and self.current_state_id % self.amc_cfg.reward_frequency == 0:
                reward, top1, top5, vloss = self.compute_reward(
                    total_macs_after_act, total_nnz_after_act)
            else:
                reward = 0
        self.prev_action = pruning_action
        if self.episode_is_done():
            normalized_macs = total_macs_after_act / self.original_model_macs * 100
            info = {"accuracy": top1, "compress_ratio": normalized_macs}
            if self.amc_cfg.protocol == "mac-constrained":
                # Sanity check (special case only for "mac-constrained")
                assert self.removed_macs_pct >= 1 - self.amc_cfg.target_density - 0.002  # 0.01
                pass
        else:
            info = {}
        return observation, reward, self.episode_is_done(), info
Ejemplo n.º 11
0
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training loop for one epoch."""

    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()
    losses = tnt.AverageValueMeter()

    meter_dict = {
        'paf': tnt.AverageValueMeter(),
        'heatmap': tnt.AverageValueMeter(),
        'max_ht': tnt.AverageValueMeter(),
        'min_ht': tnt.AverageValueMeter(),
        'max_paf': tnt.AverageValueMeter(),
        'min_paf': tnt.AverageValueMeter()
    }

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)',
                   total_samples, batch_size)

    model.train()
    end = time.time()

    for train_step, (inputs, heatmap_target, heat_mask, paf_target,
                     paf_mask) in enumerate(train_loader):
        data_time.add(time.time() - end)

        inputs = inputs.to(args.device)
        heatmap_target = heatmap_target.to(args.device)
        heat_mask = heat_mask.to(args.device)
        paf_target = paf_target.to(args.device)
        paf_mask = paf_mask.to(args.device)

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step,
                                                     steps_per_epoch,
                                                     optimizer)

        _, saved_for_loss = model(inputs)
        # criterion: get_loss
        total_loss, saved_for_log = criterion(saved_for_loss, heatmap_target,
                                              heat_mask, paf_target, paf_mask)

        for name, _ in meter_dict.items():
            meter_dict[name].add(saved_for_log[name], inputs.size(0))
        losses.add(total_loss, inputs.size(0))

        # TODO: remove?
        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(
                epoch,
                train_step,
                steps_per_epoch,
                total_loss,
                optimizer=optimizer,
                return_loss_components=True)
            loss = agg_loss.overall_loss
            losses['overall_loss'].add(loss.item())

            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())
        else:
            losses['overall_loss'].add(total_loss.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        total_loss.backward()
        if compression_scheduler:
            compression_scheduler.before_parameter_optimization(
                epoch, train_step, steps_per_epoch, optimizer)
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step,
                                                   steps_per_epoch, optimizer)

        batch_time.add(time.time() - end)
        steps_completed = (train_step + 1)

        if steps_completed % args.print_freq == 0:
            stats_dict = OrderedDict({
                'loss': losses.mean,
                'LR': optimizer.param_groups[0]['lr'],
                'Time': batch_time.mean,
            })
            stats = ('Performance/Training/', stats_dict)

            params = model.named_parameters(
            ) if args.log_params_histograms else None
            distiller.log_training_progress(stats, params, epoch,
                                            steps_completed, steps_per_epoch,
                                            args.print_freq, loggers)
        end = time.time()

    return losses.mean
Ejemplo n.º 12
0
def train(train_loader, model, original_model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training loop for one epoch."""
    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    # For Early Exit, we define statistics for each exit
    # So exiterrors is analogous to classerr for the non-Early Exit case
    if args.earlyexit_lossweights:
        args.exiterrors = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(
                tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)',
                   total_samples, batch_size)
    epoch_frac = args.partial_epoch
    steps_per_frac_epoch = math.ceil((total_samples * epoch_frac) / batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)
        inputs, target = inputs.to('cuda'), target.to('cuda')

        if train_step == steps_per_frac_epoch:
            break
        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step,
                                                     steps_per_epoch,
                                                     optimizer)

        if args.kd_policy is None:
            torch.set_printoptions(precision=10)
            #model.freeze_partial([0, 4])
            #model.module.freeze_partial([0, 3])
            #print("Quantized")
            output = model(inputs)
            #new_tensor = model.module.act_conv2
            #print(model)
            #new_tensor = model.act_conv2
            new_tensor = output
            #print(new_tensor)
            #model.module.freeze()
            #model.module.fc1.weight.requires_grad = False
            #model.module.fc2.weight.requires_grad = False
            #original_model.module.freeze()
            original_model.freeze()
            #output_new = original_model.original_forward(inputs)
            #output_new = original_model.module.original_forward(inputs)
            #print("Original")
            output_new = original_model(inputs)
            #old_tensor = original_model.module.act_conv2
            #old_tensor = original_model.act_conv2
            old_tensor = output_new
            #print(torch.sum(model.module.fc3.weight), torch.sum(model.module.fc2.weight),  torch.sum(model.module.fc1.weight), torch.sum(model.module.conv2.weight),  torch.sum(model.module.conv1.weight))
            #print(torch.sum(original_model.module.fc2.weight), torch.sum(original_model.module.fc1.weight), torch.sum(original_model.module.conv2.weight),  torch.sum(original_model.module.conv1.weight))
            #print(set(model.module.conv2.weight.data.cpu().numpy().ravel()))
            #print("Difference")
            #print(old_tensor - new_tensor)
        else:
            output = args.kd_policy.forward(inputs)
        if not args.earlyexit_lossweights:
            #loss = criterion(output, target)
            new_criterion = nn.PoissonNLLLoss(
            )  #nn.PoissonNLLLoss() #nn.L1Loss() #torch.nn.KLDivLoss() #torch.nn.MSELoss(size_average = False)
            old_tensor = torch.nn.functional.log_softmax(old_tensor)
            new_tensor = torch.nn.functional.softmax(new_tensor)
            loss = new_criterion(new_tensor, old_tensor)
            #loss = torch.sum(new_tensor - old_tensor)
            #print('loss >>>>>> ', loss)
            # Measure accuracy and record loss
            classerr.add(output.data, target)
        else:
            # Measure accuracy and record loss
            loss = earlyexit_loss(output, target, criterion, args)
        losses[OBJECTIVE_LOSS_KEY].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(
                epoch,
                train_step,
                steps_per_epoch,
                loss,
                optimizer=optimizer,
                return_loss_components=True)
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())
            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step,
                                                   steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step + 1)

        if steps_completed % args.print_freq == 0:
            # Log some statistics
            errs = OrderedDict()
            if not args.earlyexit_lossweights:
                errs['Top1'] = classerr.value(1)
                csvlogger.writerow(
                    [epoch, steps_completed,
                     classerr.value(1), loss])
                errs['Top5'] = classerr.value(5)
            else:
                # for Early Exit case, the Top1 and Top5 stats are computed for each exit.
                for exitnum in range(args.num_exits):
                    errs['Top1_exit' +
                         str(exitnum)] = args.exiterrors[exitnum].value(1)
                    errs['Top5_exit' +
                         str(exitnum)] = args.exiterrors[exitnum].value(5)

            stats_dict = OrderedDict()
            for loss_name, meter in losses.items():
                stats_dict[loss_name] = meter.mean
            stats_dict.update(errs)
            stats_dict['LR'] = optimizer.param_groups[0]['lr']
            stats_dict['Time'] = batch_time.mean
            stats = ('Peformance/Training/', stats_dict)

            params = model.named_parameters(
            ) if args.log_params_histograms else None
            distiller.log_training_progress(stats, params, epoch,
                                            steps_completed, steps_per_epoch,
                                            args.print_freq, loggers)
        end = time.time()
Ejemplo n.º 13
0
                                                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch)
            distiller.log_activation_statsitics(epoch, "valid", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss),
                              ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5, epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler,
                                 perf_scores_history[0].top1, is_best, args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)


OVERALL_LOSS_KEY = 'Overall Loss'
Ejemplo n.º 14
0
def train(epoch, optimizer, compression_scheduler=None):
    # Turn on training mode which enables dropout.
    model.train()

    total_samples = train_data.size(0)
    steps_per_epoch = math.ceil(total_samples / args.bptt)

    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    # The line below was fixed as per: https://github.com/pytorch/examples/issues/214
    for batch, i in enumerate(range(0, train_data.size(0), args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)

        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch)
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            loss = compression_scheduler.before_backward_pass(epoch, minibatch_id=batch,
                                                              minibatches_per_epoch=steps_per_epoch, loss=loss,
                                                              return_loss_components=False)

        optimizer.zero_grad()
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.item()

        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch)

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            lr = optimizer.param_groups[0]['lr']
            msglogger.info(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.4f} | ms/batch {:5.2f} '
                    '| loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
            stats = ('Peformance/Training/',
                OrderedDict([
                    ('Loss', cur_loss),
                    ('Perplexity', math.exp(cur_loss)),
                    ('LR', lr),
                    ('Batch Time', elapsed * 1000)])
                )
            steps_completed = batch + 1
            distiller.log_training_progress(stats, model.named_parameters(), epoch, steps_completed,
                                            steps_per_epoch, args.log_interval, [tflogger])
Ejemplo n.º 15
0
    def step(self, pruning_action):
        """Take a step, given an action.

        The action represents the desired sparsity.
        This function is invoked by the Agent.
        """
        msglogger.info("env.step - current_layer_id={}  episode={}".format(
            self.current_layer_id, self.episode))
        msglogger.info("\tAgent pruning_action={}".format(pruning_action))

        if is_using_continuous_action_space(self.amc_cfg.agent_algo):
            pruning_action = np.clip(pruning_action[0], self.action_low,
                                     self.action_high)
        else:
            # Divide the action space into 10 discrete levels (0%, 10%, 20%,....90% sparsity)
            pruning_action = pruning_action / 10
        msglogger.info(
            "\tAgent clipped pruning_action={}".format(pruning_action))
        self.agent_action_history.append(pruning_action)
        if self.amc_cfg.action_constrain_fn is not None:
            pruning_action = self.amc_cfg.action_constrain_fn(
                self, pruning_action=pruning_action)
            msglogger.info(
                "Constrained pruning_action={}".format(pruning_action))

        total_macs_before, _ = self.net_wrapper.get_model_resources_requirements(
            self.model)
        layer_macs = self.net_wrapper.get_layer_macs(self.current_layer())
        msglogger.info("\tlayer_macs={:.2f}".format(layer_macs /
                                                    self.dense_model_macs))
        msglogger.info("\tremoved_macs={:.2f}".format(self.removed_macs()))
        msglogger.info("\trest_macs={:.2f}".format(self.rest_macs()))

        if pruning_action > 0:
            pruning_action = self.net_wrapper.remove_structures(
                self.current_layer_id,
                fraction_to_prune=pruning_action,
                prune_what="filters")
        else:
            pruning_action = 0

        self.action_history.append(pruning_action)
        total_macs_after, _ = self.net_wrapper.get_model_resources_requirements(
            self.model)
        layer_macs_after_action = self.net_wrapper.get_layer_macs(
            self.current_layer())

        # Update the various counters after taking the step
        self.current_layer_id += 1
        self._removed_macs += (total_macs_before - total_macs_after)

        msglogger.info("actual_action={}".format(pruning_action))
        msglogger.info(
            "layer_macs={} layer_macs_after_action={} removed now={}".format(
                layer_macs, layer_macs_after_action,
                (layer_macs - layer_macs_after_action)))
        msglogger.info("self._removed_macs={}".format(self._removed_macs))
        assert math.isclose(layer_macs_after_action / layer_macs,
                            1 - pruning_action)

        stats = ('Peformance/Validation/',
                 OrderedDict([('requested_action', pruning_action)]))

        distiller.log_training_progress(stats,
                                        None,
                                        self.episode,
                                        steps_completed=self.current_layer_id,
                                        total_steps=self.amc_cfg.conv_cnt,
                                        log_freq=1,
                                        loggers=[self.tflogger])

        if self.episode_is_done():
            msglogger.info("Episode is ending")
            observation = self.get_final_obs()
            reward, top1, total_macs, total_nnz = self.compute_reward()
            normalized_macs = total_macs / self.dense_model_macs * 100
            normalized_nnz = total_nnz / self.dense_model_size * 100
            self.finalize_episode(top1, reward, total_macs, normalized_macs,
                                  normalized_nnz, self.action_history,
                                  self.agent_action_history)

            self.episode += 1
        else:
            observation = self.get_obs()
            if self.amc_cfg.reward_frequency > 0 and self.current_layer_id % self.amc_cfg.reward_frequency == 0:
                reward, top1, total_macs, total_nnz = self.compute_reward(
                    False)
            else:
                reward = 0
        self.prev_action = pruning_action
        info = {}
        return observation, reward, self.episode_is_done(), info
Ejemplo n.º 16
0
def main():
    global msglogger
    check_pytorch_version()
    args = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(sys.argv, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_top1 = 0

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        torch.manual_seed(0)
        random.seed(0)
        np.random.seed(0)
        cudnn.deterministic = True
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.ADC:
        HAVE_GYM_INSTALLED = False
        if not HAVE_GYM_INSTALLED:
            raise ValueError(
                "ADC is currently experimental and uses non-public Coach features"
            )

        import examples.automated_deep_compression.ADC as ADC
        train_loader, val_loader, test_loader, _ = apputils.load_data(
            args.dataset, os.path.expanduser(args.data), args.batch_size,
            args.workers, args.validation_size, args.deterministic)

        validate_fn = partial(validate,
                              val_loader=test_loader,
                              criterion=criterion,
                              loggers=[pylogger],
                              print_freq=args.print_freq)

        save_checkpoint_fn = partial(apputils.save_checkpoint,
                                     arch=args.arch,
                                     name='adc')
        ADC.do_adc(model, args.dataset, args.arch, val_loader, validate_fn,
                   save_checkpoint_fn)
        exit()

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        which_summary = args.summary
        if which_summary.startswith('png'):
            apputils.draw_img_classifier_to_file(
                model, 'model.png', args.dataset,
                which_summary == 'png_w_params')
        else:
            distiller.model_summary(model, which_summary, args.dataset)
        exit()

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_size, args.deterministic)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    activations_sparsity = None
    if args.activation_stats:
        # If your model has ReLU layers, then those layers have sparse activations.
        # ActivationSparsityCollector will collect information about this sparsity.
        # WARNING! Enabling activation sparsity collection will significantly slow down training!
        activations_sparsity = ActivationSparsityCollector(model)

    if args.sensitivity is not None:
        # This sample application can be invoked to execute Sensitivity Analysis on your
        # model.  The ouptut is saved to CSV and PNG.
        msglogger.info("Running sensitivity tests")
        test_fnc = partial(test,
                           test_loader=test_loader,
                           criterion=criterion,
                           loggers=[pylogger],
                           print_freq=args.print_freq)
        which_params = [
            param_name for param_name, _ in model.named_parameters()
        ]
        sensitivity = distiller.perform_sensitivity_analysis(
            model,
            net_params=which_params,
            sparsities=np.arange(0.0, 0.95, 0.05),
            test_func=test_fnc,
            group=args.sensitivity)
        distiller.sensitivities_to_png(sensitivity, 'sensitivity.png')
        distiller.sensitivities_to_csv(sensitivity, 'sensitivity.csv')
        exit()

    if args.evaluate:
        # This sample application can be invoked to evaluate the accuracy of your model on
        # the test dataset.
        # You can optionally quantize the model to 8-bit integer before evaluation.
        # For example:
        # python3 compress_classifier.py --arch resnet20_cifar  ../data.cifar10 -p=50 --resume=checkpoint.pth.tar --evaluate
        if args.quantize:
            model.cpu()
            quantizer = quantization.SymmetricLinearQuantizer(model, 8, 8)
            quantizer.prepare_model()
            model.cuda()
        top1, _, _ = test(test_loader, model, criterion, [pylogger],
                          args.print_freq)
        if args.quantize:
            checkpoint_name = 'quantized'
            apputils.save_checkpoint(0,
                                     args.arch,
                                     model,
                                     optimizer=None,
                                     best_top1=top1,
                                     name='_'.split(args.name, checkpoint_name)
                                     if args.name else checkpoint_name,
                                     dir=msglogger.logdir)
        exit()

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress)

    best_epoch = start_epoch
    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        train(train_loader,
              model,
              criterion,
              optimizer,
              epoch,
              compression_scheduler,
              loggers=[tflogger, pylogger],
              print_freq=args.print_freq,
              log_params_hist=args.log_params_histograms)
        distiller.log_weights_sparsity(model,
                                       epoch,
                                       loggers=[tflogger, pylogger])
        if args.activation_stats:
            distiller.log_activation_sparsity(epoch,
                                              loggers=[tflogger, pylogger],
                                              collector=activations_sparsity)

        # evaluate on validation set
        top1, top5, vloss = validate(val_loader, model, criterion, [pylogger],
                                     args.print_freq, epoch)
        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # remember best top1 and save checkpoint
        is_best = top1 > best_top1
        if is_best:
            best_epoch = epoch
            best_top1 = top1
        msglogger.info('==> Best validation Top1: %.3f   Epoch: %d', best_top1,
                       best_epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler, best_top1, is_best,
                                 args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], args.print_freq)
Ejemplo n.º 17
0
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training-with-compression loop for one epoch.
    
    For each training step in epoch:
        compression_scheduler.on_minibatch_begin(epoch)
        output = model(input)
        loss = criterion(output, target)
        compression_scheduler.before_backward_pass(epoch)
        loss.backward()
        compression_scheduler.before_parameter_optimization(epoch)
        optimizer.step()
        compression_scheduler.on_minibatch_end(epoch)
    """
    OVERALL_LOSS_KEY = 'Overall Loss'
    OBJECTIVE_LOSS_KEY = 'Objective Loss'

    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    # For Early Exit, we define statistics for each exit
    # So exiterrors is analogous to classerr for the non-Early Exit case
    if early_exit_mode(args):
        args.exiterrors = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to train mode
    model.train()
    acc_stats = []
    end = time.time()
    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)
        inputs, target = inputs.to(args.device), target.to(args.device)

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer)

        if not hasattr(args, 'kd_policy') or args.kd_policy is None:
            output = model(inputs)
        else:
            output = args.kd_policy.forward(inputs)

        if not early_exit_mode(args):
            loss = criterion(output, target)
            # Measure accuracy
            classerr.add(output.data, target)
            acc_stats.append([classerr.value(1), classerr.value(5)])
        else:
            # Measure accuracy and record loss
            loss = earlyexit_loss(output, target, criterion, args)
        # Record loss
        losses[OBJECTIVE_LOSS_KEY].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss,
                                                                  optimizer=optimizer, return_loss_components=True)
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())

            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())
        else:
            losses[OVERALL_LOSS_KEY].add(loss.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        if compression_scheduler:
            compression_scheduler.before_parameter_optimization(epoch, train_step, steps_per_epoch, optimizer)
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % args.print_freq == 0:
            # Log some statistics
            errs = OrderedDict()
            if not early_exit_mode(args):
                errs['Top1'] = classerr.value(1)
                errs['Top5'] = classerr.value(5)
            else:
                # for Early Exit case, the Top1 and Top5 stats are computed for each exit.
                for exitnum in range(args.num_exits):
                    errs['Top1_exit' + str(exitnum)] = args.exiterrors[exitnum].value(1)
                    errs['Top5_exit' + str(exitnum)] = args.exiterrors[exitnum].value(5)

            stats_dict = OrderedDict()
            for loss_name, meter in losses.items():
                stats_dict[loss_name] = meter.mean
            stats_dict.update(errs)
            stats_dict['LR'] = optimizer.param_groups[0]['lr']
            stats_dict['Time'] = batch_time.mean
            stats = ('Performance/Training/', stats_dict)

            params = model.named_parameters() if args.log_params_histograms else None
            distiller.log_training_progress(stats,
                                            params,
                                            epoch, steps_completed,
                                            steps_per_epoch, args.print_freq,
                                            loggers)
        end = time.time()
    #return acc_stats
    # NOTE: this breaks previous behavior, which returned a history of (top1, top5) values
    return classerr.value(1), classerr.value(5), losses[OVERALL_LOSS_KEY]
Ejemplo n.º 18
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    # 记录有关执行环境的各种详细信息。有时是有用的
    # 参考过去的实验执行,这些信息可能有用。
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    perf_scores_history = []
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )  # 错误:设置--确定性要求将--workers/-j设置为0或1
            exit(1)  # 正常退出程序
        # Use a well-known seed, for repeatability of experiments 使用一种众所周知的种子,用于实验的重复性。
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cousm'

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = ResNet152()
    # model = torch.nn.DataParallel(model, device_ids=args.gpus) # 并行GPU
    model.to(args.device)
    compression_scheduler = None  # 压缩调度
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    # 创建两个日志后端 TensorBoardLogger以Google的Tensor板可以读取的格式写入日志文件。python logger将写入python记录器。
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:  # 加载训练模型
        # checkpoint = torch.load(args.resume)
        # model.load_state_dict(checkpoint['state_dict'])
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
        model.to(args.device)

    # Define loss function (criterion) and optimizer  # 定义损失函数和优化器SGD
    criterion = nn.CrossEntropyLoss().to(args.device)

    # optimizer = torch.optim.SGD(model.fc.parameters(), lr=args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)
    optimizer = torch.optim.Adam(model.model.fc.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:  # 自动化的深层压缩
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:  # 贪婪的
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports. # 可以调用此示例应用程序来生成各种摘要报告。
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)
    # 激活统计收集器
    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))  # 量化统计收集器
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    # 加载数据集:从传递的模型名称推断要加载的数据集

    train_loader, val_loader, test_loader, _ = get_data_loaders(
        datasets_fn, r'/home/tian/Desktop/image_yasuo', args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))
    # 可以调用此示例应用程序来对模型执行敏感性分析。输出保存到csv和png。
    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        # #这个示例应用程序的主要用例是CNN压缩
        # #需要yaml中的压缩计划配置文件。
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress,
                                                      compression_scheduler)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        # 如果添加了参数(如PactQualifier),则模型会重新传输到GPU。
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)  # 压缩计划程序

    if args.thinnify:
        # zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"  # 必须使用--resume提供检查点文件以细化
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )  # 注意:您的模型可能已折叠为随机推理,因此您可能需要对其进行微调。
        return

    args.kd_policy = None  # 蒸馏
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher,
                                                     chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)
    lr = args.lr
    lr_decay = 0.5
    for epoch in range(start_epoch, args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:  # 打印掩盖稀疏表 在end of each epoch
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint # 更新到目前为止获得的最高分数列表,并保存检查点
        sparsity = distiller.model_sparsity(model)
        perf_scores_history.append(
            distiller.MutableNamedTuple({
                'sparsity': sparsity,
                'top1': top1,
                'top5': top5,
                'epoch': epoch
            }))
        # Keep perf_scores_history sorted from best to worst
        # Sort by sparsity as main sort key, then sort by top1, top5 and epoch
        # 保持绩效分数历史记录从最好到最差的排序
        # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序
        perf_scores_history.sort(key=operator.attrgetter(
            'sparsity', 'top1', 'top5', 'epoch'),
                                 reverse=True)
        for score in perf_scores_history[:args.num_best_scores]:
            msglogger.info(
                '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d]',
                score.top1, score.top5, score.sparsity, score.epoch)

        is_best = epoch == perf_scores_history[0].epoch
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler,
                                 perf_scores_history[0].top1, is_best,
                                 args.name, msglogger.logdir)
        if not is_best:
            lr = lr * lr_decay
            # 当loss大于上一次loss,降低学习率
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

    # Finally run results on the test set # 最后在测试集上运行结果
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)