Example #1
0
def test_load_checkpoint_without_model():
    checkpoint_filename = 'checkpoints/resnet20_cifar10_checkpoint.pth.tar'
    # Load a checkpoint w/o specifying the model: this should fail because the loaded
    # checkpoint is old and does not have the required metadata to create a model.
    with pytest.raises(ValueError):
        load_checkpoint(model=None, chkpt_file=checkpoint_filename)

    for model_device in (None, 'cuda', 'cpu'):
        # Now we create a new model, save a checkpoint, and load it w/o specifying the model.
        # This should succeed because the checkpoint has enough metadata to create model.
        model = create_model(False, 'cifar10', 'resnet20_cifar', 0)
        model, compression_scheduler, optimizer, start_epoch = load_checkpoint(
            model, checkpoint_filename)
        save_checkpoint(epoch=0,
                        arch='resnet20_cifar',
                        model=model,
                        name='eraseme',
                        scheduler=compression_scheduler,
                        optimizer=None,
                        dir='checkpoints')
        temp_checkpoint = os.path.join("checkpoints",
                                       "eraseme_checkpoint.pth.tar")
        model, compression_scheduler, optimizer, start_epoch = load_checkpoint(
            model=None, chkpt_file=temp_checkpoint, model_device=model_device)
        assert compression_scheduler is not None
        assert optimizer is None
        assert start_epoch == 1
        assert model
        assert model.arch == "resnet20_cifar"
        assert model.dataset == "cifar10"
        os.remove(temp_checkpoint)
Example #2
0
def test_load_gpu_model_on_cpu_with_thinning():
    # Issue #148
    # 1. create a GPU model and remove 50% of the filters in one of the layers (thninning)
    # 2. save the thinned model in a checkpoint file
    # 3. load the checkpoint and place it on the CPU
    CPU_DEVICE_ID = -1
    gpu_model = create_model(False, 'cifar10', 'resnet20_cifar')
    conv_pname = "module.layer1.0.conv1.weight"
    conv_p = distiller.model_find_param(gpu_model, conv_pname)
    pruner = distiller.pruning.L1RankedStructureParameterPruner("test_pruner", group_type="Filters",
                                                                desired_sparsity=0.5, weights=conv_pname)
    zeros_mask_dict = distiller.create_model_masks_dict(gpu_model)
    pruner.set_param_mask(conv_p, conv_pname, zeros_mask_dict, meta=None)

    # Use the mask to prune
    zeros_mask_dict[conv_pname].apply_mask(conv_p)
    distiller.remove_filters(gpu_model, zeros_mask_dict, 'resnet20_cifar', 'cifar10', optimizer=None)
    assert hasattr(gpu_model, 'thinning_recipes')
    scheduler = distiller.CompressionScheduler(gpu_model)
    save_checkpoint(epoch=0, arch='resnet20_cifar', model=gpu_model, scheduler=scheduler, optimizer=None)

    CPU_DEVICE_ID = -1
    cpu_model = create_model(False, 'cifar10', 'resnet20_cifar', device_ids=CPU_DEVICE_ID)
    load_checkpoint(cpu_model, "checkpoint.pth.tar")
    assert distiller.model_device(cpu_model) == 'cpu'
Example #3
0
 def __init__(self, args, script_dir):
     super().__init__(args, script_dir)
     early_exit_init(self.args)
     # Save the randomly-initialized model before training (useful for lottery-ticket method)
     if args.save_untrained_model:
         ckpt_name = '_'.join((self.args.name or "", "untrained"))
         apputils.save_checkpoint(0, self.args.arch, self.model,
                                  name=ckpt_name, dir=msglogger.logdir)
Example #4
0
 def _finalize_epoch(self, epoch, perf_scores_history, top1, top5):
     # Update the list of top scores achieved so far, and save the checkpoint
     update_training_scores_history(perf_scores_history, self.model, 
                                    top1, top5, epoch, self.args.num_best_scores)
     is_best = epoch == perf_scores_history[0].epoch
     checkpoint_extras = {'current_top1': top1,
                          'best_top1': perf_scores_history[0].top1,
                          'best_epoch': perf_scores_history[0].epoch}
     apputils.save_checkpoint(epoch, self.args.arch, self.model, optimizer=self.optimizer, 
                              scheduler=self.compression_scheduler, extras=checkpoint_extras, 
                              is_best=is_best, name=self.args.name, dir=msglogger.logdir)
Example #5
0
 def _finalize_epoch(self, epoch, top1, top5):
     # Update the list of top scores achieved so far, and save the checkpoint
     self.performance_tracker.step(self.model, epoch, top1=top1, top5=top5)
     _log_best_scores(self.performance_tracker, msglogger)
     best_score = self.performance_tracker.best_scores()[0]
     is_best = epoch == best_score.epoch
     checkpoint_extras = {'current_top1': top1,
                          'best_top1': best_score.top1,
                          'best_epoch': best_score.epoch}
     if msglogger.logdir:
         apputils.save_checkpoint(epoch, self.args.arch, self.model, optimizer=self.optimizer,
                                  scheduler=self.compression_scheduler, extras=checkpoint_extras,
                                  is_best=is_best, name=self.args.name, dir=msglogger.logdir)
Example #6
0
def handle_subapps(model, criterion, optimizer, compression_scheduler, pylogger, args):
    def load_test_data(args):
        test_loader = classifier.load_data(args, load_train=False, load_val=False, load_test=True)
        return test_loader

    do_exit = False
    if args.greedy:
        greedy(model, criterion, optimizer, pylogger, args)
        do_exit = True
    elif args.summary:
        # This sample application can be invoked to produce various summary reports
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        do_exit = True
    elif args.export_onnx is not None:
        distiller.export_img_classifier_to_onnx(model,
                                                os.path.join(msglogger.logdir, args.export_onnx),
                                                args.dataset, add_softmax=True, verbose=False)
        do_exit = True
    elif args.qe_calibration and not (args.evaluate and args.quantize_eval):
        classifier.acts_quant_stats_collection(model, criterion, pylogger, args, save_to_file=True)
        do_exit = True
    elif args.activation_histograms:
        classifier.acts_histogram_collection(model, criterion, pylogger, args)
        do_exit = True
    elif args.sensitivity is not None:
        test_loader = load_test_data(args)
        sensitivities = np.arange(*args.sensitivity_range)
        sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities)
        do_exit = True
    elif args.evaluate:
        if args.quantize_eval and args.qe_lapq:
            from ptq_lapq import image_classifier_ptq_lapq
            image_classifier_ptq_lapq(model, criterion, pylogger, args)
        else:
            test_loader = load_test_data(args)
            classifier.evaluate_model(test_loader, model, criterion, pylogger,
                classifier.create_activation_stats_collectors(model, *args.activation_stats),
                args, scheduler=compression_scheduler)
        do_exit = True
    elif args.thinnify:
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.contract_model(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None)
        apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler,
                                 name="{}_thinned".format(args.resumed_checkpoint_path.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        msglogger.info("Note: if your model collapsed to random inference, you may want to fine-tune")
        do_exit = True
    return do_exit
Example #7
0
def evaluate_model(model,
                   criterion,
                   test_loader,
                   loggers,
                   activations_collectors,
                   args,
                   scheduler=None):
    # This sample application can be invoked to evaluate the accuracy of your model on
    # the test dataset.
    # You can optionally quantize the model to 8-bit integer before evaluation.
    # For example:
    # python3 compress_classifier.py --arch resnet20_cifar  ../data.cifar10 -p=50 --resume-from=checkpoint.pth.tar --evaluate

    if not isinstance(loggers, list):
        loggers = [loggers]

    if args.quantize_eval:
        model.cpu()
        quantizer = quantization.PostTrainLinearQuantizer.from_args(
            model, args)
        quantizer.prepare_model()
        model.to(args.device)

    top1, top5, vloss = test(test_loader,
                             model,
                             criterion,
                             loggers,
                             activations_collectors,
                             args=args)

    if args.use_swa_model:
        msglogger.info('==> SWATop1: %.3f    SWATop5: %.3f    SWALoss: %.3f\n',
                       top1, top5, vloss)
    else:
        msglogger.info('==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n', top1,
                       top5, vloss)

    if args.quantize_eval:
        checkpoint_name = 'quantized'
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=scheduler,
                                 name='_'.join([args.name, checkpoint_name])
                                 if args.name else checkpoint_name,
                                 dir=msglogger.logdir,
                                 extras={'quantized_top1': top1})
def quantize_and_test_model(test_loader, model, criterion, args, loggers=None, scheduler=None, save_flag=True):
    """Collect stats using test_loader (when stats file is absent),

    clone the model and quantize the clone, and finally, test it.
    args.device is allowed to differ from the model's device.
    When args.qe_calibration is set to None, uses 0.05 instead.

    scheduler - pass scheduler to store it in checkpoint
    save_flag - defaults to save both quantization statistics and checkpoint.
    """
    if hasattr(model, 'quantizer_metadata') and \
            model.quantizer_metadata['type'] == distiller.quantization.PostTrainLinearQuantizer:
        raise RuntimeError('Trying to invoke post-training quantization on a model that has already been post-'
                           'train quantized. Model was likely loaded from a checkpoint. Please run again without '
                           'passing the --quantize-eval flag')
    if not (args.qe_dynamic or args.qe_stats_file or args.qe_config_file):
        args_copy = copy.deepcopy(args)
        args_copy.qe_calibration = args.qe_calibration if args.qe_calibration is not None else 0.05

        # set stats into args stats field
        args.qe_stats_file = acts_quant_stats_collection(
            model, criterion, loggers, args_copy, save_to_file=save_flag)

    args_qe = copy.deepcopy(args)
    if args.device == 'cpu':
        # NOTE: Even though args.device is CPU, we allow here that model is not in CPU.
        qe_model = distiller.make_non_parallel_copy(model).cpu()
    else:
        qe_model = copy.deepcopy(model).to(args.device)

    quantizer = quantization.PostTrainLinearQuantizer.from_args(qe_model, args_qe)
    dummy_input = distiller.get_dummy_input(input_shape=model.input_shape)
    quantizer.prepare_model(dummy_input)

    if args.qe_convert_pytorch:
        qe_model = _convert_ptq_to_pytorch(qe_model, args_qe)

    test_res = test(test_loader, qe_model, criterion, loggers, args=args_qe)

    if save_flag:
        checkpoint_name = 'quantized'
        apputils.save_checkpoint(0, args_qe.arch, qe_model, scheduler=scheduler,
            name='_'.join([args_qe.name, checkpoint_name]) if args_qe.name else checkpoint_name,
            dir=msglogger.logdir, extras={'quantized_top1': test_res[0]})

    del qe_model
    return test_res
 def __init__(self, args, script_dir):
     checkpoint_copy = None
     if args.dataset in custom_datasets and args.resumed_checkpoint_path:
         checkpoint_copy = args.resumed_checkpoint_path
         args.resumed_checkpoint_path = ''
     super().__init__(args, script_dir)
     early_exit_init(args)
     # Save the randomly-initialized model before training (useful for lottery-ticket method)
     if checkpoint_copy is not None:
         args.resumed_checkpoint_path = checkpoint_copy
     if args.save_untrained_model:
         ckpt_name = '_'.join((self.args.name or "", "untrained"))
         apputils.save_checkpoint(0,
                                  self.args.arch,
                                  self.model,
                                  name=ckpt_name,
                                  dir=msglogger.logdir)
Example #10
0
def extract_lottery_ticket(args, untrained_ckpt_name, pruned_ckpt_name):
    untrained_ckpt = apputils.load_checkpoint(model=None, chkpt_file=untrained_ckpt_name, model_device='cpu')
    untrained_model, _, optimizer, start_epoch = untrained_ckpt

    pruned_ckpt = apputils.load_checkpoint(model=None, chkpt_file=pruned_ckpt_name, model_device='cpu')
    pruned_model, pruned_scheduler, optimizer, start_epoch = pruned_ckpt

    # create a dictionary of masks by inferring the masks from the parameter sparsity
    masks_dict = {pname: (torch.ne(param, 0)).type(param.type())
                  for pname, param in pruned_model.named_parameters()
                  if pname in pruned_scheduler.zeros_mask_dict.keys()}
    for pname, mask in masks_dict.items():
        untrained_model.state_dict()[pname].mul_(mask)

    sparsities = {pname: distiller.sparsity(mask) for pname, mask in masks_dict.items()}
    print(sparsities)
    pruned_scheduler.init_from_masks_dict(masks_dict)

    apputils.save_checkpoint(0, pruned_model.arch, untrained_model, optimizer=optimizer,
                             scheduler=pruned_scheduler,
                             name='_'.join([untrained_ckpt_name, 'masked']))
def quantize_and_test_model(test_loader, model, criterion, args, loggers=None, scheduler=None, save_flag=True):
    """Collect stats using test_loader (when stats file is absent),

    clone the model and quantize the clone, and finally, test it.
    args.device is allowed to differ from the model's device.
    When args.qe_calibration is set to None, uses 0.05 instead.

    scheduler - pass scheduler to store it in checkpoint
    save_flag - defaults to save both quantization statistics and checkpoint.
    """
    if not (args.qe_dynamic or args.qe_stats_file or args.qe_config_file):
        args_copy = copy.deepcopy(args)
        args_copy.qe_calibration = args.qe_calibration if args.qe_calibration is not None else 0.05

        # set stats into args stats field
        args.qe_stats_file = acts_quant_stats_collection(
            model, criterion, loggers, args_copy, save_to_file=save_flag)

    args_qe = copy.deepcopy(args)
    if args.device == 'cpu':
        # NOTE: Even though args.device is CPU, we allow here that model is not in CPU.
        qe_model = distiller.make_non_parallel_copy(model).cpu()
    else:
        qe_model = copy.deepcopy(model).to(args.device)

    quantizer = quantization.PostTrainLinearQuantizer.from_args(qe_model, args_qe)
    quantizer.prepare_model(distiller.get_dummy_input(input_shape=model.input_shape))

    test_res = test(test_loader, qe_model, criterion, loggers, args=args_qe)

    if save_flag:
        checkpoint_name = 'quantized'
        apputils.save_checkpoint(0, args_qe.arch, qe_model, scheduler=scheduler,
            name='_'.join([args_qe.name, checkpoint_name]) if args_qe.name else checkpoint_name,
            dir=msglogger.logdir, extras={'quantized_top1': test_res[0]})

    del qe_model
    return test_res
Example #12
0
def greedy_pruner(
    pruned_model,
    app_args,
    fraction_to_prune,
    pruning_step,
    test_fn,
    train_fn,
    writer=None,
):
    dataset = app_args.dataset
    arch = app_args.arch
    # create_network_record_file()

    # Temporary ugly hack!
    resnet_layers = None
    resnet_params = None
    if arch == "resnet20_cifar":
        resnet_params = resnet20_params
    elif arch == "resnet56_cifar":
        resnet_params = resnet56_params
    elif arch == "resnet50":
        resnet_params = resnet50_params
    elif arch == "mobilenetv1":
        resnet_params = mobilenetv1_params
    if resnet_params is not None:
        resnet_layers = [param[:-len(".weight")] for param in resnet_params]
    logging.info("resnet params %s" % resnet_layers)
    total_macs = dense_total_macs = get_model_compute_budget(
        pruned_model, dataset, resnet_layers)
    logging.info("total_macs %f" % total_macs)
    logging.info("fraction_to_prune * dense_total_macs = %f" %
                 (fraction_to_prune * dense_total_macs))
    iteration = 0
    model = pruned_model

    while total_macs > fraction_to_prune * dense_total_macs:
        iteration += 1
        if app_args.greedy_finetuning_policy == "constant":
            effective_train_size = app_args.effective_train_size
        elif app_args.greedy_finetuning_policy == "linear-grow":
            effective_train_size = 1 - (total_macs / dense_total_macs)

        (
            prec1,
            prec5,
            param_name,
            pruned_model,
            zeros_mask_dict,
        ) = find_most_robust_layer(
            iteration,
            pruned_model,
            pruning_step,
            test_fn,
            train_fn,
            app_args,
            resnet_params,
            effective_train_size,
        )
        total_macs = get_model_compute_budget(pruned_model, dataset,
                                              resnet_layers)
        densities = get_param_densities(model, pruned_model, resnet_params)
        if writer is not None:
            logging.info("info writer to tensorboard")
            writer.add_scalar("macs", total_macs, iteration)
            writer.add_scalar("acc", prec1, iteration)
            writer.add_scalar("acc/macs", prec1, total_macs)
        compute_density = total_macs / dense_total_macs
        results = (iteration, prec1, param_name, compute_density, total_macs,
                   densities)
        record_network_details(results)
        scheduler = create_scheduler(pruned_model, zeros_mask_dict)
        save_checkpoint(
            0,
            arch,
            pruned_model,
            optimizer=None,
            scheduler=scheduler,
            extras={"top1": prec1},
            name="greedy__{}__{:.1f}__{:.1f}".format(
                str(iteration).zfill(3), compute_density * 100, prec1),
            dir=msglogger.logdir,
        )
        del scheduler
        # del zeros_mask_dict
        msglogger.info("Iteration {}: top1-{:.2f} {} compute-{:.2f}".format(
            *results[0:4]))

    assert iteration > 0
    prec1, prec5, loss = test_fn(model=pruned_model)
    logging.info("final result: %f %f %f" % (prec1, prec5, loss))
    scheduler = create_scheduler(pruned_model, zeros_mask_dict)
    save_checkpoint(
        0,
        arch,
        pruned_model,
        optimizer=None,
        scheduler=scheduler,
        extras={"top1": prec1},
        name="_".join(("greedy", str(fraction_to_prune))),
        dir=msglogger.logdir,
    )
    return pruned_model
def greedy_pruner(pruned_model, app_args, fraction_to_prune, pruning_step,
                  test_fn, train_fn):
    dataset = app_args.dataset
    arch = app_args.arch
    create_network_record_file()

    # Temporary ugly hack!
    resnet_layers = None
    resnet_params = None
    if arch == "resnet20_cifar":
        resnet_params = resnet20_params
    elif arch == "resnet56_cifar":
        resnet_params = resnet56_params
    elif arch == "resnet50":
        resnet_params = resnet50_params
    if resnet_params is not None:
        resnet_layers = [param[:-len(".weight")] for param in resnet_params]

    total_macs = dense_total_macs = get_model_compute_budget(
        pruned_model, dataset, resnet_layers)
    iteration = 0
    model = pruned_model

    while total_macs > fraction_to_prune * dense_total_macs:
        iteration += 1
        if app_args.greedy_finetuning_policy == "constant":
            effective_train_size = app_args.effective_train_size
        elif app_args.greedy_finetuning_policy == "linear-grow":
            effective_train_size = 1 - (total_macs / dense_total_macs)

        prec1, prec5, param_name, pruned_model, zeros_mask_dict = find_most_robust_layer(
            iteration, pruned_model, pruning_step, test_fn, train_fn, app_args,
            resnet_params, effective_train_size)
        total_macs = get_model_compute_budget(pruned_model, dataset,
                                              resnet_layers)
        densities = get_param_densities(model, pruned_model, resnet_params)
        compute_density = total_macs / dense_total_macs
        results = (iteration, prec1, param_name, compute_density, total_macs,
                   densities)
        record_network_details(results)
        scheduler = create_scheduler(pruned_model, zeros_mask_dict)
        save_checkpoint(0,
                        arch,
                        pruned_model,
                        optimizer=None,
                        scheduler=scheduler,
                        extras={'top1': prec1},
                        name="greedy__{}__{:.1f}__{:.1f}".format(
                            str(iteration).zfill(3), compute_density * 100,
                            prec1),
                        dir=msglogger.logdir)
        del scheduler
        del zeros_mask_dict
        msglogger.info("Iteration {}: top1-{:.2f} {} compute-{:.2f}".format(
            *results[0:4]))

    assert iteration > 0
    prec1, prec5, loss = test_fn(model=pruned_model)
    print(prec1, prec5, loss)
    scheduler = create_scheduler(pruned_model, zeros_mask_dict)
    save_checkpoint(0,
                    arch,
                    pruned_model,
                    optimizer=None,
                    scheduler=scheduler,
                    extras={'top1': prec1},
                    name='_'.join(("greedy", str(fraction_to_prune))),
                    dir=msglogger.logdir)
Example #14
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 200

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file
                      ]),  # remove both None and empty strings
        msglogger.logdir,
        gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        distiller.set_deterministic(
            args.seed)  # For experiment reproducability
    else:
        if args.seed is not None:
            distiller.set_seed(args.seed)
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    # TODO
    args.dataset = 'coco'
    # args.num_classes = 21  # wc -l ~/data/VOC2012/voc-model-labels.txt

    if args.load_vgg19 and args.arch != 'vgg19':
        raise ValueError(
            '``load_vgg19`` should be set only when vgg19 is used')

    model = create_pose_estimation_model(args.pretrained,
                                         args.dataset,
                                         args.arch,
                                         load_vgg19=args.load_vgg19,
                                         parallel=not args.load_serialized,
                                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # <editor-fold desc=">>> Load Model">

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)

    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )
    # </editor-fold>

    # Define loss function (criterion)
    # get_loss(saved_for_loss, heat_temp, heat_weight,vec_temp, vec_weight)
    criterion = {
        'shufflenetv2': shufflenetv2_get_loss,
        'vgg19': vgg19_get_loss,
        'hourglass': hourglass_get_loss,
    }[args.arch]

    if optimizer is None:
        trainable_vars = [
            param for param in model.parameters() if param.requires_grad
        ]
        optimizer = torch.optim.SGD(trainable_vars,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    # TODO: load lr_scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer,
                                     mode='min',
                                     factor=0.8,
                                     patience=5,
                                     verbose=True,
                                     threshold=0.0001,
                                     threshold_mode='rel',
                                     cooldown=3,
                                     min_lr=0,
                                     eps=1e-08)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        return

    if args.export_onnx is not None:
        return distiller.export_img_classifier_to_onnx(model,
                                                       os.path.join(
                                                           msglogger.logdir,
                                                           args.export_onnx),
                                                       args.dataset,
                                                       add_softmax=True,
                                                       verbose=False)

    if args.qe_calibration:
        return acts_quant_stats_collection(model, criterion, pylogger, args)

    if args.activation_histograms:
        return acts_histogram_collection(model, criterion, pylogger, args)

    print('Building activations_collectors...')
    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    print('Loading data...')
    train_loader, val_loader, test_loader, _ = load_data(args)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        # zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'
            .format(start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')

    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(
                epoch,
                metrics=(total_loss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            loss = validate(val_loader, model, criterion, [pylogger], args,
                            epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        lr_scheduler.step(loss)

        stats = ('Performance/Validation/', OrderedDict([('Loss', loss)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, loss, epoch,
                                       args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {
            'current_loss': loss,
            'best_loss': perf_scores_history[0].loss,
            'best_epoch': perf_scores_history[0].epoch
        }
        apputils.save_checkpoint(epoch,
                                 args.arch,
                                 model,
                                 optimizer=optimizer,
                                 scheduler=compression_scheduler,
                                 extras=checkpoint_extras,
                                 is_best=is_best,
                                 name=args.name,
                                 dir=msglogger.logdir)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
Example #15
0
    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler,
            (start_epoch-1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None)
        apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler,
                                 name="{}_thinned".format(args.resumed_checkpoint_path.replace(".pth.tar", "")), dir=msglogger.logdir)
        print("Note: your model may have collapsed to random inference, so you may want to fine-tune")
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir,
        args.verbose)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file
                      ]),  # remove both None and empty strings
        msglogger.logdir,
        gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        distiller.set_deterministic(
            args.seed)  # For experiment reproducability
    else:
        if args.seed is not None:
            distiller.set_seed(args.seed)
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = distiller.apputils.classification_dataset_str_from_arch(
        args.arch)
    args.num_classes = distiller.apputils.classification_num_classes(
        args.dataset)

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model, config = create_model(args.pretrained,
                                 args.dataset,
                                 args.arch,
                                 parallel=not args.load_serialized,
                                 device_ids=args.gpus)

    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning(
            'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.'
        )
        if not args.reset_optimizer:
            msglogger.warning(
                'If you wish to also reset the optimizer, call with: --reset-optimizer'
            )
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )

    # Define loss function (criterion)
    if "ssd" in args.arch:
        neg_pos_ratio = 3
        criterion = MultiboxLoss(config.priors,
                                 iou_threshold=0.5,
                                 neg_pos_ratio=neg_pos_ratio,
                                 center_variance=0.1,
                                 size_variance=0.2,
                                 device=args.device,
                                 reduction="sum",
                                 class_reduction=True,
                                 verbose=0)
    else:
        criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        if "ssd" in args.arch:
            base_net_lr = args.lr
            extra_layers_lr = args.lr
            params = [{
                'params': model.base_net.parameters(),
                'lr': base_net_lr
            }, {
                'params':
                itertools.chain(model.source_layer_add_ons.parameters(),
                                model.extras.parameters()),
                'lr':
                extra_layers_lr
            }, {
                'params':
                itertools.chain(model.regression_headers.parameters(),
                                model.classification_headers.parameters())
            }]
        else:
            params = model.parameters()
        optimizer = torch.optim.SGD(params,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        return

    if args.export_onnx is not None:
        return distiller.export_img_classifier_to_onnx(model,
                                                       os.path.join(
                                                           msglogger.logdir,
                                                           args.export_onnx),
                                                       args.dataset,
                                                       add_softmax=True,
                                                       verbose=False)

    if args.qe_calibration:
        return acts_quant_stats_collection(model, criterion, pylogger, args)

    if args.activation_histograms:
        return acts_histogram_collection(model, criterion, pylogger, args)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = load_data(args, config=config)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher, _ = create_model(args.kd_pretrained,
                                  args.dataset,
                                  args.kd_teacher,
                                  parallel=not args.load_serialized,
                                  device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        raw_teacher_model_path = msglogger.logdir + "/raw_teacher.pth.tar"
        if not os.path.exists(raw_teacher_model_path):
            teacher.save(raw_teacher_model_path)
            msglogger.info(Fore.CYAN + '\tRaw Teacher Model saved: {0}'.format(
                raw_teacher_model_path) + Style.RESET_ALL)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model,
            teacher,
            args.kd_temp,
            dlw,
            loss_type=args.kd_loss_type,
            focal_alpha=args.kd_focal_alpha,
            use_adaptive=args.kd_focal_adaptive,
            verbose=0)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'
            .format(start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')

    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(
                epoch, metrics=(vloss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5,
                                       epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {
            'current_top1': top1,
            'best_top1': perf_scores_history[0].top1,
            'best_epoch': perf_scores_history[0].epoch
        }
        try:
            raw_fullpath_best = apputils.save_checkpoint(
                epoch,
                args.arch,
                model,
                optimizer=optimizer,
                scheduler=compression_scheduler,
                extras=checkpoint_extras,
                is_best=is_best,
                name=args.name,
                dir=msglogger.logdir)
        except Exception as ex:
            # keep previous fullpath_best
            pass
        mlflow.log_artifacts(msglogger.logdir)

    # Finally run results on the test set
    eval_params = {
        "model_type": args.arch,
        "model_path": raw_fullpath_best,
        "dataset_path": args.data,
        "label_path": "models/voc-model-labels.txt"
    }
    mlflow.projects.run(uri=".",
                        entry_point="eval",
                        use_conda=False,
                        parameters=eval_params)
Example #17
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_epochs = list()

    if args.deterministic:
        if args.loaders is None:
            args.loaders = 1
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.loaders > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --loaders to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.use_cpu or (args.gpus is None
                        and not torch.cuda.is_available()) or (args.gpus
                                                               == ''):
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    if args.loaders is None:
        active_gpus = args.gpus if args.gpus is not None else torch.cuda.device_count(
        )
        args.loaders = max(parser.DEFAULT_LOADERS_COUNT,
                           parser.DEFAULT_LOADERS_COUNT * active_gpus)
    msglogger.debug('Number of data loaders set to: {}'.format(args.loaders))

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    optimizer = None
    resumed_training_steps = None
    if args.resume or args.load_state_dict:
        if args.resume and not args.reset_optimizer:
            # initiate SGD with dummy lr
            optimizer = torch.optim.SGD(model.parameters(), lr=0.36787944117)
        model, compression_scheduler, optimizer, start_epoch, resumed_training_steps = apputils.load_checkpoint(
            model, args.resume or args.load_state_dict, optimizer=optimizer)
        model.to(args.device)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is not None:
        # optimizer was resumed from checkpoint
        # check if user has tried to set optimizer arguments
        # if so, ignore arguments with a warning.
        optimizer_group_args = [
            'lr', 'learning-rate', 'momentum', 'weight-decay', 'wd'
        ]
        user_optim_args = [
            x for x in optimizer_group_args for arg in sys.argv
            if arg.startswith('--' + x)
        ]
        if user_optim_args:
            msglogger.warning(
                '{} optimizer arguments are ignored.'.format(user_optim_args))
            msglogger.info(
                'setting optimizer arguments when optimizer is resumed '
                'from checkpoint is forbidden.')
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.loaders, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))
    args.trainset_print_period = parser.getPrintPeriod(
        args, len(train_loader.sampler), args.batch_size)
    args.validset_print_period = parser.getPrintPeriod(args,
                                                       len(val_loader.sampler),
                                                       args.batch_size)
    args.testset_print_period = parser.getPrintPeriod(args,
                                                      len(test_loader.sampler),
                                                      args.batch_size)

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if
            (args.resume and not args.reset_optimizer) else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_checkpoint(teacher,
                                               chkpt_file=args.kd_resume)[0]
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(
            args.kd_policy, range(args.kd_start_epoch, args.epochs, 1))

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if getattr(compression_scheduler, 'global_policy_end_epoch',
               None) is not None:
        if compression_scheduler.global_policy_end_epoch >= (start_epoch +
                                                             args.epochs):
            msglogger.warning(
                'scheduler requires at least {} epochs, but only {} are sanctioned'
                .format(compression_scheduler.global_policy_end_epoch,
                        args.epochs))

    accumulated_training_steps = resumed_training_steps if resumed_training_steps is not None else 0
    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            try:
                train(train_loader,
                      model,
                      criterion,
                      optimizer,
                      epoch,
                      accumulated_training_steps,
                      compression_scheduler,
                      loggers=[tflogger, pylogger],
                      args=args)
            except RuntimeError as e:
                if ('cuda out of memory' in str(e).lower()):
                    msglogger.error(
                        'CUDA memory failure has been detected.\n'
                        'Sometimes it helps to decrease batch size.\n'
                        'e.g. Add the following flag to your call: --batch-size={}'
                        .format(args.batch_size // 10))
                raise
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))
        accumulated_training_steps += math.ceil(
            len(train_loader.sampler) / train_loader.batch_size)

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        tflogger.log_training_progress(stats, epoch, None)

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        if getattr(compression_scheduler, 'global_policy_end_epoch',
                   None) is None or (
                       compression_scheduler.global_policy_end_epoch <= epoch):
            # Update the list of top scores achieved since all policies have concluded
            if top1 > 0:
                best_epochs.append(
                    distiller.MutableNamedTuple({
                        'top1': top1,
                        'top5': top5,
                        'epoch': epoch
                    }))
            # Keep best_epochs sorted from best to worst
            # Sort by top1 first, secondary sort by top5, and so forth
            best_epochs.sort(key=operator.attrgetter('top1', 'top5', 'epoch'),
                             reverse=True)
            for score in best_epochs[:args.num_best_scores]:
                msglogger.info('==> Best Top1: %.3f Top5: %.3f on epoch: %d',
                               score.top1, score.top5, score.epoch)

        is_best = best_epochs and (epoch == best_epochs[0].epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler,
                                 best_epochs[0].top1 if best_epochs else None,
                                 is_best, args.name, msglogger.logdir,
                                 accumulated_training_steps)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
Example #18
0
def main():
    global msglogger

    script_dir = os.path.dirname(__file__)

    args = parse_args()

    # Distiller loggers
    msglogger = apputils.config_pylogger('logging.conf',
                                         args.name,
                                         output_dir=args.output_dir)
    tflogger = TensorBoardLogger(msglogger.logdir)
    # tflogger.log_gradients = True
    # pylogger = PythonLogger(msglogger)

    if args.seed is not None:
        msglogger.info("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    args.qe_mode = str(args.qe_mode).split('.')[1]
    args.qe_clip_acts = str(args.qe_clip_acts).split('.')[1]

    apputils.log_execution_env_state(sys.argv)

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        if len(args.gpus) > 1:
            msglogger.error('ERROR: Only single GPU supported for NCF')
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = msglogger.logdir
    msglogger.info("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    t1 = time.time()
    # Load Data
    training = not (args.eval or args.qe_calibration
                    or args.activation_histograms)
    msglogger.info('Loading data')
    if training:
        train_dataset = CFTrainDataset(
            os.path.join(args.data, TRAIN_RATINGS_FILENAME),
            args.negative_samples)
        train_dataloader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.workers,
            pin_memory=True)
        nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items
    else:
        train_dataset = None
        train_dataloader = None
        nb_users, nb_items = (138493, 26744)

    test_ratings = load_test_ratings(
        os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))

    msglogger.info(
        'Load data done [%.1f s]. #user=%d, #item=%d, #train=%s, #test=%d' %
        (time.time() - t1, nb_users, nb_items,
         str(train_dataset.mat.nnz) if training else 'N/A', len(test_ratings)))

    # Create model
    model = NeuMF(nb_users,
                  nb_items,
                  mf_dim=args.factors,
                  mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers],
                  split_final=args.split_final)
    if use_cuda:
        model = model.cuda()
    msglogger.info(model)
    msglogger.info("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    compression_scheduler = None
    start_epoch = 0
    optimizer = None
    if args.load:
        if training:
            model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
                model, args.load)
            if args.reset_optimizer:
                start_epoch = 0
                optimizer = None
        else:
            model = apputils.load_lean_checkpoint(model, args.load)

    # Add loss to graph
    criterion = nn.BCEWithLogitsLoss()

    if use_cuda:
        criterion = criterion.cuda()

    if training and optimizer is None:
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.compress:
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress)
        model.cuda()

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    if args.qe_calibration or args.activation_histograms:
        calib = {
            'portion':
            args.qe_calibration,
            'desc_str':
            'quantization calibration stats',
            'collect_func':
            partial(distiller.data_loggers.collect_quant_stats,
                    inplace_runtime_check=True,
                    disable_inplace_attrs=True)
        }
        hists = {
            'portion':
            args.activation_histograms,
            'desc_str':
            'activation histograms',
            'collect_func':
            partial(distiller.data_loggers.collect_histograms,
                    activation_stats=None,
                    nbins=2048,
                    save_hist_imgs=True)
        }
        d = calib if args.qe_calibration else hists

        distiller.utils.assign_layer_fq_names(model)
        num_users = int(np.floor(len(test_ratings) * d['portion']))
        msglogger.info(
            "Generating {} based on {:.1%} of the test-set ({} users)".format(
                d['desc_str'], d['portion'], num_users))

        test_fn = partial(val_epoch,
                          ratings=test_ratings,
                          negs=test_negs,
                          K=args.topk,
                          use_cuda=use_cuda,
                          processes=args.processes,
                          num_users=num_users)
        d['collect_func'](model=model,
                          test_fn=test_fn,
                          save_dir=run_dir,
                          classes=None)

        return 0

    if args.eval:
        if args.quantize_eval and args.qe_calibration is None:
            model.cpu()
            quantizer = quantization.PostTrainLinearQuantizer.from_args(
                model, args)
            dummy_input = (torch.tensor([1]), torch.tensor([1]),
                           torch.tensor([True], dtype=torch.bool))
            quantizer.prepare_model(dummy_input)
            model.cuda()

        distiller.utils.assign_layer_fq_names(model)

        if args.eval_fp16:
            model = model.half()

        # Calculate initial Hit Ratio and NDCG
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                processes=args.processes)
        val_time = time.time() - begin
        hit_rate = np.mean(hits)
        msglogger.info(
            'Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, val_time = {val_time:.2f}'
            .format(K=args.topk,
                    hit_rate=hit_rate,
                    ndcg=np.mean(ndcgs),
                    val_time=val_time))
        hit_rate = 0

        if args.quantize_eval:
            checkpoint_name = 'quantized'
            apputils.save_checkpoint(0,
                                     'NCF',
                                     model,
                                     optimizer=None,
                                     extras={'quantized_hr@10': hit_rate},
                                     name='_'.join([args.name, 'quantized'])
                                     if args.name else checkpoint_name,
                                     dir=msglogger.logdir)
        return 0

    total_samples = len(train_dataloader.sampler)
    steps_per_epoch = math.ceil(total_samples / args.batch_size)
    best_hit_rate = 0
    best_epoch = 0
    for epoch in range(start_epoch, args.epochs):
        msglogger.info('')
        model.train()
        losses = utils.AverageMeter()

        begin = time.time()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch, optimizer)

        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, label) in enumerate(loader):
            user = torch.autograd.Variable(user, requires_grad=False)
            item = torch.autograd.Variable(item, requires_grad=False)
            label = torch.autograd.Variable(label, requires_grad=False)
            if use_cuda:
                user = user.cuda(async=True)
                item = item.cuda(async=True)
                label = label.cuda(async=True)

            if compression_scheduler:
                compression_scheduler.on_minibatch_begin(
                    epoch, batch_index, steps_per_epoch, optimizer)

            outputs = model(user, item, torch.tensor([False],
                                                     dtype=torch.bool))
            loss = criterion(outputs, label)

            if compression_scheduler:
                compression_scheduler.before_backward_pass(
                    epoch,
                    batch_index,
                    steps_per_epoch,
                    loss,
                    optimizer,
                    return_loss_components=False)

            losses.update(loss.data.item(), user.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if compression_scheduler:
                compression_scheduler.on_minibatch_end(epoch, batch_index,
                                                       steps_per_epoch,
                                                       optimizer)

            # Save stats to file
            description = (
                'Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                    epoch, loss=losses))
            loader.set_description(description)

            steps_completed = batch_index + 1
            if steps_completed % args.log_freq == 0:
                stats_dict = OrderedDict()
                stats_dict['Loss'] = losses.avg
                stats = ('Performance/Training/', stats_dict)
                params = model.named_parameters(
                ) if args.log_params_histograms else None
                distiller.log_training_progress(stats, params, epoch,
                                                steps_completed,
                                                steps_per_epoch, args.log_freq,
                                                [tflogger])

                tflogger.log_model_buffers(model,
                                           ['tracked_min', 'tracked_max'],
                                           'Quant/Train/Acts/TrackedMinMax',
                                           epoch, steps_completed,
                                           steps_per_epoch, args.log_freq)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                output=valid_results_file,
                                epoch=epoch,
                                processes=args.processes)
        val_time = time.time() - begin

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        hit_rate = np.mean(hits)
        mean_ndcgs = np.mean(ndcgs)

        stats_dict = OrderedDict()
        stats_dict['HR@{0}'.format(args.topk)] = hit_rate
        stats_dict['NDCG@{0}'.format(args.topk)] = mean_ndcgs
        stats = ('Performance/Validation/', stats_dict)
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        msglogger.info(
            'Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, AvgTrainLoss = {loss.avg:.4f}, '
            'train_time = {train_time:.2f}, val_time = {val_time:.2f}'.format(
                epoch=epoch,
                K=args.topk,
                hit_rate=hit_rate,
                ndcg=mean_ndcgs,
                loss=losses,
                train_time=train_time,
                val_time=val_time))

        is_best = False
        if hit_rate > best_hit_rate:
            best_hit_rate = hit_rate
            is_best = True
            best_epoch = epoch
        extras = {
            'current_hr@10': hit_rate,
            'best_hr@10': best_hit_rate,
            'best_epoch': best_epoch
        }
        apputils.save_checkpoint(epoch,
                                 'NCF',
                                 model,
                                 optimizer,
                                 compression_scheduler,
                                 extras,
                                 is_best,
                                 dir=run_dir)

        if args.threshold is not None:
            if np.mean(hits) >= args.threshold:
                msglogger.info("Hit threshold of {}".format(args.threshold))
                break
def handle_subapps(model, criterion, optimizer, compression_scheduler,
                   pylogger, args):
    if args.transfer or args.dataset in custom_datasets:
        if args.arch == 'resnet34' or 'resnet20_cifar':
            model.module.fc = nn.Linear(model.module.fc.in_features,
                                        args.num_classes)

            if args.resumed_checkpoint_path:
                model, _, _, _ = apputils.load_checkpoint(
                    model,
                    args.resumed_checkpoint_path,
                    model_device=args.device)
            model = model.module
            model = nn.DataParallel(model, device_ids=args.gpus)

            print(model.module.fc)
        elif args.arch == 'vgg16':
            model.classifier[6] = nn.Linear(model.classifier[6].in_features,
                                            args.num_classes)
            if args.resumed_checkpoint_path:
                model, _, _, _ = apputils.load_checkpoint(
                    model,
                    args.resumed_checkpoint_path,
                    model_device=args.device)
            model.to(0)
            print(model.classifier[6])

    def load_test_data(args):
        test_loader = classifier.load_data(args,
                                           load_train=False,
                                           load_val=False,
                                           load_test=True)
        return test_loader

    do_exit = False
    if args.greedy:
        greedy(model, criterion, optimizer, pylogger, args)
        do_exit = True
    elif args.summary:
        # This sample application can be invoked to produce various summary reports
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        do_exit = True
    elif args.export_onnx is not None:
        distiller.export_img_classifier_to_onnx(model,
                                                os.path.join(
                                                    msglogger.logdir,
                                                    args.export_onnx),
                                                args.dataset,
                                                add_softmax=True,
                                                verbose=False)
        do_exit = True
    elif args.qe_calibration:
        classifier.acts_quant_stats_collection(model, criterion, pylogger,
                                               args)
        do_exit = True
    elif args.activation_histograms:
        classifier.acts_histogram_collection(model, criterion, pylogger, args)
        do_exit = True
    elif args.sensitivity is not None:
        test_loader = load_test_data(args)
        #sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2])
        sensitivities = np.arange(*args.sensitivity_range)
        sensitivity_analysis(model, criterion, test_loader, pylogger, args,
                             sensitivities)
        do_exit = True
    elif args.evaluate:
        test_loader = load_test_data(args)
        activations_collectors = classifier.create_activation_stats_collectors(
            model, *args.activation_stats)
        classifier.evaluate_model(model, criterion, test_loader, pylogger,
                                  activations_collectors, args,
                                  compression_scheduler)
        do_exit = True
    elif args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        msglogger.info(
            "Note: if your model collapsed to random inference, you may want to fine-tune"
        )
        do_exit = True
    return do_exit
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_epochs = [
        distiller.MutableNamedTuple({
            'epoch': 0,
            'top1': 0,
            'sparsity': 0
        }) for i in range(args.num_best_scores)
    ]

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
        model.to(args.device)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(args.device)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress,
                                                      compression_scheduler)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher,
                                                     chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        is_best = top1 > best_epochs[-1].top1
        if top1 > best_epochs[0].top1:
            best_epochs[0].epoch = epoch
            best_epochs[0].top1 = top1
            # Keep best_epochs sorted such that best_epochs[0] is the lowest top1 in the best_epochs list
            best_epochs = sorted(best_epochs, key=lambda score: score.top1)
        for score in reversed(best_epochs):
            if score.top1 > 0:
                msglogger.info('==> Best Top1: %.3f on Epoch: %d', score.top1,
                               score.epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler, best_epochs[-1].top1,
                                 is_best, args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
Example #21
0
import logging
from distiller.data_loggers import *
import DataLoader
import distiller.apputils.image_classifier as ic
import torch
from distiller.quantization.range_linear import PostTrainLinearQuantizer
from opt import args
import distiller.apputils as apputils
from distiller.apputils.image_classifier import test
import torch.nn as nn
import os
from transformers import GPT2LMHeadModel

model_path = 'model/tmpfile/'

model = GPT2LMHeadModel.from_pretrained(model_path)
model.load_state_dict(model_path)

model = model.cuda()
model.eval()

quantizer = PostTrainLinearQuantizer(model)

quantizer.prepare_model(torch.rand([你的模型输入的大小如[1, 3, 32, 32]]))

apputils.save_checkpoint(0,
                         'my_model',
                         model,
                         optimizer=None,
                         name='quantized',
                         dir='model/tmpfile_quantization')
Example #22
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress, msglogger.logdir, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        distiller.set_deterministic()  # Use a well-known seed, for repeatability of experiments
    else:
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError('ERROR: Argument --gpus must be a comma-separated list of integers only')
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError('ERROR: GPU device ID {0} requested, but only {1} devices available'
                                     .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained, args.dataset, args.arch,
                         parallel=not args.load_serialized, device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds)

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning('The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.')
        if not args.reset_optimizer:
            msglogger.warning('If you wish to also reset the optimizer, call with: --reset-optimizer')
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model, args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info('\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0')

    # Define loss function (criterion)
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        optimizer = torch.optim.SGD(model.parameters(),
            lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer, pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info('\tStats will be collected for {:.1%} of test dataset'.format(args.qe_calibration))
        msglogger.info('\tSetting constant seeds and converting model to serialized execution')
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size, args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler,
            (start_epoch-1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None)
        apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler,
                                 name="{}_thinned".format(args.resumed_checkpoint_path.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print("Note: your model may have collapsed to random inference, so you may want to fine-tune")
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'.format(
            start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')
    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch,
                metrics=(vloss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader, model, criterion, optimizer, epoch, compression_scheduler,
                  loggers=[tflogger, pylogger], args=args)
            distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(epoch, "train", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch)
            distiller.log_activation_statsitics(epoch, "valid", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss),
                              ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5, epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {'current_top1': top1,
                             'best_top1': perf_scores_history[0].top1,
                             'best_epoch': perf_scores_history[0].epoch}
        apputils.save_checkpoint(epoch, args.arch, model, optimizer=optimizer, scheduler=compression_scheduler,
                                 extras=checkpoint_extras, is_best=is_best, name=args.name, dir=msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
Example #23
0
def arbitrary_channel_pruning(config, channels_to_remove, is_parallel):
    """Test removal of arbitrary channels.
    The test receives a specification of channels to remove.
    Based on this specification, the channels are pruned and then physically
    removed from the model (via a "thinning" process).
    """
    model, zeros_mask_dict = common.setup_test(config.arch, config.dataset, is_parallel)

    pair = config.module_pairs[0]
    conv2 = common.find_module_by_name(model, pair[1])
    assert conv2 is not None

    # Test that we can access the weights tensor of the first convolution in layer 1
    conv2_p = distiller.model_find_param(model, pair[1] + ".weight")
    assert conv2_p is not None

    assert conv2_p.dim() == 4
    num_channels = conv2_p.size(1)
    cnt_nnz_channels = num_channels - len(channels_to_remove)
    mask = create_channels_mask(conv2_p, channels_to_remove)
    assert distiller.density_ch(mask) == (conv2.in_channels - len(channels_to_remove)) / conv2.in_channels
    # Cool, so now we have a mask for pruning our channels.

    # Use the mask to prune
    zeros_mask_dict[pair[1] + ".weight"].mask = mask
    zeros_mask_dict[pair[1] + ".weight"].apply_mask(conv2_p)
    all_channels = set([ch for ch in range(num_channels)])
    nnz_channels = set(distiller.find_nonzero_channels_list(conv2_p, pair[1] + ".weight"))
    channels_removed = all_channels - nnz_channels
    logger.info("Channels removed {}".format(channels_removed))

    # Now, let's do the actual network thinning
    distiller.remove_channels(model, zeros_mask_dict, config.arch, config.dataset, optimizer=None)
    conv1 = common.find_module_by_name(model, pair[0])
    assert conv1
    assert conv1.out_channels == cnt_nnz_channels
    assert conv2.in_channels == cnt_nnz_channels
    assert conv1.weight.size(0) == cnt_nnz_channels
    assert conv2.weight.size(1) == cnt_nnz_channels
    if config.bn_name is not None:
        bn1 = common.find_module_by_name(model, config.bn_name)
        assert bn1.running_var.size(0) == cnt_nnz_channels
        assert bn1.running_mean.size(0) == cnt_nnz_channels
        assert bn1.num_features == cnt_nnz_channels
        assert bn1.bias.size(0) == cnt_nnz_channels
        assert bn1.weight.size(0) == cnt_nnz_channels

    dummy_input = common.get_dummy_input(config.dataset)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.1)
    run_forward_backward(model, optimizer, dummy_input)

    # Let's test saving and loading a thinned model.
    # We save 3 times, and load twice, to make sure to cover some corner cases:
    #   - Make sure that after loading, the model still has hold of the thinning recipes
    #   - Make sure that after a 2nd load, there no problem loading (in this case, the
    #   - tensors are already thin, so this is a new flow)
    # (1)
    save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None)
    model_2 = create_model(False, config.dataset, config.arch, parallel=is_parallel)
    model(dummy_input)
    model_2(dummy_input)
    conv2 = common.find_module_by_name(model_2, pair[1])
    assert conv2 is not None
    with pytest.raises(KeyError):
        model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar')
    compression_scheduler = distiller.CompressionScheduler(model)
    hasattr(model, 'thinning_recipes')

    run_forward_backward(model, optimizer, dummy_input)

    # (2)
    save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None, scheduler=compression_scheduler)
    model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar')
    assert hasattr(model_2, 'thinning_recipes')
    logger.info("test_arbitrary_channel_pruning - Done")

    # (3)
    save_checkpoint(epoch=0, arch=config.arch, model=model_2, optimizer=None, scheduler=compression_scheduler)
    model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar')
    assert hasattr(model_2, 'thinning_recipes')
    logger.info("test_arbitrary_channel_pruning - Done 2")
Example #24
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    # 记录有关执行环境的各种详细信息。有时是有用的
    # 参考过去的实验执行,这些信息可能有用。
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    perf_scores_history = []
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )  # 错误:设置--确定性要求将--workers/-j设置为0或1
            exit(1)  # 正常退出程序
        # Use a well-known seed, for repeatability of experiments 使用一种众所周知的种子,用于实验的重复性。
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cousm'

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = ResNet152()
    # model = torch.nn.DataParallel(model, device_ids=args.gpus) # 并行GPU
    model.to(args.device)
    compression_scheduler = None  # 压缩调度
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    # 创建两个日志后端 TensorBoardLogger以Google的Tensor板可以读取的格式写入日志文件。python logger将写入python记录器。
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:  # 加载训练模型
        # checkpoint = torch.load(args.resume)
        # model.load_state_dict(checkpoint['state_dict'])
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
        model.to(args.device)

    # Define loss function (criterion) and optimizer  # 定义损失函数和优化器SGD
    criterion = nn.CrossEntropyLoss().to(args.device)

    # optimizer = torch.optim.SGD(model.fc.parameters(), lr=args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)
    optimizer = torch.optim.Adam(model.model.fc.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:  # 自动化的深层压缩
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:  # 贪婪的
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports. # 可以调用此示例应用程序来生成各种摘要报告。
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)
    # 激活统计收集器
    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))  # 量化统计收集器
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    # 加载数据集:从传递的模型名称推断要加载的数据集

    train_loader, val_loader, test_loader, _ = get_data_loaders(
        datasets_fn, r'/home/tian/Desktop/image_yasuo', args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))
    # 可以调用此示例应用程序来对模型执行敏感性分析。输出保存到csv和png。
    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        # #这个示例应用程序的主要用例是CNN压缩
        # #需要yaml中的压缩计划配置文件。
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress,
                                                      compression_scheduler)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        # 如果添加了参数(如PactQualifier),则模型会重新传输到GPU。
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)  # 压缩计划程序

    if args.thinnify:
        # zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"  # 必须使用--resume提供检查点文件以细化
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )  # 注意:您的模型可能已折叠为随机推理,因此您可能需要对其进行微调。
        return

    args.kd_policy = None  # 蒸馏
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher,
                                                     chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)
    lr = args.lr
    lr_decay = 0.5
    for epoch in range(start_epoch, args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:  # 打印掩盖稀疏表 在end of each epoch
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint # 更新到目前为止获得的最高分数列表,并保存检查点
        sparsity = distiller.model_sparsity(model)
        perf_scores_history.append(
            distiller.MutableNamedTuple({
                'sparsity': sparsity,
                'top1': top1,
                'top5': top5,
                'epoch': epoch
            }))
        # Keep perf_scores_history sorted from best to worst
        # Sort by sparsity as main sort key, then sort by top1, top5 and epoch
        # 保持绩效分数历史记录从最好到最差的排序
        # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序
        perf_scores_history.sort(key=operator.attrgetter(
            'sparsity', 'top1', 'top5', 'epoch'),
                                 reverse=True)
        for score in perf_scores_history[:args.num_best_scores]:
            msglogger.info(
                '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d]',
                score.top1, score.top5, score.sparsity, score.epoch)

        is_best = epoch == perf_scores_history[0].epoch
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler,
                                 perf_scores_history[0].top1, is_best,
                                 args.name, msglogger.logdir)
        if not is_best:
            lr = lr * lr_decay
            # 当loss大于上一次loss,降低学习率
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

    # Finally run results on the test set # 最后在测试集上运行结果
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)