Python CompressionScheduler Examples

Programming Language: Python

Namespace/Package Name: distiller

Method/Function: CompressionScheduler

Examples at hotexamples.com: 30

Python CompressionScheduler - 30 examples found. These are the top rated real world Python examples of distiller.CompressionScheduler extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: QAT.py Project: wanshanhsieh/distiller

def configure(model, compress):
    model = model.cuda()
    compression_scheduler = distiller.CompressionScheduler(model)
    optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    if compress:
        source = compress
        compression_scheduler = distiller.CompressionScheduler(model)
        distiller.config.file_config(model, optimizer, compress, compression_scheduler,)
    parser = argparse.ArgumentParser()
    distiller.knowledge_distillation.add_distillation_args(parser)
    CONFIG_FILE = '.config_ipynb'
    if os.path.isfile(CONFIG_FILE):
        with open(CONFIG_FILE) as f:
            sys.argv = f.read().split()
    else:
        sys.argv = ['resnet.py','--kd-resume', 'net56_cifar.pth.tar','--kd-teacher',None,
                    '--kd-start-epoch', 0, 
                    '--kd-student-wt','0.5',
                    '--kd-teacher-wt', '0.0',
                    '--kd-distill-wt', '0.5',
                   ]
    args = parser.parse_args()
    args.kd_policy = None
    epochs = 30
    if args.kd_teacher:
        if args.kd_resume:
            teacher = torch.load(args.kd_resume)
        # Create policy and add to scheduler
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=epochs,
                                         frequency=1)
    return model, compression_scheduler, epochs, optimizer, criterion, args

Example #2

Show file

File: test_infra.py Project: barrh/distiller

def test_load_gpu_model_on_cpu_with_thinning():
    # Issue #148
    # 1. create a GPU model and remove 50% of the filters in one of the layers (thninning)
    # 2. save the thinned model in a checkpoint file
    # 3. load the checkpoint and place it on the CPU
    CPU_DEVICE_ID = -1
    gpu_model = create_model(False, 'cifar10', 'resnet20_cifar')
    conv_pname = "module.layer1.0.conv1.weight"
    conv_p = distiller.model_find_param(gpu_model, conv_pname)
    pruner = distiller.pruning.L1RankedStructureParameterPruner("test_pruner", group_type="Filters",
                                                                desired_sparsity=0.5, weights=conv_pname)
    zeros_mask_dict = distiller.create_model_masks_dict(gpu_model)
    pruner.set_param_mask(conv_p, conv_pname, zeros_mask_dict, meta=None)

    # Use the mask to prune
    zeros_mask_dict[conv_pname].apply_mask(conv_p)
    distiller.remove_filters(gpu_model, zeros_mask_dict, 'resnet20_cifar', 'cifar10', optimizer=None)
    assert hasattr(gpu_model, 'thinning_recipes')
    scheduler = distiller.CompressionScheduler(gpu_model)
    save_checkpoint(epoch=0, arch='resnet20_cifar', model=gpu_model, scheduler=scheduler, optimizer=None)

    CPU_DEVICE_ID = -1
    cpu_model = create_model(False, 'cifar10', 'resnet20_cifar', device_ids=CPU_DEVICE_ID)
    load_checkpoint(cpu_model, "checkpoint.pth.tar")
    assert distiller.model_device(cpu_model) == 'cpu'

Example #3

Show file

def test_policy_scheduling():
    model = create_model(False, 'cifar10', 'resnet20_cifar')
    scheduler = distiller.CompressionScheduler(model)
    policy = distiller.PruningPolicy(None, None)
    with pytest.raises(AssertionError):
        scheduler.add_policy(policy)
    with pytest.raises(AssertionError):
        # Test for mutual-exclusive configuration
        scheduler.add_policy(policy,
                             epochs=[1, 2, 3],
                             starting_epoch=4,
                             ending_epoch=5,
                             frequency=1)

    scheduler.add_policy(policy,
                         epochs=None,
                         starting_epoch=4,
                         ending_epoch=5,
                         frequency=1)
    # Regression test for issue #176 - https://github.com/NervanaSystems/distiller/issues/176
    scheduler.add_policy(policy, epochs=[1, 2, 3])
    sched_metadata = scheduler.sched_metadata[policy]
    assert sched_metadata['starting_epoch'] == 1
    assert sched_metadata['ending_epoch'] == 4
    assert sched_metadata['frequency'] is None

    scheduler.add_policy(policy, epochs=[5])
    sched_metadata = scheduler.sched_metadata[policy]
    assert sched_metadata['starting_epoch'] == 5
    assert sched_metadata['ending_epoch'] == 6
    assert sched_metadata['frequency'] is None

Example #4

Show file

def load_checkpoint(model, chkpt_file, optimizer=None):
    """Load a pytorch training checkpoint

    Args:
        model: the pytorch model to which we will load the parameters
        chkpt_file: the checkpoint file
        optimizer: the optimizer to which we will load the serialized state
    """
    compression_scheduler = None
    start_epoch = 0

    if os.path.isfile(chkpt_file):
        msglogger.info("=> loading checkpoint %s", chkpt_file)
        checkpoint = torch.load(chkpt_file,
                                map_location=lambda storage, loc: storage)
        msglogger.info("Checkpoint keys:\n{}".format("\n\t".join(
            k for k in checkpoint.keys())))
        start_epoch = checkpoint['epoch'] + 1
        best_top1 = checkpoint.get('best_top1', None)
        if best_top1 is not None:
            msglogger.info("   best top@1: %.3f", best_top1)

        if 'compression_sched' in checkpoint:
            compression_scheduler = distiller.CompressionScheduler(model)
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'])
            msglogger.info(
                "Loaded compression schedule from checkpoint (epoch %d)",
                checkpoint['epoch'])
        else:
            msglogger.info(
                "Warning: compression schedule data does not exist in the checkpoint"
            )

        if 'thinning_recipes' in checkpoint:
            if 'compression_sched' not in checkpoint:
                raise KeyError(
                    "Found thinning_recipes key, but missing mandatory key compression_sched"
                )
            msglogger.info("Loaded a thinning recipe from the checkpoint")
            # Cache the recipes in case we need them later
            model.thinning_recipes = checkpoint['thinning_recipes']
            distiller.execute_thinning_recipes_list(
                model, compression_scheduler.zeros_mask_dict,
                model.thinning_recipes)

        if 'quantizer_metadata' in checkpoint:
            msglogger.info('Loaded quantizer metadata from the checkpoint')
            qmd = checkpoint['quantizer_metadata']
            quantizer = qmd['type'](model, **qmd['params'])
            quantizer.prepare_model()

        msglogger.info("=> loaded checkpoint '%s' (epoch %d)", chkpt_file,
                       checkpoint['epoch'])

        model.load_state_dict(checkpoint['state_dict'])
        return model, compression_scheduler, start_epoch
    else:
        raise IOError(ENOENT, 'Could not find a checkpoint file at',
                      chkpt_file)

Example #5

Show file

File: ADC.py Project: zhepengfei/distiller

 def create_scheduler(self):
     scheduler = distiller.CompressionScheduler(self.model)
     masks = {
         param_name: masker.mask
         for param_name, masker in self.zeros_mask_dict.items()
     }
     scheduler.load_state_dict(state={'masks_dict': masks})
     return scheduler

Example #6

Show file

def create_scheduler(model, zeros_mask_dict):
    scheduler = distiller.CompressionScheduler(model)
    masks = {
        param_name: masker.mask
        for param_name, masker in zeros_mask_dict.items()
    }
    scheduler.load_state_dict(state={"masks_dict": masks})
    return scheduler

Example #7

Show file

File: image_classifier.py Project: panda781022/distiller

def _init_learner(args):
    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)
    compression_scheduler = None

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning(
            'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.'
        )
        if not args.reset_optimizer:
            msglogger.warning(
                'If you wish to also reset the optimizer, call with: --reset-optimizer'
            )
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    optimizer = None
    start_epoch = 0
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )

    if optimizer is None:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.debug('Optimizer Type: %s', type(optimizer))
        msglogger.debug('Optimizer Args: %s', optimizer.defaults)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    return model, compression_scheduler, optimizer, start_epoch, args.epochs

Example #8

Show file

File: ADC.py Project: zhyj3038/distiller

    def save_checkpoint(self, is_best=False):
        # Save the learned-model checkpoint
        scheduler = distiller.CompressionScheduler(self.model)
        masks = {param_name: masker.mask for param_name, masker in self.zeros_mask_dict.items()}
        scheduler.load_state_dict(state={'masks_dict': masks})
        episode = self.debug_stats['episode']
        episode = str(episode).zfill(3)
        if is_best:
            name = "BEST_adc_episode_{}".format(episode)
        else:
            name = "adc_episode_{}".format(episode)

        self.save_checkpoint_fn(epoch=self.debug_stats['episode'], model=self.model, scheduler=scheduler, name=name)

Example #9

Show file

def load_checkpoint(model, chkpt_file, optimizer=None):
    """Load a pytorch training checkpoint

    Args:
        model: the pytorch model to which we will load the parameters
        chkpt_file: the checkpoint file
        optimizer: the optimizer to which we will load the serialized state
    """
    compression_scheduler = None
    start_epoch = 0

    if os.path.isfile(chkpt_file):
        msglogger.info("=> loading checkpoint %s", chkpt_file)
        checkpoint = torch.load(chkpt_file)
        start_epoch = checkpoint['epoch'] + 1
        best_top1 = checkpoint.get('best_top1', None)
        if best_top1 is not None:
            msglogger.info("   best top@1: %.3f", best_top1)

        if 'compression_sched' in checkpoint:
            compression_scheduler = distiller.CompressionScheduler(model)
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'])
            msglogger.info(
                "Loaded compression schedule from checkpoint (epoch %d)",
                checkpoint['epoch'])

        if 'thinning_recipes' in checkpoint:
            if 'compression_sched' not in checkpoint:
                raise KeyError(
                    "Found thinning_recipes key, but missing mandatoy key compression_sched"
                )
            msglogger.info("Loaded a thinning recipe from the checkpoint")
            # Cache the recipes in case we need them later
            model.thinning_recipes = checkpoint['thinning_recipes']
            distiller.execute_thinning_recipes_list(
                model, compression_scheduler.zeros_mask_dict,
                model.thinning_recipes)
        else:
            msglogger.info(
                "Warning: compression schedule data does not exist in the checkpoint"
            )
            msglogger.info("=> loaded checkpoint '%s' (epoch %d)", chkpt_file,
                           checkpoint['epoch'])

        model.load_state_dict(checkpoint['state_dict'])
        return model, compression_scheduler, start_epoch
    else:
        msglogger.info("Error: no checkpoint found at %s", chkpt_file)
        exit(1)

Example #10

Show file

File: main_class.py Project: LIMr1209/machine-learn

 def __init__(self, **kwargs):
     opt._parse(kwargs)
     self.opt = opt
     self.model = getattr(models, self.opt.model)()
     self.criterion = t.nn.CrossEntropyLoss().to(self.opt.device)
     # 1. 铰链损失（Hinge Loss）：主要用于支持向量机（SVM） 中；
     # 2. 互熵损失 （Cross Entropy Loss，Softmax Loss ）：用于Logistic 回归与Softmax 分类中；
     # 3. 平方损失（Square Loss）：主要是最小二乘法（OLS）中；
     # 4. 指数损失（Exponential Loss） ：主要用于Adaboost 集成学习算法中；
     # 5. 其他损失（如0-1损失，绝对值损失）
     self.optimizer = self.model.get_optimizer(self.opt.lr,
                                               self.opt.weight_decay)
     self.compression_scheduler = distiller.CompressionScheduler(self.model)
     self.train_losses = AverageMeter()  # 误差仪表
     self.train_top1 = AverageMeter()  # top1 仪表
     self.train_top5 = AverageMeter()  # top5 仪表
     self.best_precision = 0  # 最好的精确度
     self.start_epoch = 0
     self.train_writer = None
     self.value_writer = None

Example #11

Show file

File: ADC.py Project: chanmi168/distiller

    def step(self, action):
        """Take a step, given an action.
        This is invoked by the Agent.
        """
        layer_macs = self.get_macs(self.current_layer())
        if action > 0:
            actual_action = self.__remove_channels(self.current_layer_id,
                                                   action)
        else:
            actual_action = 0
        layer_macs_after_action = self.get_macs(self.current_layer())

        # Update the various counters after taking the step
        self.current_layer_id += 1
        next_layer_macs = self.get_macs(self.current_layer())
        self._removed_macs += (layer_macs - layer_macs_after_action)
        self._remaining_macs -= next_layer_macs

        #self.prev_action = actual_action
        if self.episode_is_done():
            observation = self.get_final_obs()
            reward = self.compute_reward()
            # Save the learned-model checkpoint
            scheduler = distiller.CompressionScheduler(self.model)
            scheduler.load_state_dict(
                state={'masks_dict': self.zeros_mask_dict})
            self.save_checkpoint_fn(epoch=self.debug_stats['episode'],
                                    model=self.model,
                                    scheduler=scheduler)
            self.debug_stats['episode'] += 1
        else:
            observation = self._get_obs(next_layer_macs)
            if True:
                reward = 0
            else:
                reward = self.compute_reward()

        self.prev_action = actual_action
        info = {}
        return observation, reward, self.episode_is_done(), info

Example #12

Show file

File: test_pruning.py Project: zxshinxz/distiller

def arbitrary_channel_pruning(config, channels_to_remove):
    """Test removal of arbitrary channels.

    The test receives a specification of channels to remove.
    Based on this specification, the channels are pruned and then physically
    removed from the model (via a "thinning" process).
    """
    model, zeros_mask_dict = common.setup_test(config.arch, config.dataset)

    conv2 = common.find_module_by_name(model, config.conv2_name)
    assert conv2 is not None

    # Test that we can access the weights tensor of the first convolution in layer 1
    conv2_p = distiller.model_find_param(model, config.conv2_name + ".weight")
    assert conv2_p is not None

    assert conv2_p.dim() == 4
    num_filters = conv2_p.size(0)
    num_channels = conv2_p.size(1)
    kernel_height = conv2_p.size(2)
    kernel_width = conv2_p.size(3)
    cnt_nnz_channels = num_channels - len(channels_to_remove)

    # Let's build our 4D mask.
    # We start with a 1D mask of channels, with all but our specified channels set to one
    channels = torch.ones(num_channels)
    for ch in channels_to_remove:
        channels[ch] = 0

    # Now let's expand back up to a 4D mask
    mask = channels.expand(num_filters, num_channels)
    mask.unsqueeze_(-1)
    mask.unsqueeze_(-1)
    mask = mask.expand(num_filters, num_channels, kernel_height, kernel_width).contiguous()

    assert mask.shape == conv2_p.shape
    assert distiller.density_ch(mask) == (conv2.in_channels - len(channels_to_remove)) / conv2.in_channels

    # Cool, so now we have a mask for pruning our channels.
    # Use the mask to prune
    zeros_mask_dict[config.conv2_name + ".weight"].mask = mask
    zeros_mask_dict[config.conv2_name + ".weight"].apply_mask(conv2_p)
    all_channels = set([ch for ch in range(num_channels)])
    nnz_channels = set(distiller.find_nonzero_channels_list(conv2_p, config.conv2_name + ".weight"))
    channels_removed = all_channels - nnz_channels
    logger.info("Channels removed {}".format(channels_removed))

    # Now, let's do the actual network thinning
    distiller.remove_channels(model, zeros_mask_dict, config.arch, config.dataset)
    conv1 = common.find_module_by_name(model, config.conv1_name)
    logger.info(conv1)
    logger.info(conv2)
    assert conv1.out_channels == cnt_nnz_channels
    assert conv2.in_channels == cnt_nnz_channels
    assert conv1.weight.size(0) == cnt_nnz_channels
    assert conv2.weight.size(1) == cnt_nnz_channels
    if config.bn_name is not None:
        bn1 = common.find_module_by_name(model, config.bn_name)
        assert bn1.running_var.size(0) == cnt_nnz_channels
        assert bn1.running_mean.size(0) == cnt_nnz_channels
        assert bn1.num_features == cnt_nnz_channels
        assert bn1.bias.size(0) == cnt_nnz_channels
        assert bn1.weight.size(0) == cnt_nnz_channels

    # Let's test saving and loading a thinned model.
    # We save 3 times, and load twice, to make sure to cover some corner cases:
    #   - Make sure that after loading, the model still has hold of the thinning recipes
    #   - Make sure that after a 2nd load, there no problem loading (in this case, the
    #   - tensors are already thin, so this is a new flow)
    # (1)
    save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None)
    model_2 = create_model(False, config.dataset, config.arch, parallel=False)
    dummy_input = torch.randn(1, 3, 32, 32)
    model(dummy_input)
    model_2(dummy_input)
    conv2 = common.find_module_by_name(model_2, config.conv2_name)
    assert conv2 is not None
    with pytest.raises(KeyError):
        model_2, compression_scheduler, start_epoch = load_checkpoint(model_2, 'checkpoint.pth.tar')
    compression_scheduler = distiller.CompressionScheduler(model)
    hasattr(model, 'thinning_recipes')

    # (2)
    save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None, scheduler=compression_scheduler)
    model_2, compression_scheduler, start_epoch = load_checkpoint(model_2, 'checkpoint.pth.tar')
    assert hasattr(model_2, 'thinning_recipes')
    logger.info("test_arbitrary_channel_pruning - Done")

    # (3)
    save_checkpoint(epoch=0, arch=config.arch, model=model_2, optimizer=None, scheduler=compression_scheduler)
    model_2, compression_scheduler, start_epoch = load_checkpoint(model_2, 'checkpoint.pth.tar')
    assert hasattr(model_2, 'thinning_recipes')
    logger.info("test_arbitrary_channel_pruning - Done 2")

Example #13

Show file

def train(**kwargs):
    opt._parse(kwargs)
    train_writer = None
    value_writer = None
    if opt.vis:
        train_writer = SummaryWriter(
            log_dir='./runs/train_' +
            datetime.now().strftime('%y%m%d-%H-%M-%S'))
        value_writer = SummaryWriter(
            log_dir='./runs/val_' + datetime.now().strftime('%y%m%d-%H-%M-%S'))
    previous_loss = 1e10  # 上次学习的loss
    best_precision = 0  # 最好的精确度
    start_epoch = 0
    lr = opt.lr
    perf_scores_history = []  # 绩效分数
    # step1: criterion and optimizer
    # 1. 铰链损失（Hinge Loss）：主要用于支持向量机（SVM） 中；
    # 2. 互熵损失 （Cross Entropy Loss，Softmax Loss ）：用于Logistic 回归与Softmax 分类中；
    # 3. 平方损失（Square Loss）：主要是最小二乘法（OLS）中；
    # 4. 指数损失（Exponential Loss） ：主要用于Adaboost 集成学习算法中；
    # 5. 其他损失（如0-1损失，绝对值损失）
    criterion = t.nn.CrossEntropyLoss().to(opt.device)  # 损失函数
    # step2: meters
    train_losses = AverageMeter()  # 误差仪表
    train_top1 = AverageMeter()  # top1 仪表
    train_top5 = AverageMeter()  # top5 仪表
    pylogger = PythonLogger(msglogger)
    # step3: configure model
    model = getattr(models, opt.model)()  # 获得网络结构
    compression_scheduler = distiller.CompressionScheduler(model)
    optimizer = model.get_optimizer(lr, opt.weight_decay)  # 优化器
    if opt.load_model_path:
        # # 把所有的张量加载到CPU中
        # t.load(opt.load_model_path, map_location=lambda storage, loc: storage)
        # t.load(opt.load_model_path, map_location='cpu')
        # # 把所有的张量加载到GPU 1中
        # t.load(opt.load_model_path, map_location=lambda storage, loc: storage.cuda(1))
        # # 把张量从GPU 1 移动到 GPU 0
        # t.load(opt.load_model_path, map_location={'cuda:1': 'cuda:0'})
        checkpoint = t.load(opt.load_model_path)
        start_epoch = checkpoint["epoch"]
        # compression_scheduler.load_state_dict(checkpoint['compression_scheduler'], False)
        best_precision = checkpoint["best_precision"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer = checkpoint['optimizer']
    model.to(opt.device)  # 加载模型到 GPU

    if opt.compress:
        compression_scheduler = distiller.file_config(
            model, optimizer, opt.compress, compression_scheduler)  # 加载模型修剪计划表
        model.to(opt.device)
    # 学习速率调整器
    lr_scheduler = get_scheduler(optimizer, opt)
    # step4: data_image
    train_data = DatasetFromFilename(opt.data_root, flag='train')  # 训练集
    val_data = DatasetFromFilename(opt.data_root, flag='test')  # 验证集
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)  # 训练集加载器
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)  # 验证集加载器
    # train
    for epoch in range(start_epoch, opt.max_epoch):
        model.train()
        if opt.pruning:
            compression_scheduler.on_epoch_begin(epoch)  # epoch 开始修剪
        train_losses.reset()  # 重置仪表
        train_top1.reset()  # 重置仪表
        # print('训练数据集大小', len(train_dataloader))
        total_samples = len(train_dataloader.sampler)
        steps_per_epoch = math.ceil(total_samples / opt.batch_size)
        train_progressor = ProgressBar(mode="Train  ",
                                       epoch=epoch,
                                       total_epoch=opt.max_epoch,
                                       model_name=opt.model,
                                       lr=lr,
                                       total=len(train_dataloader))
        lr = lr_scheduler.get_lr()[0]
        for ii, (data, labels, img_path, tag) in enumerate(train_dataloader):
            if not check_date(img_path, tag, msglogger): return
            if opt.pruning:
                compression_scheduler.on_minibatch_begin(
                    epoch, ii, steps_per_epoch, optimizer)  # batch 开始修剪
            train_progressor.current = ii + 1  # 训练集当前进度
            # train model
            input = data.to(opt.device)
            target = labels.to(opt.device)
            if train_writer:
                grid = make_grid(
                    (input.data.cpu() * 0.225 + 0.45).clamp(min=0, max=1))
                train_writer.add_image('train_images', grid,
                                       ii * (epoch + 1))  # 训练图片
            score = model(input)  # 网络结构返回值
            # 计算损失
            loss = criterion(score, target)
            if opt.pruning:
                # Before running the backward phase, we allow the scheduler to modify the loss
                # (e.g. add regularization loss)
                agg_loss = compression_scheduler.before_backward_pass(
                    epoch,
                    ii,
                    steps_per_epoch,
                    loss,
                    optimizer=optimizer,
                    return_loss_components=True)  # 模型修建误差
                loss = agg_loss.overall_loss
            train_losses.update(loss.item(), input.size(0))
            # loss = criterion(score[0], target)  # 计算损失   Inception3网络
            optimizer.zero_grad()  # 参数梯度设成0
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数

            if opt.pruning:
                compression_scheduler.on_minibatch_end(epoch, ii,
                                                       steps_per_epoch,
                                                       optimizer)  # batch 结束修剪

            precision1_train, precision5_train = accuracy(
                score, target, topk=(1, 5))  # top1 和 top5 的准确率

            # writer.add_graph(model, input)
            # precision1_train, precision2_train = accuracy(score[0], target, topk=(1, 2))  # Inception3网络
            train_losses.update(loss.item(), input.size(0))
            train_top1.update(precision1_train[0].item(), input.size(0))
            train_top5.update(precision5_train[0].item(), input.size(0))
            train_progressor.current_loss = train_losses.avg
            train_progressor.current_top1 = train_top1.avg
            train_progressor.current_top5 = train_top5.avg
            train_progressor()  # 打印进度
            if ii % opt.print_freq == 0:
                if train_writer:
                    train_writer.add_scalar('loss', train_losses.avg,
                                            ii * (epoch + 1))  # 训练误差
                    train_writer.add_text(
                        'top1', 'train accuracy top1 %s' % train_top1.avg,
                        ii * (epoch + 1))  # top1准确率文本
                    train_writer.add_scalars(
                        'accuracy', {
                            'top1': train_top1.avg,
                            'top5': train_top5.avg,
                            'loss': train_losses.avg
                        }, ii * (epoch + 1))
        # train_progressor.done()  # 保存训练结果为txt
        # validate and visualize
        if opt.pruning:
            distiller.log_weights_sparsity(model, epoch,
                                           loggers=[pylogger])  # 打印模型修剪结果
            compression_scheduler.on_epoch_end(epoch, optimizer)  # epoch 结束修剪
        val_loss, val_top1, val_top5 = val(model, criterion, val_dataloader,
                                           epoch, value_writer, lr)  # 校验模型
        sparsity = distiller.model_sparsity(model)
        perf_scores_history.append(
            distiller.MutableNamedTuple(
                {
                    'sparsity': sparsity,
                    'top1': val_top1,
                    'top5': val_top5,
                    'epoch': epoch + 1,
                    'lr': lr,
                    'loss': val_loss
                }, ))
        # 保持绩效分数历史记录从最好到最差的排序
        # 按稀疏度排序为主排序键，然后按top1、top5、epoch排序
        perf_scores_history.sort(key=operator.attrgetter(
            'sparsity', 'top1', 'top5', 'epoch'),
                                 reverse=True)
        for score in perf_scores_history[:1]:
            msglogger.info(
                '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d   Lr: %f   Loss: %f]',
                score.top1, score.top5, score.sparsity, score.epoch, lr,
                score.loss)

        best_precision = max(perf_scores_history[0].top1,
                             best_precision)  # 最大top1 准确率
        is_best = epoch + 1 == perf_scores_history[
            0].epoch  # 当前epoch 和最佳epoch 一样
        if is_best:
            model.save({
                "epoch":
                epoch + 1,
                "model_name":
                opt.model,
                "state_dict":
                model.state_dict(),
                "best_precision":
                best_precision,
                "optimizer":
                optimizer,
                "valid_loss": [val_loss, val_top1, val_top5],
                'compression_scheduler':
                compression_scheduler.state_dict(),
            })  # 保存模型
        # update learning rate
        lr_scheduler.step(epoch)  # 更新学习效率
        # 如果训练误差比上次大　降低学习效率
        # if train_losses.val > previous_loss:
        #     lr = lr * opt.lr_decay
        #     # 当loss大于上一次loss,降低学习率
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr
        #
        # previous_loss = train_losses.val
        t.cuda.empty_cache()  # 这个命令是清除没用的临时变量的

Example #14

Show file

File: compress_classifier.googlenet_cinic10.py Project: Chih-Ling-Hsu/distiller

def main():
    global msglogger
    check_pytorch_version()
    args = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(sys.argv, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_epochs = [distiller.MutableNamedTuple({'epoch': 0, 'top1': 0, 'sparsity': 0})
                   for i in range(args.num_best_scores)]

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error('ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1')
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        torch.manual_seed(0)
        random.seed(0)
        np.random.seed(0)
        cudnn.deterministic = True
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error('ERROR: Argument --gpus must be a comma-separated list of integers only')
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error('ERROR: GPU device ID {0} requested, but only {1} devices available'
                                .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    if 'cinic' in args.arch:
        args.dataset = 'cinic10'
    else:
        args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset in ['cifar10', 'cinic10'] else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    #model = create_model(args.pretrained, args.dataset, args.arch,
    #                     parallel=not args.load_serialized, device_ids=args.gpus)
    model = create_model(False, args.dataset, args.arch, device_ids=args.gpus) # Get arch state_dict
      
    
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:
        #model, compression_scheduler, start_epoch = apputils.load_checkpoint(
        #    model, chkpt_file=args.resume)
        
        # Load Pre-trained Model
        chkpt_file=args.resume
        print("=> loading checkpoint %s" % chkpt_file)
        checkpoint = torch.load(chkpt_file)
        model.load_state_dict(checkpoint['net'])  

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.ADC:
        return automated_deep_compression(model, criterion, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_size, args.deterministic)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler))

    activations_collectors = create_activation_stats_collectors(model, collection_phase=args.activation_stats)

    if args.sensitivity is not None:
        return sensitivity_analysis(model, criterion, test_loader, pylogger, args)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.cuda()
    else:
        compression_scheduler = distiller.CompressionScheduler(model)

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher, chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader, model, criterion, optimizer, epoch, compression_scheduler,
                  loggers=[tflogger, pylogger], args=args)
            distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(epoch, "train", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch)
            distiller.log_activation_statsitics(epoch, "valid", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss),
                              ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # remember best top1 and save checkpoint
        #sparsity = distiller.model_sparsity(model)
        is_best = top1 > best_epochs[0].top1
        if is_best:
            best_epochs[0].epoch = epoch
            best_epochs[0].top1 = top1
            #best_epoch.sparsity = sparsity
            best_epochs = sorted(best_epochs, key=lambda score: score.top1)
        for score in reversed(best_epochs):
            if score.top1 > 0:
                msglogger.info('==> Best Top1: %.3f on Epoch: %d', score.top1, score.epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler,
                                 best_epochs[0].top1, is_best, args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)

Example #15

Show file

File: checkpoint.py Project: wade1990/distiller

def load_checkpoint(model,
                    chkpt_file,
                    optimizer=None,
                    model_device=None,
                    lean_checkpoint=False,
                    strict=False):
    """Load a pytorch training checkpoint.

    Args:
        model: the pytorch model to which we will load the parameters.  You can
        specify model=None if the checkpoint contains enough metadata to infer
        the model.  The order of the arguments is misleading and clunky, and is
        kept this way for backward compatibility.
        chkpt_file: the checkpoint file
        lean_checkpoint: if set, read into model only 'state_dict' field
        optimizer: [deprecated argument]
        model_device [str]: if set, call model.to($model_device)
                This should be set to either 'cpu' or 'cuda'.
    :returns: updated model, compression_scheduler, optimizer, start_epoch
    """
    def _load_compression_scheduler():
        normalize_keys = False
        try:
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'], normalize_keys)
        except KeyError as e:
            # A very common source of this KeyError is loading a GPU model on the CPU.
            # We rename all of the DataParallel keys because DataParallel does not execute on the CPU.
            normalize_keys = True
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'], normalize_keys)
        msglogger.info(
            "Loaded compression schedule from checkpoint (epoch {})".format(
                checkpoint_epoch))
        return normalize_keys

    def _load_and_execute_thinning_recipes():
        msglogger.info("Loaded a thinning recipe from the checkpoint")
        # Cache the recipes in case we need them later
        model.thinning_recipes = checkpoint['thinning_recipes']
        if normalize_dataparallel_keys:
            model.thinning_recipes = [
                distiller.get_normalized_recipe(recipe)
                for recipe in model.thinning_recipes
            ]
        distiller.execute_thinning_recipes_list(
            model, compression_scheduler.zeros_mask_dict,
            model.thinning_recipes)

    def _load_optimizer():
        """Initialize optimizer with model parameters and load src_state_dict"""
        try:
            cls, src_state_dict = checkpoint['optimizer_type'], checkpoint[
                'optimizer_state_dict']
            # Initialize the dest_optimizer with a dummy learning rate,
            # this is required to support SGD.__init__()
            dest_optimizer = cls(model.parameters(), lr=1)
            dest_optimizer.load_state_dict(src_state_dict)
            msglogger.info(
                'Optimizer of type {type} was loaded from checkpoint'.format(
                    type=type(dest_optimizer)))
            optimizer_param_groups = dest_optimizer.state_dict(
            )['param_groups']
            msglogger.info('Optimizer Args: {}'.format(
                dict((k, v) for k, v in optimizer_param_groups[0].items()
                     if k != 'params')))
            return dest_optimizer
        except KeyError:
            # Older checkpoints do support optimizer loading: They either had an 'optimizer' field
            # (different name) which was not used during the load, or they didn't even checkpoint
            # the optimizer.
            msglogger.warning('Optimizer could not be loaded from checkpoint.')
            return None

    def _create_model_from_ckpt():
        try:
            return distiller.models.create_model(False,
                                                 checkpoint['dataset'],
                                                 checkpoint['arch'],
                                                 checkpoint['is_parallel'],
                                                 device_ids=None)
        except KeyError:
            return None

    def _sanity_check():
        try:
            if model.arch != checkpoint["arch"]:
                raise ValueError(
                    "The model architecture does not match the checkpoint architecture"
                )
        except (NameError, KeyError):
            # One of the values is missing so we can't perform the comparison
            pass

    if not os.path.isfile(chkpt_file):
        raise IOError(ENOENT, 'Could not find a checkpoint file at',
                      chkpt_file)
    assert optimizer == None, "argument optimizer is deprecated and must be set to None"

    msglogger.info("=> loading checkpoint %s", chkpt_file)
    checkpoint = torch.load(chkpt_file,
                            map_location=lambda storage, loc: storage)
    msglogger.info('=> Checkpoint contents:\n%s\n' %
                   get_contents_table(checkpoint))
    if 'extras' in checkpoint:
        msglogger.info("=> Checkpoint['extras'] contents:\n{}\n".format(
            get_contents_table(checkpoint['extras'])))

    if 'state_dict' not in checkpoint:
        raise ValueError(
            "Checkpoint must contain the model parameters under the key 'state_dict'"
        )

    if not model:
        model = _create_model_from_ckpt()
        if not model:
            raise ValueError(
                "You didn't provide a model, and the checkpoint %s doesn't contain "
                "enough information to create one", chkpt_file)

    checkpoint_epoch = checkpoint.get('epoch', None)
    start_epoch = checkpoint_epoch + 1 if checkpoint_epoch is not None else 0
    compression_scheduler = None
    normalize_dataparallel_keys = False
    if 'compression_sched' in checkpoint:
        compression_scheduler = distiller.CompressionScheduler(model)
        normalize_dataparallel_keys = _load_compression_scheduler()
    else:
        msglogger.info(
            "Warning: compression schedule data does not exist in the checkpoint"
        )

    if 'thinning_recipes' in checkpoint:
        if not compression_scheduler:
            msglogger.warning(
                "Found thinning_recipes key, but missing key compression_scheduler"
            )
            compression_scheduler = distiller.CompressionScheduler(model)
        _load_and_execute_thinning_recipes()

    if 'quantizer_metadata' in checkpoint:
        msglogger.info('Loaded quantizer metadata from the checkpoint')
        qmd = checkpoint['quantizer_metadata']
        quantizer = qmd['type'](model, **qmd['params'])
        quantizer.prepare_model(qmd['dummy_input'])

    if normalize_dataparallel_keys:
        checkpoint['state_dict'] = {
            normalize_module_name(k): v
            for k, v in checkpoint['state_dict'].items()
        }
    anomalous_keys = model.load_state_dict(checkpoint['state_dict'], strict)
    if anomalous_keys:
        # This is pytorch 1.1+
        missing_keys, unexpected_keys = anomalous_keys
        if unexpected_keys:
            msglogger.warning(
                "Warning: the loaded checkpoint (%s) contains %d unexpected state keys"
                % (chkpt_file, len(unexpected_keys)))
        if missing_keys:
            raise ValueError(
                "The loaded checkpoint (%s) is missing %d state keys" %
                (chkpt_file, len(missing_keys)))

    if model_device is not None:
        model.to(model_device)

    if lean_checkpoint:
        msglogger.info("=> loaded 'state_dict' from checkpoint '{}'".format(
            str(chkpt_file)))
        return model, None, None, 0

    optimizer = _load_optimizer()
    msglogger.info("=> loaded checkpoint '{f}' (epoch {e})".format(
        f=str(chkpt_file), e=checkpoint_epoch))
    _sanity_check()
    return model, compression_scheduler, optimizer, start_epoch

Example #16

Show file

 def create_scheduler(self):
     scheduler = distiller.CompressionScheduler(self.model,
                                                self.zeros_mask_dict)
     return scheduler

Example #17

Show file

File: compress_classifier.py Project: malk271828/pytorch-ssd

def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir,
        args.verbose)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file
                      ]),  # remove both None and empty strings
        msglogger.logdir,
        gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        distiller.set_deterministic(
            args.seed)  # For experiment reproducability
    else:
        if args.seed is not None:
            distiller.set_seed(args.seed)
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = distiller.apputils.classification_dataset_str_from_arch(
        args.arch)
    args.num_classes = distiller.apputils.classification_num_classes(
        args.dataset)

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model, config = create_model(args.pretrained,
                                 args.dataset,
                                 args.arch,
                                 parallel=not args.load_serialized,
                                 device_ids=args.gpus)

    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning(
            'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.'
        )
        if not args.reset_optimizer:
            msglogger.warning(
                'If you wish to also reset the optimizer, call with: --reset-optimizer'
            )
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )

    # Define loss function (criterion)
    if "ssd" in args.arch:
        neg_pos_ratio = 3
        criterion = MultiboxLoss(config.priors,
                                 iou_threshold=0.5,
                                 neg_pos_ratio=neg_pos_ratio,
                                 center_variance=0.1,
                                 size_variance=0.2,
                                 device=args.device,
                                 reduction="sum",
                                 class_reduction=True,
                                 verbose=0)
    else:
        criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        if "ssd" in args.arch:
            base_net_lr = args.lr
            extra_layers_lr = args.lr
            params = [{
                'params': model.base_net.parameters(),
                'lr': base_net_lr
            }, {
                'params':
                itertools.chain(model.source_layer_add_ons.parameters(),
                                model.extras.parameters()),
                'lr':
                extra_layers_lr
            }, {
                'params':
                itertools.chain(model.regression_headers.parameters(),
                                model.classification_headers.parameters())
            }]
        else:
            params = model.parameters()
        optimizer = torch.optim.SGD(params,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        return

    if args.export_onnx is not None:
        return distiller.export_img_classifier_to_onnx(model,
                                                       os.path.join(
                                                           msglogger.logdir,
                                                           args.export_onnx),
                                                       args.dataset,
                                                       add_softmax=True,
                                                       verbose=False)

    if args.qe_calibration:
        return acts_quant_stats_collection(model, criterion, pylogger, args)

    if args.activation_histograms:
        return acts_histogram_collection(model, criterion, pylogger, args)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = load_data(args, config=config)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher, _ = create_model(args.kd_pretrained,
                                  args.dataset,
                                  args.kd_teacher,
                                  parallel=not args.load_serialized,
                                  device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        raw_teacher_model_path = msglogger.logdir + "/raw_teacher.pth.tar"
        if not os.path.exists(raw_teacher_model_path):
            teacher.save(raw_teacher_model_path)
            msglogger.info(Fore.CYAN + '\tRaw Teacher Model saved: {0}'.format(
                raw_teacher_model_path) + Style.RESET_ALL)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model,
            teacher,
            args.kd_temp,
            dlw,
            loss_type=args.kd_loss_type,
            focal_alpha=args.kd_focal_alpha,
            use_adaptive=args.kd_focal_adaptive,
            verbose=0)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'
            .format(start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')

    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(
                epoch, metrics=(vloss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5,
                                       epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {
            'current_top1': top1,
            'best_top1': perf_scores_history[0].top1,
            'best_epoch': perf_scores_history[0].epoch
        }
        try:
            raw_fullpath_best = apputils.save_checkpoint(
                epoch,
                args.arch,
                model,
                optimizer=optimizer,
                scheduler=compression_scheduler,
                extras=checkpoint_extras,
                is_best=is_best,
                name=args.name,
                dir=msglogger.logdir)
        except Exception as ex:
            # keep previous fullpath_best
            pass
        mlflow.log_artifacts(msglogger.logdir)

    # Finally run results on the test set
    eval_params = {
        "model_type": args.arch,
        "model_path": raw_fullpath_best,
        "dataset_path": args.data,
        "label_path": "models/voc-model-labels.txt"
    }
    mlflow.projects.run(uri=".",
                        entry_point="eval",
                        use_conda=False,
                        parameters=eval_params)

Example #18

Show file

File: main.py Project: StarStyleSky/distiller

                # Skip biases
                continue
            bottomk, _ = torch.topk(param.abs().view(-1), int(percentile * param.numel()),
                                    largest=False, sorted=True)
            threshold = bottomk.data[-1]
            msglogger.info("parameter %s: q = %.2f" %(name, threshold))
    else:
        distiller.model_summary(model, None, which_summary, 'wikitext2')
    exit(0)

compression_scheduler = None

if args.compress:
    # Create a CompressionScheduler and configure it from a YAML schedule file
    source = args.compress
    compression_scheduler = distiller.CompressionScheduler(model)
    distiller.config.fileConfig(model, None, compression_scheduler, args.compress, msglogger)

optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                 momentum=args.momentum,
                                 weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                          patience=0, verbose=True, factor=0.5)

# Loop over epochs.
# At any point you can hit Ctrl + C to break out of training early.
best_val_loss = float("inf")
try:
    for epoch in range(0, args.epochs):
        epoch_start_time = time.time()
        if compression_scheduler:

Example #19

Show file

def objective(space):
    global model
    global count
    global global_min_score
    
    #Explore new model
    model = create_model(False, args.dataset, args.arch, device_ids=args.gpus)
    count += 1
    # Objective function: F(Acc, Lat) = (1 - Acc.) + (alpha * Sparsity)
    accuracy = 0
    alpha = 0.3 # Super-parameter: the importance of inference time
    latency = 0.0
    sparsity = 0.0
    # Training hyperparameter

    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
        print('resume mode: {}'.format(args.resume))

    print(global_min_score)
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    """
    distiller/distiller/config.py
        # Element-wise sparsity
        sparsity_levels = {net_param: sparsity_level}
        pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels)
        policy = distiller.PruningPolicy(pruner, pruner_args=None)
        scheduler = distiller.CompressionScheduler(model)
        scheduler.add_policy(policy, epochs=[0, 2, 4])
        # Local search 
        add multiple pruner for each layer
    """
    sparsity_levels = {}
    for key, value in space.items():
        sparsity_levels[key] = value
    #print(sparsity_levels)

    pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels) # for SparsityLevelParameterPruner
    # pruner = distiller.pruning.SensitivityPruner(name='sensitivity', sensitivities=sparsity_levels) # for SensitivityPruner
    policy = distiller.PruningPolicy(pruner, pruner_args=None)
    lrpolicy = distiller.LRPolicy(torch.optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1))
    compression_scheduler = distiller.CompressionScheduler(model)
    compression_scheduler.add_policy(policy, epochs=[PrunerEpoch])
    # compression_scheduler.add_policy(policy, starting_epoch=0, ending_epoch=38, frequency=2)
    compression_scheduler.add_policy(lrpolicy, starting_epoch=0, ending_epoch=50, frequency=1)
    """
    distiller/example/classifier_compression/compress_classifier.py
    For each epoch:
        compression_scheduler.on_epoch_begin(epoch)
        train()
        save_checkpoint()
        compression_scheduler.on_epoch_end(epoch)

    train():
        For each training step:
            compression_scheduler.on_minibatch_begin(epoch)
            output = model(input)
            loss = criterion(output, target)
            compression_scheduler.before_backward_pass(epoch)
            loss.backward()
            optimizer.step()
            compression_scheduler.on_minibatch_end(epoch)
    """
    
    local_min_score = 2.
    for i in range(args.epochs):
        compression_scheduler.on_epoch_begin(i)
        train_accuracy = train(i,criterion, optimizer, compression_scheduler)
        val_accuracy = validate() # Validate hyperparameter setting
        t, sparsity = distiller.weights_sparsity_tbl_summary(model, return_total_sparsity=True)
        compression_scheduler.on_epoch_end(i, optimizer)
        apputils.save_checkpoint(i, args.arch, model, optimizer, compression_scheduler, train_accuracy, False,
                                         'hyperopt', './')
        print('Epoch: {}, train_acc: {:.4f}, val_acc: {:.4f}, sparsity: {:.4f}'.format(i, train_accuracy, val_accuracy, sparsity))
        
        score = (1-(val_accuracy/100.)) + (alpha * (1-sparsity/100.)) # objective funtion here
        if(score < global_min_score):
            global_min_score = score
            apputils.save_checkpoint(i, args.arch, model, optimizer, compression_scheduler, train_accuracy, True, 'best', './')

        if(score < local_min_score):
            local_min_score = score

        if (PrunerConstraint == True and i >= PrunerEpoch and (sparsity < Expected_Sparsity_Level_Low or sparsity > Expected_Sparsity_Level_High)):
            break 

    test_accuracy = test() # Validate hyperparameter setting

    print('{} trials: score: {:.4f}, train_acc:{:.4f}, val_acc:{:.4f}, test_acc:{:.4f}, sparsity:{:.4f}'.format(count, 
                                      local_min_score, 
                                      train_accuracy, 
                                      val_accuracy, 
                                      test_accuracy,
                                      sparsity))

    return local_min_score

Example #20

Show file

# Distiller loggers
msglogger = apputils.config_pylogger('logging.conf', None)
tflogger = TensorBoardLogger(msglogger.logdir)
tflogger.log_gradients = True
pylogger = PythonLogger(msglogger)

if args.summary:
    # The last string is the dataset
    distiller.model_summary(net, args.summary, 'coco')
    exit(0)

compression_scheduler = None

if args.compress:
    source = args.compress
    compression_scheduler = distiller.CompressionScheduler(net)
    distiller.config.file_config(net, optimizer, source, compression_scheduler)

try:
    for epoch in range(0, args.epochs):
        total_loss = 0.
        epoch_start_time = time.time()
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        train(epoch, optimizer, compression_scheduler)
        val_loss = test(epoch)

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch)

Example #21

Show file

def arbitrary_channel_pruning(config, channels_to_remove, is_parallel):
    """Test removal of arbitrary channels.
    The test receives a specification of channels to remove.
    Based on this specification, the channels are pruned and then physically
    removed from the model (via a "thinning" process).
    """
    model, zeros_mask_dict = common.setup_test(config.arch, config.dataset, is_parallel)

    pair = config.module_pairs[0]
    conv2 = common.find_module_by_name(model, pair[1])
    assert conv2 is not None

    # Test that we can access the weights tensor of the first convolution in layer 1
    conv2_p = distiller.model_find_param(model, pair[1] + ".weight")
    assert conv2_p is not None

    assert conv2_p.dim() == 4
    num_channels = conv2_p.size(1)
    cnt_nnz_channels = num_channels - len(channels_to_remove)
    mask = create_channels_mask(conv2_p, channels_to_remove)
    assert distiller.density_ch(mask) == (conv2.in_channels - len(channels_to_remove)) / conv2.in_channels
    # Cool, so now we have a mask for pruning our channels.

    # Use the mask to prune
    zeros_mask_dict[pair[1] + ".weight"].mask = mask
    zeros_mask_dict[pair[1] + ".weight"].apply_mask(conv2_p)
    all_channels = set([ch for ch in range(num_channels)])
    nnz_channels = set(distiller.find_nonzero_channels_list(conv2_p, pair[1] + ".weight"))
    channels_removed = all_channels - nnz_channels
    logger.info("Channels removed {}".format(channels_removed))

    # Now, let's do the actual network thinning
    distiller.remove_channels(model, zeros_mask_dict, config.arch, config.dataset, optimizer=None)
    conv1 = common.find_module_by_name(model, pair[0])
    assert conv1
    assert conv1.out_channels == cnt_nnz_channels
    assert conv2.in_channels == cnt_nnz_channels
    assert conv1.weight.size(0) == cnt_nnz_channels
    assert conv2.weight.size(1) == cnt_nnz_channels
    if config.bn_name is not None:
        bn1 = common.find_module_by_name(model, config.bn_name)
        assert bn1.running_var.size(0) == cnt_nnz_channels
        assert bn1.running_mean.size(0) == cnt_nnz_channels
        assert bn1.num_features == cnt_nnz_channels
        assert bn1.bias.size(0) == cnt_nnz_channels
        assert bn1.weight.size(0) == cnt_nnz_channels

    dummy_input = common.get_dummy_input(config.dataset)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.1)
    run_forward_backward(model, optimizer, dummy_input)

    # Let's test saving and loading a thinned model.
    # We save 3 times, and load twice, to make sure to cover some corner cases:
    #   - Make sure that after loading, the model still has hold of the thinning recipes
    #   - Make sure that after a 2nd load, there no problem loading (in this case, the
    #   - tensors are already thin, so this is a new flow)
    # (1)
    save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None)
    model_2 = create_model(False, config.dataset, config.arch, parallel=is_parallel)
    model(dummy_input)
    model_2(dummy_input)
    conv2 = common.find_module_by_name(model_2, pair[1])
    assert conv2 is not None
    with pytest.raises(KeyError):
        model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar')
    compression_scheduler = distiller.CompressionScheduler(model)
    hasattr(model, 'thinning_recipes')

    run_forward_backward(model, optimizer, dummy_input)

    # (2)
    save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None, scheduler=compression_scheduler)
    model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar')
    assert hasattr(model_2, 'thinning_recipes')
    logger.info("test_arbitrary_channel_pruning - Done")

    # (3)
    save_checkpoint(epoch=0, arch=config.arch, model=model_2, optimizer=None, scheduler=compression_scheduler)
    model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar')
    assert hasattr(model_2, 'thinning_recipes')
    logger.info("test_arbitrary_channel_pruning - Done 2")

Example #22

Show file

File: compress_classifier.py Project: barrh/distiller

def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_epochs = list()

    if args.deterministic:
        if args.loaders is None:
            args.loaders = 1
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.loaders > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --loaders to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.use_cpu or (args.gpus is None
                        and not torch.cuda.is_available()) or (args.gpus
                                                               == ''):
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    if args.loaders is None:
        active_gpus = args.gpus if args.gpus is not None else torch.cuda.device_count(
        )
        args.loaders = max(parser.DEFAULT_LOADERS_COUNT,
                           parser.DEFAULT_LOADERS_COUNT * active_gpus)
    msglogger.debug('Number of data loaders set to: {}'.format(args.loaders))

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    optimizer = None
    resumed_training_steps = None
    if args.resume or args.load_state_dict:
        if args.resume and not args.reset_optimizer:
            # initiate SGD with dummy lr
            optimizer = torch.optim.SGD(model.parameters(), lr=0.36787944117)
        model, compression_scheduler, optimizer, start_epoch, resumed_training_steps = apputils.load_checkpoint(
            model, args.resume or args.load_state_dict, optimizer=optimizer)
        model.to(args.device)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is not None:
        # optimizer was resumed from checkpoint
        # check if user has tried to set optimizer arguments
        # if so, ignore arguments with a warning.
        optimizer_group_args = [
            'lr', 'learning-rate', 'momentum', 'weight-decay', 'wd'
        ]
        user_optim_args = [
            x for x in optimizer_group_args for arg in sys.argv
            if arg.startswith('--' + x)
        ]
        if user_optim_args:
            msglogger.warning(
                '{} optimizer arguments are ignored.'.format(user_optim_args))
            msglogger.info(
                'setting optimizer arguments when optimizer is resumed '
                'from checkpoint is forbidden.')
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.loaders, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))
    args.trainset_print_period = parser.getPrintPeriod(
        args, len(train_loader.sampler), args.batch_size)
    args.validset_print_period = parser.getPrintPeriod(args,
                                                       len(val_loader.sampler),
                                                       args.batch_size)
    args.testset_print_period = parser.getPrintPeriod(args,
                                                      len(test_loader.sampler),
                                                      args.batch_size)

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if
            (args.resume and not args.reset_optimizer) else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_checkpoint(teacher,
                                               chkpt_file=args.kd_resume)[0]
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(
            args.kd_policy, range(args.kd_start_epoch, args.epochs, 1))

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if getattr(compression_scheduler, 'global_policy_end_epoch',
               None) is not None:
        if compression_scheduler.global_policy_end_epoch >= (start_epoch +
                                                             args.epochs):
            msglogger.warning(
                'scheduler requires at least {} epochs, but only {} are sanctioned'
                .format(compression_scheduler.global_policy_end_epoch,
                        args.epochs))

    accumulated_training_steps = resumed_training_steps if resumed_training_steps is not None else 0
    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            try:
                train(train_loader,
                      model,
                      criterion,
                      optimizer,
                      epoch,
                      accumulated_training_steps,
                      compression_scheduler,
                      loggers=[tflogger, pylogger],
                      args=args)
            except RuntimeError as e:
                if ('cuda out of memory' in str(e).lower()):
                    msglogger.error(
                        'CUDA memory failure has been detected.\n'
                        'Sometimes it helps to decrease batch size.\n'
                        'e.g. Add the following flag to your call: --batch-size={}'
                        .format(args.batch_size // 10))
                raise
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))
        accumulated_training_steps += math.ceil(
            len(train_loader.sampler) / train_loader.batch_size)

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        tflogger.log_training_progress(stats, epoch, None)

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        if getattr(compression_scheduler, 'global_policy_end_epoch',
                   None) is None or (
                       compression_scheduler.global_policy_end_epoch <= epoch):
            # Update the list of top scores achieved since all policies have concluded
            if top1 > 0:
                best_epochs.append(
                    distiller.MutableNamedTuple({
                        'top1': top1,
                        'top5': top5,
                        'epoch': epoch
                    }))
            # Keep best_epochs sorted from best to worst
            # Sort by top1 first, secondary sort by top5, and so forth
            best_epochs.sort(key=operator.attrgetter('top1', 'top5', 'epoch'),
                             reverse=True)
            for score in best_epochs[:args.num_best_scores]:
                msglogger.info('==> Best Top1: %.3f Top5: %.3f on epoch: %d',
                               score.top1, score.top5, score.epoch)

        is_best = best_epochs and (epoch == best_epochs[0].epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler,
                                 best_epochs[0].top1 if best_epochs else None,
                                 is_best, args.name, msglogger.logdir,
                                 accumulated_training_steps)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)

Example #23

Show file

def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    global model
    global optimizer

    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    global train_loader
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    global val_loader
    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # add quantization scheduler
    global compression_scheduler
    compression_scheduler = distiller.CompressionScheduler(model)
    compression_scheduler = distiller.file_config(
        model, optimizer,
        '/home/hguan2/workspace/fault-tolerance/nips19/quant_aware_training.yaml',
        compression_scheduler, (args.start_epoch - 1) if args.resume else None)

    model.cuda()
    global epoch

    info = "before training, large_weights_count: {}".format(
        check_large_weights_count(model))
    print(info)
    write_log(info)

    if args.mode == Mode.ADMM:
        global W_list, Z_list, U_list
        W_list = []
        Z_list = []
        U_list = []
        init_admm_param(model, W_list, Z_list, U_list)
        print("Initiaze ADMM parameters...")
        print("W_list:", W_list[0].view(-1)[:5])
        print("Z_list:", Z_list[0].view(-1)[:5])
        print("U_list:", U_list[0].view(-1)[:5])

    for epoch in range(args.start_epoch, args.epochs):
        compression_scheduler.on_epoch_begin(epoch)

        if args.distributed:
            train_sampler.set_epoch(epoch)
        # adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        regularized_train(compression_scheduler, train_loader, model,
                          criterion, optimizer, epoch, args)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)
        info = "epoch: {}, iteration: {}, after epoch, large_weights_count: {}, accuracy: {:.3f}".format(
            epoch, len(train_loader), check_large_weights_count(model), acc1)
        write_log(info)
        print(info)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        compression_scheduler.on_epoch_end(epoch, optimizer)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)

        ## when it is clipping-based training, stop whenever the accuracy is met
        if args.mode == Mode.WOT and best_acc1 >= target_accs[args.arch] + 1e-5:
            print("Reach best accuracy:{:.3f}, target_acc:{:.3f}".format(
                best_acc1, target_accs[args.arch]))
            return

Example #24

Show file

File: checkpoint.py Project: yyht/distiller

def load_checkpoint(model, chkpt_file, optimizer=None):
    """Load a pytorch training checkpoint

    Args:
        model: the pytorch model to which we will load the parameters
        chkpt_file: the checkpoint file
        optimizer: the optimizer to which we will load the serialized state
    """
    if not os.path.isfile(chkpt_file):
        raise IOError(ENOENT, 'Could not find a checkpoint file at',
                      chkpt_file)

    msglogger.info("=> loading checkpoint %s", chkpt_file)
    checkpoint = torch.load(chkpt_file,
                            map_location=lambda storage, loc: storage)
    msglogger.debug("\n\t".join(['Checkpoint keys:'] + list(checkpoint)))

    if 'state_dict' not in checkpoint:
        raise ValueError(
            "Checkpoint must contain the model parameters under the key 'state_dict'"
        )

    checkpoint_epoch = checkpoint.get('epoch', None)
    start_epoch = checkpoint_epoch + 1 if checkpoint_epoch is not None else 0

    best_top1 = checkpoint.get('best_top1', None)
    if best_top1 is not None:
        msglogger.info("   best top@1: %.3f", best_top1)

    compression_scheduler = None
    normalize_dataparallel_keys = False
    if 'compression_sched' in checkpoint:
        compression_scheduler = distiller.CompressionScheduler(model)
        try:
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'], normalize_dataparallel_keys)
        except KeyError as e:
            # A very common source of this KeyError is loading a GPU model on the CPU.
            # We rename all of the DataParallel keys because DataParallel does not execute on the CPU.
            normalize_dataparallel_keys = True
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'], normalize_dataparallel_keys)
        msglogger.info(
            "Loaded compression schedule from checkpoint (epoch {})".format(
                checkpoint_epoch))
    else:
        msglogger.info(
            "Warning: compression schedule data does not exist in the checkpoint"
        )

    if 'thinning_recipes' in checkpoint:
        if 'compression_sched' not in checkpoint:
            raise KeyError(
                "Found thinning_recipes key, but missing mandatory key compression_sched"
            )
        msglogger.info("Loaded a thinning recipe from the checkpoint")
        # Cache the recipes in case we need them later
        model.thinning_recipes = checkpoint['thinning_recipes']
        if normalize_dataparallel_keys:
            model.thinning_recipes = [
                distiller.get_normalized_recipe(recipe)
                for recipe in model.thinning_recipes
            ]
        distiller.execute_thinning_recipes_list(
            model, compression_scheduler.zeros_mask_dict,
            model.thinning_recipes)

    if 'quantizer_metadata' in checkpoint:
        msglogger.info('Loaded quantizer metadata from the checkpoint')
        qmd = checkpoint['quantizer_metadata']
        quantizer = qmd['type'](model, **qmd['params'])
        quantizer.prepare_model()

    msglogger.info("=> loaded checkpoint '{f}' (epoch {e})".format(
        f=str(chkpt_file), e=checkpoint_epoch))
    if normalize_dataparallel_keys:
        checkpoint['state_dict'] = {
            normalize_module_name(k): v
            for k, v in checkpoint['state_dict'].items()
        }
    model.load_state_dict(checkpoint['state_dict'])
    return (model, compression_scheduler, start_epoch)

Example #25

Show file

File: compress_detector.py Project: rhkdqo93/Auto_comp

def main(args):
    utils.init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    if utils.is_main_process():
        msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir,
                                             args.verbose)

        # Log various details about the execution environment.  It is sometimes useful
        # to refer to past experiment executions and this information may be useful.
        apputils.log_execution_env_state(
            filter(None, [args.compress, args.qe_stats_file]),  # remove both None and empty strings
            msglogger.logdir)
        msglogger.debug("Distiller: %s", distiller.__version__)
    else:
        msglogger = logging.getLogger()
        msglogger.disabled = True

    # Data loading code
    print("Loading data")
    dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path)
    dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path)

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)

    if args.aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)

    print("Creating model")
    model = detection.__dict__[args.model](num_classes=num_classes,
                                                              pretrained=args.pretrained)
    patch_fastrcnn(model)
    model.to(device)

    if args.summary:
        if utils.is_main_process():
            for summary in args.summary:
                distiller.model_summary(model, summary, args.dataset)
        return

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    compression_scheduler = None
    if utils.is_main_process():
        # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
        # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
        tflogger = TensorBoardLogger(msglogger.logdir)
        pylogger = PythonLogger(msglogger)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler, None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.qe_calibration:
        def test_fn(model):
            return evaluate(model, data_loader_test, device=device)
        collect_quant_stats(model_without_ddp, test_fn, save_dir=args.output_dir,
                            modules_to_collect=['backbone', 'rpn', 'roi_heads'])
        # We skip `.transform` because it is a pre-processing unit.
        return

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        if compression_scheduler and 'compression_scheduler' in checkpoint:
            compression_scheduler.load_state_dict(checkpoint['compression_scheduler'])

    if args.test_only:
        evaluate(model, data_loader_test, device=device)
        return
    activations_collectors = create_activation_stats_collectors(model, *args.activation_stats)
    print("Start training")
    start_time = time.time()

    # if not isinstance(model, nn.DataParallel) and torch.cuda.is_available() \
    #    and torch.cuda.device_count() > 1:
    #     msglogger.info("Using %d GPUs on DataParallel." % torch.cuda.device_count())
    #     model = nn.DataParallel(model)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
            dist.barrier()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        with collectors_context(activations_collectors["train"]) as collectors:
            train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, compression_scheduler)
            if utils.is_main_process():
                distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
                distiller.log_activation_statsitics(epoch, "train", loggers=[tflogger],
                                                    collector=collectors["sparsity"])
            if args.masks_sparsity and utils.is_main_process():
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        lr_scheduler.step()
        if args.output_dir:
            save_dict = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args}
            if compression_scheduler:
                save_dict['compression_scheduler'] = compression_scheduler.state_dict()
            utils.save_on_master(save_dict,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

        # evaluate after every epoch
        evaluate(model, data_loader_test, device=device)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

Example #26

Show file

def dict_config(model, optimizer, sched_dict, scheduler=None):
    app_cfg_logger.debug('Schedule contents:\n' +
                         json.dumps(sched_dict, indent=2))

    if scheduler is None:
        scheduler = distiller.CompressionScheduler(model)

    pruners = __factory('pruners', model, sched_dict)
    regularizers = __factory('regularizers', model, sched_dict)
    quantizers = __factory('quantizers',
                           model,
                           sched_dict,
                           optimizer=optimizer)
    if len(quantizers) > 1:
        raise ValueError("\nError: Multiple Quantizers not supported")
    extensions = __factory('extensions', model, sched_dict)

    try:
        lr_policies = []
        for policy_def in sched_dict['policies']:
            policy = None
            if 'pruner' in policy_def:
                try:
                    instance_name, args = __policy_params(policy_def, 'pruner')
                except TypeError as e:
                    print(
                        '\n\nFatal Error: a policy is defined with a null pruner'
                    )
                    print(
                        'Here\'s the policy definition for your reference:\n{}'
                        .format(json.dumps(policy_def, indent=1)))
                    raise
                assert instance_name in pruners, "Pruner {} was not defined in the list of pruners".format(
                    instance_name)
                pruner = pruners[instance_name]
                policy = distiller.PruningPolicy(pruner, args)

            elif 'regularizer' in policy_def:
                instance_name, args = __policy_params(policy_def,
                                                      'regularizer')
                assert instance_name in regularizers, "Regularizer {} was not defined in the list of regularizers".format(
                    instance_name)
                regularizer = regularizers[instance_name]
                if args is None:
                    policy = distiller.RegularizationPolicy(regularizer)
                else:
                    policy = distiller.RegularizationPolicy(
                        regularizer, **args)

            elif 'quantizer' in policy_def:
                instance_name, args = __policy_params(policy_def, 'quantizer')
                assert instance_name in quantizers, "Quantizer {} was not defined in the list of quantizers".format(
                    instance_name)
                quantizer = quantizers[instance_name]
                policy = distiller.QuantizationPolicy(quantizer)

            elif 'lr_scheduler' in policy_def:
                # LR schedulers take an optimizer in their CTOR, so postpone handling until we're certain
                # a quantization policy was initialized (if exists)
                lr_policies.append(policy_def)
                continue

            elif 'extension' in policy_def:
                instance_name, args = __policy_params(policy_def, 'extension')
                assert instance_name in extensions, "Extension {} was not defined in the list of extensions".format(
                    instance_name)
                extension = extensions[instance_name]
                policy = extension

            else:
                raise ValueError(
                    "\nFATAL Parsing error while parsing the pruning schedule - unknown policy [%s]"
                    .format(policy_def))

            add_policy_to_scheduler(policy, policy_def, scheduler)

        # Any changes to the optmizer caused by a quantizer have occured by now, so safe to create LR schedulers
        lr_schedulers = __factory('lr_schedulers',
                                  model,
                                  sched_dict,
                                  optimizer=optimizer)
        for policy_def in lr_policies:
            instance_name, args = __policy_params(policy_def, 'lr_scheduler')
            assert instance_name in lr_schedulers, "LR-scheduler {} was not defined in the list of lr-schedulers".format(
                instance_name)
            lr_scheduler = lr_schedulers[instance_name]
            policy = distiller.LRPolicy(lr_scheduler)
            add_policy_to_scheduler(policy, policy_def, scheduler)

    except AssertionError:
        # propagate the assertion information
        raise
    except Exception as exception:
        print("\nFATAL Parsing error!\n%s" % json.dumps(policy_def, indent=1))
        print("Exception: %s %s" % (type(exception), exception))
        raise
    return scheduler

Example #27

Show file

def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress, msglogger.logdir, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        distiller.set_deterministic()  # Use a well-known seed, for repeatability of experiments
    else:
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError('ERROR: Argument --gpus must be a comma-separated list of integers only')
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError('ERROR: GPU device ID {0} requested, but only {1} devices available'
                                     .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained, args.dataset, args.arch,
                         parallel=not args.load_serialized, device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds)

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning('The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.')
        if not args.reset_optimizer:
            msglogger.warning('If you wish to also reset the optimizer, call with: --reset-optimizer')
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model, args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info('\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0')

    # Define loss function (criterion)
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        optimizer = torch.optim.SGD(model.parameters(),
            lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer, pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info('\tStats will be collected for {:.1%} of test dataset'.format(args.qe_calibration))
        msglogger.info('\tSetting constant seeds and converting model to serialized execution')
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size, args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler,
            (start_epoch-1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None)
        apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler,
                                 name="{}_thinned".format(args.resumed_checkpoint_path.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print("Note: your model may have collapsed to random inference, so you may want to fine-tune")
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'.format(
            start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')
    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch,
                metrics=(vloss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader, model, criterion, optimizer, epoch, compression_scheduler,
                  loggers=[tflogger, pylogger], args=args)
            distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(epoch, "train", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch)
            distiller.log_activation_statsitics(epoch, "valid", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss),
                              ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5, epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {'current_top1': top1,
                             'best_top1': perf_scores_history[0].top1,
                             'best_epoch': perf_scores_history[0].epoch}
        apputils.save_checkpoint(epoch, args.arch, model, optimizer=optimizer, scheduler=compression_scheduler,
                                 extras=checkpoint_extras, is_best=is_best, name=args.name, dir=msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)

Example #28

Show file

def main():
    global msglogger
    check_pytorch_version()
    args = parser.parse_args()
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(sys.argv, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_top1 = 0

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        torch.manual_seed(0)
        random.seed(0)
        np.random.seed(0)
        cudnn.deterministic = True
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'

    # Create the model
    png_summary = args.summary is not None and args.summary.startswith('png')
    is_parallel = not png_summary  # For PNG summary, parallel graphs are illegible
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=is_parallel,
                         device_ids=args.gpus)

    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)

        if 'resnet' in args.arch and 'cifar' in args.arch:
            distiller.resnet_cifar_remove_layers(model)
            #model = distiller.resnet_cifar_remove_channels(model, compression_scheduler.zeros_mask_dict)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info("Optimizer (%s): momentum=%s decay=%s", type(optimizer),
                   args.momentum, args.weight_decay)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        which_summary = args.summary
        if which_summary.startswith('png'):
            apputils.draw_img_classifier_to_file(
                model, 'model.png', args.dataset,
                which_summary == 'png_w_params')
        else:
            distiller.model_summary(model, optimizer, which_summary,
                                    args.dataset)
        exit()

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.deterministic)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    activations_sparsity = None
    if args.activation_stats:
        # If your model has ReLU layers, then those layers have sparse activations.
        # ActivationSparsityCollector will collect information about this sparsity.
        # WARNING! Enabling activation sparsity collection will significantly slow down training!
        activations_sparsity = ActivationSparsityCollector(model)

    if args.sensitivity is not None:
        # This sample application can be invoked to execute Sensitivity Analysis on your
        # model.  The ouptut is saved to CSV and PNG.
        msglogger.info("Running sensitivity tests")
        test_fnc = partial(test,
                           test_loader=test_loader,
                           criterion=criterion,
                           loggers=[pylogger],
                           print_freq=args.print_freq)
        which_params = [
            param_name for param_name, _ in model.named_parameters()
        ]
        sensitivity = distiller.perform_sensitivity_analysis(
            model,
            net_params=which_params,
            sparsities=np.arange(0.0, 0.50, 0.05)
            if args.sensitivity == 'filter' else np.arange(0.0, 0.95, 0.05),
            test_func=test_fnc,
            group=args.sensitivity)
        distiller.sensitivities_to_png(sensitivity, 'sensitivity.png')
        distiller.sensitivities_to_csv(sensitivity, 'sensitivity.csv')
        exit()

    if args.evaluate:
        # This sample application can be invoked to evaluate the accuracy of your model on
        # the test dataset.
        # You can optionally quantize the model to 8-bit integer before evaluation.
        # For example:
        # python3 compress_classifier.py --arch resnet20_cifar  ../data.cifar10 -p=50 --resume=checkpoint.pth.tar --evaluate
        if args.quantize:
            model.cpu()
            quantizer = quantization.SymmetricLinearQuantizer(model, 8, 8)
            quantizer.prepare_model()
            model.cuda()
        top1, _, _ = test(test_loader, model, criterion, [pylogger],
                          args.print_freq)
        if args.quantize:
            checkpoint_name = 'quantized'
            apputils.save_checkpoint(0,
                                     args.arch,
                                     model,
                                     optimizer,
                                     best_top1=top1,
                                     name='_'.split(args.name, checkpoint_name)
                                     if args.name else checkpoint_name)
        exit()

    if args.compress:
        # The main use-case for this sample application is CNN compression.  Compression
        # requires a compression schedule configuration file in YAML.
        source = args.compress
        msglogger.info("Compression schedule (source=%s)", source)
        compression_scheduler = distiller.CompressionScheduler(model)
        distiller.config.fileConfig(model, optimizer, compression_scheduler,
                                    args.compress, msglogger)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        train(train_loader,
              model,
              criterion,
              optimizer,
              epoch,
              compression_scheduler,
              loggers=[tflogger, pylogger],
              print_freq=args.print_freq,
              log_params_hist=args.log_params_histograms)
        distiller.log_weights_sparsity(model,
                                       epoch,
                                       loggers=[tflogger, pylogger])
        if args.activation_stats:
            distiller.log_activation_sparsity(epoch,
                                              loggers=[tflogger, pylogger],
                                              collector=activations_sparsity)

        # evaluate on validation set
        top1, top5, vloss = validate(val_loader, model, criterion, [pylogger],
                                     args.print_freq, epoch)
        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch)

        # remember best top1 and save checkpoint
        is_best = top1 > best_top1
        best_top1 = max(top1, best_top1)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler, best_top1, is_best,
                                 args.name)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], args.print_freq)

Example #29

Show file

File: checkpoint.py Project: xinge333/distiller

def load_checkpoint(model,
                    chkpt_file,
                    optimizer=None,
                    model_device=None,
                    *,
                    lean_checkpoint=False,
                    strict=False):
    """Load a pytorch training checkpoint.

    Args:
        model: the pytorch model to which we will load the parameters
        chkpt_file: the checkpoint file
        lean_checkpoint: if set, read into model only 'state_dict' field
        optimizer: [deprecated argument]
        model_device [str]: if set, call model.to($model_device)
                This should be set to either 'cpu' or 'cuda'.
    :returns: updated model, compression_scheduler, optimizer, start_epoch
    """
    if not os.path.isfile(chkpt_file):
        raise IOError(ENOENT, 'Could not find a checkpoint file at',
                      chkpt_file)

    msglogger.info("=> loading checkpoint %s", chkpt_file)
    checkpoint = torch.load(chkpt_file,
                            map_location=lambda storage, loc: storage)
    msglogger.info('=> Checkpoint contents:\n%s\n' %
                   get_contents_table(checkpoint))
    if 'extras' in checkpoint:
        msglogger.info("=> Checkpoint['extras'] contents:\n{}\n".format(
            get_contents_table(checkpoint['extras'])))

    if 'state_dict' not in checkpoint:
        raise ValueError(
            "Checkpoint must contain the model parameters under the key 'state_dict'"
        )

    checkpoint_epoch = checkpoint.get('epoch', None)
    start_epoch = checkpoint_epoch + 1 if checkpoint_epoch is not None else 0

    compression_scheduler = None
    normalize_dataparallel_keys = False
    if 'compression_sched' in checkpoint:
        compression_scheduler = distiller.CompressionScheduler(model)
        try:
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'], normalize_dataparallel_keys)
        except KeyError as e:
            # A very common source of this KeyError is loading a GPU model on the CPU.
            # We rename all of the DataParallel keys because DataParallel does not execute on the CPU.
            normalize_dataparallel_keys = True
            compression_scheduler.load_state_dict(
                checkpoint['compression_sched'], normalize_dataparallel_keys)
        msglogger.info(
            "Loaded compression schedule from checkpoint (epoch {})".format(
                checkpoint_epoch))
    else:
        msglogger.info(
            "Warning: compression schedule data does not exist in the checkpoint"
        )

    if 'thinning_recipes' in checkpoint:
        if 'compression_sched' not in checkpoint:
            raise KeyError(
                "Found thinning_recipes key, but missing mandatory key compression_sched"
            )
        msglogger.info("Loaded a thinning recipe from the checkpoint")
        # Cache the recipes in case we need them later
        model.thinning_recipes = checkpoint['thinning_recipes']
        if normalize_dataparallel_keys:
            model.thinning_recipes = [
                distiller.get_normalized_recipe(recipe)
                for recipe in model.thinning_recipes
            ]
        distiller.execute_thinning_recipes_list(
            model, compression_scheduler.zeros_mask_dict,
            model.thinning_recipes)

    if 'quantizer_metadata' in checkpoint:
        msglogger.info('Loaded quantizer metadata from the checkpoint')
        qmd = checkpoint['quantizer_metadata']
        quantizer = qmd['type'](model, **qmd['params'])
        quantizer.prepare_model(qmd['dummy_input'])

    if normalize_dataparallel_keys:
        checkpoint['state_dict'] = {
            normalize_module_name(k): v
            for k, v in checkpoint['state_dict'].items()
        }
    anomalous_keys = model.load_state_dict(checkpoint['state_dict'], strict)
    if anomalous_keys:
        # This is pytorch 1.1+
        missing_keys, unexpected_keys = anomalous_keys
        if unexpected_keys:
            msglogger.warning(
                "Warning: the loaded checkpoint (%s) contains %d unexpected state keys"
                % (chkpt_file, len(unexpected_keys)))
        if missing_keys:
            raise ValueError(
                "The loaded checkpoint (%s) is missing %d state keys" %
                (chkpt_file, len(missing_keys)))

    if model_device is not None:
        model.to(model_device)

    if lean_checkpoint:
        msglogger.info("=> loaded 'state_dict' from checkpoint '{}'".format(
            str(chkpt_file)))
        return (model, None, None, 0)

    def _load_optimizer(cls, src_state_dict, model):
        """Initiate optimizer with model parameters and load src_state_dict"""
        # initiate the dest_optimizer with a dummy learning rate,
        # this is required to support SGD.__init__()
        dest_optimizer = cls(model.parameters(), lr=1)
        dest_optimizer.load_state_dict(src_state_dict)
        return dest_optimizer

    try:
        optimizer = _load_optimizer(checkpoint['optimizer_type'],
                                    checkpoint['optimizer_state_dict'], model)
    except KeyError:
        # Older checkpoints do support optimizer loading: They either had an 'optimizer' field
        # (different name) which was not used during the load, or they didn't even checkpoint
        # the optimizer.
        optimizer = None

    if optimizer is not None:
        msglogger.info(
            'Optimizer of type {type} was loaded from checkpoint'.format(
                type=type(optimizer)))
        msglogger.info('Optimizer Args: {}'.format(
            dict((k, v)
                 for k, v in optimizer.state_dict()['param_groups'][0].items()
                 if k != 'params')))
    else:
        msglogger.warning('Optimizer could not be loaded from checkpoint.')

    msglogger.info("=> loaded checkpoint '{f}' (epoch {e})".format(
        f=str(chkpt_file), e=checkpoint_epoch))
    return (model, compression_scheduler, optimizer, start_epoch)

Example #30

Show file

File: LAB4-2.py Project: chilung/dllab-4-2-auto-compression

def objective(space):
    global model
    global count
    global best_dict
    
    #Explore new model
    model = create_model(False, args.dataset, args.arch, device_ids=args.gpus)
    if args.resume:
        model, _, _ = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
    
    count += 1
    print('{} trial starting...'.format(count))
    # Objective function: F(Acc, Lat) = (1 - Acc.) + (alpha * Sparsity)
    accuracy = 0
    #alpha = 0.2 # Super-parameter: the importance of inference time
    alpha = 1.0 # Super-parameter: the importance of inference time
    sparsity = 0.0
    # Training hyperparameter
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    """
    distiller/distiller/config.py
        # Element-wise sparsity
        sparsity_levels = {net_param: sparsity_level}
        pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels)
        policy = distiller.PruningPolicy(pruner, pruner_args=None)
        scheduler = distiller.CompressionScheduler(model)
        scheduler.add_policy(policy, epochs=[0, 2, 4])
        # Local search 
        add multiple pruner for each layer
    """
    sparsity_levels = {}
    for key, value in space.items():
        sparsity_levels[key] = value
    pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels)
    policy = distiller.PruningPolicy(pruner, pruner_args=None)
    lrpolicy = distiller.LRPolicy(torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1))
    compression_scheduler = distiller.CompressionScheduler(model)
    #compression_scheduler.add_policy(policy, epochs=[90])
    compression_scheduler.add_policy(policy, epochs=[0])
    compression_scheduler.add_policy(lrpolicy, starting_epoch=0, ending_epoch=90, frequency=1)
    """
    distiller/example/classifier_compression/compress_classifier.py
    For each epoch:
        compression_scheduler.on_epoch_begin(epoch)
        train()
        save_checkpoint()
        compression_scheduler.on_epoch_end(epoch)

    train():
        For each training step:
            compression_scheduler.on_minibatch_begin(epoch)
            output = model(input)
            loss = criterion(output, target)
            compression_scheduler.before_backward_pass(epoch)
            loss.backward()
            optimizer.step()
            compression_scheduler.on_minibatch_end(epoch)
    """
    for i in range(args.epochs):
        compression_scheduler.on_epoch_begin(i)
        train_accuracy = train(i,criterion, optimizer, compression_scheduler)
        val_accuracy = validate() # Validate hyperparameter setting
        t, sparsity = distiller.weights_sparsity_tbl_summary(model, return_total_sparsity=True)
        compression_scheduler.on_epoch_end(i, optimizer)
        apputils.save_checkpoint(i, args.arch, model, optimizer, compression_scheduler, train_accuracy, False,
                                         'hyperopt', './')
        print('{} epochs => train acc:{:.2f}%,  val acc:{:.2f}%'.format(i, train_accuracy, val_accuracy))
        
    test_accuracy = validate(test_loader) # Validate hyperparameter setting
    #score = (1-(val_accuracy/100.)) + (alpha * (1-sparsity/100.)) # objective funtion here
    
    # objective funtion here
    # accuracy: 98~90%, sparsity: 80%~50%
    score = -((val_accuracy/100.)**2-0.9**2 + alpha * ((sparsity/100.)**2-0.5**2)) 
    print('{} trials: score: {:.2f}\ttrain acc:{:.2f}%\tval acc:{:.2f}%\ttest acc:{:.2f}%\tsparsity:{:.2f}%'.format(count, 
                                      score, 
                                      train_accuracy, 
                                      val_accuracy, 
                                      test_accuracy,
                                      sparsity))
    if score < best_dict['score']:
        best_dict['trial'] = count
        best_dict['score'] = score
        best_dict['tr_acc'] = train_accuracy        
        best_dict['v_acc'] = val_accuracy
        best_dict['te_acc'] = test_accuracy
        best_dict['sparsity'] = sparsity
        best_dict['model_best'] = copy.deepcopy(model)

    return score