Exemple #1
0
    def test_speedup_integration(self):
        for model_name in [
                'resnet18', 'squeezenet1_1', 'mobilenet_v2', 'densenet121',
                'inception_v3'
        ]:
            Model = getattr(models, model_name)
            net = Model(pretrained=True, progress=False).to(device)
            speedup_model = Model().to(device)
            net.eval()  # this line is necessary
            speedup_model.eval()
            # random generate the prune config for the pruner
            cfgs = generate_random_sparsity(net)
            pruner = L1FilterPruner(net, cfgs)
            pruner.compress()
            pruner.export_model(MODEL_FILE, MASK_FILE)
            pruner._unwrap_model()
            state_dict = torch.load(MODEL_FILE)
            speedup_model.load_state_dict(state_dict)
            zero_bn_bias(net)
            zero_bn_bias(speedup_model)

            data = torch.ones(BATCH_SIZE, 3, 224, 224).to(device)
            ms = ModelSpeedup(speedup_model, data, MASK_FILE)
            ms.speedup_model()
            ori_out = net(data)
            speeded_out = speedup_model(data)
            ori_sum = torch.sum(ori_out).item()
            speeded_sum = torch.sum(speeded_out).item()
            print('Sum of the output of %s (before speedup):' % model_name,
                  ori_sum)
            print('Sum of the output of %s (after speedup):' % model_name,
                  speeded_sum)
            assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
                   (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
def build_model(model_class, channelcfg):
    model = model_class(pretrained=True).cuda()
    # dummy_input = torch.ones(16, 3, 224, 224).cuda()
    cfglist = []
    pos = 0

    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d):
            sparsity = 1-channelcfg[pos]/module.out_channels
            if sparsity > 0:
                cfglist.append({'sparsity':sparsity + 1e-5, 'op_names':[name], 'op_types': ['Conv2d']}) 
            pos += 1
    pruner = L1FilterPruner(model, cfglist, dummy_input=dummy_input, dependency_aware=True)
    pruner.compress()
    pruner.export_model('./model.pth', './mask')
    pruner._unwrap_model()
    del pruner
    ms = ModelSpeedup(model, dummy_input, './mask')
    ms.speedup_model()
    del ms
    pos = 0
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d):
            print(module.out_channels,  channelcfg[pos])
            assert module.out_channels == channelcfg[pos]
            pos += 1
    # del dummy_input
    torch.cuda.empty_cache()
    
    return model
Exemple #3
0
 def test_mask_conflict(self):
     outdir = os.path.join(prefix, 'masks')
     os.makedirs(outdir, exist_ok=True)
     for name in model_names:
         print('Test mask conflict for %s' % name)
         model = getattr(models, name)
         net = model().to(device)
         dummy_input = torch.ones(1, 3, 224, 224).to(device)
         # random generate the prune sparsity for each layer
         cfglist = []
         for layername, layer in net.named_modules():
             if isinstance(layer, nn.Conv2d):
                 # pruner cannot allow the sparsity to be 0 or 1
                 sparsity = np.random.uniform(0.01, 0.99)
                 cfg = {
                     'op_types': ['Conv2d'],
                     'op_names': [layername],
                     'sparsity': sparsity
                 }
                 cfglist.append(cfg)
         pruner = L1FilterPruner(net, cfglist)
         pruner.compress()
         ck_file = os.path.join(outdir, '%s.pth' % name)
         mask_file = os.path.join(outdir, '%s_mask' % name)
         pruner.export_model(ck_file, mask_file)
         pruner._unwrap_model()
         # Fix the mask conflict
         mf = MaskConflict(mask_file, net, dummy_input)
         fixed_mask = mf.fix_mask_conflict()
         mf.export(os.path.join(outdir, '%s_fixed_mask' % name))
         # use the channel dependency groud truth to check if
         # fix the mask conflict successfully
         for dset in channel_dependency_ground_truth[name]:
             lset = list(dset)
             for i, _ in enumerate(lset):
                 assert fixed_mask[lset[0]]['weight'].size(0) == fixed_mask[
                     lset[i]]['weight'].size(0)
                 w_index1 = self.get_pruned_index(
                     fixed_mask[lset[0]]['weight'])
                 w_index2 = self.get_pruned_index(
                     fixed_mask[lset[i]]['weight'])
                 assert w_index1 == w_index2
                 if hasattr(fixed_mask[lset[0]], 'bias'):
                     b_index1 = self.get_pruned_index(
                         fixed_mask[lset[0]]['bias'])
                     b_index2 = self.get_pruned_index(
                         fixed_mask[lset[i]]['bias'])
                     assert b_index1 == b_index2
                'resnet.layer1.2.conv1', 'resnet.layer1.2.conv2', 'resnet.layer2.0.conv1', 'resnet.layer2.0.conv2', \
                'resnet.layer2.0.downsample.0', 'resnet.layer2.1.conv1', 'resnet.layer2.1.conv2', 'resnet.layer2.2.conv1', \
                'resnet.layer2.2.conv2', 'resnet.layer2.3.conv1', 'resnet.layer2.3.conv2', 'resnet.layer3.0.conv1', \
                'resnet.layer3.0.conv2', 'resnet.layer3.0.downsample.0', 'resnet.layer3.1.conv1', 'resnet.layer3.1.conv2', \
                'resnet.layer3.2.conv1', 'resnet.layer3.2.conv2', 'resnet.layer3.3.conv1', 'resnet.layer3.3.conv2', \
                'resnet.layer3.4.conv1', 'resnet.layer3.4.conv2', 'resnet.layer3.5.conv1', 'resnet.layer3.5.conv2', \
                'resnet.layer4.0.conv1', 'resnet.layer4.0.conv2', 'resnet.layer4.0.downsample.0', 'resnet.layer4.1.conv1', \
                'resnet.layer4.1.conv2', 'resnet.layer4.2.conv1', 'resnet.layer4.2.conv2',\
                'center.0.0', 'center.1.0',  'decoder4.squeeze.0', 'decoder3.squeeze.0', 'decoder2.squeeze.0','decoder1.squeeze.0', \
            ]
        }]
                     
    # Prune model and test accuracy without fine tuning.
    # print('=' * 10 + 'Test on the pruned model before fine tune' + '=' * 10)
    optimizer_finetune = optimizer
    pruner = L1FilterPruner(model, configure_list, optimizer_finetune)
    model = pruner.compress()

    # Code for fots training
    train_folder_syn = args.train_folder_syn
    train_folder_sample = args.train_folder_sample
    output_path = args.save_dir
    data_set = datasets.MergeText(train_folder_syn, train_folder_sample, datasets.transform, train=True)
    dl = torch.utils.data.DataLoader(data_set, batch_size=args.batch_size, shuffle=True,
                                         sampler=None, batch_sampler=None, num_workers=args.num_workers)
    dl_val = None
    if args.val:
        data_set_val = datasets.MergeText(train_folder_syn, train_folder_sample, datasets.transform, train=False)
        dl_val = torch.utils.data.DataLoader(data_set_val, batch_size=1, shuffle=True,
                                                 sampler=None, batch_sampler=None, num_workers=args.num_workers)        
    max_batches_per_iter_cnt = 2
Exemple #5
0
def prune_model_l1(model):
    config_list = [{'sparsity': SPARSITY, 'op_types': ['Conv2d']}]
    pruner = L1FilterPruner(model, config_list)
    pruner.compress()
    pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE)
def main():
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        './data.cifar10',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.Pad(4),
            transforms.RandomCrop(32),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010))
        ])),
                                               batch_size=64,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        './data.cifar10',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010))
        ])),
                                              batch_size=200,
                                              shuffle=False)

    model = VGG(depth=16)
    model.to(device)

    # Train the base VGG-16 model
    print('=' * 10 + 'Train the unpruned base model' + '=' * 10)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.1,
                                momentum=0.9,
                                weight_decay=1e-4)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, 160, 0)
    for epoch in range(160):
        print('# Epoch {} #'.format(epoch))
        train(model, device, train_loader, optimizer)
        test(model, device, test_loader)
        lr_scheduler.step(epoch)
    torch.save(model.state_dict(), 'vgg16_cifar10.pth')

    # Test base model accuracy
    print('=' * 10 + 'Test on the original model' + '=' * 10)
    model.load_state_dict(torch.load('vgg16_cifar10.pth'))
    test(model, device, test_loader)
    # top1 = 93.51%

    # Pruning Configuration, all convolution layers are pruned out 80% filters according to the L1 norm
    configure_list = [{
        'sparsity': 0.8,
        'op_types': ['Conv2d'],
    }]

    # Prune model and test accuracy without fine tuning.
    print('=' * 10 + 'Test on the pruned model before fine tune' + '=' * 10)
    pruner = L1FilterPruner(model, configure_list)
    model = pruner.compress()
    test(model, device, test_loader)
    # top1 = 10.00%

    # Fine tune the pruned model for 40 epochs and test accuracy
    print('=' * 10 + 'Fine tuning' + '=' * 10)
    optimizer_finetune = torch.optim.SGD(model.parameters(),
                                         lr=0.001,
                                         momentum=0.9,
                                         weight_decay=1e-4)
    best_top1 = 0
    kd_teacher_model = VGG(depth=16)
    kd_teacher_model.to(device)
    kd_teacher_model.load_state_dict(torch.load('vgg16_cifar10.pth'))
    kd = KnowledgeDistill(kd_teacher_model, kd_T=5)
    for epoch in range(40):
        pruner.update_epoch(epoch)
        print('# Epoch {} #'.format(epoch))
        train(model, device, train_loader, optimizer_finetune, kd)
        top1 = test(model, device, test_loader)
        if top1 > best_top1:
            best_top1 = top1
            # Export the best model, 'model_path' stores state_dict of the pruned model,
            # mask_path stores mask_dict of the pruned model
            pruner.export_model(model_path='pruned_vgg16_cifar10.pth',
                                mask_path='mask_vgg16_cifar10.pth')

    # Test the exported model
    print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10)
    new_model = VGG(depth=16)
    new_model.to(device)
    new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth'))
    test(new_model, device, test_loader)
Exemple #7
0
def main():
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        './data.cifar10',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.Pad(4),
            transforms.RandomCrop(32),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010))
        ])),
                                               batch_size=64,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        './data.cifar10',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010))
        ])),
                                              batch_size=200,
                                              shuffle=False)

    model = VGG(depth=16)
    model.to(device)

    # Train the base VGG-16 model
    print('=' * 10 + 'Train the unpruned base model' + '=' * 10)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.1,
                                momentum=0.9,
                                weight_decay=1e-4)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, 160, 0)
    for epoch in range(160):
        train(model, device, train_loader, optimizer)
        test(model, device, test_loader)
        lr_scheduler.step(epoch)
    torch.save(model.state_dict(), 'vgg16_cifar10.pth')

    # Test base model accuracy
    print('=' * 10 + 'Test on the original model' + '=' * 10)
    model.load_state_dict(torch.load('vgg16_cifar10.pth'))
    test(model, device, test_loader)
    # top1 = 93.51%

    # Pruning Configuration, in paper 'PRUNING FILTERS FOR EFFICIENT CONVNETS',
    # Conv_1, Conv_8, Conv_9, Conv_10, Conv_11, Conv_12 are pruned with 50% sparsity, as 'VGG-16-pruned-A'
    configure_list = [{
        'sparsity':
        0.5,
        'op_types': ['default'],
        'op_names': [
            'feature.0', 'feature.24', 'feature.27', 'feature.30',
            'feature.34', 'feature.37'
        ]
    }]

    # Prune model and test accuracy without fine tuning.
    print('=' * 10 + 'Test on the pruned model before fine tune' + '=' * 10)
    pruner = L1FilterPruner(model, configure_list)
    model = pruner.compress()
    test(model, device, test_loader)
    # top1 = 88.19%

    # Fine tune the pruned model for 40 epochs and test accuracy
    print('=' * 10 + 'Fine tuning' + '=' * 10)
    optimizer_finetune = torch.optim.SGD(model.parameters(),
                                         lr=0.001,
                                         momentum=0.9,
                                         weight_decay=1e-4)
    best_top1 = 0
    for epoch in range(40):
        pruner.update_epoch(epoch)
        print('# Epoch {} #'.format(epoch))
        train(model, device, train_loader, optimizer_finetune)
        top1 = test(model, device, test_loader)
        if top1 > best_top1:
            best_top1 = top1
            # Export the best model, 'model_path' stores state_dict of the pruned model,
            # mask_path stores mask_dict of the pruned model
            pruner.export_model(model_path='pruned_vgg16_cifar10.pth',
                                mask_path='mask_vgg16_cifar10.pth')

    # Test the exported model
    print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10)
    new_model = VGG(depth=16)
    new_model.to(device)
    new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth'))
    test(new_model, device, test_loader)
def main(args):
    # prepare dataset
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_loader, val_loader, criterion = get_data(args)
    model, optimizer = get_trained_model_optimizer(args, device, train_loader,
                                                   val_loader, criterion)

    def short_term_fine_tuner(model, epochs=1):
        for epoch in range(epochs):
            train(args, model, device, train_loader, criterion, optimizer,
                  epoch)

    def trainer(model, optimizer, criterion, epoch, callback):
        return train(args,
                     model,
                     device,
                     train_loader,
                     criterion,
                     optimizer,
                     epoch=epoch,
                     callback=callback)

    def evaluator(model):
        return test(model, device, criterion, val_loader)

    # used to save the performance of the original & pruned & finetuned models
    result = {'flops': {}, 'params': {}, 'performance': {}}

    flops, params = count_flops_params(model, get_input_size(args.dataset))
    result['flops']['original'] = flops
    result['params']['original'] = params

    evaluation_result = evaluator(model)
    print('Evaluation result (original model): %s' % evaluation_result)
    result['performance']['original'] = evaluation_result

    # module types to prune, only "Conv2d" supported for channel pruning
    if args.base_algo in ['l1', 'l2']:
        op_types = ['Conv2d']
    elif args.base_algo == 'level':
        op_types = ['default']

    config_list = [{'sparsity': args.sparsity, 'op_types': op_types}]
    dummy_input = get_dummy_input(args, device)

    if args.pruner == 'L1FilterPruner':
        pruner = L1FilterPruner(model, config_list)
    elif args.pruner == 'L2FilterPruner':
        pruner = L2FilterPruner(model, config_list)
    elif args.pruner == 'ActivationMeanRankFilterPruner':
        pruner = ActivationMeanRankFilterPruner(model, config_list)
    elif args.pruner == 'ActivationAPoZRankFilterPruner':
        pruner = ActivationAPoZRankFilterPruner(model, config_list)
    elif args.pruner == 'NetAdaptPruner':
        pruner = NetAdaptPruner(model,
                                config_list,
                                short_term_fine_tuner=short_term_fine_tuner,
                                evaluator=evaluator,
                                base_algo=args.base_algo,
                                experiment_data_dir=args.experiment_data_dir)
    elif args.pruner == 'ADMMPruner':
        # users are free to change the config here
        if args.model == 'LeNet':
            if args.base_algo in ['l1', 'l2']:
                config_list = [{
                    'sparsity': 0.8,
                    'op_types': ['Conv2d'],
                    'op_names': ['conv1']
                }, {
                    'sparsity': 0.92,
                    'op_types': ['Conv2d'],
                    'op_names': ['conv2']
                }]
            elif args.base_algo == 'level':
                config_list = [{
                    'sparsity': 0.8,
                    'op_names': ['conv1']
                }, {
                    'sparsity': 0.92,
                    'op_names': ['conv2']
                }, {
                    'sparsity': 0.991,
                    'op_names': ['fc1']
                }, {
                    'sparsity': 0.93,
                    'op_names': ['fc2']
                }]
        else:
            raise ValueError('Example only implemented for LeNet.')
        pruner = ADMMPruner(model,
                            config_list,
                            trainer=trainer,
                            num_iterations=2,
                            training_epochs=2)
    elif args.pruner == 'SimulatedAnnealingPruner':
        pruner = SimulatedAnnealingPruner(
            model,
            config_list,
            evaluator=evaluator,
            base_algo=args.base_algo,
            cool_down_rate=args.cool_down_rate,
            experiment_data_dir=args.experiment_data_dir)
    elif args.pruner == 'AutoCompressPruner':
        pruner = AutoCompressPruner(
            model,
            config_list,
            trainer=trainer,
            evaluator=evaluator,
            dummy_input=dummy_input,
            num_iterations=3,
            optimize_mode='maximize',
            base_algo=args.base_algo,
            cool_down_rate=args.cool_down_rate,
            admm_num_iterations=30,
            admm_training_epochs=5,
            experiment_data_dir=args.experiment_data_dir)
    else:
        raise ValueError("Pruner not supported.")

    # Pruner.compress() returns the masked model
    # but for AutoCompressPruner, Pruner.compress() returns directly the pruned model
    model = pruner.compress()
    evaluation_result = evaluator(model)
    print('Evaluation result (masked model): %s' % evaluation_result)
    result['performance']['pruned'] = evaluation_result

    if args.save_model:
        pruner.export_model(
            os.path.join(args.experiment_data_dir, 'model_masked.pth'),
            os.path.join(args.experiment_data_dir, 'mask.pth'))
        print('Masked model saved to %s', args.experiment_data_dir)

    # model speed up
    if args.speed_up:
        if args.pruner != 'AutoCompressPruner':
            if args.model == 'LeNet':
                model = LeNet().to(device)
            elif args.model == 'vgg16':
                model = VGG(depth=16).to(device)
            elif args.model == 'resnet18':
                model = ResNet18().to(device)
            elif args.model == 'resnet50':
                model = ResNet50().to(device)
            elif args.model == 'mobilenet_v2':
                model = models.mobilenet_v2(pretrained=False).to(device)

            model.load_state_dict(
                torch.load(
                    os.path.join(args.experiment_data_dir,
                                 'model_masked.pth')))
            masks_file = os.path.join(args.experiment_data_dir, 'mask.pth')

            m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
            m_speedup.speedup_model()
            evaluation_result = evaluator(model)
            print('Evaluation result (speed up model): %s' % evaluation_result)
            result['performance']['speedup'] = evaluation_result

            torch.save(
                model.state_dict(),
                os.path.join(args.experiment_data_dir, 'model_speed_up.pth'))
            print('Speed up model saved to %s', args.experiment_data_dir)
        flops, params = count_flops_params(model, get_input_size(args.dataset))
        result['flops']['speedup'] = flops
        result['params']['speedup'] = params

    if args.fine_tune:
        if args.dataset == 'mnist':
            optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
            scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
        elif args.dataset == 'cifar10' and args.model == 'vgg16':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.01,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.fine_tune_epochs * 0.5),
                                        int(args.fine_tune_epochs * 0.75)
                                    ],
                                    gamma=0.1)
        elif args.dataset == 'cifar10' and args.model == 'resnet18':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.1,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.fine_tune_epochs * 0.5),
                                        int(args.fine_tune_epochs * 0.75)
                                    ],
                                    gamma=0.1)
        elif args.dataset == 'cifar10' and args.model == 'resnet50':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.1,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.fine_tune_epochs * 0.5),
                                        int(args.fine_tune_epochs * 0.75)
                                    ],
                                    gamma=0.1)
        best_acc = 0
        for epoch in range(args.fine_tune_epochs):
            train(args, model, device, train_loader, criterion, optimizer,
                  epoch)
            scheduler.step()
            acc = evaluator(model)
            if acc > best_acc:
                best_acc = acc
                torch.save(
                    model.state_dict(),
                    os.path.join(args.experiment_data_dir,
                                 'model_fine_tuned.pth'))

    print('Evaluation result (fine tuned): %s' % best_acc)
    print('Fined tuned model saved to %s', args.experiment_data_dir)
    result['performance']['finetuned'] = best_acc

    with open(os.path.join(args.experiment_data_dir, 'result.json'),
              'w+') as f:
        json.dump(result, f)
Exemple #9
0
    result = {}

    evaluation_result = evaluator(model)
    print('Evaluation result (original model): %s' % evaluation_result)
    result['original'] = evaluation_result

    if args.pruning_mode == 'channel':
        op_types = ['Conv2d']
    elif args.pruning_mode == 'fine_grained':
        op_types = ['default']

    config_list = [{'sparsity': args.sparsity, 'op_types': op_types}]

    if args.pruner == 'L1FilterPruner':
        pruner = L1FilterPruner(model, config_list)
    elif args.pruner == 'SimulatedAnnealingPruner':
        pruner = SimulatedAnnealingPruner(
            model,
            config_list,
            evaluator=evaluator,
            cool_down_rate=args.cool_down_rate,
            experiment_data_dir=args.experiment_data_dir)
    elif args.pruner == 'NetAdaptPruner':
        pruner = NetAdaptPruner(model,
                                config_list,
                                fine_tuner=fine_tuner,
                                evaluator=evaluator,
                                pruning_mode='channel',
                                pruning_step=args.pruning_step,
                                experiment_data_dir=args.experiment_data_dir)
Exemple #10
0
def prune_model_l1(model):
    config_list = [{'sparsity': SPARSITY, 'op_types': ['Conv2d']}]
    pruner = L1FilterPruner(model, config_list)
    pruner.compress()
    pruner.export_model(model_path='./11_model.pth', mask_path='./l1_mask.pth')