Exemple #1
0
def get_model_optimizer_scheduler(args, device, train_loader, test_loader, criterion):
    if args.model == 'lenet':
        model = LeNet().to(device)
        if args.pretrained_model_dir is None:
            optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
            scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
    elif args.model == 'vgg16':
        model = VGG(depth=16).to(device)
        if args.pretrained_model_dir is None:
            optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
    elif args.model == 'vgg19':
        model = VGG(depth=19).to(device)
        if args.pretrained_model_dir is None:
            optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)
    else:
        raise ValueError("model not recognized")

    if args.pretrained_model_dir is None:
        print('start pre-training...')
        best_acc = 0
        for epoch in range(args.pretrain_epochs):
            train(args, model, device, train_loader, criterion, optimizer, epoch)
            scheduler.step()
            acc = test(args, model, device, criterion, test_loader)
            if acc > best_acc:
                best_acc = acc
                state_dict = model.state_dict()

        model.load_state_dict(state_dict)
        acc = best_acc

        torch.save(state_dict, os.path.join(args.experiment_data_dir, f'pretrain_{args.dataset}_{args.model}.pth'))
        print('Model trained saved to %s' % args.experiment_data_dir)

    else:
        model.load_state_dict(torch.load(args.pretrained_model_dir))
        best_acc = test(args, model, device, criterion, test_loader)

    # setup new opotimizer for pruning
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    scheduler = MultiStepLR(optimizer, milestones=[int(args.pretrain_epochs * 0.5), int(args.pretrain_epochs * 0.75)], gamma=0.1)

    print('Pretrained model acc:', best_acc)
    return model, optimizer, scheduler
Exemple #2
0
def get_trained_model_optimizer(args, device, train_loader, val_loader, criterion):
    if args.model == 'LeNet':
        model = LeNet().to(device)
        if args.load_pretrained_model:
            model.load_state_dict(torch.load(args.pretrained_model_dir))
            optimizer = torch.optim.Adadelta(model.parameters(), lr=1e-4)
        else:
            optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
            scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
    elif args.model == 'vgg16':
        model = VGG(depth=16).to(device)
        if args.load_pretrained_model:
            model.load_state_dict(torch.load(args.pretrained_model_dir))
            optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
    elif args.model == 'resnet18':
        model = ResNet18().to(device)
        if args.load_pretrained_model:
            model.load_state_dict(torch.load(args.pretrained_model_dir))
            optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
    elif args.model == 'resnet50':
        model = ResNet50().to(device)
        if args.load_pretrained_model:
            model.load_state_dict(torch.load(args.pretrained_model_dir))
            optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.pretrain_epochs*0.5), int(args.pretrain_epochs*0.75)], gamma=0.1)
    else:
        raise ValueError("model not recognized")

    if not args.load_pretrained_model:
        best_acc = 0
        best_epoch = 0
        for epoch in range(args.pretrain_epochs):
            train(args, model, device, train_loader, criterion, optimizer, epoch)
            scheduler.step()
            acc = test(model, device, criterion, val_loader)
            if acc > best_acc:
                best_acc = acc
                best_epoch = epoch
                state_dict = model.state_dict()
        model.load_state_dict(state_dict)
        print('Best acc:', best_acc)
        print('Best epoch:', best_epoch)

        if args.save_model:
            torch.save(state_dict, os.path.join(args.experiment_data_dir, 'model_trained.pth'))
            print('Model trained saved to %s' % args.experiment_data_dir)

    return model, optimizer
Exemple #3
0
def get_model_optimizer_scheduler(args, device, test_loader, criterion):
    if args.model == 'LeNet':
        model = LeNet().to(device)
    elif args.model == 'vgg16':
        model = VGG(depth=16).to(device)
    elif args.model == 'vgg19':
        model = VGG(depth=19).to(device)
    else:
        raise ValueError("model not recognized")

    # In this example, we set the architecture of teacher and student to be the same. It is feasible to set a different teacher architecture.
    if args.teacher_model_dir is None:
        raise NotImplementedError('please load pretrained teacher model first')
    else:
        model.load_state_dict(torch.load(args.teacher_model_dir))
        best_acc = test(args, model, device, criterion, test_loader)

    model_t = deepcopy(model)
    model_s = deepcopy(model)

    if args.student_model_dir is not None:
        # load the pruned student model checkpoint
        model_s.load_state_dict(torch.load(args.student_model_dir))

    dummy_input = get_dummy_input(args, device)
    m_speedup = ModelSpeedup(model_s, dummy_input, args.mask_path, device)
    m_speedup.speedup_model()

    module_list = nn.ModuleList([])
    module_list.append(model_s)
    module_list.append(model_t)

    # setup opotimizer for fine-tuning studeng model
    optimizer = torch.optim.SGD(model_s.parameters(),
                                lr=0.01,
                                momentum=0.9,
                                weight_decay=5e-4)
    scheduler = MultiStepLR(optimizer,
                            milestones=[
                                int(args.fine_tune_epochs * 0.5),
                                int(args.fine_tune_epochs * 0.75)
                            ],
                            gamma=0.1)

    print('Pretrained teacher model acc:', best_acc)
    return module_list, optimizer, scheduler
Exemple #4
0
def model_inference(config):
    masks_file = config['masks_file']
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # device = torch.device(config['device'])
    if config['model_name'] == 'vgg16':
        model = VGG(depth=16)
    elif config['model_name'] == 'vgg19':
        model = VGG(depth=19)
    elif config['model_name'] == 'lenet':
        model = LeNet()

    model.to(device)
    model.eval()

    dummy_input = torch.randn(config['input_shape']).to(device)
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file, device)
        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07):
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
def evaluator(model):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in tqdm(iterable=test_loader, desc='Test'):
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    acc = 100 * correct / len(test_loader.dataset)
    print('Accuracy: {}%\n'.format(acc))
    return acc


if __name__ == '__main__':
    model = VGG().to(device)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.1,
                                momentum=0.9,
                                weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    print('\nPre-train the model:')
    for i in range(5):
        trainer(model, optimizer, criterion, i)
        evaluator(model)

    config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.8}]
    pruner = L1NormPruner(model, config_list)
    _, masks = pruner.compress()
Exemple #6
0

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='PyTorch Iterative Example for model comporession')
    parser.add_argument('--pretrain-epochs', type=int, default=10,
                        help='number of epochs to pretrain the model')
    parser.add_argument('--pruning-algo', type=str, default='l1',
                        choices=['level', 'l1', 'l2', 'fpgm', 'slim', 'apoz',
                                 'mean_activation', 'taylorfo', 'admm'],
                        help='algorithm to evaluate weights to prune')
    parser.add_argument('--cool-down-rate', type=float, default=0.9,
                        help='Cool down rate of the temperature.')

    args = parser.parse_args()

    model = VGG().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # pre-train the model
    for i in range(args.pretrain_epochs):
        trainer(model, optimizer, criterion, i)
        evaluator(model)

    config_list = [{'op_types': ['Conv2d'], 'total_sparsity': 0.8}]

    # evaluator in 'SimulatedAnnealingPruner' could not be None.
    pruner = SimulatedAnnealingPruner(model, config_list, pruning_algorithm=args.pruning_algo,
                                      evaluator=evaluator, cool_down_rate=args.cool_down_rate, finetuner=finetuner)
    pruner.compress()
    _, model, masks, _, _ = pruner.get_best_result()
Exemple #7
0
                        type=str,
                        default='apoz',
                        choices=['apoz', 'mean'],
                        help='pruner to use')
    parser.add_argument('--pretrain-epochs',
                        type=int,
                        default=20,
                        help='number of epochs to pretrain the model')
    parser.add_argument('--fine-tune-epochs',
                        type=int,
                        default=20,
                        help='number of epochs to fine tune the model')
    args = parser.parse_args()

    print('\n' + '=' * 50 + ' START TO TRAIN THE MODEL ' + '=' * 50)
    model = VGG().to(device)
    optimizer, scheduler = optimizer_scheduler_generator(
        model, total_epoch=args.pretrain_epochs)
    criterion = torch.nn.CrossEntropyLoss()
    pre_best_acc = 0.0
    best_state_dict = None

    for i in range(args.pretrain_epochs):
        trainer(model, optimizer, criterion)
        scheduler.step()
        acc = evaluator(model)
        if acc > pre_best_acc:
            pre_best_acc = acc
            best_state_dict = model.state_dict()
    print("Best accuracy: {}".format(pre_best_acc))
    model.load_state_dict(best_state_dict)
Exemple #8
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='PyTorch Example for model comporession')
    parser.add_argument('--pretrain-epochs',
                        type=int,
                        default=20,
                        help='number of epochs to pretrain the model')
    parser.add_argument('--fine-tune-epochs',
                        type=int,
                        default=20,
                        help='number of epochs to fine tune the model')
    args = parser.parse_args()

    print('\n' + '=' * 50 + ' START TO TRAIN THE MODEL ' + '=' * 50)
    model = VGG().to(device)
    optimizer, scheduler = optimizer_scheduler_generator(
        model, total_epoch=args.pretrain_epochs)
    criterion = torch.nn.CrossEntropyLoss()
    pre_best_acc = 0.0
    best_state_dict = None

    for i in range(args.pretrain_epochs):
        trainer(model, optimizer, criterion)
        scheduler.step()
        acc = evaluator(model)
        if acc > pre_best_acc:
            pre_best_acc = acc
            best_state_dict = model.state_dict()
    print("Best accuracy: {}".format(pre_best_acc))
    model.load_state_dict(best_state_dict)
Exemple #9
0
def optimizer_scheduler_generator(model, _lr=0.1, _momentum=0.9, _weight_decay=5e-4, total_epoch=160):
    optimizer = torch.optim.SGD(model.parameters(), lr=_lr, momentum=_momentum, weight_decay=_weight_decay)
    scheduler = MultiStepLR(optimizer, milestones=[int(total_epoch * 0.5), int(total_epoch * 0.75)], gamma=0.1)
    return optimizer, scheduler

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
    parser.add_argument('--pretrain-epochs', type=int, default=20,
                        help='number of epochs to pretrain the model')
    parser.add_argument('--fine-tune-epochs', type=int, default=20,
                        help='number of epochs to fine tune the model')
    args = parser.parse_args()

    print('\n' + '=' * 50 + ' START TO TRAIN THE MODEL ' + '=' * 50)
    model = VGG().to(device)
    optimizer, scheduler = optimizer_scheduler_generator(model, total_epoch=args.pretrain_epochs)
    criterion = torch.nn.CrossEntropyLoss()
    pre_best_acc = 0.0
    best_state_dict = None

    for i in range(args.pretrain_epochs):
        trainer(model, optimizer, criterion)
        scheduler.step()
        acc = evaluator(model)
        if acc > pre_best_acc:
            pre_best_acc = acc
            best_state_dict = model.state_dict()
    print("Best accuracy: {}".format(pre_best_acc))
    model.load_state_dict(best_state_dict)
    pre_flops, pre_params, _ = count_flops_params(model, torch.randn([128, 3, 32, 32]).to(device))
Exemple #10
0
def evaluator(model):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in tqdm(iterable=test_loader, desc='Test'):
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    acc = 100 * correct / len(test_loader.dataset)
    print('Accuracy: {}%\n'.format(acc))
    return acc


if __name__ == '__main__':
    model = VGG().to(device)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.1,
                                momentum=0.9,
                                weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # pre-train the model
    for _ in range(10):
        trainer(model, optimizer, criterion)

    config_list = [{'op_types': ['Conv2d'], 'total_sparsity': 0.8}]
    dummy_input = torch.rand(10, 3, 32, 32).to(device)

    # make sure you have used nni.trace to wrap the optimizer class before initialize
    traced_optimizer = nni.trace(torch.optim.SGD)(model.parameters(),
Exemple #11
0
def main(args):
    # prepare dataset
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_loader, val_loader, criterion = get_data(args.dataset, args.data_dir, args.batch_size, args.test_batch_size)
    model, optimizer = get_trained_model_optimizer(args, device, train_loader, val_loader, criterion)

    def short_term_fine_tuner(model, epochs=1):
        for epoch in range(epochs):
            train(args, model, device, train_loader, criterion, optimizer, epoch)

    def trainer(model, optimizer, criterion, epoch):
        return train(args, model, device, train_loader, criterion, optimizer, epoch=epoch)

    def evaluator(model):
        return test(model, device, criterion, val_loader)

    # used to save the performance of the original & pruned & finetuned models
    result = {'flops': {}, 'params': {}, 'performance':{}}

    flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
    result['flops']['original'] = flops
    result['params']['original'] = params

    evaluation_result = evaluator(model)
    print('Evaluation result (original model): %s' % evaluation_result)
    result['performance']['original'] = evaluation_result

    # module types to prune, only "Conv2d" supported for channel pruning
    if args.base_algo in ['l1', 'l2', 'fpgm']:
        op_types = ['Conv2d']
    elif args.base_algo == 'level':
        op_types = ['default']

    config_list = [{
        'sparsity': args.sparsity,
        'op_types': op_types
    }]
    dummy_input = get_dummy_input(args, device)
    if args.pruner == 'L1FilterPruner':
        pruner = L1FilterPruner(model, config_list)
    elif args.pruner == 'L2FilterPruner':
        pruner = L2FilterPruner(model, config_list)
    elif args.pruner == 'FPGMPruner':
        pruner = FPGMPruner(model, config_list)
    elif args.pruner == 'NetAdaptPruner':
        pruner = NetAdaptPruner(model, config_list, short_term_fine_tuner=short_term_fine_tuner, evaluator=evaluator,
                                base_algo=args.base_algo, experiment_data_dir=args.experiment_data_dir)
    elif args.pruner == 'ADMMPruner':
        # users are free to change the config here
        if args.model == 'LeNet':
            if args.base_algo in ['l1', 'l2', 'fpgm']:
                config_list = [{
                    'sparsity': 0.8,
                    'op_types': ['Conv2d'],
                    'op_names': ['conv1']
                }, {
                    'sparsity': 0.92,
                    'op_types': ['Conv2d'],
                    'op_names': ['conv2']
                }]
            elif args.base_algo == 'level':
                config_list = [{
                    'sparsity': 0.8,
                    'op_names': ['conv1']
                }, {
                    'sparsity': 0.92,
                    'op_names': ['conv2']
                }, {
                    'sparsity': 0.991,
                    'op_names': ['fc1']
                }, {
                    'sparsity': 0.93,
                    'op_names': ['fc2']
                }]
        else:
            raise ValueError('Example only implemented for LeNet.')
        pruner = ADMMPruner(model, config_list, trainer=trainer, num_iterations=2, epochs_per_iteration=2)
    elif args.pruner == 'SimulatedAnnealingPruner':
        pruner = SimulatedAnnealingPruner(
            model, config_list, evaluator=evaluator, base_algo=args.base_algo,
            cool_down_rate=args.cool_down_rate, experiment_data_dir=args.experiment_data_dir)
    elif args.pruner == 'AutoCompressPruner':
        pruner = AutoCompressPruner(
            model, config_list, trainer=trainer, evaluator=evaluator, dummy_input=dummy_input,
            num_iterations=3, optimize_mode='maximize', base_algo=args.base_algo,
            cool_down_rate=args.cool_down_rate, admm_num_iterations=30, admm_epochs_per_iteration=5,
            experiment_data_dir=args.experiment_data_dir)
    else:
        raise ValueError(
            "Pruner not supported.")

    # Pruner.compress() returns the masked model
    # but for AutoCompressPruner, Pruner.compress() returns directly the pruned model
    model = pruner.compress()
    evaluation_result = evaluator(model)
    print('Evaluation result (masked model): %s' % evaluation_result)
    result['performance']['pruned'] = evaluation_result

    if args.save_model:
        pruner.export_model(
            os.path.join(args.experiment_data_dir, 'model_masked.pth'), os.path.join(args.experiment_data_dir, 'mask.pth'))
        print('Masked model saved to %s' % args.experiment_data_dir)

    # model speedup
    if args.speedup:
        if args.pruner != 'AutoCompressPruner':
            if args.model == 'LeNet':
                model = LeNet().to(device)
            elif args.model == 'vgg16':
                model = VGG(depth=16).to(device)
            elif args.model == 'resnet18':
                model = ResNet18().to(device)
            elif args.model == 'resnet50':
                model = ResNet50().to(device)

            model.load_state_dict(torch.load(os.path.join(args.experiment_data_dir, 'model_masked.pth')))
            masks_file = os.path.join(args.experiment_data_dir, 'mask.pth')

            m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
            m_speedup.speedup_model()
            evaluation_result = evaluator(model)
            print('Evaluation result (speedup model): %s' % evaluation_result)
            result['performance']['speedup'] = evaluation_result

            torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_speedup.pth'))
            print('Speedup model saved to %s' % args.experiment_data_dir)
        flops, params, _ = count_flops_params(model, get_input_size(args.dataset))
        result['flops']['speedup'] = flops
        result['params']['speedup'] = params

    if args.fine_tune:
        if args.dataset == 'mnist':
            optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
            scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
        elif args.dataset == 'cifar10' and args.model == 'vgg16':
            optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
        elif args.dataset == 'cifar10' and args.model == 'resnet18':
            optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
        elif args.dataset == 'cifar10' and args.model == 'resnet50':
            optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
            scheduler = MultiStepLR(
                optimizer, milestones=[int(args.fine_tune_epochs*0.5), int(args.fine_tune_epochs*0.75)], gamma=0.1)
        best_acc = 0
        for epoch in range(args.fine_tune_epochs):
            train(args, model, device, train_loader, criterion, optimizer, epoch)
            scheduler.step()
            acc = evaluator(model)
            if acc > best_acc:
                best_acc = acc
                torch.save(model.state_dict(), os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth'))

    print('Evaluation result (fine tuned): %s' % best_acc)
    print('Fined tuned model saved to %s' % args.experiment_data_dir)
    result['performance']['finetuned'] = best_acc

    with open(os.path.join(args.experiment_data_dir, 'result.json'), 'w+') as f:
        json.dump(result, f)