def test_channel_prune(self):
        orig_net = resnet18(num_classes=10).to(device)
        channel_prune(orig_net)
        state_dict = torch.load(MODEL_FILE)

        orig_net = resnet18(num_classes=10).to(device)
        orig_net.load_state_dict(state_dict)
        apply_compression_results(orig_net, MASK_FILE)
        orig_net.eval()

        net = resnet18(num_classes=10).to(device)

        net.load_state_dict(state_dict)
        net.eval()

        data = torch.randn(BATCH_SIZE, 3, 128, 128).to(device)
        ms = ModelSpeedup(net, data, MASK_FILE, confidence=8)
        ms.speedup_model()
        ms.bound_model(data)

        net.eval()

        ori_sum = orig_net(data).abs().sum().item()
        speeded_sum = net(data).abs().sum().item()

        print(ori_sum, speeded_sum)
        assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
            (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
    def test_speedup_bigmodel(self):
        prune_model_l1(BigModel())
        model = BigModel()
        apply_compression_results(model, MASK_FILE, 'cpu')
        model.eval()
        mask_out = model(dummy_input)

        model.train()
        ms = ModelSpeedup(model, dummy_input, MASK_FILE, confidence=8)
        ms.speedup_model()
        assert model.training

        model.eval()
        speedup_out = model(dummy_input)
        if not torch.allclose(mask_out, speedup_out, atol=1e-07):
            print('input:', dummy_input.size(),
                  torch.abs(dummy_input).sum((2, 3)))
            print('mask_out:', mask_out)
            print('speedup_out:', speedup_out)
            raise RuntimeError('model speedup inference result is incorrect!')

        orig_model = BigModel()

        assert model.backbone2.conv1.out_channels == int(
            orig_model.backbone2.conv1.out_channels * SPARSITY)
        assert model.backbone2.conv2.in_channels == int(
            orig_model.backbone2.conv2.in_channels * SPARSITY)
        assert model.backbone2.conv2.out_channels == int(
            orig_model.backbone2.conv2.out_channels * SPARSITY)
        assert model.backbone2.fc1.in_features == int(
            orig_model.backbone2.fc1.in_features * SPARSITY)
Exemple #3
0
def model_inference(config):
    masks_file = config['masks_file']
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # device = torch.device(config['device'])
    if config['model_name'] == 'vgg16':
        model = VGG(depth=16)
    elif config['model_name'] == 'vgg19':
        model = VGG(depth=19)
    elif config['model_name'] == 'lenet':
        model = LeNet()

    model.to(device)
    model.eval()

    dummy_input = torch.randn(config['input_shape']).to(device)
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file, device)
        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07):
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
Exemple #4
0
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    os.makedirs(args.experiment_data_dir, exist_ok=True)

    # prepare model and data
    train_loader, test_loader, criterion = get_data(args.dataset,
                                                    args.data_dir,
                                                    args.batch_size,
                                                    args.test_batch_size)

    model, optimizer, scheduler = get_model_optimizer_scheduler(
        args, device, train_loader, test_loader, criterion)

    dummy_input = get_dummy_input(args, device)
    flops, params, results = count_flops_params(model, dummy_input)
    print(f"FLOPs: {flops}, params: {params}")

    print('start pruning...')
    model_path = os.path.join(
        args.experiment_data_dir,
        'pruned_{}_{}_{}.pth'.format(args.model, args.dataset, args.pruner))
    mask_path = os.path.join(
        args.experiment_data_dir,
        'mask_{}_{}_{}.pth'.format(args.model, args.dataset, args.pruner))

    pruner = get_pruner(model, args.pruner, device, optimizer,
                        args.dependency_aware)
    model = pruner.compress()

    if args.multi_gpu and torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    if args.test_only:
        test(args, model, device, criterion, test_loader)

    best_top1 = 0
    for epoch in range(args.fine_tune_epochs):
        pruner.update_epoch(epoch)
        print('# Epoch {} #'.format(epoch))
        train(args, model, device, train_loader, criterion, optimizer, epoch)
        scheduler.step()
        top1 = test(args, model, device, criterion, test_loader)
        if top1 > best_top1:
            best_top1 = top1
            # Export the best model, 'model_path' stores state_dict of the pruned model,
            # mask_path stores mask_dict of the pruned model
            pruner.export_model(model_path=model_path, mask_path=mask_path)

    if args.nni:
        nni.report_final_result(best_top1)

    if args.speed_up:
        # reload the best checkpoint for speed-up
        args.pretrained_model_dir = model_path
        model, _, _ = get_model_optimizer_scheduler(args, device, train_loader,
                                                    test_loader, criterion)
        model.eval()

        apply_compression_results(model, mask_path, device)

        # test model speed
        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)

        m_speedup = ModelSpeedup(model, dummy_input, mask_path, device)
        m_speedup.speedup_model()

        flops, params, results = count_flops_params(model, dummy_input)
        print(f"FLOPs: {flops}, params: {params}")

        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)

        top1 = test(args, model, device, criterion, test_loader)
Exemple #5
0
                                     batch_size=batch_size,
                                     drop_last=True)
    valid_un_dataloader = DataLoader(total_dataset.valid_un_dataset,
                                     shuffle=False,
                                     batch_size=batch_size,
                                     drop_last=True)
    #train(train_un_dataloader,valid_un_dataloader, model, criterion=SMR_loss,save_path = best_un_model_path, lr = 1e-3)
    #print('unsupervised learning complete!')
    model_path = best_un_model_path
    mask_path = './model/DUU_models_%d_%d_%d_%d_%d_un_mask_model.pth' % (
        Nt, Nr, K, dk, SNR_dB)
    config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
    pruner = L2FilterPruner(model, config_list)
    pruner.compress()
    model.eval()
    pruner.export_model(model_path=model_path, mask_path=mask_path)
    apply_compression_results(model, mask_path, device)
    '''test'''
    test_dataloader = DataLoader(total_dataset.test_un_dataset,
                                 shuffle=False,
                                 batch_size=batch_size,
                                 drop_last=True)
    model = torch.load(best_su_model_path)
    su_performance = test(test_dataloader, model, criterion=SMR_loss)
    print('supervised learning performance:' + str(su_performance))
    model = torch.load(best_un_model_path)
    un_performance = test(test_dataloader, model, criterion=SMR_loss)
    print('unsupervised learning performance:' + str(un_performance))

#python train.py --Nt 64 --Nr 4 --K 10 --dk 2 --SNR 0 --SNR_channel 100 --gpu 0 --mode gpu --batch_size 200 --epoch 1000 --factor 1 --test_length 2000
    return test_acc.item()


dummy_input = torch.ones([64, 3, 32, 32]).cuda()

model = torchvision.models.vgg19_bn(num_classes=10)
model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
model.classifier = nn.Linear(512, 10)
model.load_state_dict(torch.load('pruned_vgg19_cifar10.pth'))
model.cuda()
model.eval()
model(dummy_input)  #first time infer will cost much time

# mask
use_mask_out = use_speedup_out = None
apply_compression_results(model, 'mask_vgg19_cifar10.pth')
start = time.time()
for _ in range(320):
    use_mask_out = model(dummy_input)
print('elapsed time when use mask: ', time.time() - start)
print(test(model))

# speedup
m_speedup = ModelSpeedup(model, dummy_input, 'mask_vgg19_cifar10.pth')
m_speedup.speedup_model()
start = time.time()
for _ in range(320):
    use_speedup_out = model(dummy_input)
print('elapsed time when use speedup: ', time.time() - start)
print(test(model))