Esempio n. 1
0
    def test_speedup_bigmodel(self):
        prune_model_l1(BigModel())
        model = BigModel()
        apply_compression_results(model, MASK_FILE, 'cpu')
        model.eval()
        mask_out = model(dummy_input)

        model.train()
        ms = ModelSpeedup(model, dummy_input, MASK_FILE)
        ms.speedup_model()
        assert model.training

        model.eval()
        speedup_out = model(dummy_input)
        if not torch.allclose(mask_out, speedup_out, atol=1e-07):
            print('input:', dummy_input.size(),
                  torch.abs(dummy_input).sum((2, 3)))
            print('mask_out:', mask_out)
            print('speedup_out:', speedup_out)
            raise RuntimeError('model speedup inference result is incorrect!')

        orig_model = BigModel()

        assert model.backbone2.conv1.out_channels == int(
            orig_model.backbone2.conv1.out_channels * SPARSITY)
        assert model.backbone2.conv2.in_channels == int(
            orig_model.backbone2.conv2.in_channels * SPARSITY)
        assert model.backbone2.conv2.out_channels == int(
            orig_model.backbone2.conv2.out_channels * SPARSITY)
        assert model.backbone2.fc1.in_features == int(
            orig_model.backbone2.fc1.in_features * SPARSITY)
Esempio n. 2
0
def slim_speedup(masks_file, model_checkpoint):
    device = torch.device('cuda')
    model = VGG(depth=19)
    model.to(device)
    model.eval()

    dummy_input = torch.randn(64, 3, 32, 32)
    if use_mask:
        apply_compression_results(model, masks_file)
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(32):
            out = model(dummy_input)
        #print(out.size(), out)
        print('mask elapsed time: ', time.time() - start)
        return
    else:
        #print("model before: ", model)
        m_speedup = ModelSpeedup(model, dummy_input.to(device), masks_file)
        m_speedup.speedup_model()
        #print("model after: ", model)
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(32):
            out = model(dummy_input)
        #print(out.size(), out)
        print('speedup elapsed time: ', time.time() - start)
        return
Esempio n. 3
0
def fpgm_speedup(masks_file, model_checkpoint):
    from fpgm_torch_mnist import Mnist
    device = torch.device('cpu')
    model = Mnist()
    model.to(device)
    model.print_conv_filter_sparsity()

    dummy_input = torch.randn(64, 1, 28, 28)
    if use_mask:
        apply_compression_results(model, masks_file)
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(40):
            out = model(dummy_input)
        print('mask elapsed time: ', time.time() - start)
        #print(out.size(), out)
        return
    else:
        m_speedup = ModelSpeedup(model, dummy_input.to(device), masks_file)
        m_speedup.speedup_model()
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(40):
            out = model(dummy_input)
        print('speedup elapsed time: ', time.time() - start)
        #print(out.size(), out)
        return
Esempio n. 4
0
    def test_speedup_vgg16(self):
        prune_model_l1(vgg16())
        model = vgg16()
        model.train()
        ms = ModelSpeedup(model, torch.randn(2, 3, 32, 32), MASK_FILE)
        ms.speedup_model()

        orig_model = vgg16()
        assert model.training
        assert model.features[2].out_channels == int(
            orig_model.features[2].out_channels * SPARSITY)
        assert model.classifier[0].in_features == int(
            orig_model.classifier[0].in_features * SPARSITY)
Esempio n. 5
0
    def test_speedup_bigmodel(self):
        prune_model_l1(BigModel())
        model = BigModel()
        model.train()
        ms = ModelSpeedup(model, torch.randn(2, 1, 28, 28), './l1_mask.pth')
        ms.speedup_model()

        orig_model = BigModel()
        assert model.training
        assert model.backbone2.conv1.out_channels == int(
            orig_model.backbone2.conv1.out_channels * SPARSITY)
        assert model.backbone2.conv2.in_channels == int(
            orig_model.backbone2.conv2.in_channels * SPARSITY)
        assert model.backbone2.conv2.out_channels == int(
            orig_model.backbone2.conv2.out_channels * SPARSITY)
        assert model.backbone2.fc1.in_features == int(
            orig_model.backbone2.fc1.in_features * SPARSITY)
Esempio n. 6
0
def model_inference(config):
    masks_file = config['masks_file']
    device = torch.device(config['device'])
    if config['model_name'] == 'vgg16':
        model = VGG(depth=16)
    elif config['model_name'] == 'vgg19':
        model = VGG(depth=19)
    elif config['model_name'] == 'naive':
        from model_prune_torch import NaiveModel
        model = NaiveModel()
    model.to(device)
    model.eval()

    dummy_input = torch.randn(config['input_shape']).to(device)
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file,
                                  'cpu' if config['device'] == 'cpu' else None)
        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, masks_file,
                                 'cpu' if config['device'] == 'cpu' else None)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07):
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
    test(model, device, test_data_loader)
torch.save(model.state_dict(), 'pretrained_model.pth')
print("start model pruning...")
optimizer = torch.optim.SGD(model.parameters(),
                            lr=0.001,
                            momentum=0.9,
                            weight_decay=1e-4)
best_top1 = 0
# pruner = SlimPruner(model, config_list, optimizer)
pruner = ActivationMeanRankFilterPruner(model, config_list, optimizer)
model = pruner.compress()

for epoch in range(prune_epochs):
    pruner.update_epoch(epoch)
    print("# Epoch {} #".format(epoch))
    train(model, device, train_data_loader, optimizer)
    top1 = test(model, device, test_data_loader)
    if top1 > best_top1:
        pruner.export_model(model_path='pruned_model.pth',
                            mask_path='pruned_mask.pth')
        from nni.compression.torch import apply_compression_results
        from nni.compression.speedup.torch import ModelSpeedup
        model = MobileModel().cuda()
        model.eval()
        apply_compression_results(model, 'pruned_mask.pth', None)
        m_speedup = ModelSpeedup(model,
                                 torch.randn(1, 3, 224, 224).cuda(),
                                 'pruned_mask.pth', None)
        m_speedup.speedup_model()
        torch.save(model.state_dict(), 'pruned_speedup_model.pth')
dummy_input = torch.randn((64, 3, 224, 224)).cuda()
model = MobileNetV2(n_class=config.num_classes, width_mult=1.0)
model.cuda()

start = time.time()
for i in range(32):
    output = model(dummy_input)
end = time.time()
print("Time for original model:", end - start)

model.load_state_dict(torch.load('results/pruned/pruned_model.pth'))
mask_file = './results/pruned/pruned_mask.pth'

apply_compression_results(model, mask_file, 'cuda')

start = time.time()
for i in range(32):
    mask_output = model(dummy_input)
end = time.time()
print("Time for masked model:", end - start)

m_speedup = ModelSpeedup(model, dummy_input, mask_file, 'cuda')
m_speedup.speedup_model()

start = time.time()
for i in range(32):
    speedup_output = model(dummy_input)
end = time.time()
print("Time for speedup model:", end - start)