Exemple #1
0
def fpgm_speedup(masks_file, model_checkpoint):
    from fpgm_torch_mnist import Mnist
    device = torch.device('cpu')
    model = Mnist()
    model.to(device)
    model.print_conv_filter_sparsity()

    dummy_input = torch.randn(64, 1, 28, 28)
    if use_mask:
        apply_compression_results(model, masks_file)
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(40):
            out = model(dummy_input)
        print('mask elapsed time: ', time.time() - start)
        #print(out.size(), out)
        return
    else:
        m_speedup = ModelSpeedup(model, dummy_input.to(device), masks_file)
        m_speedup.speedup_model()
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(40):
            out = model(dummy_input)
        print('speedup elapsed time: ', time.time() - start)
        #print(out.size(), out)
        return
Exemple #2
0
    def test_speedup_bigmodel(self):
        prune_model_l1(BigModel())
        model = BigModel()
        apply_compression_results(model, MASK_FILE, 'cpu')
        model.eval()
        mask_out = model(dummy_input)

        model.train()
        ms = ModelSpeedup(model, dummy_input, MASK_FILE)
        ms.speedup_model()
        assert model.training

        model.eval()
        speedup_out = model(dummy_input)
        if not torch.allclose(mask_out, speedup_out, atol=1e-07):
            print('input:', dummy_input.size(),
                  torch.abs(dummy_input).sum((2, 3)))
            print('mask_out:', mask_out)
            print('speedup_out:', speedup_out)
            raise RuntimeError('model speedup inference result is incorrect!')

        orig_model = BigModel()

        assert model.backbone2.conv1.out_channels == int(
            orig_model.backbone2.conv1.out_channels * SPARSITY)
        assert model.backbone2.conv2.in_channels == int(
            orig_model.backbone2.conv2.in_channels * SPARSITY)
        assert model.backbone2.conv2.out_channels == int(
            orig_model.backbone2.conv2.out_channels * SPARSITY)
        assert model.backbone2.fc1.in_features == int(
            orig_model.backbone2.fc1.in_features * SPARSITY)
    def test_channel_prune(self):
        orig_net = resnet18(num_classes=10).to(device)
        channel_prune(orig_net)
        state_dict = torch.load(MODEL_FILE)

        orig_net = resnet18(num_classes=10).to(device)
        orig_net.load_state_dict(state_dict)
        apply_compression_results(orig_net, MASK_FILE)
        orig_net.eval()

        net = resnet18(num_classes=10).to(device)

        net.load_state_dict(state_dict)
        net.eval()

        data = torch.randn(BATCH_SIZE, 3, 224, 224).to(device)
        ms = ModelSpeedup(net, data, MASK_FILE)
        ms.speedup_model()
        ms.bound_model(data)

        net.eval()

        ori_sum = orig_net(data).abs().sum().item()
        speeded_sum = net(data).abs().sum().item()

        print(ori_sum, speeded_sum)
        assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
            (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
Exemple #4
0
def slim_speedup(masks_file, model_checkpoint):
    device = torch.device('cuda')
    model = VGG(depth=19)
    model.to(device)
    model.eval()

    dummy_input = torch.randn(64, 3, 32, 32)
    if use_mask:
        apply_compression_results(model, masks_file)
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(32):
            out = model(dummy_input)
        #print(out.size(), out)
        print('mask elapsed time: ', time.time() - start)
        return
    else:
        #print("model before: ", model)
        m_speedup = ModelSpeedup(model, dummy_input.to(device), masks_file)
        m_speedup.speedup_model()
        #print("model after: ", model)
        dummy_input = dummy_input.to(device)
        start = time.time()
        for _ in range(32):
            out = model(dummy_input)
        #print(out.size(), out)
        print('speedup elapsed time: ', time.time() - start)
        return
Exemple #5
0
def model_inference(config):
    masks_file = config['masks_file']
    device = torch.device(config['device'])
    if config['model_name'] == 'unet':
        model = UNet(3, 1)
    elif config['model_name'] == 'vgg19':
        model = VGG(depth=19)
    elif config['model_name'] == 'naive':
        from model_prune_torch import NaiveModel
        model = NaiveModel()
    model.to(device)
    model.load_state_dict(torch.load(config['model_file'],
                                     map_location=device))
    model.eval()

    dummy_input = torch.randn(config['input_shape']).to(device)
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file, device)
        start = time.time()
        for _ in range(1):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(1):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-05):
            torch.save(model, config['save_dir_for_speedup'])
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
def model_inference(config):
    masks_file = config['masks_file']
    device = torch.device(config['device'])
    if config['model_name'] == 'unet':
        model = UNet(3, 1)
    elif config['model_name'] == 'testNet':
        model = testNet()
    elif config['model_name'] == 'naive':
        from model_prune_torch import NaiveModel
        model = NaiveModel()
    model.to(device)
    model.eval()

    dummy_input = torch.randn(config['input_shape']).to(device)
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file,
                                  'cpu' if config['device'] == 'cpu' else None)
        start = time.time()
        for _ in range(1):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, masks_file,
                                 'cpu' if config['device'] == 'cpu' else None)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(1):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07):
            print('the output from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
Exemple #7
0
def compress(model,
             dummy,
             pruner_cls,
             config_list,
             ori_metric=1.00,
             metric_thres=0.01,
             sensitivity=None,
             trace=None,
             verbose=True):
    if sensitivity:
        config_list = update_sparsity_by_sensitivity(config_list, ori_metric,
                                                     metric_thres, sensitivity)
    compressed_model = copy.deepcopy(model)
    pruner = pruner_cls(compressed_model, config_list)
    compressed_model = pruner.compress()

    mask_path = "/tmp/mask.pth"
    pruner.export_model(model_path='/tmp/model.pth', mask_path=mask_path)
    pruner._unwrap_model()

    print("fixing mask conflict...")
    fixed_mask = fix_mask_conflict(mask_path, compressed_model, dummy, trace)
    # mask = torch.load(mask_path)

    compressed_model.load_state_dict(model.state_dict())
    apply_compression_results(compressed_model, fixed_mask)
    if verbose:
        count_zero(compressed_model, verbose=False)
        from thop import profile
        macs, params = profile(compressed_model, inputs=dummy, verbose=False)
        print("MACs: {} G, Params: {} M".format(macs / 1000000000,
                                                params / 100000))
    speedup_model = speedup(compressed_model, dummy, fixed_mask, trace)
    if verbose:
        count_zero(speedup_model, verbose=False)
    return speedup_model, fixed_mask
Exemple #8
0
def test_nni():
    model = load_t_net()

    config_list = [{'sparsity': 0.5, 'op_types': ['Conv2d']}]
    pruner = SlimPruner(model, config_list)
    model = pruner.compress()

    print(model)
    masks_file = "./nni/mask.pth"
    pruner.export_model(model_path="./nni/nni_mod.pth", mask_path=masks_file)
    print("export ok")
    apply_compression_results(model, masks_file)

    # model: 要加速的模型
    # dummy_input: 模型的示例输入,传给 `jit.trace`
    # masks_file: 剪枝算法创建的掩码文件
    dummy_input = torch.randn(1, 3, 384, 224)
    m_speedup = ModelSpeedup(model, dummy_input.cuda(), masks_file)
    m_speedup.speedup_model()
    dummy_input = dummy_input.cuda()
    start = time.time()
    out = model(dummy_input)
    summary(model, dummy_input)
    print('elapsed time: ', time.time() - start)
    net = UNet(n_channels=3, n_classes=1)

    logging.info("Loading model {}".format(args.model))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # logging.info(f'Using device {device}')
    net.to(device=device)

    #选择要加载的模型
    net.load_state_dict(
        torch.load("./save_pruner/pruned_model.pt", map_location=device))

    logging.info("Model loaded !")
    net.eval()
    apply_compression_results(net, "./save_pruner/pruned_mask.pt", 'cuda')

    import time

    start = time.time()

    for i, fn in enumerate(in_files):
        logging.info("\nPredicting image {} ...".format(fn))

        img = Image.open(fn)

        mask = predict_img(net=net,
                           full_img=img,
                           scale_factor=args.scale,
                           out_threshold=args.mask_threshold,
                           device=device)
    test(model, device, test_data_loader)
torch.save(model.state_dict(), 'pretrained_model.pth')
print("start model pruning...")
optimizer = torch.optim.SGD(model.parameters(),
                            lr=0.001,
                            momentum=0.9,
                            weight_decay=1e-4)
best_top1 = 0
# pruner = SlimPruner(model, config_list, optimizer)
pruner = ActivationMeanRankFilterPruner(model, config_list, optimizer)
model = pruner.compress()

for epoch in range(prune_epochs):
    pruner.update_epoch(epoch)
    print("# Epoch {} #".format(epoch))
    train(model, device, train_data_loader, optimizer)
    top1 = test(model, device, test_data_loader)
    if top1 > best_top1:
        pruner.export_model(model_path='pruned_model.pth',
                            mask_path='pruned_mask.pth')
        from nni.compression.torch import apply_compression_results
        from nni.compression.speedup.torch import ModelSpeedup
        model = MobileModel().cuda()
        model.eval()
        apply_compression_results(model, 'pruned_mask.pth', None)
        m_speedup = ModelSpeedup(model,
                                 torch.randn(1, 3, 224, 224).cuda(),
                                 'pruned_mask.pth', None)
        m_speedup.speedup_model()
        torch.save(model.state_dict(), 'pruned_speedup_model.pth')
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    dummy_input = next(iter(validate_loader))
    dummy_input = dummy_input['img'].to(device)

    checkpoint = torch.load(args.model_file, map_location=device)
    model.load_state_dict(checkpoint, strict=False)
    model.to(device)
    model.eval()

    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, args.masks_file, device)
        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, args.masks_file, device)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    torch.save(
        model.state_dict(),
        "output/DBNet_opensource_nni_resnet18_fpn_db/checkpoint/pruner_speed.pth"
    )
def model_inference(config):
    masks_file = './speedup_test/mask_new.pth'
    shape_mask = './speedup_test/mask_new.pth'
    org_mask = './speedup_test/mask.pth'
    rn50 = models.resnet50()
    m_paras = torch.load('./speedup_test/model_fine_tuned.pth')
    ##delete mask in pth
    m_new = collections.OrderedDict()
    for key in m_paras:
        if 'mask' in key: continue
        if 'module' in key:
            m_new[key.replace('module.', '')] = m_paras[key]
        else:
            m_new[key] = m_paras[key]
    rn50.load_state_dict(m_new)
    rn50.cuda()
    rn50.eval()

    dummy_input = torch.randn(64, 3, 224, 224).cuda()
    use_mask_out = use_speedup_out = None
    rn = rn50
    apply_compression_results(rn, org_mask, 'cuda')
    rn_mask_out = rn(dummy_input)
    model = rn50
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file, 'cuda')
        torch.onnx.export(model,
                          dummy_input,
                          'resnet_masked.onnx',
                          export_params=True,
                          opset_version=12,
                          do_constant_folding=True,
                          input_names=['inputs'],
                          output_names=['proba'],
                          dynamic_axes={
                              'inputs': [0],
                              'mask': [0]
                          },
                          keep_initializers_as_inputs=True)

        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    print('Model is ', model)
    print('before speed up===================')
    #    print(para)
    #    print(model.state_dict()[para])
    #    print(model.state_dict()[para].shape)
    flops, paras = count_flops_params(model, (1, 3, 224, 224))
    print(
        'flops and parameters before speedup is {} FLOPS and {} params'.format(
            flops, paras))
    if use_speedup:
        dummy_input.cuda()
        m_speedup = ModelSpeedup(model, dummy_input, shape_mask, 'cuda')
        m_speedup.speedup_model()
        print('==' * 20)
        print('Start inference')
        torch.onnx.export(model,
                          dummy_input,
                          'resnet_fpgm.onnx',
                          export_params=True,
                          opset_version=12,
                          do_constant_folding=True,
                          input_names=['inputs'],
                          output_names=['proba'],
                          dynamic_axes={
                              'inputs': [0],
                              'mask': [0]
                          },
                          keep_initializers_as_inputs=True)
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    print('After speedup model is ', model)
    print('=================')
    print('After speedup')
    flops, paras = count_flops_params(model, (1, 3, 224, 224))
    print(
        'flops and parameters before speedup is {} FLOPS and {} params'.format(
            flops, paras))
    #for para in model.state_dict():
    #    print(para)
    #    print(model.state_dict()[para])
    #    print(model.state_dict()[para].shape)
    if compare_results:
        print(rn_mask_out)
        print('another is', use_speedup_out)
        if torch.allclose(rn_mask_out, use_speedup_out, atol=1e-6):  #-07):
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
    # start the accuracy check
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        start = time.time()
        evaluate(model,
                 criterion,
                 data_loader_test,
                 device="cuda",
                 print_freq=20)
        print('elapsed time is ', time.time() - start)
from nni.compression.speedup.torch import ModelSpeedup

dummy_input = torch.randn((64, 3, 224, 224)).cuda()
model = MobileNetV2(n_class=config.num_classes, width_mult=1.0)
model.cuda()

start = time.time()
for i in range(32):
    output = model(dummy_input)
end = time.time()
print("Time for original model:", end - start)

model.load_state_dict(torch.load('results/pruned/pruned_model.pth'))
mask_file = './results/pruned/pruned_mask.pth'

apply_compression_results(model, mask_file, 'cuda')

start = time.time()
for i in range(32):
    mask_output = model(dummy_input)
end = time.time()
print("Time for masked model:", end - start)

m_speedup = ModelSpeedup(model, dummy_input, mask_file, 'cuda')
m_speedup.speedup_model()

start = time.time()
for i in range(32):
    speedup_output = model(dummy_input)
end = time.time()
print("Time for speedup model:", end - start)