예제 #1
0
    def test_speedup_bigmodel(self):
        prune_model_l1(BigModel())
        model = BigModel()
        apply_compression_results(model, MASK_FILE, 'cpu')
        model.eval()
        mask_out = model(dummy_input)

        model.train()
        ms = ModelSpeedup(model, dummy_input, MASK_FILE)
        ms.speedup_model()
        assert model.training

        model.eval()
        speedup_out = model(dummy_input)
        if not torch.allclose(mask_out, speedup_out, atol=1e-07):
            print('input:', dummy_input.size(),
                  torch.abs(dummy_input).sum((2, 3)))
            print('mask_out:', mask_out)
            print('speedup_out:', speedup_out)
            raise RuntimeError('model speedup inference result is incorrect!')

        orig_model = BigModel()

        assert model.backbone2.conv1.out_channels == int(
            orig_model.backbone2.conv1.out_channels * SPARSITY)
        assert model.backbone2.conv2.in_channels == int(
            orig_model.backbone2.conv2.in_channels * SPARSITY)
        assert model.backbone2.conv2.out_channels == int(
            orig_model.backbone2.conv2.out_channels * SPARSITY)
        assert model.backbone2.fc1.in_features == int(
            orig_model.backbone2.fc1.in_features * SPARSITY)
    def generate_model(self, cfg):
        """
        generate the models according to the channel_cfg.
        The generated model has the same network architecture
        with self.bound_model, but the out_channels of each
        conv layers are configured according to the channel_cfg.

        Parameters
        ----------
        cfg: list
            cfg for the pruner.
        """

        model = copy.deepcopy(self.bound_model)
        pruner = Constrained_L1FilterPruner(model, cfg, self.dummy_input)
        pruner.compress()
        _tmp_ck_path = os.path.join(self.ck_dir, 'tmp.pth')
        _tmp_mask_path = os.path.join(self.ck_dir, 'mask')
        pruner.export_model(_tmp_ck_path, _tmp_mask_path)
        pruner._unwrap_model()
        ms = ModelSpeedup(model, self.dummy_input, _tmp_mask_path)
        ms.speedup_model()

        try:
            model(self.dummy_input)
            print('Success inference')
        except Exception as err:
            _logger.warn('The updated model may have shape conflicts')
            _logger.warn(err)
            traceback.print_exc()
            # the model is not valid, it has shape conflict
            # under the
            return None

        return model
예제 #3
0
    def test_speedup_integration(self):
        for model_name in [
                'resnet18', 'squeezenet1_1', 'mobilenet_v2', 'densenet121',
                'inception_v3'
        ]:
            Model = getattr(models, model_name)
            net = Model(pretrained=True, progress=False).to(device)
            speedup_model = Model().to(device)
            net.eval()  # this line is necessary
            speedup_model.eval()
            # random generate the prune config for the pruner
            cfgs = generate_random_sparsity(net)
            pruner = L1FilterPruner(net, cfgs)
            pruner.compress()
            pruner.export_model(MODEL_FILE, MASK_FILE)
            pruner._unwrap_model()
            state_dict = torch.load(MODEL_FILE)
            speedup_model.load_state_dict(state_dict)
            zero_bn_bias(net)
            zero_bn_bias(speedup_model)

            data = torch.ones(BATCH_SIZE, 3, 224, 224).to(device)
            ms = ModelSpeedup(speedup_model, data, MASK_FILE)
            ms.speedup_model()
            ori_out = net(data)
            speeded_out = speedup_model(data)
            ori_sum = torch.sum(ori_out).item()
            speeded_sum = torch.sum(speeded_out).item()
            print('Sum of the output of %s (before speedup):' % model_name,
                  ori_sum)
            print('Sum of the output of %s (after speedup):' % model_name,
                  speeded_sum)
            assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
                   (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
예제 #4
0
def get_model(args):
    print('=> Building model..')

    if args.dataset == 'imagenet':
        n_class = 1000
    elif args.dataset == 'cifar10':
        n_class = 10
    else:
        raise NotImplementedError

    if args.model_type == 'mobilenet':
        net = MobileNet(n_class=n_class)
    elif args.model_type == 'mobilenetv2':
        net = MobileNetV2(n_class=n_class)
    elif args.model_type.startswith('resnet'):
        net = resnet.__dict__[args.model_type](pretrained=True)
        in_features = net.fc.in_features
        net.fc = nn.Linear(in_features, n_class)
    else:
        raise NotImplementedError

    if args.ckpt_path is not None:
        # the checkpoint can be state_dict exported by amc_search.py or saved by amc_train.py
        print('=> Loading checkpoint {} ..'.format(args.ckpt_path))
        net.load_state_dict(torch.load(args.ckpt_path, torch.device('cpu')))
        if args.mask_path is not None:
            SZ = 224 if args.dataset == 'imagenet' else 32
            data = torch.randn(2, 3, SZ, SZ)
            ms = ModelSpeedup(net, data, args.mask_path, torch.device('cpu'))
            ms.speedup_model()

    net.to(args.device)
    if torch.cuda.is_available() and args.n_gpu > 1:
        net = torch.nn.DataParallel(net, list(range(args.n_gpu)))
    return net
def flops_counter(args):
    # model speed up
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_loader, val_loader, criterion = get_data(args)

    if args.pruner != 'AutoCompressPruner':
        if args.model == 'LeNet':
            model = LeNet().to(device)
        elif args.model == 'vgg16':
            model = VGG(depth=16).to(device)
        elif args.model == 'resnet18':
            model = models.resnet18(pretrained=False,
                                    num_classes=10).to(device)
        elif args.model == 'mobilenet_v2':
            model = models.mobilenet_v2(pretrained=False).to(device)

        def evaluator(model):
            return test(model, device, criterion, val_loader)

        model.load_state_dict(
            torch.load(
                os.path.join(args.experiment_data_dir,
                             'model_fine_tuned.pth')))
        masks_file = os.path.join(args.experiment_data_dir, 'mask.pth')

        dummy_input = get_dummy_input(args, device)

        m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
        m_speedup.speedup_model()
        evaluation_result = evaluator(model)
        print('Evaluation result (speed up model): %s' % evaluation_result)

        with open(os.path.join(args.experiment_data_dir,
                               'performance.json')) as f:
            result = json.load(f)

        result['speedup'] = evaluation_result
        with open(os.path.join(args.experiment_data_dir, 'performance.json'),
                  'w+') as f:
            json.dump(result, f)

        torch.save(
            model.state_dict(),
            os.path.join(args.experiment_data_dir, 'model_speed_up.pth'))
        print('Speed up model saved to %s', args.experiment_data_dir)
    else:
        model = torch.load(
            os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth'))
        model.eval()
        flops, params = count_flops_params(model, (1, 3, 32, 32))
        with open(os.path.join(args.experiment_data_dir, 'flops.json'),
                  'w+') as f:
            json.dump({'FLOPS': int(flops), 'params': int(params)}, f)
예제 #6
0
    def test_speedup_vgg16(self):
        prune_model_l1(vgg16())
        model = vgg16()
        model.train()
        ms = ModelSpeedup(model, torch.randn(2, 3, 32, 32), MASK_FILE)
        ms.speedup_model()

        orig_model = vgg16()
        assert model.training
        assert model.features[2].out_channels == int(
            orig_model.features[2].out_channels * SPARSITY)
        assert model.classifier[0].in_features == int(
            orig_model.classifier[0].in_features * SPARSITY)
예제 #7
0
    def test_dependency_aware_pruning(self):
        model_zoo = ['resnet18']
        pruners = [L1FilterPruner, L2FilterPruner, FPGMPruner, TaylorFOWeightFilterPruner]
        sparsity = 0.7
        cfg_list = [{'op_types': ['Conv2d'], 'sparsity':sparsity}]
        dummy_input = torch.ones(1, 3, 224, 224)
        for model_name in model_zoo:
            for pruner in pruners:
                print('Testing on ', pruner)
                ori_filters = {}
                Model = getattr(models, model_name)
                net = Model(pretrained=True, progress=False)
                # record the number of the filter of each conv layer
                for name, module in net.named_modules():
                    if isinstance(module, nn.Conv2d):
                        ori_filters[name] = module.out_channels

                # for the pruners that based on the activations, we need feed
                # enough data before we call the compress function.
                optimizer = torch.optim.SGD(net.parameters(), lr=0.0001,
                                 momentum=0.9,
                                 weight_decay=4e-5)
                criterion = torch.nn.CrossEntropyLoss()
                tmp_pruner = pruner(
                    net, cfg_list, optimizer, dependency_aware=True, dummy_input=dummy_input)
                # train one single batch so that the the pruner can collect the
                # statistic
                optimizer.zero_grad()
                out = net(dummy_input)
                batchsize = dummy_input.size(0)
                loss = criterion(out, torch.zeros(batchsize, dtype=torch.int64))
                loss.backward()
                optimizer.step()

                tmp_pruner.compress()
                tmp_pruner.export_model(MODEL_FILE, MASK_FILE)
                # if we want to use the same model, we should unwrap the pruner before the speedup
                tmp_pruner._unwrap_model()
                ms = ModelSpeedup(net, dummy_input, MASK_FILE)
                ms.speedup_model()
                for name, module in net.named_modules():
                    if isinstance(module, nn.Conv2d):
                        expected = int(ori_filters[name] * (1-sparsity))
                        filter_diff = abs(expected - module.out_channels)
                        errmsg = '%s Ori: %d, Expected: %d, Real: %d' % (
                            name, ori_filters[name], expected, module.out_channels)

                        # because we are using the dependency-aware mode, so the number of the
                        # filters after speedup should be ori_filters[name] * ( 1 - sparsity )
                        print(errmsg)
                        assert filter_diff <= 1, errmsg
예제 #8
0
    def test_channel_prune(self):
        orig_net = resnet18(num_classes=10).to(device)
        channel_prune(orig_net)
        state_dict = torch.load(MODEL_FILE)

        orig_net = resnet18(num_classes=10).to(device)
        orig_net.load_state_dict(state_dict)
        apply_compression_results(orig_net, MASK_FILE)
        orig_net.eval()

        net = resnet18(num_classes=10).to(device)

        net.load_state_dict(state_dict)
        net.eval()

        data = torch.randn(BATCH_SIZE, 3, 224, 224).to(device)
        ms = ModelSpeedup(net, data, MASK_FILE)
        ms.speedup_model()
        ms.bound_model(data)

        net.eval()

        ori_sum = orig_net(data).abs().sum().item()
        speeded_sum = net(data).abs().sum().item()

        print(ori_sum, speeded_sum)
        assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
            (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
예제 #9
0
    def test_dependency_aware_random_config(self):
        model_zoo = ['resnet18']
        pruners = [L1FilterPruner, L2FilterPruner, FPGMPruner, TaylorFOWeightFilterPruner,
                   ActivationMeanRankFilterPruner, ActivationAPoZRankFilterPruner]
        dummy_input = torch.ones(1, 3, 224, 224)
        for model_name in model_zoo:
            for pruner in pruners:
                Model = getattr(models, model_name)
                cfg_generator = [generate_random_sparsity, generate_random_sparsity_v2]
                for _generator in cfg_generator:
                    net = Model(pretrained=True, progress=False)
                    cfg_list = _generator(net)

                    print('\n\nModel:', model_name)
                    print('Pruner', pruner)
                    print('Config_list:', cfg_list)
                    # for the pruners that based on the activations, we need feed
                    # enough data before we call the compress function.
                    optimizer = torch.optim.SGD(net.parameters(), lr=0.0001,
                                    momentum=0.9,
                                    weight_decay=4e-5)
                    criterion = torch.nn.CrossEntropyLoss()
                    tmp_pruner = pruner(
                        net, cfg_list, optimizer, dependency_aware=True, dummy_input=dummy_input)
                    # train one single batch so that the the pruner can collect the
                    # statistic
                    optimizer.zero_grad()
                    out = net(dummy_input)
                    batchsize = dummy_input.size(0)
                    loss = criterion(out, torch.zeros(batchsize, dtype=torch.int64))
                    loss.backward()
                    optimizer.step()

                    tmp_pruner.compress()
                    tmp_pruner.export_model(MODEL_FILE, MASK_FILE)
                    # if we want to use the same model, we should unwrap the pruner before the speedup
                    tmp_pruner._unwrap_model()
                    ms = ModelSpeedup(net, dummy_input, MASK_FILE)
                    ms.speedup_model()
예제 #10
0
def model_inference(config):
    masks_file = config['masks_file']
    device = torch.device(config['device'])
    if config['model_name'] == 'unet':
        model = UNet(3, 1)
    elif config['model_name'] == 'vgg19':
        model = VGG(depth=19)
    elif config['model_name'] == 'naive':
        from model_prune_torch import NaiveModel
        model = NaiveModel()
    model.to(device)
    model.load_state_dict(torch.load(config['model_file'],
                                     map_location=device))
    model.eval()

    dummy_input = torch.randn(config['input_shape']).to(device)
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file, device)
        start = time.time()
        for _ in range(1):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(1):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-05):
            torch.save(model, config['save_dir_for_speedup'])
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
예제 #11
0
def model_inference(config):
    masks_file = config['masks_file']
    device = torch.device(config['device'])
    if config['model_name'] == 'unet':
        model = UNet(3, 1)
    elif config['model_name'] == 'testNet':
        model = testNet()
    elif config['model_name'] == 'naive':
        from model_prune_torch import NaiveModel
        model = NaiveModel()
    model.to(device)
    model.eval()

    dummy_input = torch.randn(config['input_shape']).to(device)
    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file,
                                  'cpu' if config['device'] == 'cpu' else None)
        start = time.time()
        for _ in range(1):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, masks_file,
                                 'cpu' if config['device'] == 'cpu' else None)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(1):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07):
            print('the output from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
예제 #12
0
def test_nni():
    model = load_t_net()

    config_list = [{'sparsity': 0.5, 'op_types': ['Conv2d']}]
    pruner = SlimPruner(model, config_list)
    model = pruner.compress()

    print(model)
    masks_file = "./nni/mask.pth"
    pruner.export_model(model_path="./nni/nni_mod.pth", mask_path=masks_file)
    print("export ok")
    apply_compression_results(model, masks_file)

    # model: 要加速的模型
    # dummy_input: 模型的示例输入,传给 `jit.trace`
    # masks_file: 剪枝算法创建的掩码文件
    dummy_input = torch.randn(1, 3, 384, 224)
    m_speedup = ModelSpeedup(model, dummy_input.cuda(), masks_file)
    m_speedup.speedup_model()
    dummy_input = dummy_input.cuda()
    start = time.time()
    out = model(dummy_input)
    summary(model, dummy_input)
    print('elapsed time: ', time.time() - start)
def main(args):
    # prepare dataset
    torch.manual_seed(0)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_loader, val_loader, criterion = get_data(args)
    model, optimizer = get_trained_model_optimizer(args, device, train_loader,
                                                   val_loader, criterion)

    def short_term_fine_tuner(model, epochs=1):
        for epoch in range(epochs):
            train(args, model, device, train_loader, criterion, optimizer,
                  epoch)

    def trainer(model, optimizer, criterion, epoch, callback):
        return train(args,
                     model,
                     device,
                     train_loader,
                     criterion,
                     optimizer,
                     epoch=epoch,
                     callback=callback)

    def evaluator(model):
        return test(model, device, criterion, val_loader)

    # used to save the performance of the original & pruned & finetuned models
    result = {'flops': {}, 'params': {}, 'performance': {}}

    flops, params = count_flops_params(model, get_input_size(args.dataset))
    result['flops']['original'] = flops
    result['params']['original'] = params

    evaluation_result = evaluator(model)
    print('Evaluation result (original model): %s' % evaluation_result)
    result['performance']['original'] = evaluation_result

    # module types to prune, only "Conv2d" supported for channel pruning
    if args.base_algo in ['l1', 'l2']:
        op_types = ['Conv2d']
    elif args.base_algo == 'level':
        op_types = ['default']

    config_list = [{'sparsity': args.sparsity, 'op_types': op_types}]
    dummy_input = get_dummy_input(args, device)

    if args.pruner == 'L1FilterPruner':
        pruner = L1FilterPruner(model, config_list)
    elif args.pruner == 'L2FilterPruner':
        pruner = L2FilterPruner(model, config_list)
    elif args.pruner == 'ActivationMeanRankFilterPruner':
        pruner = ActivationMeanRankFilterPruner(model, config_list)
    elif args.pruner == 'ActivationAPoZRankFilterPruner':
        pruner = ActivationAPoZRankFilterPruner(model, config_list)
    elif args.pruner == 'NetAdaptPruner':
        pruner = NetAdaptPruner(model,
                                config_list,
                                short_term_fine_tuner=short_term_fine_tuner,
                                evaluator=evaluator,
                                base_algo=args.base_algo,
                                experiment_data_dir=args.experiment_data_dir)
    elif args.pruner == 'ADMMPruner':
        # users are free to change the config here
        if args.model == 'LeNet':
            if args.base_algo in ['l1', 'l2']:
                config_list = [{
                    'sparsity': 0.8,
                    'op_types': ['Conv2d'],
                    'op_names': ['conv1']
                }, {
                    'sparsity': 0.92,
                    'op_types': ['Conv2d'],
                    'op_names': ['conv2']
                }]
            elif args.base_algo == 'level':
                config_list = [{
                    'sparsity': 0.8,
                    'op_names': ['conv1']
                }, {
                    'sparsity': 0.92,
                    'op_names': ['conv2']
                }, {
                    'sparsity': 0.991,
                    'op_names': ['fc1']
                }, {
                    'sparsity': 0.93,
                    'op_names': ['fc2']
                }]
        else:
            raise ValueError('Example only implemented for LeNet.')
        pruner = ADMMPruner(model,
                            config_list,
                            trainer=trainer,
                            num_iterations=2,
                            training_epochs=2)
    elif args.pruner == 'SimulatedAnnealingPruner':
        pruner = SimulatedAnnealingPruner(
            model,
            config_list,
            evaluator=evaluator,
            base_algo=args.base_algo,
            cool_down_rate=args.cool_down_rate,
            experiment_data_dir=args.experiment_data_dir)
    elif args.pruner == 'AutoCompressPruner':
        pruner = AutoCompressPruner(
            model,
            config_list,
            trainer=trainer,
            evaluator=evaluator,
            dummy_input=dummy_input,
            num_iterations=3,
            optimize_mode='maximize',
            base_algo=args.base_algo,
            cool_down_rate=args.cool_down_rate,
            admm_num_iterations=30,
            admm_training_epochs=5,
            experiment_data_dir=args.experiment_data_dir)
    else:
        raise ValueError("Pruner not supported.")

    # Pruner.compress() returns the masked model
    # but for AutoCompressPruner, Pruner.compress() returns directly the pruned model
    model = pruner.compress()
    evaluation_result = evaluator(model)
    print('Evaluation result (masked model): %s' % evaluation_result)
    result['performance']['pruned'] = evaluation_result

    if args.save_model:
        pruner.export_model(
            os.path.join(args.experiment_data_dir, 'model_masked.pth'),
            os.path.join(args.experiment_data_dir, 'mask.pth'))
        print('Masked model saved to %s', args.experiment_data_dir)

    # model speed up
    if args.speed_up:
        if args.pruner != 'AutoCompressPruner':
            if args.model == 'LeNet':
                model = LeNet().to(device)
            elif args.model == 'vgg16':
                model = VGG(depth=16).to(device)
            elif args.model == 'resnet18':
                model = ResNet18().to(device)
            elif args.model == 'resnet50':
                model = ResNet50().to(device)
            elif args.model == 'mobilenet_v2':
                model = models.mobilenet_v2(pretrained=False).to(device)

            model.load_state_dict(
                torch.load(
                    os.path.join(args.experiment_data_dir,
                                 'model_masked.pth')))
            masks_file = os.path.join(args.experiment_data_dir, 'mask.pth')

            m_speedup = ModelSpeedup(model, dummy_input, masks_file, device)
            m_speedup.speedup_model()
            evaluation_result = evaluator(model)
            print('Evaluation result (speed up model): %s' % evaluation_result)
            result['performance']['speedup'] = evaluation_result

            torch.save(
                model.state_dict(),
                os.path.join(args.experiment_data_dir, 'model_speed_up.pth'))
            print('Speed up model saved to %s', args.experiment_data_dir)
        flops, params = count_flops_params(model, get_input_size(args.dataset))
        result['flops']['speedup'] = flops
        result['params']['speedup'] = params

    if args.fine_tune:
        if args.dataset == 'mnist':
            optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
            scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
        elif args.dataset == 'cifar10' and args.model == 'vgg16':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.01,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.fine_tune_epochs * 0.5),
                                        int(args.fine_tune_epochs * 0.75)
                                    ],
                                    gamma=0.1)
        elif args.dataset == 'cifar10' and args.model == 'resnet18':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.1,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.fine_tune_epochs * 0.5),
                                        int(args.fine_tune_epochs * 0.75)
                                    ],
                                    gamma=0.1)
        elif args.dataset == 'cifar10' and args.model == 'resnet50':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.1,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.fine_tune_epochs * 0.5),
                                        int(args.fine_tune_epochs * 0.75)
                                    ],
                                    gamma=0.1)
        best_acc = 0
        for epoch in range(args.fine_tune_epochs):
            train(args, model, device, train_loader, criterion, optimizer,
                  epoch)
            scheduler.step()
            acc = evaluator(model)
            if acc > best_acc:
                best_acc = acc
                torch.save(
                    model.state_dict(),
                    os.path.join(args.experiment_data_dir,
                                 'model_fine_tuned.pth'))

    print('Evaluation result (fine tuned): %s' % best_acc)
    print('Fined tuned model saved to %s', args.experiment_data_dir)
    result['performance']['finetuned'] = best_acc

    with open(os.path.join(args.experiment_data_dir, 'result.json'),
              'w+') as f:
        json.dump(result, f)
예제 #14
0
    def compress(self):
        """
        Compress the model with AutoCompress.

        Returns
        -------
        torch.nn.Module
            model with specified modules compressed.
        """
        _logger.info('Starting AutoCompress pruning...')

        sparsity_each_round = 1 - pow(1 - self._sparsity,
                                      1 / self._num_iterations)

        for i in range(self._num_iterations):
            _logger.info('Pruning iteration: %d', i)
            _logger.info('Target sparsity this round: %s',
                         1 - pow(1 - sparsity_each_round, i + 1))

            # SimulatedAnnealingPruner
            _logger.info(
                'Generating sparsities with SimulatedAnnealingPruner...')
            SApruner = SimulatedAnnealingPruner(
                model=copy.deepcopy(self._model_to_prune),
                config_list=[{
                    "sparsity": sparsity_each_round,
                    "op_types": ['Conv2d']
                }],
                evaluator=self._evaluator,
                optimize_mode=self._optimize_mode,
                base_algo=self._base_algo,
                start_temperature=self._start_temperature,
                stop_temperature=self._stop_temperature,
                cool_down_rate=self._cool_down_rate,
                perturbation_magnitude=self._perturbation_magnitude,
                experiment_data_dir=self._experiment_data_dir)
            config_list = SApruner.compress(return_config_list=True)
            _logger.info("Generated config_list : %s", config_list)

            # ADMMPruner
            _logger.info('Performing structured pruning with ADMMPruner...')
            ADMMpruner = ADMMPruner(model=copy.deepcopy(self._model_to_prune),
                                    config_list=config_list,
                                    trainer=self._trainer,
                                    num_iterations=self._admm_num_iterations,
                                    training_epochs=self._admm_training_epochs,
                                    row=self._row,
                                    base_algo=self._base_algo)
            ADMMpruner.compress()

            ADMMpruner.export_model(
                os.path.join(self._experiment_data_dir,
                             'model_admm_masked.pth'),
                os.path.join(self._experiment_data_dir, 'mask.pth'))

            # use speed up to prune the model before next iteration, because SimulatedAnnealingPruner & ADMMPruner don't take masked models
            self._model_to_prune.load_state_dict(
                torch.load(
                    os.path.join(self._experiment_data_dir,
                                 'model_admm_masked.pth')))

            masks_file = os.path.join(self._experiment_data_dir, 'mask.pth')
            device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu")

            _logger.info('Speeding up models...')
            m_speedup = ModelSpeedup(self._model_to_prune, self._dummy_input,
                                     masks_file, device)
            m_speedup.speedup_model()

            evaluation_result = self._evaluator(self._model_to_prune)
            _logger.info(
                'Evaluation result of the pruned model in iteration %d: %s', i,
                evaluation_result)

        _logger.info('----------Compression finished--------------')

        os.remove(
            os.path.join(self._experiment_data_dir, 'model_admm_masked.pth'))
        os.remove(os.path.join(self._experiment_data_dir, 'mask.pth'))

        return self._model_to_prune
예제 #15
0
def model_inference(config):
    masks_file = './speedup_test/mask_new.pth'
    shape_mask = './speedup_test/mask_new.pth'
    org_mask = './speedup_test/mask.pth'
    rn50 = models.resnet50()
    m_paras = torch.load('./speedup_test/model_fine_tuned.pth')
    ##delete mask in pth
    m_new = collections.OrderedDict()
    for key in m_paras:
        if 'mask' in key: continue
        if 'module' in key:
            m_new[key.replace('module.', '')] = m_paras[key]
        else:
            m_new[key] = m_paras[key]
    rn50.load_state_dict(m_new)
    rn50.cuda()
    rn50.eval()

    dummy_input = torch.randn(64, 3, 224, 224).cuda()
    use_mask_out = use_speedup_out = None
    rn = rn50
    apply_compression_results(rn, org_mask, 'cuda')
    rn_mask_out = rn(dummy_input)
    model = rn50
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, masks_file, 'cuda')
        torch.onnx.export(model,
                          dummy_input,
                          'resnet_masked.onnx',
                          export_params=True,
                          opset_version=12,
                          do_constant_folding=True,
                          input_names=['inputs'],
                          output_names=['proba'],
                          dynamic_axes={
                              'inputs': [0],
                              'mask': [0]
                          },
                          keep_initializers_as_inputs=True)

        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    print('Model is ', model)
    print('before speed up===================')
    #    print(para)
    #    print(model.state_dict()[para])
    #    print(model.state_dict()[para].shape)
    flops, paras = count_flops_params(model, (1, 3, 224, 224))
    print(
        'flops and parameters before speedup is {} FLOPS and {} params'.format(
            flops, paras))
    if use_speedup:
        dummy_input.cuda()
        m_speedup = ModelSpeedup(model, dummy_input, shape_mask, 'cuda')
        m_speedup.speedup_model()
        print('==' * 20)
        print('Start inference')
        torch.onnx.export(model,
                          dummy_input,
                          'resnet_fpgm.onnx',
                          export_params=True,
                          opset_version=12,
                          do_constant_folding=True,
                          input_names=['inputs'],
                          output_names=['proba'],
                          dynamic_axes={
                              'inputs': [0],
                              'mask': [0]
                          },
                          keep_initializers_as_inputs=True)
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    print('After speedup model is ', model)
    print('=================')
    print('After speedup')
    flops, paras = count_flops_params(model, (1, 3, 224, 224))
    print(
        'flops and parameters before speedup is {} FLOPS and {} params'.format(
            flops, paras))
    #for para in model.state_dict():
    #    print(para)
    #    print(model.state_dict()[para])
    #    print(model.state_dict()[para].shape)
    if compare_results:
        print(rn_mask_out)
        print('another is', use_speedup_out)
        if torch.allclose(rn_mask_out, use_speedup_out, atol=1e-6):  #-07):
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
    # start the accuracy check
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        start = time.time()
        evaluate(model,
                 criterion,
                 data_loader_test,
                 device="cuda",
                 print_freq=20)
        print('elapsed time is ', time.time() - start)
예제 #16
0
    checkpoint = torch.load(args.model_file, map_location=device)
    model.load_state_dict(checkpoint, strict=False)
    model.to(device)
    model.eval()

    use_mask_out = use_speedup_out = None
    # must run use_mask before use_speedup because use_speedup modify the model
    if use_mask:
        apply_compression_results(model, args.masks_file, device)
        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        print('elapsed time when use mask: ', time.time() - start)
    if use_speedup:
        m_speedup = ModelSpeedup(model, dummy_input, args.masks_file, device)
        m_speedup.speedup_model()
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        print('elapsed time when use speedup: ', time.time() - start)
    torch.save(
        model.state_dict(),
        "output/DBNet_opensource_nni_resnet18_fpn_db/checkpoint/pruner_speed.pth"
    )
    if compare_results:
        if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07):
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
예제 #17
0
def model_inference(config):
    model_trained = './experiment_data/resnet_bn/model_fine_tuned_first.pth'
    rn50 = resnet50()
    m_paras = torch.load(model_trained)
    ##delete mask in pth
    m_new = collections.OrderedDict()
    mask = dict()
    for key in m_paras:
        if 'weight_mask_b' in key: continue
        if 'weight_mask' in key:
            if 'module_added' not in key:
                mask[key.replace('.weight_mask', '')] = dict()
                mask[key.replace('.weight_mask', '')]['weight'] = m_paras[key]
                mask[key.replace('.weight_mask', '')]['bias'] = m_paras[key]
            else:
                mask[key.replace('.relu1.module_added.weight_mask',
                                 '.bn3')] = {}
                mask[key.replace('.relu1.module_added.weight_mask',
                                 '.bn3')]['weight'] = m_paras[key]
                mask[key.replace('.relu1.module_added.weight_mask',
                                 '.bn3')]['bias'] = m_paras[key]
                if '0.relu1' in key:
                    mask[key.replace('relu1.module_added.weight_mask',
                                     'downsample.1')] = {}
                    mask[key.replace('relu1.module_added.weight_mask',
                                     'downsample.1')]['weight'] = m_paras[key]
                    mask[key.replace('relu1.module_added.weight_mask',
                                     'downsample.1')]['bias'] = m_paras[key]
            continue
        if 'module_added' in key:
            continue
        elif 'module' in key:
            m_new[key.replace('module.', '')] = m_paras[key]
        else:
            m_new[key] = m_paras[key]
    for key in mask:
        #modify the weight and bias of model with pruning
        m_new[key + '.weight'] = m_new[key + '.weight'].data.mul(
            mask[key]['weight'])
        m_new[key + '.bias'] = m_new[key + '.bias'].data.mul(mask[key]['bias'])
    rn50.load_state_dict(m_new)
    rn50.cuda()
    rn50.eval()
    torch.save(mask, 'taylor_mask.pth')
    mask_file = './taylor_mask.pth'
    dummy_input = torch.randn(64, 3, 224, 224).cuda()
    use_mask_out = use_speedup_out = None
    rn = rn50
    rn_mask_out = rn(dummy_input)
    model = rn50
    if use_mask:
        torch.onnx.export(model,
                          dummy_input,
                          'resnet_masked_taylor_1700.onnx',
                          export_params=True,
                          opset_version=12,
                          do_constant_folding=True,
                          input_names=['inputs'],
                          output_names=['proba'],
                          dynamic_axes={
                              'inputs': [0],
                              'mask': [0]
                          },
                          keep_initializers_as_inputs=True)

        start = time.time()
        for _ in range(32):
            use_mask_out = model(dummy_input)
        elapsed_t = time.time() - start
        print('elapsed time when use mask: ', elapsed_t)
        _logger.info(
            'for batch size 64 and with 32 runs, the elapsed time is {}'.
            format(elapsed_t))
    print('before speed up===================')
    flops, paras = count_flops_params(model, (1, 3, 224, 224))
    _logger.info(
        'flops and parameters before speedup is {} FLOPS and {} params'.format(
            flops, paras))
    if use_speedup:
        dummy_input.cuda()
        m_speedup = ModelSpeedup(model, dummy_input, mask_file, 'cuda')
        m_speedup.speedup_model()
        print('==' * 20)
        print('Start inference')
        torch.onnx.export(model,
                          dummy_input,
                          'resnet_taylor_1700.onnx',
                          export_params=True,
                          opset_version=12,
                          do_constant_folding=True,
                          input_names=['inputs'],
                          output_names=['proba'],
                          dynamic_axes={
                              'inputs': [0],
                              'mask': [0]
                          },
                          keep_initializers_as_inputs=True)
        start = time.time()
        for _ in range(32):
            use_speedup_out = model(dummy_input)
        elasped_t1 = time.time() - start
        print('elapsed time when use speedup: ', elasped_t1)
        _logger.info(
            'elasped time with batch_size 64 and in 32 runs is {}'.format(
                elasped_t1))
    #print('After speedup model is ',model)
    _logger.info('model structure after speedup is ====')
    _logger.info(model)
    print('=================')
    print('After speedup')
    flops, paras = count_flops_params(model, (1, 3, 224, 224))
    _logger.info(
        'After speedup flops are {} and number of parameters are {}'.format(
            flops, paras))
    if compare_results:
        print(rn_mask_out)
        print('another is', use_speedup_out)
        if torch.allclose(rn_mask_out, use_speedup_out, atol=1e-6):  #-07):
            print('the outputs from use_mask and use_speedup are the same')
        else:
            raise RuntimeError(
                'the outputs from use_mask and use_speedup are different')
    # start the accuracy check
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        start = time.time()
        evaluate(model,
                 criterion,
                 data_loader_test,
                 device="cuda",
                 print_freq=20)
        print('elapsed time is ', time.time() - start)