def test_speedup_bigmodel(self): prune_model_l1(BigModel()) model = BigModel() apply_compression_results(model, MASK_FILE, 'cpu') model.eval() mask_out = model(dummy_input) model.train() ms = ModelSpeedup(model, dummy_input, MASK_FILE) ms.speedup_model() assert model.training model.eval() speedup_out = model(dummy_input) if not torch.allclose(mask_out, speedup_out, atol=1e-07): print('input:', dummy_input.size(), torch.abs(dummy_input).sum((2, 3))) print('mask_out:', mask_out) print('speedup_out:', speedup_out) raise RuntimeError('model speedup inference result is incorrect!') orig_model = BigModel() assert model.backbone2.conv1.out_channels == int( orig_model.backbone2.conv1.out_channels * SPARSITY) assert model.backbone2.conv2.in_channels == int( orig_model.backbone2.conv2.in_channels * SPARSITY) assert model.backbone2.conv2.out_channels == int( orig_model.backbone2.conv2.out_channels * SPARSITY) assert model.backbone2.fc1.in_features == int( orig_model.backbone2.fc1.in_features * SPARSITY)
def generate_model(self, cfg): """ generate the models according to the channel_cfg. The generated model has the same network architecture with self.bound_model, but the out_channels of each conv layers are configured according to the channel_cfg. Parameters ---------- cfg: list cfg for the pruner. """ model = copy.deepcopy(self.bound_model) pruner = Constrained_L1FilterPruner(model, cfg, self.dummy_input) pruner.compress() _tmp_ck_path = os.path.join(self.ck_dir, 'tmp.pth') _tmp_mask_path = os.path.join(self.ck_dir, 'mask') pruner.export_model(_tmp_ck_path, _tmp_mask_path) pruner._unwrap_model() ms = ModelSpeedup(model, self.dummy_input, _tmp_mask_path) ms.speedup_model() try: model(self.dummy_input) print('Success inference') except Exception as err: _logger.warn('The updated model may have shape conflicts') _logger.warn(err) traceback.print_exc() # the model is not valid, it has shape conflict # under the return None return model
def test_channel_prune(self): orig_net = resnet18(num_classes=10).to(device) channel_prune(orig_net) state_dict = torch.load(MODEL_FILE) orig_net = resnet18(num_classes=10).to(device) orig_net.load_state_dict(state_dict) apply_compression_results(orig_net, MASK_FILE) orig_net.eval() net = resnet18(num_classes=10).to(device) net.load_state_dict(state_dict) net.eval() data = torch.randn(BATCH_SIZE, 3, 224, 224).to(device) ms = ModelSpeedup(net, data, MASK_FILE) ms.speedup_model() ms.bound_model(data) net.eval() ori_sum = orig_net(data).abs().sum().item() speeded_sum = net(data).abs().sum().item() print(ori_sum, speeded_sum) assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \ (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
def test_speedup_integration(self): for model_name in [ 'resnet18', 'squeezenet1_1', 'mobilenet_v2', 'densenet121', 'inception_v3' ]: Model = getattr(models, model_name) net = Model(pretrained=True, progress=False).to(device) speedup_model = Model().to(device) net.eval() # this line is necessary speedup_model.eval() # random generate the prune config for the pruner cfgs = generate_random_sparsity(net) pruner = L1FilterPruner(net, cfgs) pruner.compress() pruner.export_model(MODEL_FILE, MASK_FILE) pruner._unwrap_model() state_dict = torch.load(MODEL_FILE) speedup_model.load_state_dict(state_dict) zero_bn_bias(net) zero_bn_bias(speedup_model) data = torch.ones(BATCH_SIZE, 3, 224, 224).to(device) ms = ModelSpeedup(speedup_model, data, MASK_FILE) ms.speedup_model() ori_out = net(data) speeded_out = speedup_model(data) ori_sum = torch.sum(ori_out).item() speeded_sum = torch.sum(speeded_out).item() print('Sum of the output of %s (before speedup):' % model_name, ori_sum) print('Sum of the output of %s (after speedup):' % model_name, speeded_sum) assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \ (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
def get_model(args): print('=> Building model..') if args.dataset == 'imagenet': n_class = 1000 elif args.dataset == 'cifar10': n_class = 10 else: raise NotImplementedError if args.model_type == 'mobilenet': net = MobileNet(n_class=n_class) elif args.model_type == 'mobilenetv2': net = MobileNetV2(n_class=n_class) elif args.model_type.startswith('resnet'): net = resnet.__dict__[args.model_type](pretrained=True) in_features = net.fc.in_features net.fc = nn.Linear(in_features, n_class) else: raise NotImplementedError if args.ckpt_path is not None: # the checkpoint can be state_dict exported by amc_search.py or saved by amc_train.py print('=> Loading checkpoint {} ..'.format(args.ckpt_path)) net.load_state_dict(torch.load(args.ckpt_path, torch.device('cpu'))) if args.mask_path is not None: SZ = 224 if args.dataset == 'imagenet' else 32 data = torch.randn(2, 3, SZ, SZ) ms = ModelSpeedup(net, data, args.mask_path, torch.device('cpu')) ms.speedup_model() net.to(args.device) if torch.cuda.is_available() and args.n_gpu > 1: net = torch.nn.DataParallel(net, list(range(args.n_gpu))) return net
def flops_counter(args): # model speed up torch.manual_seed(0) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_loader, val_loader, criterion = get_data(args) if args.pruner != 'AutoCompressPruner': if args.model == 'LeNet': model = LeNet().to(device) elif args.model == 'vgg16': model = VGG(depth=16).to(device) elif args.model == 'resnet18': model = models.resnet18(pretrained=False, num_classes=10).to(device) elif args.model == 'mobilenet_v2': model = models.mobilenet_v2(pretrained=False).to(device) def evaluator(model): return test(model, device, criterion, val_loader) model.load_state_dict( torch.load( os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth'))) masks_file = os.path.join(args.experiment_data_dir, 'mask.pth') dummy_input = get_dummy_input(args, device) m_speedup = ModelSpeedup(model, dummy_input, masks_file, device) m_speedup.speedup_model() evaluation_result = evaluator(model) print('Evaluation result (speed up model): %s' % evaluation_result) with open(os.path.join(args.experiment_data_dir, 'performance.json')) as f: result = json.load(f) result['speedup'] = evaluation_result with open(os.path.join(args.experiment_data_dir, 'performance.json'), 'w+') as f: json.dump(result, f) torch.save( model.state_dict(), os.path.join(args.experiment_data_dir, 'model_speed_up.pth')) print('Speed up model saved to %s', args.experiment_data_dir) else: model = torch.load( os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth')) model.eval() flops, params = count_flops_params(model, (1, 3, 32, 32)) with open(os.path.join(args.experiment_data_dir, 'flops.json'), 'w+') as f: json.dump({'FLOPS': int(flops), 'params': int(params)}, f)
def test_speedup_vgg16(self): prune_model_l1(vgg16()) model = vgg16() model.train() ms = ModelSpeedup(model, torch.randn(2, 3, 32, 32), MASK_FILE) ms.speedup_model() orig_model = vgg16() assert model.training assert model.features[2].out_channels == int( orig_model.features[2].out_channels * SPARSITY) assert model.classifier[0].in_features == int( orig_model.classifier[0].in_features * SPARSITY)
def test_dependency_aware_pruning(self): model_zoo = ['resnet18'] pruners = [L1FilterPruner, L2FilterPruner, FPGMPruner, TaylorFOWeightFilterPruner] sparsity = 0.7 cfg_list = [{'op_types': ['Conv2d'], 'sparsity':sparsity}] dummy_input = torch.ones(1, 3, 224, 224) for model_name in model_zoo: for pruner in pruners: print('Testing on ', pruner) ori_filters = {} Model = getattr(models, model_name) net = Model(pretrained=True, progress=False) # record the number of the filter of each conv layer for name, module in net.named_modules(): if isinstance(module, nn.Conv2d): ori_filters[name] = module.out_channels # for the pruners that based on the activations, we need feed # enough data before we call the compress function. optimizer = torch.optim.SGD(net.parameters(), lr=0.0001, momentum=0.9, weight_decay=4e-5) criterion = torch.nn.CrossEntropyLoss() tmp_pruner = pruner( net, cfg_list, optimizer, dependency_aware=True, dummy_input=dummy_input) # train one single batch so that the the pruner can collect the # statistic optimizer.zero_grad() out = net(dummy_input) batchsize = dummy_input.size(0) loss = criterion(out, torch.zeros(batchsize, dtype=torch.int64)) loss.backward() optimizer.step() tmp_pruner.compress() tmp_pruner.export_model(MODEL_FILE, MASK_FILE) # if we want to use the same model, we should unwrap the pruner before the speedup tmp_pruner._unwrap_model() ms = ModelSpeedup(net, dummy_input, MASK_FILE) ms.speedup_model() for name, module in net.named_modules(): if isinstance(module, nn.Conv2d): expected = int(ori_filters[name] * (1-sparsity)) filter_diff = abs(expected - module.out_channels) errmsg = '%s Ori: %d, Expected: %d, Real: %d' % ( name, ori_filters[name], expected, module.out_channels) # because we are using the dependency-aware mode, so the number of the # filters after speedup should be ori_filters[name] * ( 1 - sparsity ) print(errmsg) assert filter_diff <= 1, errmsg
def test_dependency_aware_random_config(self): model_zoo = ['resnet18'] pruners = [L1FilterPruner, L2FilterPruner, FPGMPruner, TaylorFOWeightFilterPruner, ActivationMeanRankFilterPruner, ActivationAPoZRankFilterPruner] dummy_input = torch.ones(1, 3, 224, 224) for model_name in model_zoo: for pruner in pruners: Model = getattr(models, model_name) cfg_generator = [generate_random_sparsity, generate_random_sparsity_v2] for _generator in cfg_generator: net = Model(pretrained=True, progress=False) cfg_list = _generator(net) print('\n\nModel:', model_name) print('Pruner', pruner) print('Config_list:', cfg_list) # for the pruners that based on the activations, we need feed # enough data before we call the compress function. optimizer = torch.optim.SGD(net.parameters(), lr=0.0001, momentum=0.9, weight_decay=4e-5) criterion = torch.nn.CrossEntropyLoss() tmp_pruner = pruner( net, cfg_list, optimizer, dependency_aware=True, dummy_input=dummy_input) # train one single batch so that the the pruner can collect the # statistic optimizer.zero_grad() out = net(dummy_input) batchsize = dummy_input.size(0) loss = criterion(out, torch.zeros(batchsize, dtype=torch.int64)) loss.backward() optimizer.step() tmp_pruner.compress() tmp_pruner.export_model(MODEL_FILE, MASK_FILE) # if we want to use the same model, we should unwrap the pruner before the speedup tmp_pruner._unwrap_model() ms = ModelSpeedup(net, dummy_input, MASK_FILE) ms.speedup_model()
def model_inference(config): masks_file = config['masks_file'] device = torch.device(config['device']) if config['model_name'] == 'unet': model = UNet(3, 1) elif config['model_name'] == 'vgg19': model = VGG(depth=19) elif config['model_name'] == 'naive': from model_prune_torch import NaiveModel model = NaiveModel() model.to(device) model.load_state_dict(torch.load(config['model_file'], map_location=device)) model.eval() dummy_input = torch.randn(config['input_shape']).to(device) use_mask_out = use_speedup_out = None # must run use_mask before use_speedup because use_speedup modify the model if use_mask: apply_compression_results(model, masks_file, device) start = time.time() for _ in range(1): use_mask_out = model(dummy_input) print('elapsed time when use mask: ', time.time() - start) if use_speedup: m_speedup = ModelSpeedup(model, dummy_input, masks_file, device) m_speedup.speedup_model() start = time.time() for _ in range(1): use_speedup_out = model(dummy_input) print('elapsed time when use speedup: ', time.time() - start) if compare_results: if torch.allclose(use_mask_out, use_speedup_out, atol=1e-05): torch.save(model, config['save_dir_for_speedup']) print('the outputs from use_mask and use_speedup are the same') else: raise RuntimeError( 'the outputs from use_mask and use_speedup are different')
def model_inference(config): masks_file = config['masks_file'] device = torch.device(config['device']) if config['model_name'] == 'unet': model = UNet(3, 1) elif config['model_name'] == 'testNet': model = testNet() elif config['model_name'] == 'naive': from model_prune_torch import NaiveModel model = NaiveModel() model.to(device) model.eval() dummy_input = torch.randn(config['input_shape']).to(device) use_mask_out = use_speedup_out = None # must run use_mask before use_speedup because use_speedup modify the model if use_mask: apply_compression_results(model, masks_file, 'cpu' if config['device'] == 'cpu' else None) start = time.time() for _ in range(1): use_mask_out = model(dummy_input) print('elapsed time when use mask: ', time.time() - start) if use_speedup: m_speedup = ModelSpeedup(model, dummy_input, masks_file, 'cpu' if config['device'] == 'cpu' else None) m_speedup.speedup_model() start = time.time() for _ in range(1): use_speedup_out = model(dummy_input) print('elapsed time when use speedup: ', time.time() - start) if compare_results: if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07): print('the output from use_mask and use_speedup are the same') else: raise RuntimeError( 'the outputs from use_mask and use_speedup are different')
def test_nni(): model = load_t_net() config_list = [{'sparsity': 0.5, 'op_types': ['Conv2d']}] pruner = SlimPruner(model, config_list) model = pruner.compress() print(model) masks_file = "./nni/mask.pth" pruner.export_model(model_path="./nni/nni_mod.pth", mask_path=masks_file) print("export ok") apply_compression_results(model, masks_file) # model: 要加速的模型 # dummy_input: 模型的示例输入,传给 `jit.trace` # masks_file: 剪枝算法创建的掩码文件 dummy_input = torch.randn(1, 3, 384, 224) m_speedup = ModelSpeedup(model, dummy_input.cuda(), masks_file) m_speedup.speedup_model() dummy_input = dummy_input.cuda() start = time.time() out = model(dummy_input) summary(model, dummy_input) print('elapsed time: ', time.time() - start)
def main(args): # prepare dataset torch.manual_seed(0) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_loader, val_loader, criterion = get_data(args) model, optimizer = get_trained_model_optimizer(args, device, train_loader, val_loader, criterion) def short_term_fine_tuner(model, epochs=1): for epoch in range(epochs): train(args, model, device, train_loader, criterion, optimizer, epoch) def trainer(model, optimizer, criterion, epoch, callback): return train(args, model, device, train_loader, criterion, optimizer, epoch=epoch, callback=callback) def evaluator(model): return test(model, device, criterion, val_loader) # used to save the performance of the original & pruned & finetuned models result = {'flops': {}, 'params': {}, 'performance': {}} flops, params = count_flops_params(model, get_input_size(args.dataset)) result['flops']['original'] = flops result['params']['original'] = params evaluation_result = evaluator(model) print('Evaluation result (original model): %s' % evaluation_result) result['performance']['original'] = evaluation_result # module types to prune, only "Conv2d" supported for channel pruning if args.base_algo in ['l1', 'l2']: op_types = ['Conv2d'] elif args.base_algo == 'level': op_types = ['default'] config_list = [{'sparsity': args.sparsity, 'op_types': op_types}] dummy_input = get_dummy_input(args, device) if args.pruner == 'L1FilterPruner': pruner = L1FilterPruner(model, config_list) elif args.pruner == 'L2FilterPruner': pruner = L2FilterPruner(model, config_list) elif args.pruner == 'ActivationMeanRankFilterPruner': pruner = ActivationMeanRankFilterPruner(model, config_list) elif args.pruner == 'ActivationAPoZRankFilterPruner': pruner = ActivationAPoZRankFilterPruner(model, config_list) elif args.pruner == 'NetAdaptPruner': pruner = NetAdaptPruner(model, config_list, short_term_fine_tuner=short_term_fine_tuner, evaluator=evaluator, base_algo=args.base_algo, experiment_data_dir=args.experiment_data_dir) elif args.pruner == 'ADMMPruner': # users are free to change the config here if args.model == 'LeNet': if args.base_algo in ['l1', 'l2']: config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'], 'op_names': ['conv1'] }, { 'sparsity': 0.92, 'op_types': ['Conv2d'], 'op_names': ['conv2'] }] elif args.base_algo == 'level': config_list = [{ 'sparsity': 0.8, 'op_names': ['conv1'] }, { 'sparsity': 0.92, 'op_names': ['conv2'] }, { 'sparsity': 0.991, 'op_names': ['fc1'] }, { 'sparsity': 0.93, 'op_names': ['fc2'] }] else: raise ValueError('Example only implemented for LeNet.') pruner = ADMMPruner(model, config_list, trainer=trainer, num_iterations=2, training_epochs=2) elif args.pruner == 'SimulatedAnnealingPruner': pruner = SimulatedAnnealingPruner( model, config_list, evaluator=evaluator, base_algo=args.base_algo, cool_down_rate=args.cool_down_rate, experiment_data_dir=args.experiment_data_dir) elif args.pruner == 'AutoCompressPruner': pruner = AutoCompressPruner( model, config_list, trainer=trainer, evaluator=evaluator, dummy_input=dummy_input, num_iterations=3, optimize_mode='maximize', base_algo=args.base_algo, cool_down_rate=args.cool_down_rate, admm_num_iterations=30, admm_training_epochs=5, experiment_data_dir=args.experiment_data_dir) else: raise ValueError("Pruner not supported.") # Pruner.compress() returns the masked model # but for AutoCompressPruner, Pruner.compress() returns directly the pruned model model = pruner.compress() evaluation_result = evaluator(model) print('Evaluation result (masked model): %s' % evaluation_result) result['performance']['pruned'] = evaluation_result if args.save_model: pruner.export_model( os.path.join(args.experiment_data_dir, 'model_masked.pth'), os.path.join(args.experiment_data_dir, 'mask.pth')) print('Masked model saved to %s', args.experiment_data_dir) # model speed up if args.speed_up: if args.pruner != 'AutoCompressPruner': if args.model == 'LeNet': model = LeNet().to(device) elif args.model == 'vgg16': model = VGG(depth=16).to(device) elif args.model == 'resnet18': model = ResNet18().to(device) elif args.model == 'resnet50': model = ResNet50().to(device) elif args.model == 'mobilenet_v2': model = models.mobilenet_v2(pretrained=False).to(device) model.load_state_dict( torch.load( os.path.join(args.experiment_data_dir, 'model_masked.pth'))) masks_file = os.path.join(args.experiment_data_dir, 'mask.pth') m_speedup = ModelSpeedup(model, dummy_input, masks_file, device) m_speedup.speedup_model() evaluation_result = evaluator(model) print('Evaluation result (speed up model): %s' % evaluation_result) result['performance']['speedup'] = evaluation_result torch.save( model.state_dict(), os.path.join(args.experiment_data_dir, 'model_speed_up.pth')) print('Speed up model saved to %s', args.experiment_data_dir) flops, params = count_flops_params(model, get_input_size(args.dataset)) result['flops']['speedup'] = flops result['params']['speedup'] = params if args.fine_tune: if args.dataset == 'mnist': optimizer = torch.optim.Adadelta(model.parameters(), lr=1) scheduler = StepLR(optimizer, step_size=1, gamma=0.7) elif args.dataset == 'cifar10' and args.model == 'vgg16': optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) scheduler = MultiStepLR(optimizer, milestones=[ int(args.fine_tune_epochs * 0.5), int(args.fine_tune_epochs * 0.75) ], gamma=0.1) elif args.dataset == 'cifar10' and args.model == 'resnet18': optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) scheduler = MultiStepLR(optimizer, milestones=[ int(args.fine_tune_epochs * 0.5), int(args.fine_tune_epochs * 0.75) ], gamma=0.1) elif args.dataset == 'cifar10' and args.model == 'resnet50': optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) scheduler = MultiStepLR(optimizer, milestones=[ int(args.fine_tune_epochs * 0.5), int(args.fine_tune_epochs * 0.75) ], gamma=0.1) best_acc = 0 for epoch in range(args.fine_tune_epochs): train(args, model, device, train_loader, criterion, optimizer, epoch) scheduler.step() acc = evaluator(model) if acc > best_acc: best_acc = acc torch.save( model.state_dict(), os.path.join(args.experiment_data_dir, 'model_fine_tuned.pth')) print('Evaluation result (fine tuned): %s' % best_acc) print('Fined tuned model saved to %s', args.experiment_data_dir) result['performance']['finetuned'] = best_acc with open(os.path.join(args.experiment_data_dir, 'result.json'), 'w+') as f: json.dump(result, f)
def compress(self): """ Compress the model with AutoCompress. Returns ------- torch.nn.Module model with specified modules compressed. """ _logger.info('Starting AutoCompress pruning...') sparsity_each_round = 1 - pow(1 - self._sparsity, 1 / self._num_iterations) for i in range(self._num_iterations): _logger.info('Pruning iteration: %d', i) _logger.info('Target sparsity this round: %s', 1 - pow(1 - sparsity_each_round, i + 1)) # SimulatedAnnealingPruner _logger.info( 'Generating sparsities with SimulatedAnnealingPruner...') SApruner = SimulatedAnnealingPruner( model=copy.deepcopy(self._model_to_prune), config_list=[{ "sparsity": sparsity_each_round, "op_types": ['Conv2d'] }], evaluator=self._evaluator, optimize_mode=self._optimize_mode, base_algo=self._base_algo, start_temperature=self._start_temperature, stop_temperature=self._stop_temperature, cool_down_rate=self._cool_down_rate, perturbation_magnitude=self._perturbation_magnitude, experiment_data_dir=self._experiment_data_dir) config_list = SApruner.compress(return_config_list=True) _logger.info("Generated config_list : %s", config_list) # ADMMPruner _logger.info('Performing structured pruning with ADMMPruner...') ADMMpruner = ADMMPruner(model=copy.deepcopy(self._model_to_prune), config_list=config_list, trainer=self._trainer, num_iterations=self._admm_num_iterations, training_epochs=self._admm_training_epochs, row=self._row, base_algo=self._base_algo) ADMMpruner.compress() ADMMpruner.export_model( os.path.join(self._experiment_data_dir, 'model_admm_masked.pth'), os.path.join(self._experiment_data_dir, 'mask.pth')) # use speed up to prune the model before next iteration, because SimulatedAnnealingPruner & ADMMPruner don't take masked models self._model_to_prune.load_state_dict( torch.load( os.path.join(self._experiment_data_dir, 'model_admm_masked.pth'))) masks_file = os.path.join(self._experiment_data_dir, 'mask.pth') device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") _logger.info('Speeding up models...') m_speedup = ModelSpeedup(self._model_to_prune, self._dummy_input, masks_file, device) m_speedup.speedup_model() evaluation_result = self._evaluator(self._model_to_prune) _logger.info( 'Evaluation result of the pruned model in iteration %d: %s', i, evaluation_result) _logger.info('----------Compression finished--------------') os.remove( os.path.join(self._experiment_data_dir, 'model_admm_masked.pth')) os.remove(os.path.join(self._experiment_data_dir, 'mask.pth')) return self._model_to_prune
def model_inference(config): masks_file = './speedup_test/mask_new.pth' shape_mask = './speedup_test/mask_new.pth' org_mask = './speedup_test/mask.pth' rn50 = models.resnet50() m_paras = torch.load('./speedup_test/model_fine_tuned.pth') ##delete mask in pth m_new = collections.OrderedDict() for key in m_paras: if 'mask' in key: continue if 'module' in key: m_new[key.replace('module.', '')] = m_paras[key] else: m_new[key] = m_paras[key] rn50.load_state_dict(m_new) rn50.cuda() rn50.eval() dummy_input = torch.randn(64, 3, 224, 224).cuda() use_mask_out = use_speedup_out = None rn = rn50 apply_compression_results(rn, org_mask, 'cuda') rn_mask_out = rn(dummy_input) model = rn50 # must run use_mask before use_speedup because use_speedup modify the model if use_mask: apply_compression_results(model, masks_file, 'cuda') torch.onnx.export(model, dummy_input, 'resnet_masked.onnx', export_params=True, opset_version=12, do_constant_folding=True, input_names=['inputs'], output_names=['proba'], dynamic_axes={ 'inputs': [0], 'mask': [0] }, keep_initializers_as_inputs=True) start = time.time() for _ in range(32): use_mask_out = model(dummy_input) print('elapsed time when use mask: ', time.time() - start) print('Model is ', model) print('before speed up===================') # print(para) # print(model.state_dict()[para]) # print(model.state_dict()[para].shape) flops, paras = count_flops_params(model, (1, 3, 224, 224)) print( 'flops and parameters before speedup is {} FLOPS and {} params'.format( flops, paras)) if use_speedup: dummy_input.cuda() m_speedup = ModelSpeedup(model, dummy_input, shape_mask, 'cuda') m_speedup.speedup_model() print('==' * 20) print('Start inference') torch.onnx.export(model, dummy_input, 'resnet_fpgm.onnx', export_params=True, opset_version=12, do_constant_folding=True, input_names=['inputs'], output_names=['proba'], dynamic_axes={ 'inputs': [0], 'mask': [0] }, keep_initializers_as_inputs=True) start = time.time() for _ in range(32): use_speedup_out = model(dummy_input) print('elapsed time when use speedup: ', time.time() - start) print('After speedup model is ', model) print('=================') print('After speedup') flops, paras = count_flops_params(model, (1, 3, 224, 224)) print( 'flops and parameters before speedup is {} FLOPS and {} params'.format( flops, paras)) #for para in model.state_dict(): # print(para) # print(model.state_dict()[para]) # print(model.state_dict()[para].shape) if compare_results: print(rn_mask_out) print('another is', use_speedup_out) if torch.allclose(rn_mask_out, use_speedup_out, atol=1e-6): #-07): print('the outputs from use_mask and use_speedup are the same') else: raise RuntimeError( 'the outputs from use_mask and use_speedup are different') # start the accuracy check criterion = nn.CrossEntropyLoss() with torch.no_grad(): start = time.time() evaluate(model, criterion, data_loader_test, device="cuda", print_freq=20) print('elapsed time is ', time.time() - start)
checkpoint = torch.load(args.model_file, map_location=device) model.load_state_dict(checkpoint, strict=False) model.to(device) model.eval() use_mask_out = use_speedup_out = None # must run use_mask before use_speedup because use_speedup modify the model if use_mask: apply_compression_results(model, args.masks_file, device) start = time.time() for _ in range(32): use_mask_out = model(dummy_input) print('elapsed time when use mask: ', time.time() - start) if use_speedup: m_speedup = ModelSpeedup(model, dummy_input, args.masks_file, device) m_speedup.speedup_model() start = time.time() for _ in range(32): use_speedup_out = model(dummy_input) print('elapsed time when use speedup: ', time.time() - start) torch.save( model.state_dict(), "output/DBNet_opensource_nni_resnet18_fpn_db/checkpoint/pruner_speed.pth" ) if compare_results: if torch.allclose(use_mask_out, use_speedup_out, atol=1e-07): print('the outputs from use_mask and use_speedup are the same') else: raise RuntimeError( 'the outputs from use_mask and use_speedup are different')
def model_inference(config): model_trained = './experiment_data/resnet_bn/model_fine_tuned_first.pth' rn50 = resnet50() m_paras = torch.load(model_trained) ##delete mask in pth m_new = collections.OrderedDict() mask = dict() for key in m_paras: if 'weight_mask_b' in key: continue if 'weight_mask' in key: if 'module_added' not in key: mask[key.replace('.weight_mask', '')] = dict() mask[key.replace('.weight_mask', '')]['weight'] = m_paras[key] mask[key.replace('.weight_mask', '')]['bias'] = m_paras[key] else: mask[key.replace('.relu1.module_added.weight_mask', '.bn3')] = {} mask[key.replace('.relu1.module_added.weight_mask', '.bn3')]['weight'] = m_paras[key] mask[key.replace('.relu1.module_added.weight_mask', '.bn3')]['bias'] = m_paras[key] if '0.relu1' in key: mask[key.replace('relu1.module_added.weight_mask', 'downsample.1')] = {} mask[key.replace('relu1.module_added.weight_mask', 'downsample.1')]['weight'] = m_paras[key] mask[key.replace('relu1.module_added.weight_mask', 'downsample.1')]['bias'] = m_paras[key] continue if 'module_added' in key: continue elif 'module' in key: m_new[key.replace('module.', '')] = m_paras[key] else: m_new[key] = m_paras[key] for key in mask: #modify the weight and bias of model with pruning m_new[key + '.weight'] = m_new[key + '.weight'].data.mul( mask[key]['weight']) m_new[key + '.bias'] = m_new[key + '.bias'].data.mul(mask[key]['bias']) rn50.load_state_dict(m_new) rn50.cuda() rn50.eval() torch.save(mask, 'taylor_mask.pth') mask_file = './taylor_mask.pth' dummy_input = torch.randn(64, 3, 224, 224).cuda() use_mask_out = use_speedup_out = None rn = rn50 rn_mask_out = rn(dummy_input) model = rn50 if use_mask: torch.onnx.export(model, dummy_input, 'resnet_masked_taylor_1700.onnx', export_params=True, opset_version=12, do_constant_folding=True, input_names=['inputs'], output_names=['proba'], dynamic_axes={ 'inputs': [0], 'mask': [0] }, keep_initializers_as_inputs=True) start = time.time() for _ in range(32): use_mask_out = model(dummy_input) elapsed_t = time.time() - start print('elapsed time when use mask: ', elapsed_t) _logger.info( 'for batch size 64 and with 32 runs, the elapsed time is {}'. format(elapsed_t)) print('before speed up===================') flops, paras = count_flops_params(model, (1, 3, 224, 224)) _logger.info( 'flops and parameters before speedup is {} FLOPS and {} params'.format( flops, paras)) if use_speedup: dummy_input.cuda() m_speedup = ModelSpeedup(model, dummy_input, mask_file, 'cuda') m_speedup.speedup_model() print('==' * 20) print('Start inference') torch.onnx.export(model, dummy_input, 'resnet_taylor_1700.onnx', export_params=True, opset_version=12, do_constant_folding=True, input_names=['inputs'], output_names=['proba'], dynamic_axes={ 'inputs': [0], 'mask': [0] }, keep_initializers_as_inputs=True) start = time.time() for _ in range(32): use_speedup_out = model(dummy_input) elasped_t1 = time.time() - start print('elapsed time when use speedup: ', elasped_t1) _logger.info( 'elasped time with batch_size 64 and in 32 runs is {}'.format( elasped_t1)) #print('After speedup model is ',model) _logger.info('model structure after speedup is ====') _logger.info(model) print('=================') print('After speedup') flops, paras = count_flops_params(model, (1, 3, 224, 224)) _logger.info( 'After speedup flops are {} and number of parameters are {}'.format( flops, paras)) if compare_results: print(rn_mask_out) print('another is', use_speedup_out) if torch.allclose(rn_mask_out, use_speedup_out, atol=1e-6): #-07): print('the outputs from use_mask and use_speedup are the same') else: raise RuntimeError( 'the outputs from use_mask and use_speedup are different') # start the accuracy check criterion = nn.CrossEntropyLoss() with torch.no_grad(): start = time.time() evaluate(model, criterion, data_loader_test, device="cuda", print_freq=20) print('elapsed time is ', time.time() - start)