model, (3, 32, 32)) sparse_dict = {bn_name: args.sparsity for bn_name in prune_bn_names} model = model.to(device) # prune related settings if args.resume_path: try: model.load_state_dict( torch.load(args.resume_path, map_location=(None if args.cuda else "cpu"))) except: print( "Cannot load state_dict directly, trying to load pruned weight ..." ) model = load_pruned_model( model, torch.load(args.resume_path, map_location=(None if args.cuda else "cpu"))) channel_select = False if args.prunable_layers_only else True if args.prune_ratio > 0.999: model = prune(model, (3, 32, 32), channel_select=channel_select) elif args.prune_ratio > 0: model = prune(model, (3, 32, 32), prune_method=network_slimming, prune_ratio=args.prune_ratio, channel_select=channel_select) if "resnet" in args.arch: if args.prune_ratio > 0.999: pass # OT to be updated elif args.prune_ratio > 0: ns_post_process_resnet(model, args.prune_ratio, (3, 32, 32),
from thop import profile from networks import resnet18, vgg11, vgg11s, densenet63 archs = { "resnet18": resnet18, "vgg11": vgg11, "vgg11s": vgg11s, "densenet63": densenet63 } arch_name = sys.argv[1] weight_path = sys.argv[2] model = archs[arch_name](num_classes=100) weight = torch.load(weight_path, map_location="cpu") try: model.load_state_dict(weight) except: model = load_pruned_model(model, weight) input_t = torch.randn(1, 3, 32, 32) flops, params = profile(model, inputs=(input_t,), verbose=False) flops_str = format(int(flops), ',') gflops = flops / 1024**3 gflops_str = "{:.2f} GFLOPS".format(gflops) params_str = format(int(params), ',') mparams = params / 1024**2 mparams_str = "{:.2f} M".format(mparams) line = "{}/{}: FLOPS: {} / {}\t# of params: {} / {}".format(arch_name, weight_path, flops_str, gflops_str, params_str, mparams_str) print(line)
device = torch.device('cuda' if args.cuda else 'cpu') kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} normalize = transforms.Normalize(mean=[0.4914, 0.482, 0.4465], std=[0.2023, 0.1994, 0.2010]) test_loader = torch.utils.data.DataLoader( cifar.CIFAR100('./cifar-100', train=False, transform=transforms.Compose([ transforms.ToTensor(), normalize ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) model = archs[args.arch](num_classes=100) pruned_weights = torch.load(args.resume_path) model = load_pruned_model(model, pruned_weights).to(device) model.eval() correct = 0 with torch.no_grad(): t_start = time.time() for data, target in test_loader: if args.cuda: data, target = data.to(device), target.to(device) output = model(data) pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum() t_all = time.time() - t_start accuracy = 100. * float(correct) / float(len(test_loader.dataset)) print("Accuracy: {}/{} ({:.2f}%)\n".format(correct, len(test_loader.dataset), accuracy))
_ = net(inputs) openvino_pr05_fp32_pack.append(run_openvino) openvino_pr05_fp16_pack.append(run_openvino) # build network for pytorch def run_pytorch(net, _, n): with torch.no_grad(): for i in range(n): inputs = torch.randn(1, 3, 32, 32) _ = net(inputs) pytorch_fp32_pack = ["vgg11s-fp32-pytorch", vgg11s(), 0, run_pytorch] pruned_weights = torch.load("output-vgg11s-bn-pr05/ckpt_best.pth", map_location="cpu") pruned_net = load_pruned_model(vgg11s(), pruned_weights) pytorch_pr05_fp32_pack = ["vgg11s-pr05-fp32-pytorch", pruned_net, 0, run_pytorch] # benchmark packs = [pytorch_fp32_pack, pytorch_pr05_fp32_pack, openvino_pr05_fp32_pack, openvino_pr05_fp16_pack] loops = 50 iters = 1000 total_t = {_[0]: 0. for _ in packs} for _ in tqdm(range(loops)): random.shuffle(packs) for pack in packs: pack_name, net, input_name, run_fn = pack t = time.perf_counter() run_fn(net, input_name, n=iters) total_t[pack_name] += time.perf_counter() - t
if not args.all_bn: bn_names = get_norm_layer_names(model, (3, 32, 32)) print("Sparsity regularization will be applied to:") for bn_name in bn_names: print(bn_name) else: print("Sparsity regularization will be applied to all BN layers:") model = model.to(device) if args.resume_path: try: model.load_state_dict(torch.load(args.resume_path)) except: print("Failed to load state_dict directly, trying to load pruned weight ...") model = load_pruned_model(model, torch.load(args.resume_path)) if args.prune_ratio > 0: if args.experimental: model = prune(model, (3, 32, 32), args.prune_ratio, prune_method=liu2017_normalized_by_layer) else: model = prune(model, (3, 32, 32), args.prune_ratio) if args.tfs: for m in model.modules(): if isinstance(m, nn.Conv2d): if args.arch == "densenet63": nn.init.kaiming_normal_(m.weight) # weird?? else: nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.Linear): m.reset_parameters() if args.arch == "densenet63":
_, dataset, arch = args.resume.split(os.sep)[-3].split('-') if "vgg" not in arch: from thop_res import profile else: from thop import profile archs = cifar_archs if num_classes[dataset] else ilsvrc12_archs model = archs[arch](num_classes=num_classes[dataset]) if num_classes[dataset] else archs[arch]() try: model.load_state_dict(torch.load(args.resume, map_location="cpu")) except: print("Cannot load state_dict directly, trying to load pruned weight ...") model = load_pruned_model(model, torch.load(args.resume, map_location="cpu")) model.eval() data_shape = (1, 3, 224, 224) if dataset == "ilsvrc12" else (1, 3, 32, 32) flops, params = profile(model, inputs=(torch.randn(*data_shape),), verbose=False) print("FLOPS: {:,}\nParams: {:,}".format(int(flops), int(params))) onnx_path = "{}-{}-{}.onnx".format(arch, dataset, int(flops)) torch.onnx.export(model, torch.randn(*data_shape), onnx_path) """ # Convert onnx to OpenVINO IR cmd_template = 'python /opt/intel/openvino/deployment_tools/model_optimizer/mo_onnx.py --input_model {0} --input_shape "{1}" --data_type {3} --model_name {2}-{3}' cmd = cmd_template.format( onnx_path, repr(input_shape),
parser.add_argument('--fp16', action='store_true', default=False, help='use half precision') args = parser.parse_args() if not args.outname: args.outname = args.arch net = archs[args.arch]() if args.weights: weights = torch.load(args.weights, map_location="cpu") try: net.load_state_dict(weights) except Exception as e: print("Direct load failed, trying to load pruned weight ...") net = load_pruned_model(net, weights) onnx_path = "{}.onnx".format(args.outname) print("Exporting to ONNX ...") torch.onnx.export(net, torch.randn(*input_shape), onnx_path) cmd = cmd_template.format( onnx_path, repr(input_shape), args.outname, "FP16" if args.fp16 else "FP32" ) print("Converting to OpenVINO IR ...") os.system(cmd) os.system("rm {}".format(onnx_path))