def l1_unstructured_pruning(self, percent=0.2): ''' method to prune specified modules in layer using l1 unstructured pruning ''' logging.info(f'Pruning densenet layers using l1 unstructured pruning with threshold : {percent * 100}%') for name, module in self.named_modules(): if name.contains('denselayer'): prune.l1_unstructured(module, name='weight', amount=percent) if isinstance(module, nn.BatchNorm2d): prune.l1_unstructured(module, name='weight', amount=percent)
def prune_and_save_model(model, amount): for _, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d) or isinstance( module, torch.nn.Linear): prune.l1_unstructured(module, name="weight", amount=amount) prune.remove(module, "weight") mlflow.pytorch.save_state_dict(model.state_dict(), ".") model = torch.load("state_dict.pth") os.remove("state_dict.pth") return model
def prune_module(module, method, amount): """prune a conv2d """ if method == "ln": prune.ln_structured(module, name="weight", amount=amount, n=2, dim=0) elif method == "l1": prune.l1_unstructured(module, name="weight", amount=amount) else: raise ValueError(f"{method} is wrong")
def stress_test (): while(True): C = random.randint(1, 100) % 65 + 3 K = random.randint(1, 100) % 65 + 16 R = random.randint(1, 100) % 8 + 1 cfg.xbar_row_size = random.randint(1, 100) % 64 + 1 xbar_strategy = random.randint(0,1) xbar_strategy_name = {0:"dynamic", 1:"static"} #C, K, R = 3, 3, 1 #cfg.xbar_row_size = 1 class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() #self.conv1 = nn.Conv2d(C, K, R) self.fc1 = nn.Linear(C, K) def forward(self, x): #x = self.conv1(x) x = self.fc1(x) return x model = LeNet().to(device=device) def sparsity(weight): return float(torch.sum(weight==0.0))/float(weight.nelement()) #module = model.conv1 #module.weight = torch.nn.Parameter(torch.arange(0,C*K*R*R).view(K, C, R, R).float()) module = model.fc1 #module.weight = torch.nn.Parameter(torch.arange(0,C*K).view(K, C).float()) try: # Prune a model initially - unstructured pruning if (xbar_strategy_name[xbar_strategy] == 'dynamic'): prune.l1_unstructured(module, name="weight", amount=0.5) prune.remove(module, 'weight') s_mat = sparsity(module.weight) # Fine-tune with xbar-aware pruning l1_xbar_unstructured(module, name="weight", threshold=0.5, xbar_strategy=xbar_strategy_name[xbar_strategy]) s_xbar = sparsity(module.weight) #print("{:.2f}" .format(s_xbar)) prune.remove(module, 'weight') print ('Passed:\t Mat {0:0.2f}\t Xbar {1:0.2f} \t [C {2}, K {3}, R {4}, xbar_row_size {5}] Xbar strategy {6}' .format(s_mat, s_xbar, C, K, R, cfg.xbar_row_size, xbar_strategy_name[xbar_strategy])) assert (s_xbar >= s_mat) except Exception as e: print ("Failed configuration [C, K, R, xbar_row_size, strategy] ", [C, K, R, cfg.xbar_row_size, xbar_strategy_name[xbar_strategy]]) raise e
def l1_unstructured_pruning(net, a_list): if not isinstance(net, nn.Module): print('Invalid input. Must be nn.Module') return newnet = copy.deepcopy(net) i = 0 for name, module in newnet.named_modules(): if isinstance(module, nn.Conv2d): #print("Sparsity ratio",a_list[i]) prune.l1_unstructured(module, name='weight', amount=float(1 - a_list[i])) i += 1 return newnet
def pruner(model, amount, random=False): """ (amount) total amount of desired sparsity """ for name, module in model.named_modules(): # prune declared amount of connections in all 2D-conv & Linear layers if isinstance(module, torch.nn.Conv2d) or isinstance( module, torch.nn.Linear): if random: prune.random_unstructured(module, name='weight', amount=amount) else: prune.l1_unstructured(module, name='weight', amount=amount) #prune.remove(module, 'weight') # make it permanent return model
def prune_all(self): for layer_idx in range(self.nb_layers): conv = eval(f"self.model.conv{layer_idx+1}") bn = eval(f"self.model.bn{layer_idx+1}") prune.ln_structured(module=conv, name='weight', amount=self.amount, n=self.norme, dim=self.dim) prune.l1_unstructured(module=bn, name='weight', amount=self.amount) prune.ln_structured(module=self.model.fc1, name='weight', amount=self.amount, n=self.norme, dim=self.dim)
def prune_all(self): prune.ln_structured(module=self.model.conv1, name='weight', amount=self.amount, n=self.norme, dim=self.dim) prune.l1_unstructured(module=self.model.bn1, name='weight', amount=self.amount) self.prune_block(self.model.layer1) self.prune_block(self.model.layer2) self.prune_block(self.model.layer3) prune.ln_structured(module=self.model.linear, name='weight', amount=self.amount, n=self.norme, dim=self.dim)
def l1_prune(model, amount=0.00, name='weight', verbose=False, glob=False): """ Prunes the model param by param by given amount """ params_to_prune = get_prune_params(model, name) if glob: prune.global_unstructured(params_to_prune, pruning_method=prune.L1Unstructured, amount=amount) else: for params, name in params_to_prune: prune.l1_unstructured(params, name, amount) if verbose: info, num_zeros, num_global = get_prune_summary(model, name) global_pruning = num_zeros / num_global print(tabulate(info, headers='keys', tablefmt='github')) print("Total Pruning: {}%".format(global_pruning))
def prune_fixed_amount(model, amount, verbose=True, glob=True): parameters_to_prune, num_global_weights, layers_w_count = get_prune_params( model) if glob: prune.global_unstructured(parameters_to_prune, pruning_method=prune.L1Unstructured, amount=math.floor(amount * num_global_weights)) else: for i, (m, n) in enumerate(parameters_to_prune): prune.l1_unstructured(m, name=n, amount=math.floor(amount * layers_w_count[i][1])) num_global_zeros, num_layer_zeros, num_layer_weights = 0, 0, 0 global_prune_percent, layer_prune_percent = 0, 0 prune_stat = { 'Layers': [], 'Weight Name': [], 'Percent Pruned': [], 'Total Pruned': [] } # Pruning is done in-place, thus parameters_to_prune is updated for layer, weight_name in parameters_to_prune: num_layer_zeros = torch.sum(getattr(layer, weight_name) == 0.0).item() num_global_zeros += num_layer_zeros num_layer_weights = torch.numel(getattr(layer, weight_name)) layer_prune_percent = num_layer_zeros / num_layer_weights * 100 prune_stat['Layers'].append(layer.__str__()) prune_stat['Weight Name'].append(weight_name) prune_stat['Percent Pruned'].append( f'{num_layer_zeros} / {num_layer_weights} ({layer_prune_percent:.5f}%)' ) prune_stat['Total Pruned'].append(f'{num_layer_zeros}') global_prune_percent = num_global_zeros / num_global_weights if verbose: print('Pruning Summary', flush=True) print(tabulate(prune_stat, headers='keys'), flush=True) print(f'Percent Pruned Globaly: {global_prune_percent:.2f}', flush=True)
def aggr(self, models, clients, *args, **kwargs): print("----------Averaging Models--------") weights_per_client = np.array([client.num_data for client in clients], dtype=np.float32) weights_per_client /= np.sum(weights_per_client) aggr_model = fed_avg(models=models, weights=weights_per_client, device=self.args.device) pruned_summary, _, _ = get_prune_summary(aggr_model, name='weight') print(tabulate(pruned_summary, headers='keys', tablefmt='github')) prune_params = get_prune_params(aggr_model) for param, name in prune_params: zeroed_weights = torch.eq(getattr(param, name).data, 0.00).sum().float() prune.l1_unstructured(param, name, int(zeroed_weights)) return aggr_model
def main(): # prepare output directory # global epoch print('EAST <==> TEST <==> Create Res_file and Img_with_box <==> Begin') result_root = os.path.abspath(cfg.res_img_path) if not os.path.exists(result_root): os.mkdir(result_root) print('EAST <==> Prepare <==> Network <==> Begin') model = East() model = torch.nn.DataParallel(model, device_ids=cfg.gpu_ids) model #.cuda() if os.path.isfile(cfg.checkpoint): print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format( cfg.checkpoint)) checkpoint = torch.load(cfg.checkpoint, map_location='cpu') epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format( cfg.checkpoint)) else: print('Can not find checkpoint !!!') exit(1) print() print('###############') print() print('Original Size:') print_size_of_model(model) ############### print() print('Pruned model size') import torch.nn.utils.prune as prune for name, module in model.named_modules(): # prune 40% of connections in all 2D-conv layers if isinstance(module, torch.nn.Conv2d): prune.l1_unstructured(module, name='weight', amount=0.4) prune.l1_unstructured(module, name='bias', amount=0.3) prune.remove(module, 'weight') prune.remove(module, 'bias') # prune 40% of connections in all linear layers elif isinstance(module, torch.nn.Linear): prune.l1_unstructured(module, name='weight', amount=0.4) prune.l1_unstructured(module, name='bias', amount=0.4) prune.remove(module, 'weight') prune.remove(module, 'bias') model = model.to_sparse() #print(dict(model.named_buffers()).keys()) print_size_of_model(model)
def download(self, global_model, global_init_model, *args, **kwargs): """ Download global model from server """ self.global_model = global_model self.global_init_model = global_init_model # params_to_prune = get_prune_params(self.global_model) # for param, name in params_to_prune: # weights = getattr(param, name) # masked = torch.eq(weights.data, 0.00).sum().item() # # masked = 0.00 # prune.l1_unstructured(param, name, amount=int(masked)) params_to_prune = get_prune_params(self.global_init_model) for param, name in params_to_prune: weights = getattr(param, name) # masked = torch.eq(weights.data, 0.00).sum().item() masked = 0.00 prune.l1_unstructured(param, name, amount=int(masked))
def prune_6M(para): model = load_t_net(file=True) x = torch.randn(1, 3, 384, 224).cuda() model = model.cuda() summary(model, x) num = 0 for name, mod in model.module.named_modules(): num += 1 if num % para == 0: continue if isinstance(mod, torch.nn.Conv2d): print("yes") prune.l1_unstructured(mod, name='weight', amount=0.5) else: print("no") print("++++/++++/+++++/+++++/++++++++++++=+++++++++++++++++++++=") x = torch.randn(1, 3, 384, 224).cuda() summary(model, x) # prune.remove(model,'weight') torch.save(model.module, 'gj_dir/after_6M.pth.tar')
def pruner( model, grouped_pruning=True, conv2d_prune_amount=0.4, linear_prune_amount=0.2 ): if grouped_pruning == True: # Global pruning parameters_to_prune = [] for module_name, module in model.named_modules(): if any([isinstance(module, x) for x in conv_names]): parameters_to_prune.append((module, "weight")) print(f"Pruning: {len(parameters_to_prune)}") prune.global_unstructured( parameters_to_prune, pruning_method=prune.L1Unstructured, amount=conv2d_prune_amount, ) else: for module_name, module in model.named_modules(): if any([isinstance(module, x) for x in conv_names]): prune.l1_unstructured(module, name="weight", amount=conv2d_prune_amount) elif isinstance(module, torch.nn.Linear): prune.l1_unstructured(module, name="weight", amount=linear_prune_amount)
def prune_block(self, sub_layer): for block_num, block in enumerate(sub_layer): prune.ln_structured(module=block.conv1, name='weight', amount=self.amount, n=self.norme, dim=self.dim) prune.l1_unstructured(module=block.bn1, name='weight', amount=self.amount) prune.ln_structured(module=block.conv2, name='weight', amount=self.amount, n=self.norme, dim=self.dim) prune.l1_unstructured(module=block.bn2, name='weight', amount=self.amount) for short_layer in block.shortcut: if isinstance(short_layer, torch.nn.modules.conv.Conv1d): prune.ln_structured(module=short_layer, name='weight', amount=self.amount, n=self.norme, dim=self.dim) elif isinstance(short_layer, torch.nn.modules.batchnorm.BatchNorm1d): prune.l1_unstructured(module=short_layer, name='weight', amount=self.amount)
def prune_step_change(self, pstep, prune_mode): cuda_using = next(self.parameters()).is_cuda #reimport fixed u and v weights ui, vi = self.load_weights() #fix current weights uc, vc = (self.u_embeddings.weight.data.clone().cpu(), self.v_embeddings.weight.data.clone().cpu()) #fix current masks if not list(self.u_embeddings.named_buffers()): #prune.identity(self.u_embeddings, name='weight') prune.identity(self.v_embeddings, name='weight') #umask = dict(self.u_embeddings.named_buffers())['weight_mask'].cpu() vmask = dict(self.v_embeddings.named_buffers())['weight_mask'].cpu() #u_temp = torch.nn.Embedding(self.vocab_size, self.emb_dimension) v_temp = torch.nn.Embedding(self.vocab_size, self.emb_dimension) if prune_mode == 'change': f = lambda x, y: x - y elif prune_mode == 'absolute change': f = lambda x, y: torch.abs(x) - torch.abs(y) else: f = lambda x, y: x # weights to be left must have higher function outputs #u_temp.weight.data.copy_(f(uc,ui)) v_temp.weight.data.copy_(f(vc, vi)) #prune.custom_from_mask(u_temp,name='weight',mask=umask) prune.custom_from_mask(v_temp, name='weight', mask=vmask) if cuda_using: # u_temp.cuda() v_temp.cuda() #prune.l1_unstructured(u_temp, name='weight', amount=pstep) prune.l1_unstructured(v_temp, name='weight', amount=pstep) #checked, cuda <-> cpu crash DNE #u_temp.weight.data.copy_(uc) v_temp.weight.data.copy_(vc) #self.u_embeddings = u_temp self.v_embeddings = v_temp
def weight_prune(prune_iter): conv_rate = (1 - ((1 - args.prune_per_conv)**prune_iter)) fc_rate = (1 - ((1 - args.prune_per_linear)**prune_iter)) out_rate = (1 - ((1 - args.prune_per_out)**prune_iter)) # make prune mask for name, module in model.named_modules(): if isinstance(module, nn.Conv2d): prune.l1_unstructured(module, name='weight', amount=conv_rate) if isinstance(module, nn.Linear): if 'out' in name: prune.l1_unstructured(module, name='weight', amount=out_rate) else: prune.l1_unstructured(module, name='weight', amount=fc_rate) # mask copy cpd_mask = {} for name, mask in model.named_buffers(): cpd_mask[name] = mask # going prune for name, module in model.named_modules(): if isinstance(module, nn.Conv2d): prune.remove(module, name='weight') elif isinstance(module, nn.Linear): prune.remove(module, name='weight') # return copied mask return cpd_mask
def prune_model(model, prune_ratio=0.2, prune_ratio_conv=None, prune_method='local', prune_output_layer=True): o_layer_idx = get_idx_of_output_layer(model) i = 0 if prune_ratio_conv is None: prune_ratio_conv = prune_ratio if prune_method == 'local': for layer in model.children(): if i == o_layer_idx: if prune_output_layer: prune.l1_unstructured(layer, name='weight', amount=prune_ratio / 2) else: if isinstance(layer, nn.Linear): prune.l1_unstructured(layer, name='weight', amount=prune_ratio) if isinstance(layer, nn.Conv2d): prune.l1_unstructured(layer, name='weight', amount=prune_ratio_conv) i += 1 elif prune_method == 'global': parameters_to_prune = [] for layer in model.children(): if i == o_layer_idx: if prune_output_layer: parameters_to_prune.append(tuple([layer, 'weight'])) else: if isinstance(layer, nn.Linear): parameters_to_prune.append(tuple([layer, 'weight'])) if isinstance(layer, nn.Conv2d): parameters_to_prune.append(tuple([layer, 'weight'])) i += 1 prune.global_unstructured(tuple(parameters_to_prune), pruning_method=prune.L1Unstructured, amount=prune_ratio)
def prune_network(self, model, rate): """ Prunes the weights of a feed forward neural network according to l1_norm. For LetNet300100 and CONV2, convolutional and output layers are pruned at half the rate of other fully connected fully connected layers. Such layers are pruned at equal layers. This follows the protocol in the original paper Arguments -------- model: (nn.Module) The neural network whose layers are to be pruned. rate: (float) The fraction of the weights to prune. Must be between 0 and 1 """ for n, m in model.named_children(): if hasattr(m, 'weight'): if "output" in n or 'conv' in n: prune.l1_unstructured(m, 'weight', amount=rate / 2.0) else: prune.l1_unstructured(m, 'weight', amount=rate) self.weight_mask[f"{n}.weight"] = m.weight_mask.detach().clone() prune.remove(m, name='weight')
def prune_darts(model, pruning_percentage): for modules in model.children(): if not isinstance(modules, nn.AdaptiveAvgPool3d): for module in modules: if not isinstance(module, Cell): # print(list(module.named_parameters())) prune.random_unstructured(module, name="weight", amount=pruning_percentage) # print(list(module.named_parameters())) # print("Not Cell") else: # print(module) for cell_module in module.children(): # print("Cell") if isinstance(cell_module, ReLUConvBN): # print("ReLUConvBN") for ReLU_List in cell_module.children(): for ReLU_module in ReLU_List: if isinstance(ReLU_module, nn.Conv2d): # if ReLU_module.kernel_size[0] == 1: # continue prune.l1_unstructured( ReLU_module, name="weight", amount=pruning_percentage) elif isinstance(cell_module, nn.ModuleList): # print("nn.ModuleList") for innerModules in cell_module.children(): for seqItem in innerModules.children(): for layer in seqItem: if isinstance(layer, nn.Conv2d): # if layer.kernel_size[0] == 1: # continue prune.l1_unstructured( layer, name="weight", amount=pruning_percentage)
def weight_init(model, model_init, c_rate, f_rate, o_rate): # make prune mask for name, module in model.named_modules(): if isinstance(module, nn.Conv2d): prune.l1_unstructured(module, name='weight', amount=c_rate) if isinstance(module, nn.Linear): if 'out' in name: prune.l1_unstructured(module, name='weight', amount=o_rate) else: prune.l1_unstructured(module, name='weight', amount=f_rate) # mask copy cp_mask = {} for name, mask in model.named_buffers(): cp_mask[name[:(len(name) - 12)]] = mask # weight initialize # copy weight model_init.layer.weight -> model.layer.weight_orig. Bias is similar. for name, p in model.named_parameters(): if 'weight_orig' in name: for name2, p2 in model_init.named_parameters(): if name[0:len(name) - 5] in name2: p.data = copy.deepcopy(p2.data) if 'bias' in name: for name2, p2 in model_init.named_parameters(): if name in name2: p.data = copy.deepcopy(p2.data) # go prune for name, module in model.named_modules(): if isinstance(module, nn.Conv2d): prune.remove(module, name='weight') elif isinstance(module, nn.Linear): prune.remove(module, name='weight') # gradient hook (freeze zero-weight) for name, module in model.named_modules(): if 'fc' in name: hook = module.weight.register_hook( lambda grad, name=name: grad.mul_(cp_mask[name])) optimizer = optim.Adam(model.parameters(), lr=param.lr, weight_decay=param.weight_decay) # return copied mask return cp_mask, optimizer, hook
from utils.load_model import load_model from utils.load_data import load_data from utils.test_model import test_model from utils.visualize import imshow, visualize_model dataloaders, dataset_sizes, class_names = load_data('../data') model = load_model('resnet18_01') # test_model(model, dataloaders['test']) for name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): prune.l1_unstructured(module, name='weight', amount=0.7) # prune.l1_unstructured(module, name='bias', amount=0.2) elif isinstance(module, torch.nn.Linear): prune.l1_unstructured(module, name='weight', amount=0.4) # prune.l1_unstructured(module, name='bias', amount=0.4) for name, module in model.named_modules(): if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)): prune.remove(module, 'weight') test_model(model, dataloaders['test']) torch.save(model.state_dict(), '../models/resnet18_01_pruned')
def do_weight_pruning( cfgfile=None, # config file name weightfile=None, # weight file name roofline_prune_rates=None, max_prune_rate=None, model_dir="models", results_dir="results", save_orig=False, # save original network ): import cv2 imgfile = "data/dog.jpg" model = Darknet(cfgfile) # m.print_network() model.load_weights(weightfile) # print('Loading weights from %s... Done!' % (weightfile)) if use_cuda: model.cuda() # Save original network model_path = os.path.join(model_dir, "yolov4_darknet") res_path = os.path.join(results_dir, "prediction") if save_orig: torch.save(model, model_path + ".pt") num_classes = model.num_classes if num_classes == 20: namesfile = 'data/voc.names' elif num_classes == 80: namesfile = 'data/coco.names' else: namesfile = 'data/x.names' class_names = load_class_names(namesfile) img = cv2.imread(imgfile) sized = cv2.resize(img, (model.width, model.height)) sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB) # Global weight pruning print("Start global pruning") parameters_to_prune = [] for name, module in model.named_modules(): # prune global connections in all 2D-conv layers if isinstance(module, torch.nn.Conv2d): parameters_to_prune.append((module, 'weight')) prune.global_unstructured(parameters_to_prune, pruning_method=prune.L1Unstructured, amount=max_prune_rate) # Check sparsity global_sparsity = [] global_num_weights = [] global_prune_rates = [] for name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): sparsity = float(torch.sum(module.weight == 0)) num_weights = float(module.weight.nelement()) rate = sparsity / num_weights global_sparsity.append(sparsity) global_num_weights.append(num_weights) global_prune_rates.append(rate) print("Sparsity in {}: {:.2f}%".format(name, 100 * rate)) prune_rate_gl = float(sum(global_sparsity)) / float( sum(global_num_weights)) print("Global sparsity (global pruning): {:.2f}%".format(100 * prune_rate_gl)) postfix = "_global_weight_{}".format(int(100 * max_prune_rate)) model_path_gl = model_path + postfix + ".pt" # Save pruned pt model torch.save(model, model_path_gl) # Make prediction with model start = time.time() boxes = do_detect(model, sized, 0.4, 0.6, use_cuda) finish = time.time() print('%s: Predicted in %f seconds.' % (imgfile, (finish - start))) plot_boxes_cv2(img, boxes[0], savename=res_path + postfix + ".jpg", class_names=class_names) if roofline_prune_rates is not None: print("Start global + roofline pruning") # Loop through each prune percent rl_prune_rates = np.minimum(global_prune_rates, roofline_prune_rates) # Load the original unprunned model # We cannot copy otherwise it keeps reusing the same model throughout all loops model = Darknet(cfgfile) model.load_weights(weightfile) if use_cuda: model.cuda() local_sparsity = [] local_num_weights = [] j = 0 for name, module in model.named_modules(): # prune local connections in all 2D-conv layers if isinstance(module, torch.nn.Conv2d): prune.l1_unstructured(module, name='weight', amount=rl_prune_rates[j]) # Check sparsity sparsity = float(torch.sum(module.weight == 0)) num_weights = float(module.weight.nelement()) local_sparsity.append(sparsity) local_num_weights.append(num_weights) print("Sparsity in {}: {:.2f}%".format( name, 100 * sparsity / num_weights)) j += 1 prune_rate_rl = float(sum(local_sparsity)) / float( sum(local_num_weights)) print("Global sparsity (roofline global pruning): {:.2f}%".format( 100 * prune_rate_rl)) postfix = "_roofline_global_weight_{}".format(int(100 * max_prune_rate)) # Save pruned pt model torch.save(model, model_path + postfix + ".pt") # Make prediction with model start = time.time() boxes = do_detect(model, sized, 0.4, 0.6, use_cuda) finish = time.time() print('%s: Predicted in %f seconds.' % (imgfile, (finish - start))) plot_boxes_cv2(img, boxes[0], savename=res_path + postfix + ".jpg", class_names=class_names) if roofline_prune_rates is not None: return (global_prune_rates, prune_rate_gl), (rl_prune_rates, prune_rate_rl) else: return (global_prune_rates, prune_rate_gl)
def iterative_pruning_finetuning(model, train_loader, test_loader, device, learning_rate, l1_regularization_strength, l2_regularization_strength, learning_rate_decay=0.1, conv2d_prune_amount=0.4, linear_prune_amount=0.2, num_iterations=10, num_epochs_per_iteration=10, model_filename_prefix="pruned_model", model_dir="saved_models", grouped_pruning=False): for i in range(num_iterations): print("Pruning and Finetuning {}/{}".format(i + 1, num_iterations)) print("Pruning...") if grouped_pruning == True: # Global pruning # I would rather call it grouped pruning. parameters_to_prune = [] for module_name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): parameters_to_prune.append((module, "weight")) prune.global_unstructured( parameters_to_prune, pruning_method=prune.L1Unstructured, amount=conv2d_prune_amount, ) else: for module_name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): prune.l1_unstructured(module, name="weight", amount=conv2d_prune_amount) elif isinstance(module, torch.nn.Linear): prune.l1_unstructured(module, name="weight", amount=linear_prune_amount) _, eval_accuracy = evaluate_model(model=model, test_loader=test_loader, device=device, criterion=None) classification_report = create_classification_report( model=model, test_loader=test_loader, device=device) num_zeros, num_elements, sparsity = measure_global_sparsity( model, weight=True, bias=False, conv2d_use_mask=True, linear_use_mask=False) print("Test Accuracy: {:.3f}".format(eval_accuracy)) print("Classification Report:") print(classification_report) print("Global Sparsity:") print("{:.2f}".format(sparsity)) # print(model.conv1._forward_pre_hooks) print("Fine-tuning...") train_model(model=model, train_loader=train_loader, test_loader=test_loader, device=device, l1_regularization_strength=l1_regularization_strength, l2_regularization_strength=l2_regularization_strength, learning_rate=learning_rate * (learning_rate_decay**i), num_epochs=num_epochs_per_iteration) _, eval_accuracy = evaluate_model(model=model, test_loader=test_loader, device=device, criterion=None) classification_report = create_classification_report( model=model, test_loader=test_loader, device=device) num_zeros, num_elements, sparsity = measure_global_sparsity( model, weight=True, bias=False, conv2d_use_mask=True, linear_use_mask=False) print("Test Accuracy: {:.3f}".format(eval_accuracy)) print("Classification Report:") print(classification_report) print("Global Sparsity:") print("{:.2f}".format(sparsity)) model_filename = "{}_{}.pt".format(model_filename_prefix, i + 1) model_filepath = os.path.join(model_dir, model_filename) save_model(model=model, model_dir=model_dir, model_filename=model_filename) model = load_model(model=model, model_filepath=model_filepath, device=device) return model
print(list(module.named_parameters())) print("\# Unpruned buffers") print(list(module.named_buffers())) # Prune a single module (here conv1) print("# Prune a single module (here conv1) by 30%") prune.random_unstructured(module, name="weight", amount=0.3) print("# conv1 pruned params") print(list(module.named_parameters())) print("# conv1 pruned buffers") print(list(module.named_buffers())) # Prune weight using L1 norm and 3 smallest entries prune.l1_unstructured(module, name="bias", amount=3) print("# conv1 pruned bias params") print(list(module.named_parameters())) print("# conv1 pruned bias buffers") print(list(module.named_buffers())) print("# Forward pre hooks") print(module._forward_pre_hooks) # Iterative pruning (Prune multiple times in series, zeros out 50%) prune.ln_structured(module, name="weight", amount=0.5, n=2, dim=0) # weights pruned print(module.weight) for hook in module._forward_pre_hooks.values(): if hook._tensor_name == "weight": # select out the correct hook
from model import Generator, Discriminator import torchvision.utils as vutils import matplotlib.pyplot as plt import numpy as np ngpu = 0 device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") netG = Generator(ngpu).to(device) weights = "./weights/netG.pth" netG.load_state_dict(torch.load(weights, map_location=device)) noise = torch.randn(64, 100, 1, 1, device=device) for name, module in netG.named_modules(): if isinstance(module, torch.nn.ConvTranspose2d): prune.l1_unstructured(module, name='weight', amount=0.1) # 关键就是这句 amount是0-1就是按比例剪枝, # amount是int那就按个数剪枝,这里是4x4卷积 乘上 in_channel和out_channel 剪枝个数也就是 in_channel和out_channel的积 # 但他是按所有卷积的从小到大的顺序排序 不是按选取每个卷积的最小值 img_list = [] with torch.no_grad(): fake = netG(noise).detach().cpu() img_list.append(vutils.make_grid(fake, padding=2, normalize=True)) #%%capture fig = plt.figure(figsize=(8, 8)) plt.axis("off") for i in img_list: plt.imshow(np.transpose(i, (1, 2, 0)), animated=True) plt.show()
def __init__( self, vocab_path=None, model_paths=None, weigths=None, max_len=50, min_len=3, lowercase_tokens=False, log=False, iterations=3, min_probability=0.0, model_name='roberta', special_tokens_fix=1, is_ensemble=True, # is_ensemble=False, min_error_probability=0.0, confidence=0, resolve_cycles=False, prune_amount=0., num_layers_to_keep=12): # print('here') self.model_weights = list(map( float, weigths)) if weigths else [1] * len(model_paths) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.max_len = max_len self.min_len = min_len self.lowercase_tokens = lowercase_tokens self.min_probability = min_probability self.min_error_probability = min_error_probability self.vocab = Vocabulary.from_files(vocab_path) self.log = log self.iterations = iterations self.confidence = confidence self.resolve_cycles = resolve_cycles # set training parameters and operations self.indexers = [] self.models = [] for model_path in model_paths: # print('model_path:', model_path); exit(0) if is_ensemble: model_name, special_tokens_fix = self._get_model_data( model_path) weights_name = get_weights_name(model_name, lowercase_tokens) self.indexers.append( self._get_indexer(weights_name, special_tokens_fix)) # token_embs = get_token_embedders(model_name, tune_bert=1, special_tokens_fix=special_tokens_fix) model = Seq2Labels( vocab=self.vocab, text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix), # text_field_embedder= token_embs, confidence=self.confidence).to(self.device) # count number of params pytorch_total_params = sum(p.numel() for p in model.parameters()) print('total params:', pytorch_total_params) # print('model:', model) print('type:', type(model)) #exit(0) if torch.cuda.is_available(): model.load_state_dict(torch.load(model_path)) else: model.load_state_dict( torch.load(model_path, map_location=torch.device('cpu'))) # print('chk1'); exit(0) # get model size def print_size_of_model(model): torch.save(model.state_dict(), "temp.p") print('Size (MB):', os.path.getsize("temp.p") / 1e6) os.remove('temp.p') # print(model) print_size_of_model(model) #exit(0) print('type:', type(model)) #exit(0) def deleteEncodingLayers( model, num_layers_to_keep): # must pass in the full bert model oldModuleList = model.text_field_embedder.token_embedder_bert.bert_model.encoder.layer # print('oldModuleList:', oldModuleList) # print('oldModuleList:', len(oldModuleList)); exit(0) newModuleList = nn.ModuleList() # Now iterate over all layers, only keeping only the relevant layers. for i in range(0, num_layers_to_keep): # for i in range(0, len(num_layers_to_keep)): newModuleList.append(oldModuleList[i]) # create a copy of the model, modify it with the new list, and return copyOfModel = copy.deepcopy(model) copyOfModel.text_field_embedder.token_embedder_bert.bert_model.encoder.layer = newModuleList return copyOfModel print('before model 12:', model) # model = deleteEncodingLayers(model, 12) # print('after 12:', model) # print_size_of_model(model) # # print('before model:', model) # model = deleteEncodingLayers(model, 11) # print ('after 11:', model) # print_size_of_model(model) model = deleteEncodingLayers(model, num_layers_to_keep) print('after', num_layers_to_keep, ' :', model) print_size_of_model(model) # exit(0) # # save model # torch.save(model, 'pytorch-saved.pth') # # print('model:', model) # # for name, module in model.named_modules(): # print('name:', name) # print('module:', module) # exit(0) # onnx_batch_size = 64 # dummy_input = {'tokens': { # 'bert': torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')), # 'bert-offsets':torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')), # 'mask': torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')) # }} # # # print('dummy_input:', dummy_input.shape) # # # pred = model(dummy_input['tokens']) # # # print('pred:', pred) # # d_inp = (dummy_input['tokens']['bert'], dummy_input['tokens']['bert-offsets'], # # dummy_input['tokens']['mask']) # d_inp = dummy_input['tokens'] # input_names = ['bert', 'bert-offsets', 'mask'] # output_names = ['output'] # # # convert model to onnx # torch.onnx.export(model, d_inp, 'bert_64.onnx', # input_names=input_names, output_names=output_names, verbose = False) # # torch.onnx.export(model, dummy_input['tokens'], 'try.onnx', verbose=False) # # d_inp = {'bert': np.zeros(shape=(1, 64), 'bert-offsets': np.zeros(1, 64), 'mask': torch.zeros(1, 64)} # exit(0) # model = torch.quantization.quantize_dynamic( # model, # # {torch.nn.Linear}, # dtype=torch.qint8 # ) # print_size_of_model(model) # ########################## # # # quantized_model = torch.quantization.quantize_dynamic( # # # model, {torch.nn.Linear}, dtype=torch.qint8 # # # ) # quantized_model = torch.quantization.quantize_dynamic( # model.cpu(), # # model, # # {torch.nn.Linear}, # dtype=torch.qint8 # ) # # # print_size_of_model(model) # print_size_of_model(quantized_model) # # # quantized_model.cuda() # # # exit(0) # # quantized_model.eval() # self.models.append(quantized_model) # ####################################### # prune model ################################################# # random unstructured # model = prune.random_unstructured(model, 'weight', amount=0.2) # # l1_unstructured # # m = prune.l1_unstructured(model, 'weight', amount=0.2) # # m = prune.l1_unstructured(model, 'bias', amount=3) print_size_of_model(model) for name, module in model.named_modules(): # print('name:', name) # print('module:', module)#; exit(0) # prune.random_unstructured(module, name='weight', amount=0.2) # # prune 20% of connections in all 2D-conv layers # if isinstance(module, torch.nn.Conv2d): # prune.l1_unstructured(module, name='weight', amount=0.2) # prune 40% of connections in all linear layers if isinstance(module, torch.nn.Linear): # print('prune_amount:', prune_amount) # print('.....pruning.....') # print('before pruning:', torch.sum(module.weight)); #exit(0) # print(list(module.named_parameters())) prune.l1_unstructured(module, name='weight', amount=prune_amount) # print('shape:', module.weight.shape); #exit(0) # prune.ln_structured(module, name='weight', amount=prune_amount, n=1, dim=module.weight.shape[1]) # print(list(module.named_parameters())); exit(0) # print('after pruning:', torch.sum(module.weight)); prune.remove(module, name='weight') # module.weight = torch.nn.Parameter(module.weight.data.to_sparse()) # print('after removing:', torch.sum(module.weight)); # print('shape:', module.weight.shape); exit(0) # exit(0) # prune.random_unstructured(module, name='weight', amount=0.25) # exit(0) # exit(0) print('About to return') print_size_of_model(model) #exit(0) ############################################################## model.eval() self.models.append(model)
checkpoint = torch.load(args.loadfile) net.load_state_dict(checkpoint['net']) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # Training criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) for name, module in net.named_modules(): if isinstance(module, torch.nn.Conv2d): pruner.l1_unstructured(module, name='weight', amount=prune) elif isinstance(module, torch.nn.Linear): pruner.l1_unstructured(module, name='weight', amount=prune) def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets)
def prune_model(model, prune_protopyte): model = copy.deepcopy(model) prune_protopyte = copy.deepcopy(prune_protopyte) for idx, (data_1, data_2) in enumerate( zip(model.named_modules(), prune_protopyte.named_modules())): if idx == 0: continue name_1, module_1 = data_1[0], data_1[1] name_2, module_2 = data_2[0], data_2[1] if isinstance(module_1, nn.Conv2d) or isinstance(module_1, nn.Linear): w_shape_1 = torch.tensor(module_1.weight.shape) w_shape_2 = torch.tensor(module_2.weight.shape) w_diff = torch.abs(w_shape_1 - w_shape_2) if w_diff[0] > 0 or w_diff[1] > 0: if w_diff[0] > 0: prune.ln_structured(module_1, name="weight", amount=int(w_diff[0].item()), n=1, dim=0) if w_diff[1] > 0: prune.ln_structured(module_1, name="weight", amount=int(w_diff[1].item()), n=1, dim=1) mask = module_1.weight_mask w = torch.where(mask != 0) w_mask = torch.unique(w[0]) module_1.register_parameter('w_mask', nn.Parameter(w_mask.float())) continue if isinstance(module_1, nn.BatchNorm2d): w_shape_1 = torch.tensor(module_1.weight.shape) w_shape_2 = torch.tensor(module_2.weight.shape) w_diff = torch.abs(w_shape_1 - w_shape_2) if w_diff[0] > 0: prune.l1_unstructured(module_1, name="weight", amount=1.0) tree = [] tree_dict = {} for idx, (name, module) in enumerate(model.named_modules()): if idx == 0: continue if isinstance(module, nn.Conv2d): tree.append([name, 'Conv2d']) tree_dict[name] = 'Conv2d' if isinstance(module, nn.BatchNorm2d): tree.append([name, 'BatchNorm2d']) tree_dict[name] = 'BatchNorm2d' if isinstance(module, nn.Linear): tree.append([name, 'Linear']) tree_dict[name] = 'Linear' bn_dependencies = {} for idx, t in enumerate(tree): if t[1] == 'BatchNorm2d' and idx == 0: raise Exception('ERROR') if t[1] == 'BatchNorm2d': bn_dependencies[t[0]] = tree[idx - 1][0] prune_protopyte_state_dict = prune_protopyte.state_dict() for key in prune_protopyte.state_dict().keys(): prune_protopyte_state_dict[key].fill_(0) for layer in tree_dict.keys(): if f'{layer}.weight_orig' in model.state_dict().keys( ) and f'{layer}.weight_mask' in model.state_dict().keys(): if tree_dict[f'{layer}'] in ['Conv2d', 'Linear']: weights = model.state_dict()[f'{layer}.weight_orig'] mask = model.state_dict()[f'{layer}.weight_mask'] prune_protopyte_state_dict[f'{layer}.weight'] = weights[ mask.bool()].reshape( prune_protopyte_state_dict[f'{layer}.weight'].shape) if f'{layer}.bias' in model.state_dict().keys(): bias = model.state_dict()[f'{layer}.bias'] w_mask = model.state_dict()[f'{layer}.w_mask'].long() prune_protopyte_state_dict[f'{layer}.bias'] = bias[ w_mask].reshape( prune_protopyte_state_dict[f'{layer}.bias'].shape) continue if tree_dict[f'{layer}'] == 'BatchNorm2d': weights = model.state_dict()[f'{layer}.weight_orig'] running_mean = model.state_dict()[f'{layer}.running_mean'] running_var = model.state_dict()[f'{layer}.running_var'] w_mask = model.state_dict( )[f'{bn_dependencies[layer]}.w_mask'].long() prune_protopyte_state_dict[f'{layer}.weight'] = weights[ w_mask].reshape( prune_protopyte_state_dict[f'{layer}.weight'].shape) prune_protopyte_state_dict[ f'{layer}.running_mean'] = running_mean[w_mask].reshape( prune_protopyte_state_dict[f'{layer}.running_mean']. shape) prune_protopyte_state_dict[ f'{layer}.running_var'] = running_var[w_mask].reshape( prune_protopyte_state_dict[f'{layer}.running_var']. shape) if f'{layer}.bias' in model.state_dict().keys(): bias = model.state_dict()[f'{layer}.bias'] prune_protopyte_state_dict[f'{layer}.bias'] = bias[ w_mask].reshape( prune_protopyte_state_dict[f'{layer}.bias'].shape) continue else: if tree_dict[f'{layer}'] in ['Conv2d', 'Linear']: prune_protopyte_state_dict[ f'{layer}.weight'] = model.state_dict()[f'{layer}.weight'] if f'{layer}.bias' in model.state_dict().keys(): prune_protopyte_state_dict[ f'{layer}.bias'] = model.state_dict()[f'{layer}.bias'] if tree_dict[f'{layer}'] in ['Batch', 'BatchNorm2d']: prune_protopyte_state_dict[ f'{layer}.weight'] = model.state_dict()[f'{layer}.weight'] prune_protopyte_state_dict[ f'{layer}.running_mean'] = model.state_dict( )[f'{layer}.running_mean'] prune_protopyte_state_dict[ f'{layer}.running_var'] = model.state_dict( )[f'{layer}.running_var'] if f'{layer}.bias' in model.state_dict().keys(): prune_protopyte_state_dict[ f'{layer}.bias'] = model.state_dict()[f'{layer}.bias'] prune_protopyte.load_state_dict(prune_protopyte_state_dict) return prune_protopyte