def train_pp(model, loss_fn, optimizer, param, loader_train, loader_test, ratio_list, k=3, loader_val=None): model.train() ratio_count = 0 for epoch in range(param['num_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): x_var, y_var = to_var(x), to_var(y.long()) scores = model(x_var) loss = loss_fn(scores, y_var) if (t + 1) % 100 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.data[0])) optimizer.zero_grad() loss.backward() model.update_grad() optimizer.step() test(model, loader_test) if epoch%k == 0 and epoch!=0 and ratio_count<len(ratio_list): model.procedure_stb_pruning(ratio_list[ratio_count]) # model.procedure_weight_pruning(ratio_list[ratio_count]) ratio_count+=1 print(ratio_count, 'th','###########################pruning once######################################')
def train_rand(model, loss_fn, optimizer, param, loader_train, loader_test, ratio, loader_val=None): model.train() model.rand_mask(ratio) for epoch in range(param['num_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): x_var, y_var = to_var(x), to_var(y.long()) scores = model(x_var) loss = loss_fn(scores, y_var) if (t + 1) % 100 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.data[0])) optimizer.zero_grad() loss.backward() optimizer.step() test(model, loader_test)
def baseline_fitness(state_dict,num_epochs=600): # Hyper Parameters param = { 'batch_size': 4, 'test_batch_size': 50, 'num_epochs': num_epochs, 'learning_rate': 0.001, 'weight_decay': 5e-4, } num_cnn_layer =sum( [ int(len(v.size())==4) for d, v in state_dict.items() ] ) num_fc_layer = sum( [ int(len(v.size())==2) for d, v in state_dict.items() ] ) state_key = [ k for k,v in state_dict.items()] cfg = [] first = True for d, v in state_dict.items(): #print(v.data.size()) if len(v.data.size()) == 4 or len(v.data.size()) ==2: if first: first = False cfg.append(v.data.size()[1]) cfg.append(v.data.size()[0]) assert num_cnn_layer + num_fc_layer == len(cfg) - 1 net = ConvNet(cfg, num_cnn_layer) # masks = [] for i, p in enumerate(net.parameters()): p.data = state_dict[ state_key[i] ] if len(p.data.size()) == 4: pass #p_np = p.data.cpu().numpy() #masks.append(np.ones(p_np.shape).astype('float32')) #value_this_layer = np.abs(p_np).sum(axis=(2,3)) # for j in range(len(value_this_layer)): # # for k in range(len(value_this_layer[0])): # # if abs( value_this_layer[j][k] ) < 1e-4: # # masks[-1][j][k] = 0. elif len(p.data.size()) == 2: pass #p_np = p.data.cpu().numpy() #masks.append(np.ones(p_np.shape).astype('float32')) #value_this_layer = np.abs(p_np) # for j in range(len(value_this_layer)): # # for k in range(len(value_this_layer[0])): # # if abs( value_this_layer[j][k] ) < 1e-4: # # masks[-1][j][k] = 0. #net.set_masks(masks) ## Retraining loader_train, loader_test = load_dataset() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) #if num_epochs > 0: # test(net, loader_test) #train(net, criterion, optimizer, param, loader_train) test_acc_list = [] for t in range(num_epochs ): param['num_epochs'] = 10 train(net, criterion, optimizer, param, loader_train) #print("--- After training ---") test_acc_list.append(test(net, loader_test)) plt.plot(test_acc_list) with open('baseline_result.csv','a',newline='') as csvfile: writer = csv.writer(csvfile) for row in test_acc_list: writer.writerow([row])
def gen_mask(model, loss_fn, optimizer, param, loader_train, loader_test, ratio, k=3, loader_val=None): test(model, loader_test) model.train() count = 0 ratio_ind = 0 for epoch in range(param['num_epochs']): model.train() print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): x_var, y_var = to_var(x), to_var(y.long()) scores = model(x_var) loss = loss_fn(scores, y_var) if (t + 1) % 100 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.item())) optimizer.zero_grad() loss.backward() model.update_grad() optimizer.step() # print(epoch,t) # test(model, loader_test) if (epoch + 1) % k == 0 and ratio_ind < len(ratio): print( ' pruning some filters which are in convolution layes , pruning ratio:%.3f' % ratio[ratio_ind]) if ratio_ind == 0: model.com_mask2(ratio[ratio_ind], 0) else: model.com_mask2(ratio[ratio_ind], ratio[ratio_ind - 1]) model.set_masks(model.mask) model.zero_accmgrad() ratio_ind += 1 else: model.set_masks(model.mask) prune_rate(model) print('modify learning rate') lr = param['learning_rate'] * (0.5**((epoch - k * len(ratio)) // 30)) # lr = param['learning_rate'] * (0.5 ** ((epoch - 1) // 30)) for param_group in optimizer.param_groups: param_group['lr'] = lr print('epoch', epoch) test(model, loader_test) count += 1
test_dataset = datasets.MNIST(root='../data/', train=False, download=True, transform=transforms.ToTensor()) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=param['test_batch_size'], shuffle=True) # Load the pretrained model net = MLP() net.load_state_dict(torch.load('models/mlp_pretrained.pkl')) if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() print("--- Pretrained network loaded ---") test(net, loader_test) # prune the weights masks = weight_prune(net, param['pruning_perc']) net.set_masks(masks) print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(net, criterion, optimizer, param, loader_train)
def main(): # parser parser = argparse.ArgumentParser() parser.add_argument("-m", "--model_config", required=False, default="deep_dnn.json", help="Model Architecture .json") parser.add_argument('-s', "--setup", default="agressive_setup.json", help="Experimental Setup .json") args = parser.parse_args() # config paths model_config = 'configs/model_architecture/' + args.model_config setup_path = "configs/experimental_setup/" + args.setup print(model_config) # Hyper Parameters setup = load_config(setup_path) train_setup = setup["Train"] prune_setup = setup["Prune"] batch_size = train_setup["batch_size"] epochs = train_setup["training_epochs"] lr = train_setup["learning_rate"] datatype = train_setup["datatype"] feat_size = train_setup["feature_size"] n_samples = train_setup["n_samples"] n_classes = train_setup["n_classes"] val_ratio = train_setup["val_ratio"] test_ratio = train_setup["test_ratio"] labels = 1 if datatype == "multilabel": labels = train_setup["labels_per_sample"] # CUDA for PyTorch use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Dataloaders train_loader, val_loader, test_loader = \ CreateRandomDataset(datatype, feat_size, n_samples, n_classes, val_ratio, test_ratio, batch_size, labels).get_dataloaders() data_loaders = {"train": train_loader, "val": val_loader, "test": test_loader} # Init model model = DNN(config=model_config, in_features=feat_size, n_classes=n_classes) if torch.cuda.is_available(): print('CUDA enabled.') model.cuda() print("--- DNN network initialized ---") print(model) # Criterion/Optimizer/Pruner criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) weight_pruner = WeightPruner(model) # Train the model from scratch print("--- Training DNN ---") train_losses, val_losses = \ train_eval(model, criterion, optimizer, epochs, train_loader, val_loader) test_acc, test_loss = test(model, test_loader, criterion) learn_curves(train_losses, val_losses, "loss_fig.png") iterative_pruning(model, weight_pruner, criterion, data_loaders, prune_setup)
m1.bias.data = m0.bias.data.clone() print('m1.weight.data shape ', m1.weight.data.size()) layer_id_in_cfg += 1 continue m1.weight.data = m0.weight.data.clone() m1.bias.data = m0.bias.data.clone() num_parameters = sum([param.nelement() for param in new_net.parameters()]) # prune the weights #masks = filter_prune(net, param['pruning_perc']) #net.set_masks(masks) #print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(new_net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(new_net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(new_net, criterion, optimizer, param, loader_train) # Check accuracy and nonzeros weights in each layer print("--- After retraining ---") test(new_net, loader_test) # Save and load the entire model #torch.save(net.state_dict(), 'models/convnet_pruned.pkl')
def gen_mask(model, loss_fn, optimizer, param, loader_train, loader_test, ratio, k=3, loader_val=None): model.train() count = 0 ratio_ind = 0 for epoch in range(param['num_epochs']): model.train() print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): # print(t) x_var, y_var = to_var(x), to_var(y.long()) # x_var = x.cuda(); y_var = y.long().cuda() scores = model(x_var) loss = loss_fn(scores, y_var) if (t + 1) % 100 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.item())) #loss.data[0])) optimizer.zero_grad() loss.backward() optimizer.step() if (epoch + 1) % k == 0 and ratio_ind < len(ratio): print( ' pruning some filters which are in convolution layes , pruning ratio:%.3f' % ratio[ratio_ind]) if ratio_ind == 0: model.com_mask3(ratio[ratio_ind], 0) model.set_masks(model.mask) print(model.layer_list) print(model.filters_array) print(len(model.filters_array)) print('not prune') # model = prune_vgg(model, model.layer_list, model.filters_array) # init_model(model) # optimizer.params = model.parameters() else: model.com_mask3(ratio[ratio_ind], ratio[ratio_ind - 1]) model.set_masks(model.mask) print(model.layer_list) print(model.filters_array) print(len(model.filters_array)) print('not prune') ratio_ind += 1 # 当所有的剪枝率满足之后,再训练k个epochs,然后再剪枝——因为比如剪到80的时候,会陷入10%的怪圈,见同where中的结果 if epoch > k * (len(ratio) + 1): print(model.layer_list) print(model.filters_array) print(len(model.filters_array)) print('real pruning') model = prune_vgg(model, model.layer_list, model.filters_array) return model print('modify learning rate') lr = param['learning_rate'] * (0.5**((epoch - k * len(ratio)) // 100)) # lr = param['learning_rate'] * (0.5 ** ((epoch - 1) // 30)) for param_group in optimizer.param_groups: param_group['lr'] = lr print('epoch', epoch) test(model, loader_test) count += 1 return model
shuffle=True) test_dataset = datasets.MNIST(root='../data/', train=False, download=True, transform=transforms.ToTensor()) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=param['test_batch_size'], shuffle=True) model = LeNet() model.load_state_dict( torch.load('models/lenet_pretrained.pkl', map_location='cpu')) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) print("--- Accuracy of Pretrained Model ---") test(model, loader_test) # pruning masks = lenet_prune() model.set_masks(masks) print("--- Accuracy After Pruning ---") test(model, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(model, criterion, optimizer, param, loader_train) print("--- Accuracy After Retraining ---")
scal = np.sqrt(snr * 2 * hp.k / hp.n) labels, ip = generate_input(amt=10**hp.e_prec) enc = encoder(ip) enc = enc + torch.randn_like(enc, device=device) / scal op = decoder(enc) errs[i] = error_rate(op, labels) plt.semilogy(xx, errs + 1 / 10**hp.e_prec, label='All weights') loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=hp.lr) print("--- Pretrained network loaded ---") test() # prune the weights masks = weight_prune(net, hp.pp) i = 0 for part in net: # part in [encoder,decoder] for p in part[::2]: # conveniently skips biases p.set_mask(masks[i]) i += 1 print("--- {}% parameters pruned ---".format(hp.pp)) test() if hp.plot: for i, snr in enumerate(snrs): print(i) scal = np.sqrt(snr * 2 * hp.k / hp.n)