[transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])), batch_size=args.test_batch_size, shuffle=False, **kwargs) # Define which model to use if args.architecture == 'lenet': model = LeNet(mask=True).to(device) else: model = AE(mask=False, input_shape=784).to(device) print(model) util.print_model_parameters(model) # NOTE : `weight_decay` term denotes L2 regularization loss term optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.00001) initial_optimizer_state_dict = optimizer.state_dict() def train(epochs): model.train() for epoch in range(epochs): pbar = tqdm(enumerate(train_loader), total=len(train_loader)) for batch_idx, (data, target) in pbar: data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) if args.architecture != 'lenet': loss = F.mse_loss(output, data.view(-1, 784)) else: loss = F.nll_loss(output, target)
transforms.ToTensor()])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=False, transform=transforms.Compose([ transforms.ToTensor()])), batch_size=args.test_batch_size, shuffle=False, **kwargs) # Define which model to use model = LeNet(mask=False).to(device) print(model) util.print_model_parameters(model) # NOTE : `weight_decay` term denotes L2 regularization loss term optimizer = optim.Adam(model.parameters(), lr=args.lr) initial_optimizer_state_dict = optimizer.state_dict() def train(epochs, decay=0, threshold=0.0): model.train() pbar = tqdm(range(epochs), total=epochs) curves = np.zeros((epochs,14)) for epoch in pbar: for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) reg = 0.0
print( f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)' ) return accuracy def to_average(arr, length): return [ np.mean(arr[i * length:(i + 1) * length]) for i in range(int(len(arr) // length)) ] # Initial training # NOTE : `weight_decay` term denotes L2 regularization loss term optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001) initial_optimizer_state_dict = optimizer.state_dict() print("--- Initial training ---") loss_values = train(args.epochs) loss_values = to_average(loss_values, 50) prune_threshold = len(loss_values) accuracy = test() util.log(args.log, f"initial_accuracy {accuracy}") torch.save(model, f"saves/initial_model.ptmodel") print("--- Before pruning ---") util.print_nonzeros(model) # Pruning model.prune_by_std(args.sensitivity) accuracy = test()