[transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])),
                                          batch_size=args.test_batch_size,
                                          shuffle=False,
                                          **kwargs)

# Define which model to use
if args.architecture == 'lenet':
    model = LeNet(mask=True).to(device)
else:
    model = AE(mask=False, input_shape=784).to(device)
print(model)
util.print_model_parameters(model)

# NOTE : `weight_decay` term denotes L2 regularization loss term
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.00001)
initial_optimizer_state_dict = optimizer.state_dict()


def train(epochs):
    model.train()
    for epoch in range(epochs):
        pbar = tqdm(enumerate(train_loader), total=len(train_loader))
        for batch_idx, (data, target) in pbar:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            if args.architecture != 'lenet':
                loss = F.mse_loss(output, data.view(-1, 784))
            else:
                loss = F.nll_loss(output, target)
Esempio n. 2
0
                       transforms.ToTensor()])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.Compose([
                       transforms.ToTensor()])),
    batch_size=args.test_batch_size, shuffle=False, **kwargs)


# Define which model to use
model = LeNet(mask=False).to(device)

print(model)
util.print_model_parameters(model)

# NOTE : `weight_decay` term denotes L2 regularization loss term
optimizer = optim.Adam(model.parameters(), lr=args.lr)
initial_optimizer_state_dict = optimizer.state_dict()

def train(epochs, decay=0, threshold=0.0):
    model.train()
    pbar = tqdm(range(epochs), total=epochs)
    curves = np.zeros((epochs,14))
    
    for epoch in pbar:
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            
            reg = 0.0
        print(
            f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)'
        )
    return accuracy


def to_average(arr, length):
    return [
        np.mean(arr[i * length:(i + 1) * length])
        for i in range(int(len(arr) // length))
    ]


# Initial training
# NOTE : `weight_decay` term denotes L2 regularization loss term
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001)
initial_optimizer_state_dict = optimizer.state_dict()
print("--- Initial training ---")
loss_values = train(args.epochs)
loss_values = to_average(loss_values, 50)
prune_threshold = len(loss_values)

accuracy = test()
util.log(args.log, f"initial_accuracy {accuracy}")
torch.save(model, f"saves/initial_model.ptmodel")
print("--- Before pruning ---")
util.print_nonzeros(model)

# Pruning
model.prune_by_std(args.sensitivity)
accuracy = test()