def mnist(train_x, train_y, val_x, val_y): """Problem 3.1: Initialize objects and start training You won't need to call this function yourself. (Data is provided by autograder) Args: train_x (np.array): training data (55000, 784) train_y (np.array): training labels (55000,) val_x (np.array): validation data (5000, 784) val_y (np.array): validation labels (5000,) Returns: val_accuracies (list(float)): List of accuracies per validation round (num_epochs,) """ # TODO: Initialize an MLP, optimizer, and criterion mnist_model = Sequential(Linear(784, 20), ReLU(), Linear(20, 10)) #mnist_model = Sequential(Linear(784,20),BatchNorm1d(20),ReLU(),Linear(20,10)) creterion = CrossEntropyLoss() mdl_optimizer = SGD(mnist_model.parameters(), momentum=0.9, lr=0.1) # TODO: Call training routine (make sure to write it below) val_accuracies = train(mnist_model, mdl_optimizer, creterion, train_x, train_y, val_x, val_y) return val_accuracies
def test_linear_xeloss_backward(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20)) mytorch_optimizer = SGD(mytorch_mlp.parameters()) mytorch_criterion = CrossEntropyLoss() test_forward_backward(mytorch_mlp, mytorch_criterion=mytorch_criterion) return True
def test_linear_adam(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20)) mytorch_optimizer = Adam(mytorch_mlp.parameters()) mytorch_criterion = CrossEntropyLoss() return test_step(mytorch_mlp, mytorch_optimizer, 5, 5, mytorch_criterion=mytorch_criterion)
def test_big_model_step(): np.random.seed(11785) # run a big model model = Sequential(Linear(10, 15), ReLU(), Dropout(p=0.2), Linear(15, 20), ReLU(), Dropout(p=0.1)) x, y = generate_dataset_for_mytorch_model(model, 4) x, y = Tensor(x), Tensor(y) criterion = CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08) # check output correct out = model(x) test_out = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_output.npy') if not assertions_all(out.data, test_out, "test_big_model_step_out", 1e-5, 1e-6): return False # run backward loss = criterion(out, y) loss.backward() # check params are correct (sorry this is ugly) assert model[0].weight.grad is not None, "Linear layer must have gradient." assert model[0].weight.grad.grad is None, "Final gradient tensor must not have its own gradient" assert model[0].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function" assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" assert model[3].weight.grad is not None, "Linear layer must have gradient." assert model[3].weight.grad.grad is None, "Final gradient tensor must not have its own gradient" assert model[3].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function" assert model[3].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[3].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" # check gradient for linear layer at idx 0 is correct test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_grad.npy') if not assertions_all(model[0].weight.grad.data, test_grad, "test_big_model_grad_0", 1e-5, 1e-6): return False # check gradient for linear layer at idx 3 is correct test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_grad_3.npy') if not assertions_all(model[3].weight.grad.data, test_grad, "test_big_model_grad_3", 1e-5, 1e-6): return False # weight update with adam optimizer.step() # check updated weight values assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" test_weights_3 = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_weight_update_3.npy') test_weights_0 = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_weight_update_0.npy') return assertions_all(model[0].weight.data, test_weights_0, "test_big_weight_update_0", 1e-5, 1e-6) and \ assertions_all(model[3].weight.data, test_weights_3, "test_big_weight_update_3", 1e-5, 1e-6)
def test_big_linear_relu_xeloss_momentum(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20), ReLU(), Linear(20, 30), ReLU()) mytorch_optimizer = SGD(mytorch_mlp.parameters(), momentum=0.9) mytorch_criterion = CrossEntropyLoss() test_step(mytorch_mlp, mytorch_optimizer, 5, 5, mytorch_criterion=mytorch_criterion) return True
def test_big_linear_bn_relu_xeloss_train_eval(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20), BatchNorm1d(20), ReLU(), Linear(20, 30), BatchNorm1d(30), ReLU()) mytorch_optimizer = SGD(mytorch_mlp.parameters()) mytorch_criterion = CrossEntropyLoss() test_step(mytorch_mlp, mytorch_optimizer, 5, 5, mytorch_criterion=mytorch_criterion) return True
def test_big_model_adam(): np.random.seed(11785) # mytorch_mlp = Sequential(Linear(10, 20), BatchNorm1d(20), ReLU(), # Linear(20, 30), BatchNorm1d(30), ReLU()) mytorch_mlp = Sequential(Linear(10, 20), ReLU(), Linear(20, 30), ReLU()) mytorch_optimizer = Adam(mytorch_mlp.parameters()) mytorch_criterion = CrossEntropyLoss() return test_step(mytorch_mlp, mytorch_optimizer, 5, 5, mytorch_criterion=mytorch_criterion)
def __init__(self): super().__init__() # You'll need these constants for the first layer first_input_size = 60 # The width of the input to the first convolutional layer first_in_channel = 24 # The number of channels input into the first layer # TODO: initialize all layers EXCEPT the last linear layer layers = [ Conv1d(first_in_channel, 56, 5), Tanh(), Conv1d(56, 28, 6, 2), ReLU(), Conv1d(28, 14, 2, 2), Sigmoid(), Flatten() # ... etc ... put layers in here, comma separated ] # TODO: Iterate through the conv layers and calculate the final output size output_size_conv = get_final_conv_output_size(layers, first_input_size) # TODO: Append the linear layer with the correct size onto `layers` layers.append(Linear(output_size_conv, 10)) # TODO: Put the layers into a Sequential self.layers = Sequential(*layers)
def test_dropout_forward_backward(): np.random.seed(11785) # run on small model, forward backward (no step) model = Sequential(Linear(10, 20), ReLU(), Dropout(p=0.6)) x, y = generate_dataset_for_mytorch_model(model, 5) x, y = Tensor(x), Tensor(y) criterion = CrossEntropyLoss() out = model(x) test_out = load_numpy_array('autograder/hw1_bonus_autograder/outputs/backward_output.npy') if not assertions_all(out.data, test_out, "test_dropout_forward_backward_output", 1e-5, 1e-6): return False loss = criterion(out, y) loss.backward() assert model[0].weight.grad is not None, "Linear layer must have gradient." assert model[0].weight.grad.grad is None, "Final gradient tensor must not have its own gradient" assert model[0].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function" assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/backward_grad.npy') return assertions_all(model[0].weight.grad.data, test_grad, "test_dropout_forward_backward_grad", 1e-5, 1e-6)
def conv1d_forward_correctness(num_layers=1): ''' CNN: scanning with a MLP with stride ''' scores_dict = [0] ############################################################################################ ############################# Initialize parameters ################################### ############################################################################################ in_c = np.random.randint(5, 15) channels = [np.random.randint(5, 15) for i in range(num_layers + 1)] kernel = [np.random.randint(3, 7) for i in range(num_layers)] stride = [np.random.randint(3, 5) for i in range(num_layers)] width = np.random.randint(60, 80) batch_size = np.random.randint(1, 4) x = np.random.randn(batch_size, channels[0], width) ############################################################################################# ################################# Create Models ######################################## ############################################################################################# test_layers = [ Conv1d(channels[i], channels[i + 1], kernel[i], stride[i]) for i in range(num_layers) ] test_model = Sequential(*test_layers) torch_layers = [ nn.Conv1d(channels[i], channels[i + 1], kernel[i], stride=stride[i]) for i in range(num_layers) ] torch_model = nn.Sequential(*torch_layers) for torch_layer, test_layer in zip(torch_model, test_model.layers): torch_layer.weight = nn.Parameter(torch.tensor(test_layer.weight.data)) torch_layer.bias = nn.Parameter(torch.tensor(test_layer.bias.data)) ############################################################################################# ######################### Get the correct results from PyTorch ######################### ############################################################################################# x1 = Variable(torch.tensor(x), requires_grad=True) y1 = torch_model(x1) torch_y = y1.detach().numpy() ############################################################################################# ################### Get fwd results from TestModel and compare ########################## ############################################################################################# y2 = test_model(Tensor(x)) test_y = y2.data # check that model is correctly configured check_model_param_settings(test_model) if not assertions(test_y, torch_y, 'type', 'y'): return scores_dict if not assertions(test_y, torch_y, 'shape', 'y'): return scores_dict if not assertions(test_y, torch_y, 'closeness', 'y'): return scores_dict scores_dict[0] = 1 return scores_dict
def __init__(self): # TODO: Initialize Conv1d layers # For reference, here's the arguments for Conv1d: # Conv1d(in_channel, out_channel, kernel_size, stride) self.conv1 = None self.conv2 = None self.conv3 = None # TODO: Initialize Sequential object self.layers = Sequential()
def test_dropout_forward(): np.random.seed(11785) # run on small model forward only x = Tensor.randn(5, 10) model = Sequential(Linear(10, 5), ReLU(), Dropout(p=0.6)) my_output = model(x) test_output = load_numpy_array('autograder/hw1_bonus_autograder/outputs/dropout_forward.npy') return assertions_all(my_output.data, test_output, "test_dropout_forward", 1e-5, 1e-6)
def __init__(self): # TODO: Initialize Conv1d layers # For reference, here's the arguments for Conv1d: # Conv1d(in_channel, out_channel, kernel_size, stride) self.conv1 = Conv1d(24, 2, 2, 2) self.conv2 = Conv1d(2, 8, 2, 2) self.conv3 = Conv1d(8, 4, 2, 1) # TODO: Initialize Sequential object self.layers = Sequential(self.conv1, ReLU(), self.conv2, ReLU(), self.conv3, Flatten())
def __init__(self): # TODO: Initialize Conv1d layers with appropriate params (this is the hard part) # For reference, here's the arguments for Conv1d: # Conv1d(in_channel, out_channel, kernel_size, stride) self.conv1 = None self.conv2 = None self.conv3 = None # TODO: Initialize Sequential object with layers based on the MLP architecture. # Note: Besides using Conv1d instead of Linear, there is a slight difference in layers. # What's the difference and why? self.layers = Sequential()
def test_linear_momentum(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20), ReLU()) mytorch_optimizer = SGD(mytorch_mlp.parameters(), momentum=0.9) test_step(mytorch_mlp, mytorch_optimizer, 5, 0) return True
def test_linear_forward(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20)) test_forward(mytorch_mlp) check_model_param_settings(mytorch_mlp) return True
def test_linear_batchnorm_relu_train_eval(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20), BatchNorm1d(20), ReLU()) mytorch_optimizer = SGD(mytorch_mlp.parameters()) test_step(mytorch_mlp, mytorch_optimizer, 5, 5) return True
def test_linear_batchnorm_relu_backward_train(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20), BatchNorm1d(20), ReLU()) test_forward_backward(mytorch_mlp) return True
def test_big_linear_relu_step(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20), ReLU(), Linear(20, 30), ReLU()) mytorch_optimizer = SGD(mytorch_mlp.parameters()) test_step(mytorch_mlp, mytorch_optimizer, 5, 5) return True
def test_big_linear_relu_backward(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20), ReLU(), Linear(20, 30), ReLU()) test_forward_backward(mytorch_mlp) return True
def test_linear_backward(): np.random.seed(11785) mytorch_mlp = Sequential(Linear(10, 20)) test_forward_backward(mytorch_mlp) return True