def test_softmaxXentropy_forward(): # Test input np.random.seed(0) autograd = autograd_engine.Autograd() l1 = nn.Linear(5, 5, autograd) x = np.random.random((1, 5)) y = np.array([[0., 0., 1., 0., 0.]]) l1_out = l1(x) test_loss = nn.SoftmaxCrossEntropy(autograd) a1_out = test_loss(y, l1_out) # Torch input torch_l1 = torch.nn.Linear(5, 5) torch_l1.weight = torch.nn.Parameter(torch.DoubleTensor(l1.W)) torch_l1.bias = torch.nn.Parameter(torch.DoubleTensor(l1.b.squeeze())) torch_x = torch.DoubleTensor(x) torch_y = torch.LongTensor(np.array([2])) torch_l1_out = torch_l1(torch_x) torch_loss = torch.nn.CrossEntropyLoss() torch_a1_out = torch_loss(torch_l1_out, torch_y).reshape(1, ) torch_a1_out.backward() compare_np_torch(a1_out, torch_a1_out) return True
def test_sigmoid_backward(): # Test input np.random.seed(0) autograd = autograd_engine.Autograd() l1 = nn.Linear(5, 5, autograd) x = np.random.random((1, 5)) l1_out = l1(x) test_act = nn.Sigmoid(autograd) a1_out = test_act(l1_out) autograd.backward(1) # Torch input torch_l1 = torch.nn.Linear(5, 5) torch_l1.weight = torch.nn.Parameter(torch.DoubleTensor(l1.W)) torch_l1.bias = torch.nn.Parameter(torch.DoubleTensor(l1.b.squeeze())) torch_x = torch.DoubleTensor(x) torch_l1_out = torch_l1(torch_x) torch_act = torch.nn.Sigmoid() torch_a1_out = torch_act(torch_l1_out) torch_a1_out.sum().backward() compare_np_torch(l1.dW, torch_l1.weight.grad) compare_np_torch(l1.db.squeeze(), torch_l1.bias.grad) return True
def test_identity_forward(): # Test input np.random.seed(0) autograd = autograd_engine.Autograd() l1 = nn.Linear(5, 5, autograd) x = np.random.random( (1, 5)) # just use batch size 1 since broadcasting is not written yet l1_out = l1(x) test_act = nn.Identity(autograd) a1_out = test_act(l1_out) # Torch input torch_l1 = torch.nn.Linear(5, 5) torch_l1.weight = torch.nn.Parameter(torch.DoubleTensor( l1.W)) # note transpose here, probably should standardize torch_l1.bias = torch.nn.Parameter(torch.DoubleTensor(l1.b.squeeze())) torch_x = torch.DoubleTensor(x) torch_l1_out = torch_l1(torch_x) torch_act = torch.nn.Identity() torch_a1_out = torch_act(torch_l1_out) compare_np_torch(a1_out, torch_a1_out) return True
def test_linear_skip_backward(): np.random.seed(0) autograd = autograd_engine.Autograd() autograd.zero_grad() l1 = nn.Linear(5, 5, autograd) x = np.random.random((1, 5)) l1_out = l1(x) output = l1_out + x autograd.add_operation(inputs=[l1_out, x], output=output, gradients_to_update=[None, None], backward_operation=add_backward) autograd.backward(1) torch_l1 = torch.nn.Linear(5, 5) torch_l1.weight = torch.nn.Parameter(torch.DoubleTensor(l1.W)) torch_l1.bias = torch.nn.Parameter(torch.DoubleTensor(l1.b.squeeze())) torch_x = torch.DoubleTensor(x) torch_x.requires_grad = True torch_l1_out = torch_l1(torch_x) torch_output = torch_l1_out + torch_x torch_output.sum().backward() compare_np_torch(l1_out, torch_l1_out) compare_np_torch(l1.dW, torch_l1.weight.grad) compare_np_torch(l1.db.squeeze(), torch_l1.bias.grad) compare_np_torch(autograd.memory_buffer.get_param(x), torch_x.grad) # skip connections work''' return True
def test_linear_skip_forward(): np.random.seed(0) autograd = autograd_engine.Autograd() autograd.zero_grad() l1 = nn.Linear(5, 5, autograd) x = np.random.random((1, 5)) l1_out = l1(x) output = l1_out + x autograd.add_operation(inputs=[l1_out, x], output=output, gradients_to_update=[None, None], backward_operation=add_backward) torch_l1 = torch.nn.Linear(5, 5) torch_l1.weight = torch.nn.Parameter(torch.DoubleTensor(l1.W)) torch_l1.bias = torch.nn.Parameter(torch.DoubleTensor(l1.b.squeeze())) torch_x = torch.DoubleTensor(x) torch_x.requires_grad = True torch_l1_out = torch_l1(torch_x) torch_output = torch_l1_out + torch_x compare_np_torch(output, torch_output) return True
def test_linear_layer_forward(): np.random.seed(0) x = np.random.random((1, 5)) autograd = autograd_engine.Autograd() l1 = nn.Linear(5, 5, autograd) l1_out = l1(x) torch_l1 = torch.nn.Linear(5, 5) torch_l1.weight = torch.nn.Parameter(torch.DoubleTensor(l1.W)) torch_l1.bias = torch.nn.Parameter(torch.DoubleTensor(l1.b.squeeze())) torch_x = torch.DoubleTensor(x) torch_l1_out = torch_l1(torch_x) compare_np_torch(l1_out, torch_l1_out) return True
def visualize(outpath): # Configure the training visualization process below # Change these hyperparameters around to experiment with your implementation epochs = 5 batch_size = 1 thisdir = os.path.dirname(__file__) savepath = outpath train_data_path = os.path.join(thisdir, "data/train_data.npy") train_labels_path = os.path.join(thisdir, "data/train_labels.npy") val_data_path = os.path.join(thisdir, "data/val_data.npy") val_labels_path = os.path.join(thisdir, "data/val_labels.npy") test_data_path = os.path.join(thisdir, "data/test_data.npy") test_labels_path = os.path.join(thisdir, "data/test_labels.npy") dset = (process_dset_partition( (np.load(train_data_path), np.load(train_labels_path))), process_dset_partition( (np.load(val_data_path), np.load(val_labels_path))), process_dset_partition( (np.load(test_data_path), np.load(test_labels_path)))) autograd = autograd_engine.Autograd() #(self, input_size, output_size, hiddens, activations, criterion, lr, autograd, momentum=0.0): mlp = MLP( 784, 10, [32, 32, 32], [ nn.Sigmoid(autograd), nn.Sigmoid(autograd), nn.Sigmoid(autograd), nn.Identity(autograd) ], #np.random.randn, bias_init, nn.SoftmaxCrossEntropy(autograd), 1e-3, autograd, momentum=0.856) visualize_training_statistics(mlp, dset, epochs, batch_size, savepath) print("Saved output to {}".format(savepath))
def test_tanh_forward(): # Test input np.random.seed(0) autograd = autograd_engine.Autograd() l1 = nn.Linear(5, 5, autograd) x = np.random.random((1, 5)) l1_out = l1(x) test_act = nn.Tanh(autograd) a1_out = test_act(l1_out) # Torch input torch_l1 = torch.nn.Linear(5, 5) torch_l1.weight = torch.nn.Parameter(torch.DoubleTensor(l1.W)) torch_l1.bias = torch.nn.Parameter(torch.DoubleTensor(l1.b.squeeze())) torch_x = torch.DoubleTensor(x) torch_l1_out = torch_l1(torch_x) torch_act = torch.nn.Tanh() torch_a1_out = torch_act(torch_l1_out) compare_np_torch(a1_out, torch_a1_out) return True