def test4(): a = Tensor(1, requires_grad=True) a_torch = get_same_torch_tensor(a) b = Tensor(2, requires_grad=True) b_torch = get_same_torch_tensor(b) c = Tensor(3, requires_grad=True) c_torch = get_same_torch_tensor(c) #pdb.set_trace() d = a + a * b d_torch = a_torch + a_torch * b_torch e = d + c + Tensor(3) e_torch = d_torch + c_torch + torch.tensor(3) e.backward() e_torch.sum().backward() assert check_val_and_grad(a, a_torch) assert check_val_and_grad(b, b_torch) assert check_val_and_grad(c, c_torch) assert check_val_and_grad(d, d_torch) assert check_val_and_grad(e, e_torch)
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y, num_epochs=3): """Problem 3.2: Training routine that runs for `num_epochs` epochs. Returns: val_accuracies (list): (num_epochs,) """ model.train() store_validation_accuracy = [] store_loss = [] Avg_loss = [] for j in range(num_epochs): p = np.random.permutation(len(train_x)) shuffled_x = train_x[p] shuffled_y = train_y[p] xx = np.split(shuffled_x,len(shuffled_x)/BATCH_SIZE) yy = np.split(shuffled_y,len(shuffled_y)/BATCH_SIZE) for i, (batch_data,batch_labels) in enumerate(zip(xx,yy)): optimizer.zero_grad() # clear any previous gradients out = model(Tensor(batch_data)) loss = criterion(out,Tensor(batch_labels)) store_loss.append(loss.data) loss.backward() optimizer.step() if i % 100 == 0: Avg_loss.append(np.mean(np.array(store_loss))) store_loss = [] accuracy = validate(model,val_x,val_y) store_validation_accuracy.append(accuracy) model.train() print(Avg_loss) return store_validation_accuracy
def __init__(self, input_size, hidden_size, nonlinearity='tanh'): super(RNNUnit, self).__init__() # Initializing parameters self.weight_ih = Tensor(np.random.randn(hidden_size, input_size), requires_grad=True, is_parameter=True) self.bias_ih = Tensor(np.zeros(hidden_size), requires_grad=True, is_parameter=True) self.weight_hh = Tensor(np.random.randn(hidden_size, hidden_size), requires_grad=True, is_parameter=True) self.bias_hh = Tensor(np.zeros(hidden_size), requires_grad=True, is_parameter=True) self.hidden_size = hidden_size # Setting the Activation Unit if nonlinearity == 'tanh': self.act = Tanh() elif nonlinearity == 'relu': self.act = ReLU()
def test5(): a = Tensor(1, requires_grad=True) a_torch = get_same_torch_tensor(a) b = Tensor(2, requires_grad=True) b_torch = get_same_torch_tensor(b) c = Tensor(3, requires_grad=True) c_torch = get_same_torch_tensor(c) # d = a + a * b z1 = a * b z1_torch = a_torch * b_torch d = a + z1 d_torch = a_torch + z1_torch # e = (d + c) + 3 z2 = d + c z2_torch = d_torch + c_torch e = z2 + Tensor(3) e_torch = z2_torch + 3 e.backward() e_torch.sum().backward() assert check_val_and_grad(a, a_torch) assert check_val_and_grad(b, b_torch) assert check_val_and_grad(c, c_torch) assert check_val_and_grad(z1, z1_torch) assert check_val_and_grad(d, d_torch) assert check_val_and_grad(z2, z2_torch) assert check_val_and_grad(e, e_torch)
def forward(self, x): """ Args: x (Tensor): (batch_size, num_features) Returns: Tensor: (batch_size, num_features) """ #print("x.shape[0]",x.shape[0]) if self.is_train: u = x.Sum() / Tensor(x.shape[0]) #print("In forward shape of u:",u.shape) s = (((x - u).Power()).Sum()) / Tensor(x.shape[0]) #print("In forward shape of s:",s.shape) x_new = (x - u) / (s + self.eps).Root() #print("In forward shape of x_new:",x_new.shape) y = (self.gamma * x_new) + self.beta var = ((x - u).Power().Sum()) / Tensor(x.shape[0] - 1) self.running_mean = (Tensor(1) - self.momentum ) * self.running_mean + (self.momentum * u) self.running_var = (Tensor(1) - self.momentum ) * self.running_var + (self.momentum * var) return y else: u = self.running_mean #print("In forward shape of u:",u.shape) s = self.running_var #print("In forward shape of s:",s.shape) x_new = (x - u) / (s + self.eps).Root() #print("In forward shape of x_new:",x_new.shape) y = (self.gamma * x_new) + self.beta return y
def test7(): # a = 3 a = Tensor(3., requires_grad=False) a_torch = get_same_torch_tensor(a) # b = 4 b = Tensor(4., requires_grad=False) b_torch = get_same_torch_tensor(b) # c = 5 c = Tensor(5., requires_grad=True) c_torch = get_same_torch_tensor(c) # out = a * b + 3 * c z1 = a * b z1_torch = a_torch * b_torch z2 = Tensor(3) * c z2_torch = 3 * c_torch out = z1 + z2 out_torch = z1_torch + z2_torch out_torch.sum().backward() out.backward() assert check_val_and_grad(a, a_torch) assert check_val_and_grad(b, b_torch) assert check_val_and_grad(c, c_torch) assert check_val_and_grad(z1, z1_torch) assert check_val_and_grad(z2, z2_torch) assert check_val_and_grad(out, out_torch)
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y, num_epochs=3): """Problem 3.2: Training routine that runs for `num_epochs` epochs. Returns: val_accuracies (list): (num_epochs,) """ val_accuracies = [] # TODO: Implement me! (Pseudocode on writeup) model.train() for epoch in range(num_epochs): shuffler = np.random.permutation(len(train_y)) train_x = train_x[shuffler] train_y = train_y[shuffler] batches = split_data_into_batches(train_x, train_y, 100) for i, (batch_data, batch_labels) in enumerate(batches): optimizer.zero_grad() # clear any previous gradients out = model(Tensor(batch_data)) loss = criterion(out, Tensor(batch_labels)) loss.backward() optimizer.step() # update weights with new gradients if i % 100 == 0: accuracy = validate(model, val_x, val_y) val_accuracies.append(accuracy) model.train() return val_accuracies
def forward_(mytorch_model, mytorch_criterion, pytorch_model, pytorch_criterion, x, y): """ Calls forward on both mytorch and pytorch models. x: ndrray (batch_size, in_features) y: ndrray (batch_size,) Returns (passed, (mytorch x, mytorch y, pytorch x, pytorch y)), where passed is whether the test passed """ # forward pytorch_x = Variable(torch.tensor(x).double(), requires_grad=True) pytorch_y = pytorch_model(pytorch_x) if not pytorch_criterion is None: pytorch_y = pytorch_criterion(pytorch_y, torch.LongTensor(y)) mytorch_x = Tensor(x, requires_grad=True) mytorch_y = mytorch_model(mytorch_x) if not mytorch_criterion is None: mytorch_y = mytorch_criterion(mytorch_y, Tensor(y)) # forward check if not assertions_all(mytorch_y.data, pytorch_y.detach().numpy(), 'y'): return False, (mytorch_x, mytorch_y, pytorch_x, pytorch_y) return True, (mytorch_x, mytorch_y, pytorch_x, pytorch_y)
def test_dropout_forward_backward(): np.random.seed(11785) # run on small model, forward backward (no step) model = Sequential(Linear(10, 20), ReLU(), Dropout(p=0.6)) x, y = generate_dataset_for_mytorch_model(model, 5) x, y = Tensor(x), Tensor(y) criterion = CrossEntropyLoss() out = model(x) test_out = load_numpy_array('autograder/hw1_bonus_autograder/outputs/backward_output.npy') if not assertions_all(out.data, test_out, "test_dropout_forward_backward_output", 1e-5, 1e-6): return False loss = criterion(out, y) loss.backward() assert model[0].weight.grad is not None, "Linear layer must have gradient." assert model[0].weight.grad.grad is None, "Final gradient tensor must not have its own gradient" assert model[0].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function" assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/backward_grad.npy') return assertions_all(model[0].weight.grad.data, test_grad, "test_dropout_forward_backward_grad", 1e-5, 1e-6)
def forward(self, input, hidden=None): ''' Args: input (Tensor): (effective_batch_size,input_size) hidden (Tensor,None): (effective_batch_size,hidden_size) Return: Tensor: (effective_batch_size,hidden_size) ''' # TODO: INSTRUCTIONS # Perform matrix operations to construct the intermediary value from input and hidden tensors # Remeber to handle the case when hidden = None. Construct a tensor of appropriate size, filled with 0s to use as the hidden. #raise NotImplementedError('Implement Forward') effective_batch_size, input_size = input.shape if hidden is None: requires_grad = True hidden = Tensor(np.zeros((effective_batch_size, self.hidden_size)), requires_grad=requires_grad, is_leaf=not requires_grad) sigmoid_ = Sigmoid() tanh_ = Tanh() r_t = sigmoid_( input.matmul(self.weight_ir) + self.bias_ir + hidden.matmul(self.weight_hr) + self.bias_hr) z_t = sigmoid_( input.matmul(self.weight_iz) + self.bias_iz + hidden.matmul(self.weight_hz) + self.bias_hz) n_t = tanh_( input.matmul(self.weight_in) + self.bias_in + r_t * (hidden.matmul(self.weight_hn) + self.bias_hn)) h_t = (Tensor(1) - z_t) * n_t + z_t * hidden return h_t
def init_weights(self, weights): """Use the 3 weight matrices of linear MLP to init the weights of the CNN. Args: weights (tuple(np.array)): shapes ((8, 192), (16, 8), (4, 16)) Think of each as a Linear.weight.data, shaped (out_features, in_features) """ w1, w2, w3 = weights # TODO: Convert the linear weights into Conv1d weights # Make sure to not add nodes to the comp graph! w1 = w1[0:2:, 0:48] w2 = w2[0:8:, 0:4] conv1_weights = np.reshape( w1, (self.conv1.out_channel, self.conv1.kernel_size, self.conv1.in_channel)) conv1_weights = np.transpose(conv1_weights, axes=(0, 2, 1)) self.conv1.weight = Tensor(conv1_weights, is_parameter=True, requires_grad=True) conv2_weights = np.reshape( w2, (self.conv2.out_channel, self.conv2.kernel_size, self.conv2.in_channel)) conv2_weights = np.transpose(conv2_weights, axes=(0, 2, 1)) self.conv2.weight = Tensor(conv2_weights, is_parameter=True, requires_grad=True) conv3_weights = np.reshape( w3, (self.conv3.out_channel, self.conv3.kernel_size, self.conv3.in_channel)) conv3_weights = np.transpose(conv3_weights, axes=(0, 2, 1)) self.conv3.weight = Tensor(conv3_weights, is_parameter=True, requires_grad=True)
def test_big_model_step(): np.random.seed(11785) # run a big model model = Sequential(Linear(10, 15), ReLU(), Dropout(p=0.2), Linear(15, 20), ReLU(), Dropout(p=0.1)) x, y = generate_dataset_for_mytorch_model(model, 4) x, y = Tensor(x), Tensor(y) criterion = CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08) # check output correct out = model(x) test_out = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_output.npy') if not assertions_all(out.data, test_out, "test_big_model_step_out", 1e-5, 1e-6): return False # run backward loss = criterion(out, y) loss.backward() # check params are correct (sorry this is ugly) assert model[0].weight.grad is not None, "Linear layer must have gradient." assert model[0].weight.grad.grad is None, "Final gradient tensor must not have its own gradient" assert model[0].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function" assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" assert model[3].weight.grad is not None, "Linear layer must have gradient." assert model[3].weight.grad.grad is None, "Final gradient tensor must not have its own gradient" assert model[3].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function" assert model[3].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[3].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" # check gradient for linear layer at idx 0 is correct test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_grad.npy') if not assertions_all(model[0].weight.grad.data, test_grad, "test_big_model_grad_0", 1e-5, 1e-6): return False # check gradient for linear layer at idx 3 is correct test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_grad_3.npy') if not assertions_all(model[3].weight.grad.data, test_grad, "test_big_model_grad_3", 1e-5, 1e-6): return False # weight update with adam optimizer.step() # check updated weight values assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True" assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor" test_weights_3 = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_weight_update_3.npy') test_weights_0 = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_weight_update_0.npy') return assertions_all(model[0].weight.data, test_weights_0, "test_big_weight_update_0", 1e-5, 1e-6) and \ assertions_all(model[3].weight.data, test_weights_3, "test_big_weight_update_3", 1e-5, 1e-6)
def __init__(self, in_features, out_features): super().__init__() self.in_features = in_features self.out_features = out_features # Randomly initializing layer weights k = 1 / in_features weight = k * (np.random.rand(out_features, in_features) - 0.5) bias = k * (np.random.rand(out_features) - 0.5) self.weight = Tensor(weight, requires_grad=True, is_parameter=True) self.bias = Tensor(bias, requires_grad=True, is_parameter=True)
def __init__(self, in_channel, out_channel, kernel_size, stride=1): super().__init__() self.in_channel = in_channel self.out_channel = out_channel self.kernel_size = kernel_size self.stride = stride # Initializing weights and bias (not a very good initialization strategy) weight = np.random.normal(0, 1.0, (out_channel, in_channel, kernel_size)) self.weight = Tensor(weight, requires_grad=True, is_parameter=True) bias = np.zeros(out_channel) self.bias = Tensor(bias, requires_grad=True, is_parameter=True)
def test_conv_forward_back_just_1_layer(): in_c = 2 out_c = 3 kernel = 2 stride = 2 width = 5 batch_size = 1 # setup weights and biases conv = Conv1d(in_c, out_c, kernel, stride) # conv.weight = Tensor(np.random.randint(5, size=conv.weight.shape)+.0, # requires_grad = True) test_weight = np.asarray([[[1, 2], [2, 1]], [[0, 1], [1, 0]], [[3, 2], [1, 0]]]) + .0 conv.weight = Tensor(test_weight, requires_grad=True) # conv.bias = Tensor(np.random.randint(2, size=conv.out_channel)+.0, # requires_grad = True) conv.bias = Tensor(np.zeros(conv.out_channel) + .0, requires_grad=True) conv_torch = nn.Conv1d(in_c, out_c, kernel_size=kernel, stride=stride) conv_torch.weight = nn.Parameter(torch.tensor(conv.weight.data)) conv_torch.bias = nn.Parameter(torch.tensor(conv.bias.data)) print(f"weight:\n {conv.weight}") print(f"bias:\n {conv.bias}") # setup input # x = Tensor(np.random.randint(5, size=(batch_size, in_c, width)),\ # requires_grad = True) x = Tensor(np.asarray([[[1, 0, 1, 0, 1], [0, 1, 0, 1, 0]]]), requires_grad=True) x_torch = get_same_torch_tensor(x).double() print(f"x:\n {x}") # calculate output o = conv(x) o_torch = conv_torch(x_torch) print(f"out:\n {o}") # backward o.backward() o_torch.sum().backward() print(f"grad_x:\n {x.grad}") print(f"grad_w:\n {conv.weight.grad}") print(f"grad_b:\n {conv.bias.grad}") # check everything assert check_val_and_grad(x, x_torch) assert check_val_and_grad(o, o_torch) assert check_val_and_grad(conv.weight, conv_torch.weight) assert check_val_and_grad(conv.bias, conv_torch.bias)
def validate(model, val_x, val_y): """Problem 3.3: Validation routine, tests on val data, scores accuracy Relevant Args: val_x (np.array): validation data (5000, 784) val_y (np.array): validation labels (5000,) Returns: float: Accuracy = correct / total """ #TODO: implement validation based on pseudocode model.eval() num_samples = val_x.shape[0] #val_batches = get_batch(val_x,val_y) batches = list( zip(np.array_split(val_x, num_samples // 100), np.array_split(val_y, num_samples // 100))) num_correct = 0 for i, (batch_data, batch_labels) in enumerate(batches): #if(i*BATCH_SIZE>=num_samples): # break out = model(Tensor(batch_data)) batch_preds = np.argmax(out.data, axis=1) #print('\nPrediction on Batch:',batch_preds[:10]) #print(f'\n Bach True label: {batch_labels}') num_correct += (batch_preds == batch_labels).sum() accuracy = (num_correct / len(val_x) * 100) return accuracy
def test_distributed_scanning_mlp(): cnn = CNN_DistributedScanningMLP() weights = np.load(os.path.join('autograder', 'hw2_autograder', 'weights', 'mlp_weights_part_c.npy'), allow_pickle=True) weights = tuple(w.T for w in weights) cnn.init_weights(weights) data = np.loadtxt(os.path.join('autograder', 'hw2_autograder', 'data', 'data.asc')).T.reshape(1, 24, -1) data = Tensor(data, requires_grad=False, is_parameter=False, is_leaf=True) expected_result = np.load(os.path.join('autograder', 'hw2_autograder', 'ref_result', 'res_c.npy'), allow_pickle=True) result = cnn(data) # check that model is correctly configured check_model_param_settings(cnn) # if passes tests, return true. # If exception anywhere (failure or crash), return false try: # check that output is correct assert type(result.data) == type(expected_result), "Incorrect output type." assert result.data.shape == expected_result.shape, "Incorrect output shape." assert np.allclose(result.data, expected_result), "Incorrect output values." except Exception as e: traceback.print_exc() return False return True
def test_simple_scanning_mlp(): cnn = CNN_SimpleScanningMLP() # Load and init weights weights = np.load(os.path.join('autograder', 'hw2_autograder', 'weights', 'mlp_weights_part_b.npy'), allow_pickle = True) weights = tuple(w.T for w in weights) cnn.init_weights(weights) # load data and expected answer data = np.loadtxt(os.path.join('autograder', 'hw2_autograder', 'data', 'data.asc')).T.reshape(1, 24, -1) data = Tensor(data, requires_grad=False, is_parameter=False, is_leaf=True) expected_result = np.load(os.path.join('autograder', 'hw2_autograder', 'ref_result', 'res_b.npy'), allow_pickle=True) # get forward output and check result = cnn(data) # check that model is correctly configured check_model_param_settings(cnn) # now check correct results try: # check that output is correct assert type(result.data) == type(expected_result), f"Incorrect output type: {result.data}, expected: {expected_result}" assert result.data.shape == expected_result.shape, f"Incorrect output shape: {result.data.shape}, expected: {expected_result.shape}" assert np.allclose(result.data, expected_result), f"Incorrect output values: {result.data}, expected: {expected_result}" except Exception as e: traceback.print_exc() return False return True
def conv1d_forward_correctness(num_layers=1): ''' CNN: scanning with a MLP with stride ''' scores_dict = [0] ############################################################################################ ############################# Initialize parameters ################################### ############################################################################################ in_c = np.random.randint(5, 15) channels = [np.random.randint(5, 15) for i in range(num_layers + 1)] kernel = [np.random.randint(3, 7) for i in range(num_layers)] stride = [np.random.randint(3, 5) for i in range(num_layers)] width = np.random.randint(60, 80) batch_size = np.random.randint(1, 4) x = np.random.randn(batch_size, channels[0], width) ############################################################################################# ################################# Create Models ######################################## ############################################################################################# test_layers = [ Conv1d(channels[i], channels[i + 1], kernel[i], stride[i]) for i in range(num_layers) ] test_model = Sequential(*test_layers) torch_layers = [ nn.Conv1d(channels[i], channels[i + 1], kernel[i], stride=stride[i]) for i in range(num_layers) ] torch_model = nn.Sequential(*torch_layers) for torch_layer, test_layer in zip(torch_model, test_model.layers): torch_layer.weight = nn.Parameter(torch.tensor(test_layer.weight.data)) torch_layer.bias = nn.Parameter(torch.tensor(test_layer.bias.data)) ############################################################################################# ######################### Get the correct results from PyTorch ######################### ############################################################################################# x1 = Variable(torch.tensor(x), requires_grad=True) y1 = torch_model(x1) torch_y = y1.detach().numpy() ############################################################################################# ################### Get fwd results from TestModel and compare ########################## ############################################################################################# y2 = test_model(Tensor(x)) test_y = y2.data # check that model is correctly configured check_model_param_settings(test_model) if not assertions(test_y, torch_y, 'type', 'y'): return scores_dict if not assertions(test_y, torch_y, 'shape', 'y'): return scores_dict if not assertions(test_y, torch_y, 'closeness', 'y'): return scores_dict scores_dict[0] = 1 return scores_dict
def test_debdas(): predicted = Tensor.randn(4, 20) predicted.requires_grad = True predicted_torch = get_same_torch_tensor(predicted) target = Tensor(np.random.randint(20, size=(4, ))) target.requires_grad = True targets = to_one_hot(target, 20) targets_torch = get_same_torch_tensor(targets) p_std = predicted - Tensor(np.max(predicted.data)) p_std_torch = predicted_torch - torch.max(predicted_torch) p_exp = p_std.exp() p_exp_torch = torch.exp(p_std_torch) p_softmax = p_exp / p_exp.sumAxis(1) p_softmax_torch = p_exp_torch / torch.sum(p_exp_torch, 1, keepdim=True) p_log_softmax = p_softmax.log() p_log_softmax_torch = torch.log(p_softmax_torch) log_lik = targets * p_log_softmax log_lik_torch = targets_torch * p_log_softmax_torch log_lik_sum = log_lik.sumAxis(None) log_lik_sum_torch = torch.sum(log_lik_torch) ce = tensor.Tensor(-1) * log_lik_sum / tensor.Tensor(4) ce_torch = -1 * log_lik_sum_torch / 4 ce_torch.sum().backward() ce.backward() #assert check_val_and_grad(predicted, predicted_torch) assert check_val_and_grad(targets, targets_torch) assert check_val_and_grad(p_std, p_std_torch) assert check_val_and_grad(p_exp, p_exp_torch) assert check_val_and_grad(p_softmax, p_softmax_torch) assert check_val_and_grad(p_log_softmax, p_log_softmax_torch) assert check_val_and_grad(log_lik, log_lik_torch) assert check_val_and_grad(log_lik_sum, log_lik_sum_torch) assert check_val_and_grad(ce, ce_torch)
def init_weights(self, weights): """Converts the given 3 weight matrices of the linear MLP into the weights of the Conv layers. Args: weights (tuple(np.array)): shapes ((8, 192), (16, 8), (4, 16)) Think of each as a Linear.weight.data, shaped (out_features, in_features) """ # TODO: Convert the linear weight arrays into Conv1d weight tensors # Make sure to not add nodes to the comp graph! w1, w2, w3 = weights # Here, we've unpacked them into separate arrays for you. # Assume the Conv1d weight tensors are already initialized with the params that you specified in __init__(). # Your job now is to replace those weights with the MLP's weights. # Tip: You can automatically retrieve the Conv1d params like so: # ex) self.conv1.out_channel, self.conv1.kernel_size, self.conv1.in_channel # Set the weight tensors with your converted MLP weights conv1_weights = np.reshape( w1, (self.conv1.out_channel, self.conv1.kernel_size, self.conv1.in_channel)) conv1_weights = np.transpose(conv1_weights, axes=(0, 2, 1)) self.conv1.weight = Tensor(conv1_weights, is_parameter=True, requires_grad=True) conv2_weights = np.reshape( w2, (self.conv2.out_channel, self.conv2.kernel_size, self.conv2.in_channel)) conv2_weights = np.transpose(conv2_weights, axes=(0, 2, 1)) self.conv2.weight = Tensor(conv2_weights, is_parameter=True, requires_grad=True) conv3_weights = np.reshape( w3, (self.conv3.out_channel, self.conv3.kernel_size, self.conv3.in_channel)) conv3_weights = np.transpose(conv3_weights, axes=(0, 2, 1)) self.conv3.weight = Tensor(conv3_weights, is_parameter=True, requires_grad=True)
def forward(self, x): """ Args: x (Tensor): (batch_size, num_features) Returns: Tensor: (batch_size, num_features) """ if (self.is_train == False): norm1 = x - self.running_mean norm = norm1 / Tensor.sqrt(self.running_var + self.eps) else: sample_mean = Tensor.sum(x, axis=0) / Tensor(x.shape[0]) x_sub_mean = x - sample_mean sample_var = Tensor.sum(x_sub_mean * x_sub_mean, axis=0) / Tensor( x.shape[0]) norm1 = x - sample_mean norm = norm1 / Tensor.sqrt(sample_var + self.eps) self.running_mean = self.momentum * self.running_mean + ( Tensor(1) - self.momentum) * sample_mean self.running_var = self.momentum * self.running_var + ( Tensor(1) - self.momentum) * sample_var out = self.gamma * norm + self.beta return out
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y, num_epochs=3): """Problem 3.2: Training routine that runs for `num_epochs` epochs. Returns: val_accuracies (list): (num_epochs,) """ val_accuracies = [] # TODO: Implement me! (Pseudocode on writeup) model.train() np_samples = train_x.shape[0] for epoch in range(num_epochs): indx_shuffle = np.random.permutation(train_x.shape[0]) train_x, train_y = train_x[indx_shuffle], train_y[indx_shuffle] #batches = get_batch(train_x,train_y) batches = list( zip(np.array_split(train_x, np_samples // 100), np.array_split(train_y, np_samples // 100))) for i, (batch_data, batch_labels) in enumerate(batches): #if(i*BATCH_SIZE>=np_samples): # break optimizer.zero_grad() out = model(Tensor(train_x)) loss = criterion(out, Tensor(train_y)) loss.backward() optimizer.step() if (i % 100 == 0 and i != 0): accuracy = validate(model, val_x, val_y) val_accuracies.append(accuracy) model.train() print(f'Epoch:{epoch+1} \t Validation AC: {val_accuracies[-1]}') return val_accuracies
def validate(model, val_x, val_y, criterion): """Problem 3.3: Validation routine, tests on val data, scores accuracy Relevant Args: val_x (np.array): validation data (5000, 784) val_y (np.array): validation labels (5000,) Returns: float: Accuracy = correct / total """ model.eval() valid_loss = 0 total_accuracy = 0 val_y = np.reshape(val_y, (val_y.shape[0], 1)) batch = np.hstack((val_x, val_y)) batches = np.split(batch, 50) for batch_idx, x in enumerate(batches): accuracy = 0 y = x[:, -1] x = x[:, :-1] output = model(Tensor(x)) predictions = np.argmax(output.data, axis=1) loss = criterion(output, Tensor(y)) valid_loss += loss.data y = Tensor(y) #print("Shape of y:",y.shape) for i in range(0, y.shape[0]): #print('y :{} and pred :{}'.format(int(y.data[i]),predictions[i])) if int(y.data[i]) == predictions[i]: accuracy += 1 #print("Accuracy is:",(accuracy/y.shape[0])*100) total_accuracy += accuracy #print("Validation loss:",valid_loss/val_y.shape[0]) #TODO: implement validation based on pseudocode return (total_accuracy / len(val_x)) * 100
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y, num_epochs): """Problem 3.2: Training routine that runs for `num_epochs` epochs. Returns: val_accuracies (list): (num_epochs,) """ train_loss = 0 val_accuracies = [] #print("Num_epochs is:",num_epochs) for epoch in range(num_epochs): print("Epoch:", epoch) model.train() train_y = np.reshape(train_y, (train_y.shape[0], 1)) batch = np.hstack((train_x, train_y)) batches = np.split(batch, 550) # for batch_idx, x in enumerate(batches): y = x[:, -1] x = x[:, :-1] output = model(Tensor(x)) loss = criterion(output, Tensor(y)) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.data #print("Training Loss :",train_loss/train_y.shape[0]) if (batch_idx % 100 == 1): accuracy = validate(model, val_x, val_y, criterion) val_accuracies.append(accuracy) #print("Validation accuracy at Batchidx {} is : {}".format(batch_idx,val_accuracies[-1])) model.train() # TODO: Implement me! (Pseudocode on writeup) return val_accuracies
def validate(model, val_x, val_y): """Problem 3.3: Validation routine, tests on val data, scores accuracy Relevant Args: val_x (np.array): validation data (5000, 784) val_y (np.array): validation labels (5000,) Returns: float: Accuracy = correct / total """ #TODO: implement validation based on pseudocode model.eval() batches = split_data_into_batches(val_x, val_y, 100) for (batch_data, batch_labels) in batches: out = model(Tensor(batch_data)) batch_preds = np.argmax(out.data, axis=1) num_correct = np.sum(batch_preds == batch_labels) accuracy = num_correct / len(val_y) return accuracy
def validate(model, val_x, val_y): """Problem 3.3: Validation routine, tests on val data, scores accuracy Relevant Args: val_x (np.array): validation data (5000, 784) val_y (np.array): validation labels (5000,) Returns: float: Accuracy = correct / total """ model.eval() xx = np.split(val_x,len(val_x)/BATCH_SIZE) yy = np.split(val_y,len(val_y)/BATCH_SIZE) num_correct = 0 for i, (batch_data,batch_labels) in enumerate(zip(xx,yy)): out = model(Tensor(batch_data)) batch_preds = np.argmax(out.data,axis=1) num_correct_i = np.sum(batch_labels==batch_preds) num_correct += num_correct_i accuracy = num_correct / len(val_y) return accuracy
def forward(self, x): """ Args: x (Tensor): (batch_size, num_features) Returns: Tensor: (batch_size, num_features) """ #raise Exception("TODO!") m = x.shape[0] # mu_b = (1/m) * np.sum(x.data, axis=0) # x_mu_sq = np.square(x.data - mu_b) # var_b = (1/m) * np.sum(x_mu_sq, axis=0) # x_i = (x.data - mu_b) / np.sqrt(var_b + self.eps.data) # y_i = self.gamma.data * x_i + self.beta.data # sigma_b = (1/(m-1)) * np.sum(x_mu_sq, axis=0) # self.running_mean.data = (1 - self.momentum.data) * self.running_mean.data + self.momentum.data * mu_b # self.running_var.data = (1 - self.momentum.data) * self.running_var.data + self.momentum.data * sigma_b # return Tensor(y_i) if self.is_train: one_by_m = Tensor([1 / m]) mu_b = one_by_m * x.sum(axis=0) x_mu_sq = (x - mu_b).power(Tensor([2])) var_b = one_by_m * x_mu_sq.sum(axis=0) x_i = (x - mu_b) / (var_b + self.eps).power(Tensor([1 / 2])) y_i = self.gamma * x_i + self.beta one_by_m_1 = Tensor([1 / (m - 1)]) sigma_b = one_by_m_1 * x_mu_sq.sum(axis=0) self.running_mean = (Tensor([1]) - self.momentum ) * self.running_mean + self.momentum * mu_b self.running_var = (Tensor([1]) - self.momentum ) * self.running_var + self.momentum * sigma_b else: mu_b = self.running_mean var_b = self.running_var x_i = (x - mu_b) / (var_b + self.eps).power(Tensor([1 / 2])) y_i = self.gamma * x_i + self.beta return y_i
def forward(self, input, hidden=None): ''' Args: input (Tensor): (effective_batch_size,input_size) hidden (Tensor,None): (effective_batch_size,hidden_size) Return: Tensor: (effective_batch_size,hidden_size) ''' # TODO: INSTRUCTIONS # Perform matrix operations to construct the intermediary value from input and hidden tensors # Apply the activation on the resultant # Remeber to handle the case when hidden = None. Construct a tensor of appropriate size, filled with 0s to use as the hidden. effective_batch_size, input_size = input.shape if hidden is None: requires_grad = True hidden = Tensor(np.zeros((effective_batch_size, self.hidden_size)), requires_grad=requires_grad, is_leaf=not requires_grad) #raise NotImplementedError('Implement Forward') res = input.matmul(self.weight_ih) + self.bias_ih + hidden.matmul( self.weight_hh) + self.bias_hh return self.act(res)
def __init__(self, num_features, eps=1e-5, momentum=0.1): super().__init__() self.num_features = num_features self.eps = Tensor(np.array([eps])) self.momentum = Tensor(np.array([momentum])) # To make the final output affine self.gamma = Tensor(np.ones((self.num_features, )), requires_grad=True, is_parameter=True) self.beta = Tensor(np.zeros((self.num_features, )), requires_grad=True, is_parameter=True) # Running mean and var self.running_mean = Tensor(np.zeros(self.num_features, ), requires_grad=False, is_parameter=False) self.running_var = Tensor(np.ones(self.num_features, ), requires_grad=False, is_parameter=False)