Example #1
0
def test4():
    a = Tensor(1, requires_grad=True)
    a_torch = get_same_torch_tensor(a)

    b = Tensor(2, requires_grad=True)
    b_torch = get_same_torch_tensor(b)

    c = Tensor(3, requires_grad=True)
    c_torch = get_same_torch_tensor(c)

    #pdb.set_trace()
    d = a + a * b
    d_torch = a_torch + a_torch * b_torch

    e = d + c + Tensor(3)
    e_torch = d_torch + c_torch + torch.tensor(3)

    e.backward()
    e_torch.sum().backward()

    assert check_val_and_grad(a, a_torch)
    assert check_val_and_grad(b, b_torch)
    assert check_val_and_grad(c, c_torch)
    assert check_val_and_grad(d, d_torch)
    assert check_val_and_grad(e, e_torch)
Example #2
0
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y, num_epochs=3):
    """Problem 3.2: Training routine that runs for `num_epochs` epochs.
    Returns:
        val_accuracies (list): (num_epochs,)
    """
    model.train()
    store_validation_accuracy = []
    store_loss = []
    Avg_loss = []
    for j in range(num_epochs):
        p = np.random.permutation(len(train_x))
        shuffled_x = train_x[p] 
        shuffled_y = train_y[p]
        xx = np.split(shuffled_x,len(shuffled_x)/BATCH_SIZE)
        yy = np.split(shuffled_y,len(shuffled_y)/BATCH_SIZE)
        for i, (batch_data,batch_labels) in enumerate(zip(xx,yy)):
            optimizer.zero_grad() # clear any previous gradients
            out = model(Tensor(batch_data))
            loss = criterion(out,Tensor(batch_labels))
            store_loss.append(loss.data)
            loss.backward()
            optimizer.step()
            if i % 100 == 0:
                Avg_loss.append(np.mean(np.array(store_loss)))
                store_loss = []
                accuracy = validate(model,val_x,val_y)
                store_validation_accuracy.append(accuracy)
                model.train()
    print(Avg_loss)
    return store_validation_accuracy
Example #3
0
    def __init__(self, input_size, hidden_size, nonlinearity='tanh'):

        super(RNNUnit, self).__init__()

        # Initializing parameters
        self.weight_ih = Tensor(np.random.randn(hidden_size, input_size),
                                requires_grad=True,
                                is_parameter=True)
        self.bias_ih = Tensor(np.zeros(hidden_size),
                              requires_grad=True,
                              is_parameter=True)
        self.weight_hh = Tensor(np.random.randn(hidden_size, hidden_size),
                                requires_grad=True,
                                is_parameter=True)
        self.bias_hh = Tensor(np.zeros(hidden_size),
                              requires_grad=True,
                              is_parameter=True)

        self.hidden_size = hidden_size

        # Setting the Activation Unit
        if nonlinearity == 'tanh':
            self.act = Tanh()
        elif nonlinearity == 'relu':
            self.act = ReLU()
Example #4
0
def test5():
    a = Tensor(1, requires_grad=True)
    a_torch = get_same_torch_tensor(a)

    b = Tensor(2, requires_grad=True)
    b_torch = get_same_torch_tensor(b)

    c = Tensor(3, requires_grad=True)
    c_torch = get_same_torch_tensor(c)

    # d = a + a * b
    z1 = a * b
    z1_torch = a_torch * b_torch
    d = a + z1
    d_torch = a_torch + z1_torch

    # e = (d + c) + 3
    z2 = d + c
    z2_torch = d_torch + c_torch
    e = z2 + Tensor(3)
    e_torch = z2_torch + 3

    e.backward()
    e_torch.sum().backward()

    assert check_val_and_grad(a, a_torch)
    assert check_val_and_grad(b, b_torch)
    assert check_val_and_grad(c, c_torch)
    assert check_val_and_grad(z1, z1_torch)
    assert check_val_and_grad(d, d_torch)
    assert check_val_and_grad(z2, z2_torch)
    assert check_val_and_grad(e, e_torch)
    def forward(self, x):
        """
        Args:
            x (Tensor): (batch_size, num_features)
        Returns:
            Tensor: (batch_size, num_features)
        """
        #print("x.shape[0]",x.shape[0])
        if self.is_train:
            u = x.Sum() / Tensor(x.shape[0])
            #print("In forward shape of u:",u.shape)
            s = (((x - u).Power()).Sum()) / Tensor(x.shape[0])
            #print("In forward shape of s:",s.shape)
            x_new = (x - u) / (s + self.eps).Root()
            #print("In forward shape of x_new:",x_new.shape)
            y = (self.gamma * x_new) + self.beta

            var = ((x - u).Power().Sum()) / Tensor(x.shape[0] - 1)
            self.running_mean = (Tensor(1) - self.momentum
                                 ) * self.running_mean + (self.momentum * u)
            self.running_var = (Tensor(1) - self.momentum
                                ) * self.running_var + (self.momentum * var)

            return y
        else:
            u = self.running_mean
            #print("In forward shape of u:",u.shape)
            s = self.running_var
            #print("In forward shape of s:",s.shape)
            x_new = (x - u) / (s + self.eps).Root()
            #print("In forward shape of x_new:",x_new.shape)
            y = (self.gamma * x_new) + self.beta

            return y
Example #6
0
def test7():
    # a = 3
    a = Tensor(3., requires_grad=False)
    a_torch = get_same_torch_tensor(a)

    # b = 4
    b = Tensor(4., requires_grad=False)
    b_torch = get_same_torch_tensor(b)

    # c = 5
    c = Tensor(5., requires_grad=True)
    c_torch = get_same_torch_tensor(c)

    # out = a * b + 3 * c
    z1 = a * b
    z1_torch = a_torch * b_torch
    z2 = Tensor(3) * c
    z2_torch = 3 * c_torch
    out = z1 + z2
    out_torch = z1_torch + z2_torch

    out_torch.sum().backward()
    out.backward()

    assert check_val_and_grad(a, a_torch)
    assert check_val_and_grad(b, b_torch)
    assert check_val_and_grad(c, c_torch)
    assert check_val_and_grad(z1, z1_torch)
    assert check_val_and_grad(z2, z2_torch)
    assert check_val_and_grad(out, out_torch)
Example #7
0
def train(model,
          optimizer,
          criterion,
          train_x,
          train_y,
          val_x,
          val_y,
          num_epochs=3):
    """Problem 3.2: Training routine that runs for `num_epochs` epochs.
    Returns:
        val_accuracies (list): (num_epochs,)
    """
    val_accuracies = []

    # TODO: Implement me! (Pseudocode on writeup)
    model.train()
    for epoch in range(num_epochs):
        shuffler = np.random.permutation(len(train_y))
        train_x = train_x[shuffler]
        train_y = train_y[shuffler]

        batches = split_data_into_batches(train_x, train_y, 100)
        for i, (batch_data, batch_labels) in enumerate(batches):
            optimizer.zero_grad()  # clear any previous gradients
            out = model(Tensor(batch_data))
            loss = criterion(out, Tensor(batch_labels))
            loss.backward()
            optimizer.step()  # update weights with new gradients
            if i % 100 == 0:
                accuracy = validate(model, val_x, val_y)
                val_accuracies.append(accuracy)
                model.train()

    return val_accuracies
Example #8
0
def forward_(mytorch_model, mytorch_criterion, pytorch_model,
             pytorch_criterion, x, y):
    """
    Calls forward on both mytorch and pytorch models.

    x: ndrray (batch_size, in_features)
    y: ndrray (batch_size,)

    Returns (passed, (mytorch x, mytorch y, pytorch x, pytorch y)),
    where passed is whether the test passed

    """
    # forward
    pytorch_x = Variable(torch.tensor(x).double(), requires_grad=True)
    pytorch_y = pytorch_model(pytorch_x)
    if not pytorch_criterion is None:
        pytorch_y = pytorch_criterion(pytorch_y, torch.LongTensor(y))
    mytorch_x = Tensor(x, requires_grad=True)
    mytorch_y = mytorch_model(mytorch_x)
    if not mytorch_criterion is None:
        mytorch_y = mytorch_criterion(mytorch_y, Tensor(y))

    # forward check
    if not assertions_all(mytorch_y.data, pytorch_y.detach().numpy(), 'y'):
        return False, (mytorch_x, mytorch_y, pytorch_x, pytorch_y)

    return True, (mytorch_x, mytorch_y, pytorch_x, pytorch_y)
Example #9
0
def test_dropout_forward_backward():
    np.random.seed(11785)
    
    # run on small model, forward backward (no step)
    model = Sequential(Linear(10, 20), ReLU(), Dropout(p=0.6))
    x, y = generate_dataset_for_mytorch_model(model, 5)
    x, y = Tensor(x), Tensor(y)
    criterion = CrossEntropyLoss()
    out = model(x)
    
    test_out = load_numpy_array('autograder/hw1_bonus_autograder/outputs/backward_output.npy')
    
    if not assertions_all(out.data, test_out, "test_dropout_forward_backward_output", 1e-5, 1e-6):
        return False
    
    loss = criterion(out, y)
    loss.backward()
    
    assert model[0].weight.grad is not None, "Linear layer must have gradient."
    assert model[0].weight.grad.grad is None, "Final gradient tensor must not have its own gradient"
    assert model[0].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function"
    assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True"
    assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor"
    
    test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/backward_grad.npy')
    
    return assertions_all(model[0].weight.grad.data, test_grad, "test_dropout_forward_backward_grad", 1e-5, 1e-6)
Example #10
0
    def forward(self, input, hidden=None):
        '''
        Args:
            input (Tensor): (effective_batch_size,input_size)
            hidden (Tensor,None): (effective_batch_size,hidden_size)
        Return:
            Tensor: (effective_batch_size,hidden_size)
        '''

        # TODO: INSTRUCTIONS
        # Perform matrix operations to construct the intermediary value from input and hidden tensors
        # Remeber to handle the case when hidden = None. Construct a tensor of appropriate size, filled with 0s to use as the hidden.

        #raise NotImplementedError('Implement Forward')
        effective_batch_size, input_size = input.shape
        if hidden is None:
            requires_grad = True
            hidden = Tensor(np.zeros((effective_batch_size, self.hidden_size)),
                            requires_grad=requires_grad,
                            is_leaf=not requires_grad)

        sigmoid_ = Sigmoid()
        tanh_ = Tanh()
        r_t = sigmoid_(
            input.matmul(self.weight_ir) + self.bias_ir +
            hidden.matmul(self.weight_hr) + self.bias_hr)
        z_t = sigmoid_(
            input.matmul(self.weight_iz) + self.bias_iz +
            hidden.matmul(self.weight_hz) + self.bias_hz)
        n_t = tanh_(
            input.matmul(self.weight_in) + self.bias_in + r_t *
            (hidden.matmul(self.weight_hn) + self.bias_hn))
        h_t = (Tensor(1) - z_t) * n_t + z_t * hidden
        return h_t
Example #11
0
    def init_weights(self, weights):
        """Use the 3 weight matrices of linear MLP to init the weights of the CNN.
        Args:
            weights (tuple(np.array)): shapes ((8, 192), (16, 8), (4, 16))
                                       Think of each as a Linear.weight.data, shaped (out_features, in_features)
        """
        w1, w2, w3 = weights

        # TODO: Convert the linear weights into Conv1d weights
        # Make sure to not add nodes to the comp graph!

        w1 = w1[0:2:, 0:48]
        w2 = w2[0:8:, 0:4]

        conv1_weights = np.reshape(
            w1, (self.conv1.out_channel, self.conv1.kernel_size,
                 self.conv1.in_channel))
        conv1_weights = np.transpose(conv1_weights, axes=(0, 2, 1))
        self.conv1.weight = Tensor(conv1_weights,
                                   is_parameter=True,
                                   requires_grad=True)
        conv2_weights = np.reshape(
            w2, (self.conv2.out_channel, self.conv2.kernel_size,
                 self.conv2.in_channel))
        conv2_weights = np.transpose(conv2_weights, axes=(0, 2, 1))
        self.conv2.weight = Tensor(conv2_weights,
                                   is_parameter=True,
                                   requires_grad=True)
        conv3_weights = np.reshape(
            w3, (self.conv3.out_channel, self.conv3.kernel_size,
                 self.conv3.in_channel))
        conv3_weights = np.transpose(conv3_weights, axes=(0, 2, 1))
        self.conv3.weight = Tensor(conv3_weights,
                                   is_parameter=True,
                                   requires_grad=True)
Example #12
0
def test_big_model_step():
    np.random.seed(11785)
    
    # run a big model
    model = Sequential(Linear(10, 15), ReLU(), Dropout(p=0.2), 
                       Linear(15, 20), ReLU(), Dropout(p=0.1))
    x, y = generate_dataset_for_mytorch_model(model, 4)
    x, y = Tensor(x), Tensor(y)
    criterion = CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08)
    
    # check output correct
    out = model(x)
    test_out = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_output.npy')

    if not assertions_all(out.data, test_out, "test_big_model_step_out", 1e-5, 1e-6):
        return False
    
    # run backward
    loss = criterion(out, y)
    loss.backward()
    
    # check params are correct (sorry this is ugly)
    assert model[0].weight.grad is not None, "Linear layer must have gradient."
    assert model[0].weight.grad.grad is None, "Final gradient tensor must not have its own gradient"
    assert model[0].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function"
    assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True"
    assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor"
    assert model[3].weight.grad is not None, "Linear layer must have gradient."
    assert model[3].weight.grad.grad is None, "Final gradient tensor must not have its own gradient"
    assert model[3].weight.grad.grad_fn is None, "Final gradient tensor must not have its own grad function"
    assert model[3].weight.requires_grad, "Weight tensor must have requires_grad==True"
    assert model[3].weight.is_parameter, "Weight tensor must be marked as a parameter tensor"
    
    # check gradient for linear layer at idx 0 is correct
    test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_grad.npy')
    if not assertions_all(model[0].weight.grad.data, test_grad, "test_big_model_grad_0", 1e-5, 1e-6):
        return False
    
    # check gradient for linear layer at idx 3 is correct
    test_grad = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_grad_3.npy')
    if not assertions_all(model[3].weight.grad.data, test_grad, "test_big_model_grad_3", 1e-5, 1e-6):
        return False

    # weight update with adam
    optimizer.step()
    
    # check updated weight values
    assert model[0].weight.requires_grad, "Weight tensor must have requires_grad==True"
    assert model[0].weight.is_parameter, "Weight tensor must be marked as a parameter tensor"
    test_weights_3 = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_weight_update_3.npy')
    test_weights_0 = load_numpy_array('autograder/hw1_bonus_autograder/outputs/big_weight_update_0.npy')
    
    return assertions_all(model[0].weight.data, test_weights_0, "test_big_weight_update_0", 1e-5, 1e-6) and \
        assertions_all(model[3].weight.data, test_weights_3, "test_big_weight_update_3", 1e-5, 1e-6)
Example #13
0
    def __init__(self, in_features, out_features):
        super().__init__()

        self.in_features = in_features
        self.out_features = out_features

        # Randomly initializing layer weights
        k = 1 / in_features
        weight = k * (np.random.rand(out_features, in_features) - 0.5)
        bias = k * (np.random.rand(out_features) - 0.5)
        self.weight = Tensor(weight, requires_grad=True, is_parameter=True)
        self.bias = Tensor(bias, requires_grad=True, is_parameter=True)
Example #14
0
    def __init__(self, in_channel, out_channel, kernel_size, stride=1):
        super().__init__()
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.kernel_size = kernel_size
        self.stride = stride

        # Initializing weights and bias (not a very good initialization strategy)
        weight = np.random.normal(0, 1.0,
                                  (out_channel, in_channel, kernel_size))
        self.weight = Tensor(weight, requires_grad=True, is_parameter=True)

        bias = np.zeros(out_channel)
        self.bias = Tensor(bias, requires_grad=True, is_parameter=True)
Example #15
0
def test_conv_forward_back_just_1_layer():
    in_c = 2
    out_c = 3
    kernel = 2
    stride = 2
    width = 5
    batch_size = 1

    # setup weights and biases
    conv = Conv1d(in_c, out_c, kernel, stride)
    # conv.weight = Tensor(np.random.randint(5, size=conv.weight.shape)+.0,
    #             requires_grad = True)
    test_weight = np.asarray([[[1, 2], [2, 1]], [[0, 1], [1, 0]],
                              [[3, 2], [1, 0]]]) + .0
    conv.weight = Tensor(test_weight, requires_grad=True)
    # conv.bias = Tensor(np.random.randint(2, size=conv.out_channel)+.0,
    #             requires_grad = True)
    conv.bias = Tensor(np.zeros(conv.out_channel) + .0, requires_grad=True)
    conv_torch = nn.Conv1d(in_c, out_c, kernel_size=kernel, stride=stride)
    conv_torch.weight = nn.Parameter(torch.tensor(conv.weight.data))
    conv_torch.bias = nn.Parameter(torch.tensor(conv.bias.data))
    print(f"weight:\n {conv.weight}")
    print(f"bias:\n {conv.bias}")

    # setup input
    # x = Tensor(np.random.randint(5, size=(batch_size, in_c, width)),\
    #             requires_grad = True)
    x = Tensor(np.asarray([[[1, 0, 1, 0, 1], [0, 1, 0, 1, 0]]]),
               requires_grad=True)
    x_torch = get_same_torch_tensor(x).double()
    print(f"x:\n {x}")

    # calculate output
    o = conv(x)
    o_torch = conv_torch(x_torch)
    print(f"out:\n {o}")

    # backward
    o.backward()
    o_torch.sum().backward()
    print(f"grad_x:\n {x.grad}")
    print(f"grad_w:\n {conv.weight.grad}")
    print(f"grad_b:\n {conv.bias.grad}")

    # check everything
    assert check_val_and_grad(x, x_torch)
    assert check_val_and_grad(o, o_torch)
    assert check_val_and_grad(conv.weight, conv_torch.weight)
    assert check_val_and_grad(conv.bias, conv_torch.bias)
Example #16
0
def validate(model, val_x, val_y):
    """Problem 3.3: Validation routine, tests on val data, scores accuracy
    Relevant Args:
        val_x (np.array): validation data (5000, 784)
        val_y (np.array): validation labels (5000,)
    Returns:
        float: Accuracy = correct / total
    """
    #TODO: implement validation based on pseudocode
    model.eval()
    num_samples = val_x.shape[0]
    #val_batches = get_batch(val_x,val_y)
    batches = list(
        zip(np.array_split(val_x, num_samples // 100),
            np.array_split(val_y, num_samples // 100)))
    num_correct = 0
    for i, (batch_data, batch_labels) in enumerate(batches):
        #if(i*BATCH_SIZE>=num_samples):
        #        break
        out = model(Tensor(batch_data))
        batch_preds = np.argmax(out.data, axis=1)
        #print('\nPrediction on Batch:',batch_preds[:10])
        #print(f'\n Bach True label: {batch_labels}')
        num_correct += (batch_preds == batch_labels).sum()
    accuracy = (num_correct / len(val_x) * 100)
    return accuracy
def test_distributed_scanning_mlp():
    cnn = CNN_DistributedScanningMLP()

    weights = np.load(os.path.join('autograder', 'hw2_autograder', 'weights', 'mlp_weights_part_c.npy'), allow_pickle=True)
    weights = tuple(w.T for w in weights)

    cnn.init_weights(weights)

    data = np.loadtxt(os.path.join('autograder', 'hw2_autograder', 'data', 'data.asc')).T.reshape(1, 24, -1)
    data = Tensor(data, requires_grad=False, is_parameter=False, is_leaf=True)

    expected_result = np.load(os.path.join('autograder', 'hw2_autograder', 'ref_result', 'res_c.npy'), allow_pickle=True)

    result = cnn(data)

    # check that model is correctly configured
    check_model_param_settings(cnn)

    # if passes tests, return true.
    # If exception anywhere (failure or crash), return false
    try:
        # check that output is correct
        assert type(result.data) == type(expected_result), "Incorrect output type."
        assert result.data.shape == expected_result.shape, "Incorrect output shape."
        assert np.allclose(result.data, expected_result), "Incorrect output values."
    except Exception as e:
        traceback.print_exc()
        return False

    return True
def test_simple_scanning_mlp():
    cnn = CNN_SimpleScanningMLP()

    # Load and init weights
    weights = np.load(os.path.join('autograder', 'hw2_autograder', 'weights', 'mlp_weights_part_b.npy'), allow_pickle = True)
    weights = tuple(w.T for w in weights)
    cnn.init_weights(weights)

    # load data and expected answer
    data = np.loadtxt(os.path.join('autograder', 'hw2_autograder', 'data', 'data.asc')).T.reshape(1, 24, -1)
    data = Tensor(data, requires_grad=False, is_parameter=False, is_leaf=True)

    expected_result = np.load(os.path.join('autograder', 'hw2_autograder', 'ref_result', 'res_b.npy'), allow_pickle=True)
    # get forward output and check
    result = cnn(data)

    # check that model is correctly configured
    check_model_param_settings(cnn)

    # now check correct results
    try:
        # check that output is correct
        assert type(result.data) == type(expected_result), f"Incorrect output type: {result.data}, expected: {expected_result}"
        assert result.data.shape == expected_result.shape, f"Incorrect output shape: {result.data.shape}, expected: {expected_result.shape}"
        assert np.allclose(result.data, expected_result), f"Incorrect output values: {result.data}, expected: {expected_result}"
    except Exception as e:
        traceback.print_exc()
        return False

    return True
Example #19
0
def conv1d_forward_correctness(num_layers=1):
    '''
    CNN: scanning with a MLP with stride
    '''
    scores_dict = [0]

    ############################################################################################
    #############################   Initialize parameters    ###################################
    ############################################################################################
    in_c = np.random.randint(5, 15)
    channels = [np.random.randint(5, 15) for i in range(num_layers + 1)]
    kernel = [np.random.randint(3, 7) for i in range(num_layers)]
    stride = [np.random.randint(3, 5) for i in range(num_layers)]
    width = np.random.randint(60, 80)
    batch_size = np.random.randint(1, 4)

    x = np.random.randn(batch_size, channels[0], width)

    #############################################################################################
    #################################    Create Models   ########################################
    #############################################################################################
    test_layers = [
        Conv1d(channels[i], channels[i + 1], kernel[i], stride[i])
        for i in range(num_layers)
    ]
    test_model = Sequential(*test_layers)

    torch_layers = [
        nn.Conv1d(channels[i], channels[i + 1], kernel[i], stride=stride[i])
        for i in range(num_layers)
    ]
    torch_model = nn.Sequential(*torch_layers)

    for torch_layer, test_layer in zip(torch_model, test_model.layers):
        torch_layer.weight = nn.Parameter(torch.tensor(test_layer.weight.data))
        torch_layer.bias = nn.Parameter(torch.tensor(test_layer.bias.data))
    #############################################################################################
    #########################    Get the correct results from PyTorch   #########################
    #############################################################################################
    x1 = Variable(torch.tensor(x), requires_grad=True)
    y1 = torch_model(x1)
    torch_y = y1.detach().numpy()

    #############################################################################################
    ###################    Get fwd results from TestModel and compare  ##########################
    #############################################################################################
    y2 = test_model(Tensor(x))
    test_y = y2.data

    # check that model is correctly configured
    check_model_param_settings(test_model)

    if not assertions(test_y, torch_y, 'type', 'y'): return scores_dict
    if not assertions(test_y, torch_y, 'shape', 'y'): return scores_dict
    if not assertions(test_y, torch_y, 'closeness', 'y'): return scores_dict
    scores_dict[0] = 1

    return scores_dict
Example #20
0
def test_debdas():
    predicted = Tensor.randn(4, 20)
    predicted.requires_grad = True
    predicted_torch = get_same_torch_tensor(predicted)

    target = Tensor(np.random.randint(20, size=(4, )))
    target.requires_grad = True
    targets = to_one_hot(target, 20)
    targets_torch = get_same_torch_tensor(targets)

    p_std = predicted - Tensor(np.max(predicted.data))
    p_std_torch = predicted_torch - torch.max(predicted_torch)

    p_exp = p_std.exp()
    p_exp_torch = torch.exp(p_std_torch)

    p_softmax = p_exp / p_exp.sumAxis(1)
    p_softmax_torch = p_exp_torch / torch.sum(p_exp_torch, 1, keepdim=True)

    p_log_softmax = p_softmax.log()
    p_log_softmax_torch = torch.log(p_softmax_torch)

    log_lik = targets * p_log_softmax
    log_lik_torch = targets_torch * p_log_softmax_torch

    log_lik_sum = log_lik.sumAxis(None)
    log_lik_sum_torch = torch.sum(log_lik_torch)

    ce = tensor.Tensor(-1) * log_lik_sum / tensor.Tensor(4)
    ce_torch = -1 * log_lik_sum_torch / 4

    ce_torch.sum().backward()
    ce.backward()

    #assert check_val_and_grad(predicted, predicted_torch)
    assert check_val_and_grad(targets, targets_torch)
    assert check_val_and_grad(p_std, p_std_torch)
    assert check_val_and_grad(p_exp, p_exp_torch)
    assert check_val_and_grad(p_softmax, p_softmax_torch)
    assert check_val_and_grad(p_log_softmax, p_log_softmax_torch)
    assert check_val_and_grad(log_lik, log_lik_torch)
    assert check_val_and_grad(log_lik_sum, log_lik_sum_torch)
    assert check_val_and_grad(ce, ce_torch)
Example #21
0
    def init_weights(self, weights):
        """Converts the given 3 weight matrices of the linear MLP into the weights of the Conv layers.
        Args:
            weights (tuple(np.array)): shapes ((8, 192), (16, 8), (4, 16))
                                       Think of each as a Linear.weight.data, shaped (out_features, in_features)
        """
        # TODO: Convert the linear weight arrays into Conv1d weight tensors
        # Make sure to not add nodes to the comp graph!
        w1, w2, w3 = weights  # Here, we've unpacked them into separate arrays for you.

        # Assume the Conv1d weight tensors are already initialized with the params that you specified in __init__().
        # Your job now is to replace those weights with the MLP's weights.

        # Tip: You can automatically retrieve the Conv1d params like so:
        #      ex) self.conv1.out_channel, self.conv1.kernel_size, self.conv1.in_channel

        # Set the weight tensors with your converted MLP weights
        conv1_weights = np.reshape(
            w1, (self.conv1.out_channel, self.conv1.kernel_size,
                 self.conv1.in_channel))
        conv1_weights = np.transpose(conv1_weights, axes=(0, 2, 1))
        self.conv1.weight = Tensor(conv1_weights,
                                   is_parameter=True,
                                   requires_grad=True)
        conv2_weights = np.reshape(
            w2, (self.conv2.out_channel, self.conv2.kernel_size,
                 self.conv2.in_channel))
        conv2_weights = np.transpose(conv2_weights, axes=(0, 2, 1))
        self.conv2.weight = Tensor(conv2_weights,
                                   is_parameter=True,
                                   requires_grad=True)
        conv3_weights = np.reshape(
            w3, (self.conv3.out_channel, self.conv3.kernel_size,
                 self.conv3.in_channel))
        conv3_weights = np.transpose(conv3_weights, axes=(0, 2, 1))
        self.conv3.weight = Tensor(conv3_weights,
                                   is_parameter=True,
                                   requires_grad=True)
Example #22
0
 def forward(self, x):
     """
     Args:
         x (Tensor): (batch_size, num_features)
     Returns:
         Tensor: (batch_size, num_features)
     """
     if (self.is_train == False):
         norm1 = x - self.running_mean
         norm = norm1 / Tensor.sqrt(self.running_var + self.eps)
     else:
         sample_mean = Tensor.sum(x, axis=0) / Tensor(x.shape[0])
         x_sub_mean = x - sample_mean
         sample_var = Tensor.sum(x_sub_mean * x_sub_mean, axis=0) / Tensor(
             x.shape[0])
         norm1 = x - sample_mean
         norm = norm1 / Tensor.sqrt(sample_var + self.eps)
         self.running_mean = self.momentum * self.running_mean + (
             Tensor(1) - self.momentum) * sample_mean
         self.running_var = self.momentum * self.running_var + (
             Tensor(1) - self.momentum) * sample_var
     out = self.gamma * norm + self.beta
     return out
Example #23
0
def train(model,
          optimizer,
          criterion,
          train_x,
          train_y,
          val_x,
          val_y,
          num_epochs=3):
    """Problem 3.2: Training routine that runs for `num_epochs` epochs.
    Returns:
        val_accuracies (list): (num_epochs,)
    """
    val_accuracies = []
    # TODO: Implement me! (Pseudocode on writeup)
    model.train()
    np_samples = train_x.shape[0]
    for epoch in range(num_epochs):
        indx_shuffle = np.random.permutation(train_x.shape[0])
        train_x, train_y = train_x[indx_shuffle], train_y[indx_shuffle]
        #batches = get_batch(train_x,train_y)
        batches = list(
            zip(np.array_split(train_x, np_samples // 100),
                np.array_split(train_y, np_samples // 100)))
        for i, (batch_data, batch_labels) in enumerate(batches):
            #if(i*BATCH_SIZE>=np_samples):
            #    break
            optimizer.zero_grad()
            out = model(Tensor(train_x))
            loss = criterion(out, Tensor(train_y))
            loss.backward()
            optimizer.step()
            if (i % 100 == 0 and i != 0):
                accuracy = validate(model, val_x, val_y)
                val_accuracies.append(accuracy)
                model.train()
        print(f'Epoch:{epoch+1} \t Validation AC: {val_accuracies[-1]}')
    return val_accuracies
def validate(model, val_x, val_y, criterion):
    """Problem 3.3: Validation routine, tests on val data, scores accuracy
    Relevant Args:
        val_x (np.array): validation data (5000, 784)
        val_y (np.array): validation labels (5000,)
    Returns:
        float: Accuracy = correct / total
    """
    model.eval()
    valid_loss = 0
    total_accuracy = 0
    val_y = np.reshape(val_y, (val_y.shape[0], 1))
    batch = np.hstack((val_x, val_y))

    batches = np.split(batch, 50)

    for batch_idx, x in enumerate(batches):
        accuracy = 0
        y = x[:, -1]
        x = x[:, :-1]
        output = model(Tensor(x))

        predictions = np.argmax(output.data, axis=1)
        loss = criterion(output, Tensor(y))
        valid_loss += loss.data
        y = Tensor(y)
        #print("Shape of y:",y.shape)
        for i in range(0, y.shape[0]):
            #print('y :{} and pred :{}'.format(int(y.data[i]),predictions[i]))
            if int(y.data[i]) == predictions[i]:
                accuracy += 1
        #print("Accuracy is:",(accuracy/y.shape[0])*100)
        total_accuracy += accuracy
        #print("Validation loss:",valid_loss/val_y.shape[0])
    #TODO: implement validation based on pseudocode
    return (total_accuracy / len(val_x)) * 100
def train(model, optimizer, criterion, train_x, train_y, val_x, val_y,
          num_epochs):
    """Problem 3.2: Training routine that runs for `num_epochs` epochs.
    Returns:
        val_accuracies (list): (num_epochs,)
    """
    train_loss = 0
    val_accuracies = []
    #print("Num_epochs is:",num_epochs)
    for epoch in range(num_epochs):
        print("Epoch:", epoch)
        model.train()

        train_y = np.reshape(train_y, (train_y.shape[0], 1))
        batch = np.hstack((train_x, train_y))

        batches = np.split(batch, 550)  #

        for batch_idx, x in enumerate(batches):
            y = x[:, -1]
            x = x[:, :-1]
            output = model(Tensor(x))
            loss = criterion(output, Tensor(y))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.data
            #print("Training Loss :",train_loss/train_y.shape[0])
            if (batch_idx % 100 == 1):
                accuracy = validate(model, val_x, val_y, criterion)
                val_accuracies.append(accuracy)
                #print("Validation accuracy at Batchidx {} is : {}".format(batch_idx,val_accuracies[-1]))
                model.train()

    # TODO: Implement me! (Pseudocode on writeup)
    return val_accuracies
Example #26
0
def validate(model, val_x, val_y):
    """Problem 3.3: Validation routine, tests on val data, scores accuracy
    Relevant Args:
        val_x (np.array): validation data (5000, 784)
        val_y (np.array): validation labels (5000,)
    Returns:
        float: Accuracy = correct / total
    """
    #TODO: implement validation based on pseudocode
    model.eval()
    batches = split_data_into_batches(val_x, val_y, 100)
    for (batch_data, batch_labels) in batches:
        out = model(Tensor(batch_data))
        batch_preds = np.argmax(out.data, axis=1)
        num_correct = np.sum(batch_preds == batch_labels)
        accuracy = num_correct / len(val_y)
    return accuracy
Example #27
0
def validate(model, val_x, val_y):
    """Problem 3.3: Validation routine, tests on val data, scores accuracy
    Relevant Args:
        val_x (np.array): validation data (5000, 784)
        val_y (np.array): validation labels (5000,)
    Returns:
        float: Accuracy = correct / total
    """
    model.eval()
    xx = np.split(val_x,len(val_x)/BATCH_SIZE)
    yy = np.split(val_y,len(val_y)/BATCH_SIZE)
    num_correct = 0
    for i, (batch_data,batch_labels) in enumerate(zip(xx,yy)):
        out = model(Tensor(batch_data))
        batch_preds = np.argmax(out.data,axis=1)
        num_correct_i = np.sum(batch_labels==batch_preds)
        num_correct += num_correct_i
    accuracy = num_correct / len(val_y)
    return accuracy 
Example #28
0
    def forward(self, x):
        """
        Args:
            x (Tensor): (batch_size, num_features)
        Returns:
            Tensor: (batch_size, num_features)
        """
        #raise Exception("TODO!")
        m = x.shape[0]

        # mu_b = (1/m) * np.sum(x.data, axis=0)
        # x_mu_sq = np.square(x.data - mu_b)
        # var_b = (1/m) * np.sum(x_mu_sq, axis=0)
        # x_i = (x.data - mu_b) / np.sqrt(var_b + self.eps.data)
        # y_i = self.gamma.data * x_i + self.beta.data

        # sigma_b = (1/(m-1)) * np.sum(x_mu_sq, axis=0)
        # self.running_mean.data = (1 - self.momentum.data) * self.running_mean.data + self.momentum.data * mu_b
        # self.running_var.data = (1 - self.momentum.data) * self.running_var.data + self.momentum.data * sigma_b
        # return Tensor(y_i)

        if self.is_train:
            one_by_m = Tensor([1 / m])
            mu_b = one_by_m * x.sum(axis=0)
            x_mu_sq = (x - mu_b).power(Tensor([2]))
            var_b = one_by_m * x_mu_sq.sum(axis=0)
            x_i = (x - mu_b) / (var_b + self.eps).power(Tensor([1 / 2]))
            y_i = self.gamma * x_i + self.beta

            one_by_m_1 = Tensor([1 / (m - 1)])
            sigma_b = one_by_m_1 * x_mu_sq.sum(axis=0)
            self.running_mean = (Tensor([1]) - self.momentum
                                 ) * self.running_mean + self.momentum * mu_b
            self.running_var = (Tensor([1]) - self.momentum
                                ) * self.running_var + self.momentum * sigma_b
        else:
            mu_b = self.running_mean
            var_b = self.running_var
            x_i = (x - mu_b) / (var_b + self.eps).power(Tensor([1 / 2]))
            y_i = self.gamma * x_i + self.beta
        return y_i
Example #29
0
    def forward(self, input, hidden=None):
        '''
        Args:
            input (Tensor): (effective_batch_size,input_size)
            hidden (Tensor,None): (effective_batch_size,hidden_size)
        Return:
            Tensor: (effective_batch_size,hidden_size)
        '''

        # TODO: INSTRUCTIONS
        # Perform matrix operations to construct the intermediary value from input and hidden tensors
        # Apply the activation on the resultant
        # Remeber to handle the case when hidden = None. Construct a tensor of appropriate size, filled with 0s to use as the hidden.
        effective_batch_size, input_size = input.shape
        if hidden is None:
            requires_grad = True
            hidden = Tensor(np.zeros((effective_batch_size, self.hidden_size)),
                            requires_grad=requires_grad,
                            is_leaf=not requires_grad)
        #raise NotImplementedError('Implement Forward')

        res = input.matmul(self.weight_ih) + self.bias_ih + hidden.matmul(
            self.weight_hh) + self.bias_hh
        return self.act(res)
    def __init__(self, num_features, eps=1e-5, momentum=0.1):
        super().__init__()
        self.num_features = num_features

        self.eps = Tensor(np.array([eps]))
        self.momentum = Tensor(np.array([momentum]))

        # To make the final output affine
        self.gamma = Tensor(np.ones((self.num_features, )),
                            requires_grad=True,
                            is_parameter=True)
        self.beta = Tensor(np.zeros((self.num_features, )),
                           requires_grad=True,
                           is_parameter=True)

        # Running mean and var
        self.running_mean = Tensor(np.zeros(self.num_features, ),
                                   requires_grad=False,
                                   is_parameter=False)
        self.running_var = Tensor(np.ones(self.num_features, ),
                                  requires_grad=False,
                                  is_parameter=False)