Example #1
0
def train(
    num_epochs: int,
    learning_rate: float,
    batch_size: int,
    l2_reg_lambda: float  # Task 3 hyperparameter. Can be ignored before this.
):
    """
        Function that implements logistic regression through mini-batch
        gradient descent for the given hyperparameters
    """
    global X_train, X_val, X_test
    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    model = BinaryModel(l2_reg_lambda)
    if X_train.shape[1] == 784:
        X_train = pre_process_images(X_train)
    if X_test.shape[1] == 784:
        X_test = pre_process_images(X_test)
    if X_val.shape[1] == 784:
        X_val = pre_process_images(X_val)

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            # Select our mini-batch of images / labels
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            y_hat = model.forward(X_batch)

            model.backward(X_batch, y_hat, Y_batch)
            model.w += -1 * learning_rate * model.grad

            # Track training loss continuously
            _train_loss = cross_entropy_loss(Y_batch, y_hat)
            train_loss[global_step] = _train_loss
            # Track validation loss / accuracy every time we progress 20% through the dataset
            if global_step % num_steps_per_val == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
def train(
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        l2_reg_lambda: float  # Task 3 hyperparameter
):
    global X_train, X_val, X_test
    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    if X_train.shape[1] == 784:
        X_train = pre_process_images(X_train)
    if X_test.shape[1] == 784:
        X_test = pre_process_images(X_test)
    if X_val.shape[1] == 784:
        X_val = pre_process_images(X_val)

    # Intialize our model
    model = SoftmaxModel(l2_reg_lambda)

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            y_hat = model.forward(X_batch)
            model.backward(X_batch, y_hat, Y_batch)
            model.w += -1 * learning_rate * model.grad

            # Track training loss continuously
            _train_loss = cross_entropy_loss(Y_batch, y_hat)
            train_loss[global_step] = _train_loss
            # Track validation loss / accuracy every time we progress 20% through the dataset
            if global_step % num_steps_per_val == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Example #3
0
def predictAndDisplay(
):  #This is just a test function that predicts a random handwritten number from the dataset.

    X_train, Y_train, X_val, Y_val = utils.load_full_mnist()

    X_train = pre_process_images(X_train)

    index = random.randint(0, X_train.shape[0])

    image = np.array([X_train[index]])

    printable = X_train[index]

    model = pickle.load(open('model3a.sav', 'rb'))

    a = model.forward(image)

    predicted = (np.argmax(a))

    image_2d = printable[:-1].reshape(28, 28)

    label = "predicted :" + str(predicted)

    plt.imshow(image_2d)
    plt.title(label)
    plt.show()
Example #4
0
        accuracy_train = calculate_accuracy(X_train, Y_train, self.model)
        accuracy_val = calculate_accuracy(X_val, Y_val, self.model)
        return loss, accuracy_train, accuracy_val


if __name__ == "__main__":
    # hyperparameters DO NOT CHANGE IF NOT SPECIFIED IN ASSIGNMENT TEXT
    num_epochs = 50
    learning_rate = 0.01
    batch_size = 128
    l2_reg_lambda = 0
    shuffle_dataset = True

    # Load dataset
    X_train, Y_train, X_val, Y_val = utils.load_full_mnist()
    X_train = pre_process_images(X_train)
    X_val = pre_process_images(X_val)
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)

    # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED.

    # Intialize model
    model = SoftmaxModel(l2_reg_lambda)
    # Train model
    trainer = SoftmaxTrainer(
        model,
        learning_rate,
        batch_size,
        shuffle_dataset,
        X_train,
import numpy as np
import utils
from task2a import one_hot_encode, pre_process_images, SoftmaxModel, gradient_approximation_test

if __name__ == "__main__":
    # Simple test on one-hot encoding
    Y = np.zeros((1, 1), dtype=int)
    Y[0, 0] = 3
    Y = one_hot_encode(Y, 10)
    assert Y[0, 3] == 1 and Y.sum() == 1, \
        f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}"

    X_train, Y_train, *_ = utils.load_full_mnist(0.1)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_train = pre_process_images(X_train, mean, std)
    Y_train = one_hot_encode(Y_train, 10)
    assert X_train.shape[1] == 785,\
        f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}"

    # Modify your network here
    neurons_per_layer = [64, 64, 10]
    use_improved_sigmoid = True
    use_improved_weight_init = True
    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)

    # Gradient approximation check for 100 images
    X_train = X_train[:100]
    Y_train = Y_train[:100]
    for layer_idx, w in enumerate(model.ws):
Example #6
0
                f"Calculated gradient is incorrect. " \
                f"Approximation: {gradient_approximation}, actual gradient: {model.grad[i, j]}\n" \
                f"If this test fails there could be errors in your cross entropy loss function, " \
                f"forward function or backward function"


if __name__ == "__main__":
    # Simple test on one-hot encoding
    Y = np.zeros((1, 1), dtype=int)
    Y[0, 0] = 3
    Y = one_hot_encode(Y, 10)
    assert Y[0, 3] == 1 and Y.sum() == 1, \
        f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}"

    X_train, Y_train, *_ = utils.load_full_mnist(0.1)
    X_train = pre_process_images(X_train)
    Y_train = one_hot_encode(Y_train, 10)
    assert X_train.shape[1] == 785,\
        f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}"
    
    # Simple test for forward pass. Note that this does not cover all errors!
    model = SoftmaxModel(0.0)
    logits = model.forward(X_train)
    np.testing.assert_almost_equal(
        logits.mean(), 1/10,
        err_msg="Since the weights are all 0's, the softmax activation should be 1/10")

    # Gradient approximation check for 100 images
    X_train = X_train[:100]
    Y_train = Y_train[:100]
    for i in range(2):
Example #7
0
    return model, train_loss, val_loss, train_accuracy, val_accuracy


if __name__ == "__main__":

    # Measure execution time
    start = time.time()

    # Load dataset
    validation_percentage = 0.2
    X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_full_mnist(
        validation_percentage)

    # Preprocess and adapt the data
    mean, standard_deviation = find_mean_and_deviation(X_train)
    X_train = pre_process_images(X_train, mean, standard_deviation)
    X_val = pre_process_images(X_val, mean, standard_deviation)
    X_test = pre_process_images(X_test, mean, standard_deviation)

    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)
    Y_test = one_hot_encode(Y_test, 10)

    # Hyperparameters
    num_epochs = 20
    learning_rate = .1
    batch_size = 32
    neurons_per_layer = [64, 10]
    momentum_gamma = .9  # Task 3 hyperparameter

    # Settings for task 3. Keep all to false for task 2.
Example #8
0
from task2a import one_hot_encode, pre_process_images, SoftmaxModel, gradient_approximation_test, \
    find_mean_and_deviation

if __name__ == "__main__":
    # Simple test on one-hot encoding
    Y = np.zeros((1, 1), dtype=int)
    Y[0, 0] = 3
    Y = one_hot_encode(Y, 10)
    assert Y[0, 3] == 1 and Y.sum() == 1, \
        f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}"

    # Load and preprocess data
    X_train, Y_train, *_ = utils.load_full_mnist(0.1)
    # Preprocess and adapt the data
    mean, standard_deviation = find_mean_and_deviation(X_train)
    X_train = pre_process_images(X_train, mean, standard_deviation)
    Y_train = one_hot_encode(Y_train, 10)

    assert X_train.shape[1] == 785,\
        f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}"

    # Modify your network here
    neurons_per_layer = [64, 64, 10]
    use_improved_sigmoid = True
    use_improved_weight_init = True
    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)

    # Gradient approximation check for 100 images
    X_train = X_train[:100]
    Y_train = Y_train[:100]
Example #9
0
if __name__ == "__main__":
    # Load dataset
    validation_percentage = 0.2
    X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_full_mnist(
        validation_percentage)

    # One hot encode data
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)
    Y_test = one_hot_encode(Y_test, 10)

    # Preprocess data using mean and std of training set
    # Find mean and std of training set
    mu = np.mean(X_train)
    sigma = np.std(X_train)
    X_train = pre_process_images(X_train, mean=mu, std=sigma)
    X_val = pre_process_images(X_val, mean=mu, std=sigma)
    X_test = pre_process_images(X_test, mean=mu, std=sigma)

    # Hyperparameters
    num_epochs = 20
    learning_rate = .1
    batch_size = 32
    neurons_per_layer = [64, 10]
    momentum_gamma = .9  # Task 3 hyperparameter

    # Settings for task 3. Keep all to false for task 2.
    use_shuffle = False
    use_improved_sigmoid = False
    use_improved_weight_init = False
    use_momentum = False
Example #10
0
    num_epochs = 20
    learning_rate = .1
    batch_size = 32

    #task4d
    #neurons_per_layer = [60,60,10]

    neurons_per_layer = [64, 10]
    #neurons_per_layer = [16, 10]
    #neurons_per_layer = [128, 10]
    momentum_gamma = .9  # Task 3 hyperparameter

    # Calibration
    m = mean(X_train)
    std = stddev(X_train)
    X_train = pre_process_images(X_train, m, std)
    X_val = pre_process_images(X_val, m, std)
    X_test = pre_process_images(X_test, m, std)
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)
    Y_test = one_hot_encode(Y_test, 10)

    # Settings for task 3. Keep all to false for task 2.
    """
    use_shuffle = [False,True,True,True,True]
    use_improved_sigmoid = [False,False,True,True,True]
    use_improved_weight_init = [False,False,False,True,True]
    use_momentum = [False,False,False,False,True]
    train_loss=[0,0,0,0,0]
    val_loss=[0,0,0,0,0]
    train_accuracy=[0,0,0,0,0]
Example #11
0
X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_binary_dataset(
    category1, category2, validation_percentage)

# hyperparameters
num_epochs = 50
learning_rate = 0.2
batch_size = 128
l2_reg_lambda = 0
model, train_loss, val_loss, train_accuracy, val_accuracy = train(
    num_epochs=num_epochs,
    learning_rate=learning_rate,
    batch_size=batch_size,
    l2_reg_lambda=l2_reg_lambda)

print("Final Train Cross Entropy Loss:",
      cross_entropy_loss(Y_train, model.forward(pre_process_images(X_train))))
print("Final  Test Entropy Loss:",
      cross_entropy_loss(Y_test, model.forward(pre_process_images(X_test))))
print("Final Validation Cross Entropy Loss:",
      cross_entropy_loss(Y_val, model.forward(pre_process_images(X_val))))

print("Train accuracy:", calculate_accuracy(X_train, Y_train, model))
print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model))
print("Test accuracy:", calculate_accuracy(X_test, Y_test, model))

# Plot loss
#plt.ylim([0., .4])
utils.plot_loss(train_loss, "Training Loss")
utils.plot_loss(val_loss, "Validation Loss")
plt.legend()
plt.savefig("binary_train_loss.png")
Example #12
0
                    fmt=fmt_string_train)
    utils.plot_loss(val_accuracy_loaded,
                    "Validation Accuracy " + displayname,
                    fmt=fmt_string_val)


if __name__ == "__main__":
    # Load dataset
    validation_percentage = 0.2
    X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_full_mnist(
        validation_percentage)

    #preprocessing of targets and images
    x_train_mean = np.mean(X_train)
    x_train_std = np.std(X_train)
    X_train = pre_process_images(X_train, x_train_mean, x_train_std)
    X_test = pre_process_images(X_test, x_train_mean, x_train_std)
    X_val = pre_process_images(X_val, x_train_mean, x_train_std)
    Y_train = one_hot_encode(Y_train, 10)
    Y_test = one_hot_encode(Y_test, 10)
    Y_val = one_hot_encode(Y_val, 10)

    # Hyperparameters
    num_epochs = 50
    learning_rate = .1
    batch_size = 32
    neurons_per_layer = [60, 60, 10]
    momentum_gamma = .9  # Task 3 hyperparameter

    # Settings for task 3. Keep all to false for task 2.
    use_shuffle = True
Example #13
0
    num_epochs = 50
    learning_rate = .02
    batch_size = 32
    neurons_per_layer = [64, 10]
    momentum_gamma = .9  # Task 3 hyperparameter
    shuffle_data = True

    use_improved_sigmoid = True
    use_improved_weight_init = True
    use_momentum = True

    # Load dataset
    X_train, Y_train, X_val, Y_val = utils.load_full_mnist()
    mean = X_train.mean()
    sd = X_train.std()
    X_train = pre_process_images(X_train, mean, sd)
    X_val = pre_process_images(X_val, mean, sd)
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    trainer = SoftmaxTrainer(
        momentum_gamma,
        use_momentum,
        model,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,