Esempio n. 1
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            for i in range(len(model.ws)):
                model.ws[i] = model.ws[i] - learning_rate * model.grads[i]

            if (global_step % num_steps_per_val) == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                _train_loss = cross_entropy_loss(Y_batch, outputs)
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Esempio n. 2
0
def train_and_evaluate(
        neurons_per_layer: int,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_improved_sigmoid: bool,
        use_improved_weight_init: bool,
        use_momentum: bool,
        momentum_gamma: float,
        use_shift=False):

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    model, train_loss, val_loss, train_accuracy, val_accuracy = train(
        model,
        datasets,
        num_epochs=num_epochs,
        learning_rate=learning_rate,
        batch_size=batch_size,
        use_shuffle=use_shuffle,
        use_momentum=use_momentum,
        momentum_gamma=momentum_gamma,
        use_shift=use_shift)

    print("----------", use_shuffle, use_improved_sigmoid,
          use_improved_weight_init, use_momentum, momentum_gamma, "----------")
    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Final Test Cross Entropy Loss:",
          cross_entropy_loss(Y_test, model.forward(X_test)))

    print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model))
    print("Final Validation accuracy:",
          calculate_accuracy(X_val, Y_val, model))
    print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model))
    return train_loss, val_loss, train_accuracy, val_accuracy
Esempio n. 3
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    outputs = model.forward(X)
    return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
Esempio n. 4
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    num_classes = targets.shape[1]
    predictions = np.argmax(model.forward(X), axis=1)
    accuracy = np.count_nonzero(
        predictions == np.argmax(targets, axis=1)) / X.shape[0]
    return accuracy
Esempio n. 5
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # perform predictions
    Yhat = model.forward(X)

    # calculate accuracy by dividing the correct predictions with the total number of predictions
    accuracy = (Yhat.argmax(axis=1) == targets.argmax(axis=1)).mean()
    return accuracy
Esempio n. 6
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    logits = model.forward(X)
    outputs = np.zeros_like(logits)
    outputs[np.arange(len(logits)), logits.argmax(1)] = 1
    accuracy = np.mean((outputs == targets).all(1))
    return accuracy
Esempio n. 7
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    targets_indices = np.argmax(targets, axis=1)
    outputs_indices = np.argmax(model.forward(X), axis=1)
    result = np.equal(targets_indices, outputs_indices)
    result.size
    accuracy = (result.sum()) / result.size
    return accuracy
Esempio n. 8
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    y_hat = np.array(model.forward(X))
    y_predicted_position = np.argmax(y_hat, axis=1)
    y_position = np.argmax(targets, axis=1)
    accuracy = np.count_nonzero(
        y_position == y_predicted_position) / X.shape[0]
    return accuracy
Esempio n. 9
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    outputs = model.forward(X)
    max_outputs = np.argmax(outputs, axis=1)
    max_targets = np.argmax(targets, axis=1)

    sum = outputs.shape[0] - np.count_nonzero(max_outputs - max_targets)
    accuracy = sum / outputs.shape[0]
    return accuracy
Esempio n. 10
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    accuracy = 0.0
    logits = model.forward(X)
    logits_max, targets_max = np.argmax(logits, axis=1), np.argmax(targets,
                                                                   axis=1)
    accuracy = (1 / targets.shape[0]) * np.sum(
        [(1 if l == t else 0) for (l, t) in zip(logits_max, targets_max)])
    return accuracy
Esempio n. 11
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    accuracy = 0
    output = model.forward(X)
    predictions = one_hot_encode(np.array([np.argmax(output, axis=1)]).T, 10)
    correct_pred = np.count_nonzero(targets * predictions)
    total_pred = output.shape[0]
    accuracy = correct_pred / total_pred
    return accuracy
Esempio n. 12
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    predictions = model.forward(X)
    num_predictions = predictions.shape[0]

    correct_predictions = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(targets, axis=1))

    return correct_predictions / num_predictions
Esempio n. 13
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    # First computation of the prediction
    outputs = model.forward(X)

    # Convert the prediction into 0 and 1 by setting as 1 the highest value in the 10 outputs, the rest will be 0.
    accuracy = np.sum(
        outputs.argmax(1) == targets.argmax(1)) / targets.shape[0]
    return accuracy
Esempio n. 14
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    number_of_predictions = X.shape[0]
    number_of_rights = 0
    y_hat = model.forward(X)
    for i in range(0, number_of_predictions):
        y_hat[i] = np.around(y_hat[i])
        if np.array_equal(y_hat[i], targets[i]):
            number_of_rights += 1
    accuracy = number_of_rights / number_of_predictions
    return accuracy
Esempio n. 15
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    outputs = model.forward(X)
    N = targets.shape[0]
    correctOutputs = 0

    for i in range(N):
        target = np.where(targets[i] == 1)[0][0]
        output = np.argmax(outputs[i])
        if target == output:
            correctOutputs += 1
    return correctOutputs / N
Esempio n. 16
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    predictions = model.forward(X)
    accuracy = 0
    for n in range(X.shape[0]):
        prediction = np.argmax(predictions[n, :])
        target = np.argmax(targets[n, :])
        if prediction == target:
            accuracy += 1

    return accuracy / X.shape[0]
Esempio n. 17
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """

    # forward pass
    logits = model.forward(X)

    # finding the index of the max values for both arrays
    logits = logits.argmax(axis=1)
    targets = targets.argmax(axis=1)

    # counting the equal entries and averaging
    accuracy = np.count_nonzero(np.equal(targets, logits)) / X.shape[0]

    return accuracy
Esempio n. 18
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # Compute the outputs
    outputs = model.forward(X)
    # Counting the correct predictions
    nb_predictions = outputs.shape[0]
    nb_correct_predictions = 0
    for row, output in enumerate(outputs):
        index = np.argmax(output)
        if targets[row][index] == 1:
            nb_correct_predictions += 1

    accuracy = nb_correct_predictions / nb_predictions
    return accuracy
Esempio n. 19
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # Copied from last assignment.
    accuracy = 0

    lgts = model.forward(X)

    lgts_max = np.argmax(lgts, axis=1)
    targets_max = np.argmax(targets, axis=1)

    accuracy = ((1 / targets.shape[0]) *
                np.sum([(1 if l == t else 0)
                        for (l, t) in zip(lgts_max, targets_max)]))
    return accuracy
Esempio n. 20
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    logits = model.forward(X)
    
    correctPredictions = 0.0
    
    numberOfPredictions = logits.shape[0]
    
    for i in range(logits.shape[0]):
        prediction = np.argmax(logits[i])
        target = np.argmax(targets[i])
        
        if(target == prediction):
            correctPredictions += 1
    
    accuracy = correctPredictions / numberOfPredictions
    return accuracy
Esempio n. 21
0
    Y_test = one_hot_encode(Y_test, 10)

    # Hyperparameters
    num_epochs = 20
    learning_rate = .1
    batch_size = 32
    neurons_per_layer = [64, 10]
    momentum_gamma = .9  # Task 3 hyperparameter

    # Settings for task 3. Keep all to false for task 2.
    use_shuffle = False
    use_improved_sigmoid = False
    use_improved_weight_init = False
    use_momentum = False

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)

    model, train_loss, val_loss, train_accuracy, val_accuracy = train(
        model, [X_train, Y_train, X_val, Y_val, X_test, Y_test],
        num_epochs=num_epochs,
        learning_rate=learning_rate,
        batch_size=batch_size,
        use_shuffle=use_shuffle,
        use_momentum=use_momentum,
        momentum_gamma=momentum_gamma)

    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Final Test Cross Entropy Loss:",
Esempio n. 22
0
    X_test = pre_process_images(X_test, mean=mu, std=sigma)

    # Hyperparameters
    num_epochs = 20
    learning_rate = .1
    batch_size = 32
    neurons_per_layer = [64, 10]
    momentum_gamma = .9  # Task 3 hyperparameter

    # Settings for task 3. Keep all to false for task 2.
    use_shuffle = False
    use_improved_sigmoid = False
    use_improved_weight_init = False
    use_momentum = False

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)

    # Initial the weight to randomly sampled weights between [-1, 1]
    for layer_idx, w in enumerate(model.ws):
        model.ws[layer_idx] = np.random.uniform(-1, 1, size=w.shape)

    model, train_loss, val_loss, train_accuracy, val_accuracy = train(
        model, [X_train, Y_train, X_val, Y_val, X_test, Y_test],
        num_epochs=num_epochs,
        learning_rate=learning_rate,
        batch_size=batch_size,
        use_shuffle=use_shuffle,
        use_momentum=use_momentum,
        momentum_gamma=momentum_gamma)

    print("Final Train Cross Entropy Loss:",
Esempio n. 23
0
        momentum_gamma = .9
        shuffle_data = True

        # TODO: Change if combination of all improvements is working
        use_improved_sigmoid = True
        use_improved_weight_init = True
        use_momentum = False

        # Load dataset
        X_train, Y_train, X_val, Y_val = utils.load_full_mnist()
        X_train = pre_process_images(X_train)
        X_val = pre_process_images(X_val)
        Y_train = one_hot_encode(Y_train, 10)
        Y_val = one_hot_encode(Y_val, 10)

        model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                             use_improved_weight_init)
        trainer = SoftmaxTrainer(
            momentum_gamma,
            use_momentum,
            model,
            learning_rate,
            batch_size,
            shuffle_data,
            X_train,
            Y_train,
            X_val,
            Y_val,
        )
        current_train_history, current_val_history = trainer.train(num_epochs)
        train_history[model_name] = current_train_history
        val_history[model_name] = current_val_history
Esempio n. 24
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    outputs = model.forward(X)
    return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
Esempio n. 25
0
    neurons_per_layer = [64, 10]
    momentum_gamma = .9  # Task 3 hyperparameter
    shuffle_data = True

    use_improved_sigmoid = False
    use_improved_weight_init = False
    use_momentum = False

    # Load dataset
    X_train, Y_train, X_val, Y_val = utils.load_full_mnist()
    X_train = pre_process_images(X_train)
    X_val = pre_process_images(X_val)
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    trainer = SoftmaxTrainer(
        momentum_gamma,
        use_momentum,
        model,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history, val_history = trainer.train(num_epochs)

    # Example created in assignment text - Comparing with and without shuffling.
Esempio n. 26
0
    use_improved_sigmoid = False
    use_improved_weight_init = False
    use_momentum = False

    # Load dataset
    X_train, Y_train, X_val, Y_val = utils.load_full_mnist()
    X_train = pre_process_images(X_train)
    X_val = pre_process_images(X_val)
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)

    ######1nd model - network from task 3######
    use_improved_sigmoid = True
    use_improved_weight_init = True
    use_momentum = True
    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    trainer = SoftmaxTrainer(
        momentum_gamma,
        use_momentum,
        model,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history, val_history = trainer.train(num_epochs)

    ######2nd model - network from task 4d ######
Esempio n. 27
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    # Early stop variables
    early_stopped_weight_j = np.zeros(
        (model.ws[0].shape[0], model.ws[0].shape[1]))
    early_stopped_weight_k = np.zeros(
        (model.ws[1].shape[0], model.ws[1].shape[1]))
    early_stop_counter = 0
    best_loss = float("inf")

    global_step = 0
    for epoch in tqdm(range(num_epochs)):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            # Update the weights
            model.ws[0] = model.ws[0] - learning_rate * model.grads[0]
            model.ws[1] = model.ws[1] - learning_rate * model.grads[1]

            # Track training loss continuously over the entire X_Train and not only the current batch
            #outputs_training = model.forward(X_train)
            #_train_loss = cross_entropy_loss(Y_batch, outputs)
            #train_loss[global_step] = _train_loss

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                # Test the validation data on the network
                outputs_validation = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, outputs_validation)
                val_loss[global_step] = _val_loss

                # Track training loss over the entire X_Train and not only the current batch
                # once every validation epoch
                outputs_training = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, outputs_training)
                train_loss[global_step] = _train_loss

                # Early stop implementation

                # If the loss does not reduce compared to best loss, increment counter
                # Otherwise, set the counter to 0 and update best loss
                if _val_loss >= best_loss:
                    early_stop_counter += 1
                else:
                    early_stop_counter = 0
                    best_loss = _val_loss
                    early_stopped_weight_j = model.ws[0]
                    early_stopped_weight_k = model.ws[1]
                # If 30 times in a row a new best loss was not achieved, stop the program
                if early_stop_counter == 30:
                    print(
                        "The cross entropy loss for validation data increased too much, thus triggering "
                        "the early stop at step : " + str(global_step) +
                        " and epoch : " + str(epoch))
                    model.ws[0] = early_stopped_weight_j
                    model.ws[1] = early_stopped_weight_k
                    return model, train_loss, val_loss, train_accuracy, val_accuracy

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Esempio n. 28
0
    shuffle_data = True

    use_improved_sigmoid = True
    use_improved_weight_init = True
    use_momentum = True

    # Load dataset
    X_train, Y_train, X_val, Y_val = utils.load_full_mnist()
    mean = X_train.mean()
    sd = X_train.std()
    X_train = pre_process_images(X_train, mean, sd)
    X_val = pre_process_images(X_val, mean, sd)
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    trainer = SoftmaxTrainer(
        momentum_gamma,
        use_momentum,
        model,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history, val_history = trainer.train(num_epochs)

    #two hidden layers
if __name__ == "__main__":
    # Simple test on one-hot encoding
    Y = np.zeros((1, 1), dtype=int)
    Y[0, 0] = 3
    Y = one_hot_encode(Y, 10)
    assert Y[0, 3] == 1 and Y.sum() == 1, \
        f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}"

    X_train, Y_train, *_ = utils.load_full_mnist(0.1)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_train = pre_process_images(X_train, mean, std)
    Y_train = one_hot_encode(Y_train, 10)
    assert X_train.shape[1] == 785,\
        f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}"

    # Modify your network here
    neurons_per_layer = [64, 64, 10]
    use_improved_sigmoid = True
    use_improved_weight_init = True
    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)

    # Gradient approximation check for 100 images
    X_train = X_train[:100]
    Y_train = Y_train[:100]
    for layer_idx, w in enumerate(model.ws):
        model.ws[layer_idx] = np.random.uniform(-1, 1, size=w.shape)

    gradient_approximation_test(model, X_train, Y_train)
Esempio n. 30
0
    early_stopping = True
    use_improved_sigmoid = True
    use_improved_weight_init = True
    use_momentum = True

    # Load dataset
    X_train, Y_train, X_val, Y_val = utils.load_full_mnist()
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_train = pre_process_images(X_train, mean, std)
    X_val = pre_process_images(X_val, mean, std)
    Y_train = one_hot_encode(Y_train, 10)
    Y_val = one_hot_encode(Y_val, 10)

    model = SoftmaxModel(neurons_per_layer_small, use_improved_sigmoid,
                         use_improved_weight_init)
    trainer = SoftmaxTrainer(
        momentum_gamma,
        use_momentum,
        model,
        learning_rate,
        batch_size,
        shuffle_data,
        early_stopping,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history, val_history = trainer.train(num_epochs)