Beispiel #1
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    
    if use_momentum:
        learning_rate = 0.02

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            prev_grads = model.grads

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            for i in range(len(model.ws)):
                if use_momentum:
                    model.ws[i] = model.ws[i] - learning_rate  * (model.grads[i] + momentum_gamma * prev_grads[i])
                else:
                    model.ws[i] = model.ws[i] - learning_rate * model.grads[i]

            if (global_step % num_steps_per_val) == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                _train_loss = cross_entropy_loss(Y_train, model.forward(X_train))
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
        # shuffle training examples after each epoch
        if use_shuffle:
            X_train, Y_train = unison_shuffled_copies(X_train, Y_train)
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Beispiel #2
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            # update weigths
            model.ws[-1] = model.ws[-1] - learning_rate * model.grads[-1]
            model.ws[-2] = model.ws[-2] - learning_rate * model.grads[-2]

            if (global_step % num_steps_per_val) == 0:
                _outputs_train = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, _outputs_train)
                train_loss[global_step] = _train_loss

                _outputs_val = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, _outputs_val)
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Beispiel #3
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    # Early stop variables
    early_stopped_weight_j = np.zeros(
        (model.ws[0].shape[0], model.ws[0].shape[1]))
    early_stopped_weight_k = np.zeros(
        (model.ws[1].shape[0], model.ws[1].shape[1]))
    early_stop_counter = 0
    best_loss = float("inf")

    global_step = 0
    for epoch in tqdm(range(num_epochs)):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            # Update the weights
            model.ws[0] = model.ws[0] - learning_rate * model.grads[0]
            model.ws[1] = model.ws[1] - learning_rate * model.grads[1]

            # Track training loss continuously over the entire X_Train and not only the current batch
            #outputs_training = model.forward(X_train)
            #_train_loss = cross_entropy_loss(Y_batch, outputs)
            #train_loss[global_step] = _train_loss

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                # Test the validation data on the network
                outputs_validation = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, outputs_validation)
                val_loss[global_step] = _val_loss

                # Track training loss over the entire X_Train and not only the current batch
                # once every validation epoch
                outputs_training = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, outputs_training)
                train_loss[global_step] = _train_loss

                # Early stop implementation

                # If the loss does not reduce compared to best loss, increment counter
                # Otherwise, set the counter to 0 and update best loss
                if _val_loss >= best_loss:
                    early_stop_counter += 1
                else:
                    early_stop_counter = 0
                    best_loss = _val_loss
                    early_stopped_weight_j = model.ws[0]
                    early_stopped_weight_k = model.ws[1]
                # If 30 times in a row a new best loss was not achieved, stop the program
                if early_stop_counter == 30:
                    print(
                        "The cross entropy loss for validation data increased too much, thus triggering "
                        "the early stop at step : " + str(global_step) +
                        " and epoch : " + str(epoch))
                    model.ws[0] = early_stopped_weight_j
                    model.ws[1] = early_stopped_weight_k
                    return model, train_loss, val_loss, train_accuracy, val_accuracy

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Beispiel #4
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float,
        use_shift=False):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5

    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    #Variables for early stopping
    last_val_loss = 1
    best_val_loss = 1
    best_weights = None
    increased_last_time = False

    # Store last weights update term for momentum
    last_weights_update = []
    for l in range(len(model.ws)):
        last_weights_update.append(np.zeros_like(model.ws[l]))

    global_step = 0
    for epoch in range(num_epochs):
        print("Epoch:", epoch)
        for step in range(num_batches_per_epoch):
            shift = np.random.randint(low=-2, high=3, size=batch_size)
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            X_local = X_batch
            if use_shift:
                X_local = np.roll(X_batch[:, :784], shift, axis=1)
                ones = np.ones((X_local.shape[0], 1))
                X_local = np.concatenate((X_local, ones), axis=1)

            train_output = model.forward(X_batch)

            model.backward(X_batch, train_output, Y_batch)

            for l in range(len(model.ws)):
                if use_momentum:
                    update_term = momentum_gamma * last_weights_update[
                        l] - learning_rate * model.grads[l]
                    model.ws[l] += update_term
                    last_weights_update[l] = update_term
                else:
                    model.ws[l] -= learning_rate * model.grads[l]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                val_output = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, val_output)
                val_loss[global_step] = _val_loss

                train_output = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, train_output)
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1

        # In order to keep labels in the right order, we shuffle an array of indices
        # and then apply this ordering to both inputs and labels
        if use_shuffle:
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)
            X_train = X_train[indices]
            Y_train = Y_train[indices]

        # Compute validation loss for early stopping
        val_outputs = model.forward(X_val)
        _val_loss = cross_entropy_loss(Y_val, val_outputs)
        if _val_loss <= best_val_loss:
            best_weights = model.ws
            best_val_loss = _val_loss
        if _val_loss > last_val_loss:
            if increased_last_time:
                model.ws = best_weights
                break
            else:
                increased_last_time = True
        else:
            increased_last_time = False
        last_val_loss = _val_loss

    return model, train_loss, val_loss, train_accuracy, val_accuracy
Beispiel #5
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float,
        all_tricks=False):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Important hyper parameter setting
    if use_momentum:
        learning_rate = 0.02

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    # Early stop variables
    early_stopped_weight_j = np.zeros(
        (model.ws[0].shape[0], model.ws[0].shape[1]))
    early_stopped_weight_k = np.zeros(
        (model.ws[1].shape[0], model.ws[1].shape[1]))
    early_stop_counter = 0
    best_loss = float("inf")

    global_step = 0
    for epoch in tqdm(range(num_epochs)):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Compute the gradient
            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)

            # Update the weights with or without task3d, momemtum gradient
            for layer in range(len(model.neurons_per_layer)):
                if use_momentum:
                    new_weights = model.ws[layer] - learning_rate * model.grads[
                        layer] + momentum_gamma * model.delta_w[layer]
                    model.delta_w[layer] = new_weights - model.ws[layer]
                    model.ws[layer] = new_weights
                else:
                    model.ws[layer] = model.ws[
                        layer] - learning_rate * model.grads[layer]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                # Test the validation data on the network
                outputs_validation = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, outputs_validation)
                val_loss[global_step] = _val_loss

                # Track training loss over the entire X_Train and not only the current batch
                # once every validation epoch
                outputs_training = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, outputs_training)
                train_loss[global_step] = _train_loss

                # Track the accuracy
                if not all_tricks:
                    train_accuracy[global_step] = calculate_accuracy(
                        outputs_training, Y_train, model)
                    val_accuracy[global_step] = calculate_accuracy(
                        outputs_validation, Y_val, model)

                # Early stop implementation

                # If the loss does not reduce compared to best loss, increment counter
                # Otherwise, set the counter to 0 and update best loss
                if _val_loss >= best_loss:
                    early_stop_counter += 1
                else:
                    early_stop_counter = 0
                    best_loss = _val_loss
                    early_stopped_weight_j = model.ws[0]
                    early_stopped_weight_k = model.ws[1]
                # If 30 times in a row a new best loss was not achieved, stop the program
                if early_stop_counter == 30:
                    print(
                        "\nThe cross entropy loss for validation data increased too much, thus triggering "
                        "the early stop at step : " + str(global_step) +
                        " and epoch : " + str(epoch))
                    model.ws[0] = early_stopped_weight_j
                    model.ws[1] = early_stopped_weight_k
                    return model, train_loss, val_loss, train_accuracy, val_accuracy

            global_step += 1
        # Task 3a: Shuffle training samples after each epoch
        if use_shuffle:
            # Use the shuffle function from sklearn
            X_train, Y_train = shuffle(X_train, Y_train)

    return model, train_loss, val_loss, train_accuracy, val_accuracy
Beispiel #6
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        use_early_stopping: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    momentum = [0 for i in range(len(model.grads))]

    #Variables used for early stopping
    mean_val_loss = []
    list_val_losses = []

    global_loss_counter = 2
    global_step = 0
    for epoch in range(num_epochs):
        # Shuffling before next epoch
        if use_shuffle == True:
            shuffle_in_unison(X_train, Y_train)
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            y_hat = model.forward(X_batch)
            model.backward(X_batch, y_hat, Y_batch)

            if use_momentum == True:
                momentum[0] = (1 - momentum_gamma) * model.grads[
                    0] + momentum_gamma * momentum[0]
                momentum[1] = (1 - momentum_gamma) * model.grads[
                    1] + momentum_gamma * momentum[1]
                model.ws[0] += -1 * learning_rate * (momentum[0])
                model.ws[1] += -1 * learning_rate * (momentum[1])
            else:
                model.ws[0] += -1 * learning_rate * model.grads[0]
                model.ws[1] += -1 * learning_rate * model.grads[1]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                _train_loss = cross_entropy_loss(Y_batch, y_hat)
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

                #Early stopping
                if use_early_stopping == True:
                    list_val_losses.append(_val_loss)
                    if global_loss_counter % 5 == 0:
                        mean_val_loss.append(np.mean(list_val_losses))
                        list_val_losses = []
                        if global_loss_counter % 10 == 0:
                            if mean_val_loss[0] < mean_val_loss[1]:
                                return model, train_loss, val_loss, train_accuracy, val_accuracy
                            mean_val_loss = []
                    global_loss_counter += 1

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy