コード例 #1
0
def train(model, forward, backward, update, alpha, num_epochs, batch_size):
    """ Trains a simple MLP.
    :param model: Dictionary of model weights.
    :param forward: Forward prop function.
    :param backward: Backward prop function.
    :param update: Update weights function.
    :param alpha: Learning rate.
    :param num_epochs: Number of epochs to run training for.
    :param batch_size: Mini-batch size, -1 for full batch.
    :return: A tuple (train_ce, valid_ce, train_acc, valid_acc)
        WHERE
        train_ce: Training cross entropy.
        valid_ce: Validation cross entropy.
        train_acc: Training accuracy.
        valid_acc: Validation accuracy.
    """
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
        target_test = load_data("data/toronto_face.npz")
    rnd_idx = np.arange(inputs_train.shape[0])

    train_ce_list = []
    valid_ce_list = []
    train_acc_list = []
    valid_acc_list = []
    top_vs_actual = []
    num_train_cases = inputs_train.shape[0]
    if batch_size == -1:
        batch_size = num_train_cases
    num_steps = int(np.ceil(num_train_cases / batch_size))
    for epoch in range(num_epochs):
        np.random.shuffle(rnd_idx)
        inputs_train = inputs_train[rnd_idx]
        target_train = target_train[rnd_idx]
        for step in range(num_steps):
            # Forward pass.
            start = step * batch_size
            end = min(num_train_cases, (step + 1) * batch_size)
            x = inputs_train[start:end]
            t = target_train[start:end]

            var = forward(model, x)
            prediction = softmax(var["y"])

            train_ce = -np.sum(t * np.log(prediction)) / float(x.shape[0])
            train_acc = (np.argmax(prediction, axis=1) == np.argmax(
                t, axis=1)).astype("float").mean()
            # print(("Epoch {:3d} Step {:2d} Train CE {:.5f} "
            #        "Train Acc {:.5f}").format(
            #     epoch, step, train_ce, train_acc))

            # Compute error.
            error = (prediction - t) / float(x.shape[0])

            # Backward prop.
            backward(model, error, var)

            # Update weights.
            update(model, alpha)

            # # Find confused images.
            # input_x = inputs_train[step * batch_size:(step + 1) * batch_size]
            # input_t = target_train[step * batch_size:(step + 1) * batch_size]
            #
            # var = forward(model, input_x)
            # prediction = softmax(var["y"])
            #
            # confused_imgs = []
            # top_vs_actual = []
            #
            # for i in range(prediction.shape[0]):
            #     top_class = np.argmax(prediction[i])
            #     if np.amax(prediction[i]) <= 0.15:
            #         confused_imgs.append(i)
            #         top_vs_actual.append((np.amax(prediction[i]), input_t[i][top_class]))
            #
            # if confused_imgs:
            #     confused_img = inputs_train[confused_imgs[-1]]
            #     plt.figure(figsize=(10, 10))
            #     plt.imshow(confused_img.reshape(48, -1), cmap="gray")
            #     plt.show()
            #     print(top_vs_actual[-1])
            #     confused_imgs.pop()

        valid_ce, valid_acc = evaluate(inputs_valid,
                                       target_valid,
                                       model,
                                       forward,
                                       batch_size=batch_size)
        # print(("Epoch {:3d} "
        #        "Validation CE {:.5f} "
        #        "Validation Acc {:.5f}\n").format(
        #     epoch, valid_ce, valid_acc))
        train_ce_list.append((epoch, train_ce))
        train_acc_list.append((epoch, train_acc))
        valid_ce_list.append((epoch, valid_ce))
        valid_acc_list.append((epoch, valid_acc))
    display_plot(train_ce_list, valid_ce_list, "Cross Entropy", number=1)
    display_plot(train_acc_list, valid_acc_list, "Accuracy", number=1)

    train_ce, train_acc = evaluate(inputs_train,
                                   target_train,
                                   model,
                                   forward,
                                   batch_size=batch_size)
    valid_ce, valid_acc = evaluate(inputs_valid,
                                   target_valid,
                                   model,
                                   forward,
                                   batch_size=batch_size)
    test_ce, test_acc = evaluate(inputs_test,
                                 target_test,
                                 model,
                                 forward,
                                 batch_size=batch_size)
    print("CE: Train %.5f Validation %.5f Test %.5f" %
          (train_ce, valid_ce, test_ce))
    print("Acc: Train {:.5f} Validation {:.5f} Test {:.5f}".format(
        train_acc, valid_acc, test_acc))

    stats = {
        "train_ce": train_ce_list,
        "valid_ce": valid_ce_list,
        "train_acc": train_acc_list,
        "valid_acc": valid_acc_list
    }

    return model, stats
コード例 #2
0
ファイル: nn.py プロジェクト: annezhu98/neural-networks
def train(model, forward, backward, update, alpha, num_epochs, batch_size):
    """ Trains a simple MLP.
    :param model: Dictionary of model weights.
    :param forward: Forward prop function.
    :param backward: Backward prop function.
    :param update: Update weights function.
    :param alpha: Learning rate.
    :param num_epochs: Number of epochs to run training for.
    :param batch_size: Mini-batch size, -1 for full batch.
    :return: A tuple (train_ce, valid_ce, train_acc, valid_acc)
        WHERE
        train_ce: Training cross entropy.
        valid_ce: Validation cross entropy.
        train_acc: Training accuracy.
        valid_acc: Validation accuracy.
    """
    confidence_threshold = 0.5

    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
        target_test = load_data("data/toronto_face.npz")
    rnd_idx = np.arange(inputs_train.shape[0])

    train_ce_list = []
    valid_ce_list = []
    train_acc_list = []
    valid_acc_list = []
    num_train_cases = inputs_train.shape[0]
    if batch_size == -1:
        batch_size = num_train_cases
    num_steps = int(np.ceil(num_train_cases / batch_size))
    for epoch in range(num_epochs):
        np.random.shuffle(rnd_idx)
        inputs_train = inputs_train[rnd_idx]
        target_train = target_train[rnd_idx]
        for step in range(num_steps):
            # Forward pass.
            start = step * batch_size
            end = min(num_train_cases, (step + 1) * batch_size)
            x = inputs_train[start:end]
            t = target_train[start:end]

            var = forward(model, x)
            prediction = softmax(var["y"])

            # plot images where the neural network isn't confident about the output
            for i in range(x.shape[0]):
                if np.amax(prediction[i]) < confidence_threshold:
                    plt.imshow(x[i].reshape(48, 48), cmap='gray')
                    plt.show()
                    print(
                        f'The model prediction is {np.argmax(prediction[i]) == np.argmax(t[i])}'
                    )

            train_ce = -np.sum(t * np.log(prediction)) / float(x.shape[0])
            train_acc = (np.argmax(prediction, axis=1) == np.argmax(
                t, axis=1)).astype("float").mean()
            print(("Epoch {:3d} Step {:2d} Train CE {:.5f} "
                   "Train Acc {:.5f}").format(epoch, step, train_ce,
                                              train_acc))

            # Compute error.
            error = (prediction - t) / float(x.shape[0])

            # Backward prop.
            backward(model, error, var)

            # Update weights.
            update(model, alpha)

        valid_ce, valid_acc = evaluate(inputs_valid,
                                       target_valid,
                                       model,
                                       forward,
                                       batch_size=batch_size)
        print(("Epoch {:3d} "
               "Validation CE {:.5f} "
               "Validation Acc {:.5f}\n").format(epoch, valid_ce, valid_acc))
        train_ce_list.append((epoch, train_ce))
        train_acc_list.append((epoch, train_acc))
        valid_ce_list.append((epoch, valid_ce))
        valid_acc_list.append((epoch, valid_acc))
    display_plot(train_ce_list, valid_ce_list, "Cross Entropy", number=0)
    display_plot(train_acc_list, valid_acc_list, "Accuracy", number=1)

    train_ce, train_acc = evaluate(inputs_train,
                                   target_train,
                                   model,
                                   forward,
                                   batch_size=batch_size)
    valid_ce, valid_acc = evaluate(inputs_valid,
                                   target_valid,
                                   model,
                                   forward,
                                   batch_size=batch_size)
    test_ce, test_acc = evaluate(inputs_test,
                                 target_test,
                                 model,
                                 forward,
                                 batch_size=batch_size)
    print("CE: Train %.5f Validation %.5f Test %.5f" %
          (train_ce, valid_ce, test_ce))
    print("Acc: Train {:.5f} Validation {:.5f} Test {:.5f}".format(
        train_acc, valid_acc, test_acc))

    stats = {
        "train_ce": train_ce_list,
        "valid_ce": valid_ce_list,
        "train_acc": train_acc_list,
        "valid_acc": valid_acc_list
    }

    return model, stats