def test_multi_dimensional_numpy(self):
     actual_encoded, encoding = one_hot_encoding(
         self.multi_dimensional_numpy)
     assert len(self.multi_dimensional) == len(actual_encoded)
     assert type(actual_encoded) == np.ndarray
     actual = actual_encoded
     expected = None
     for _ in range(NUMBER_OF_RANDOM_TEST):
         index = randint(0, len(actual_encoded) - 1)
         actual = actual_encoded[index]
         expected = self.multi_dimensional[index]
         assert type(actual) == np.ndarray
         assert len(actual) == 5
         index = randint(0, len(actual) - 1)
         actual = actual[index]
         expected = expected[index]
         assert type(actual) == np.ndarray
         assert len(actual) == 2
         index = randint(0, len(actual) - 1)
         actual = actual[index]
         expected = expected[index]
         assert type(actual) == np.ndarray
         assert len(actual) == (N_ELEMENTS * 1000 / (5 * 4 * 2))
     for _ in range(NUMBER_OF_RANDOM_TEST):
         index = randint(0, len(actual) - 1)
         actual_element = actual[index]
         expected_element = encoding[expected[index]]
         assert type(actual_element) == np.ndarray
         assert len(actual_element) == N_ELEMENTS
         assert abs(sum(actual_element) - 1) < EPSILON
         assert (actual_element == expected_element).all()
 def test_one_dimension(self):
     actual, encoding = one_hot_encoding(self.one_dimension)
     assert len(self.one_dimension) == len(actual)
     assert type(actual) == list
     for _ in range(NUMBER_OF_RANDOM_TEST):
         index = randint(0, len(actual) - 1)
         actual_element = actual[index]
         expected = encoding[self.one_dimension[index]]
         assert type(actual_element) == list
         assert len(actual_element) == N_ELEMENTS
         assert abs(sum(actual_element) - 1) < EPSILON
         assert (expected == actual_element).all()
 def test_one_dimension_numpy(self):
     actual, encoding = one_hot_encoding(self.one_dimension_numpy)
     assert len(self.one_dimension_numpy) == len(actual)
     assert actual.shape == (len(actual), N_ELEMENTS)
     assert type(actual) == np.ndarray
     for _ in range(NUMBER_OF_RANDOM_TEST):
         index = randint(0, len(actual) - 1)
         actual_element = actual[index]
         expected = encoding[self.one_dimension_numpy[index]]
         assert type(actual_element) == np.ndarray
         assert len(actual_element) == N_ELEMENTS
         assert abs(actual_element.sum() - 1) < EPSILON
         assert (expected == actual_element).all()
Example #4
0
    # Initialize network
    network = NeuralNetwork(4, [6], 3, [tanh, sigmoid], LR)
    filename = "network"
    type_net = "Neural"
    k_fold = ""

    if args.normalize:
        network = NormalizedNetwork(4, [6], 3, [tanh, sigmoid], LR)
        type_net = "Normalized"
        filename = type_net.lower()

    # iris dataset
    dataset = import_data("../../data/iris.data")

    labels, encoding = one_hot_encoding(dataset[-1])
    classes = list(encoding.keys())
    dataset = dataset[0:-1]

    # Define Trainer
    trainer = StandardTrainer(dataset, labels.T, TRAIN_SIZE)
    k = 1

    if args.cross_validation is not None:
        k = args.cross_validation
        k_fold = "_{}fold".format(k)
        trainer = KFoldTrainer(k, 2, dataset, labels.T)

    fig = plt.figure(figsize=FIG_SIZE)
    fig.subplots_adjust(wspace=0.3)
    ax = fig.add_subplot(121)
def format_axes(title: str, scale: int, ax: Axes) -> None:
    """
    Format axes of a matplotlib graph

    :param title: Title
    :param scale: Scale
    :param ax: Axes of matplotlib
    :return: None, modify ax
    """
    ax.set_title("{}\n".format(title), fontsize=TITLE_SIZE)
    ax.set_aspect(aspect=str(6 * scale))
    ax.set_xlabel("Indexes", fontsize=FONT_SIZE)
    ax.grid()


if __name__ == '__main__':
    fig, axes = plt.subplots(3, 1, figsize=FIG_SIZE)
    dataset = import_data("../../data/iris.data")
    one_hot_dataset, encoding = one_hot_encoding(dataset[4])
    axes[0].imshow(one_hot_dataset.T, cmap='Greys')
    format_axes("Labels of dataset, size: {}".format(one_hot_dataset.shape[0]),
                one_hot_dataset.shape[0] / 150, axes[0])
    train_set, test_set = split_set(one_hot_dataset.T, 0.6)
    axes[1].imshow(train_set, cmap='Greys')
    format_axes("Train set, size: {}".format(train_set.shape[1]),
                train_set.shape[1] / 150, axes[1])
    axes[2].imshow(test_set, cmap='Greys')
    format_axes("Test set, size: {}".format(test_set.shape[1]),
                test_set.shape[1] / 150, axes[2])
    plt.savefig("../results/labels_of_dataset.png")
def train_evaluate(architecture: dict, dataset_name: str) -> NeuralNetwork:
    """
    Train and evaluate a Network

    :param architecture: Architecture of NeuralNetwork (above)
    :param dataset_name: Dataset to use
    :return: Trained Neural Network
    """
    # import dataset
    dataset = import_data("../data/{}.data".format(dataset_name))

    dataset = oversample(dataset)
    more_info = "(oversample)"

    labels, encoding = one_hot_encoding(dataset[-1])
    classes = list(encoding.keys())
    dataset = np.delete(dataset, -1, 0)

    dataset = np.delete(dataset, [0], 0)

    # Initialize network
    logging.info("Input size: {}\tOutput size: {}".format(
        dataset.shape[0], len(encoding)))
    network = NeuralNetwork(dataset.shape[0], architecture["INTERNAL_LAYERS"],
                            len(encoding), architecture["ACT_FUNCS"],
                            architecture["LR"])

    # Define Trainer
    trainer = StandardTrainer(dataset, labels.T, TRAIN_SIZE)

    fig = plt.figure(figsize=FIG_SIZE)
    fig.subplots_adjust(wspace=0.3)
    ax = fig.add_subplot(121)
    ax2 = ax.twinx()
    ax3 = fig.add_subplot(122)

    trained, (learn, costs) = trainer.train(network,
                                            epochs=EPOCHS,
                                            repeat=True)

    prediction = trainer.evaluate(trained)

    c_m = confusion_matrix(prediction, trainer.get_labels())

    line = ax.plot(learn, label="Learning Curve", linewidth=2.5, c="b")

    line2 = ax2.plot(costs, label="MSE", linestyle="--", linewidth=2.5, c="r")
    lines = line + line2

    ax.set_ylabel("Learning Curve", fontsize=FONT_SIZE)
    ax.set_xlabel("Epochs", fontsize=FONT_SIZE)
    ax.set_title("Network on {}\n".format(dataset_name), fontsize=TITLE_SIZE)
    ax.grid()

    ax2.set_ylabel("Cost", fontsize=FONT_SIZE)
    ax2.grid()

    labels = [l.get_label() for l in lines]
    ax2.legend(lines, labels, fontsize=FONT_SIZE, loc="center right")

    show_matrix(
        ax3, c_m,
        (classes, ["Predicted\n{}".format(a_class) for a_class in classes]),
        "Confusion Matrix of Test Set\n", FONT_SIZE, TITLE_SIZE)

    measures = {
        "accuracy": accuracy(c_m),
        "precision": precision(c_m),
        "recall": recall(c_m),
        "f1_score": f1_score(c_m)
    }

    print("Summary on {}:\n".format(dataset))
    report_results(c_m)

    plt.savefig("../results/Network_on_{}{}.png".format(
        dataset_name, more_info))

    return trained
Example #7
0
    else:
        path_dataset = args.dataset + ".data"

    # import dataset
    dataset = import_data("../data/{}".format(path_dataset), sep=args.sep, header=args.header)

    more_info = ""

    if args.oversample:
        dataset = oversample(dataset, label=args.labels)
        more_info = "(oversampled)"
    if args.undersample:
        dataset = undersample(dataset, label=args.labels)
        more_info = "(undersampled)"

    labels, encoding = one_hot_encoding(dataset[args.labels])
    classes = list(encoding.keys())
    dataset = np.delete(dataset, args.labels, 0)

    encodings = list()

    first = True

    for i in reversed(args.categorical):
        temp1, temp2 = one_hot_encoding(dataset[i])
        dataset = np.concatenate((np.delete(dataset, i, 0), temp1.copy().T))
        encodings.append(temp2.copy())
        del temp1
        del temp2

    dataset = np.delete(dataset, args.exclude, 0)
Example #8
0
from useful.preprocess_dataset import import_data, one_hot_encoding
from useful.results import confusion_matrix, show_matrix, annotate
from useful.results import accuracy, precision, recall, f1_score

N = int(1e5)
X_MIN = Y_MIN = -50
X_MAX = Y_MAX = 50
FIG_SIZE = (28, 12)
FONT_SIZE = 20
TITLE_SIZE = 30
np.random.seed(2)

if __name__ == '__main__':
    dataset = import_data("../../data/iris.data")[4]
    classes = np.unique(dataset)
    labels, _ = one_hot_encoding(dataset)
    prediction, _ = one_hot_encoding(
        np.random.choice(["a", "b", "c"], size=labels.shape[0], replace=True))
    matrix = confusion_matrix(prediction.T, labels.T)
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=FIG_SIZE)
    show_matrix(ax1, matrix, ([classes[0], classes[1], classes[2]], [
        "Predicted\n" + classes[0], "Predicted\n" + classes[1],
        "Predicted\n" + classes[2]
    ]), "Confusion matrix of a iris dataset\n", FONT_SIZE, TITLE_SIZE)
    measures = np.zeros((3, 4))
    ax2.matshow(measures, cmap="Greys")
    to_show = np.zeros((3, 4))
    to_show[0][0] = round(accuracy(matrix), 4)
    to_show[1][0] = np.nan
    to_show[2][0] = np.nan
    _precision = precision(matrix)