def test_multi_dimensional_numpy(self): actual_encoded, encoding = one_hot_encoding( self.multi_dimensional_numpy) assert len(self.multi_dimensional) == len(actual_encoded) assert type(actual_encoded) == np.ndarray actual = actual_encoded expected = None for _ in range(NUMBER_OF_RANDOM_TEST): index = randint(0, len(actual_encoded) - 1) actual = actual_encoded[index] expected = self.multi_dimensional[index] assert type(actual) == np.ndarray assert len(actual) == 5 index = randint(0, len(actual) - 1) actual = actual[index] expected = expected[index] assert type(actual) == np.ndarray assert len(actual) == 2 index = randint(0, len(actual) - 1) actual = actual[index] expected = expected[index] assert type(actual) == np.ndarray assert len(actual) == (N_ELEMENTS * 1000 / (5 * 4 * 2)) for _ in range(NUMBER_OF_RANDOM_TEST): index = randint(0, len(actual) - 1) actual_element = actual[index] expected_element = encoding[expected[index]] assert type(actual_element) == np.ndarray assert len(actual_element) == N_ELEMENTS assert abs(sum(actual_element) - 1) < EPSILON assert (actual_element == expected_element).all()
def test_one_dimension(self): actual, encoding = one_hot_encoding(self.one_dimension) assert len(self.one_dimension) == len(actual) assert type(actual) == list for _ in range(NUMBER_OF_RANDOM_TEST): index = randint(0, len(actual) - 1) actual_element = actual[index] expected = encoding[self.one_dimension[index]] assert type(actual_element) == list assert len(actual_element) == N_ELEMENTS assert abs(sum(actual_element) - 1) < EPSILON assert (expected == actual_element).all()
def test_one_dimension_numpy(self): actual, encoding = one_hot_encoding(self.one_dimension_numpy) assert len(self.one_dimension_numpy) == len(actual) assert actual.shape == (len(actual), N_ELEMENTS) assert type(actual) == np.ndarray for _ in range(NUMBER_OF_RANDOM_TEST): index = randint(0, len(actual) - 1) actual_element = actual[index] expected = encoding[self.one_dimension_numpy[index]] assert type(actual_element) == np.ndarray assert len(actual_element) == N_ELEMENTS assert abs(actual_element.sum() - 1) < EPSILON assert (expected == actual_element).all()
# Initialize network network = NeuralNetwork(4, [6], 3, [tanh, sigmoid], LR) filename = "network" type_net = "Neural" k_fold = "" if args.normalize: network = NormalizedNetwork(4, [6], 3, [tanh, sigmoid], LR) type_net = "Normalized" filename = type_net.lower() # iris dataset dataset = import_data("../../data/iris.data") labels, encoding = one_hot_encoding(dataset[-1]) classes = list(encoding.keys()) dataset = dataset[0:-1] # Define Trainer trainer = StandardTrainer(dataset, labels.T, TRAIN_SIZE) k = 1 if args.cross_validation is not None: k = args.cross_validation k_fold = "_{}fold".format(k) trainer = KFoldTrainer(k, 2, dataset, labels.T) fig = plt.figure(figsize=FIG_SIZE) fig.subplots_adjust(wspace=0.3) ax = fig.add_subplot(121)
def format_axes(title: str, scale: int, ax: Axes) -> None: """ Format axes of a matplotlib graph :param title: Title :param scale: Scale :param ax: Axes of matplotlib :return: None, modify ax """ ax.set_title("{}\n".format(title), fontsize=TITLE_SIZE) ax.set_aspect(aspect=str(6 * scale)) ax.set_xlabel("Indexes", fontsize=FONT_SIZE) ax.grid() if __name__ == '__main__': fig, axes = plt.subplots(3, 1, figsize=FIG_SIZE) dataset = import_data("../../data/iris.data") one_hot_dataset, encoding = one_hot_encoding(dataset[4]) axes[0].imshow(one_hot_dataset.T, cmap='Greys') format_axes("Labels of dataset, size: {}".format(one_hot_dataset.shape[0]), one_hot_dataset.shape[0] / 150, axes[0]) train_set, test_set = split_set(one_hot_dataset.T, 0.6) axes[1].imshow(train_set, cmap='Greys') format_axes("Train set, size: {}".format(train_set.shape[1]), train_set.shape[1] / 150, axes[1]) axes[2].imshow(test_set, cmap='Greys') format_axes("Test set, size: {}".format(test_set.shape[1]), test_set.shape[1] / 150, axes[2]) plt.savefig("../results/labels_of_dataset.png")
def train_evaluate(architecture: dict, dataset_name: str) -> NeuralNetwork: """ Train and evaluate a Network :param architecture: Architecture of NeuralNetwork (above) :param dataset_name: Dataset to use :return: Trained Neural Network """ # import dataset dataset = import_data("../data/{}.data".format(dataset_name)) dataset = oversample(dataset) more_info = "(oversample)" labels, encoding = one_hot_encoding(dataset[-1]) classes = list(encoding.keys()) dataset = np.delete(dataset, -1, 0) dataset = np.delete(dataset, [0], 0) # Initialize network logging.info("Input size: {}\tOutput size: {}".format( dataset.shape[0], len(encoding))) network = NeuralNetwork(dataset.shape[0], architecture["INTERNAL_LAYERS"], len(encoding), architecture["ACT_FUNCS"], architecture["LR"]) # Define Trainer trainer = StandardTrainer(dataset, labels.T, TRAIN_SIZE) fig = plt.figure(figsize=FIG_SIZE) fig.subplots_adjust(wspace=0.3) ax = fig.add_subplot(121) ax2 = ax.twinx() ax3 = fig.add_subplot(122) trained, (learn, costs) = trainer.train(network, epochs=EPOCHS, repeat=True) prediction = trainer.evaluate(trained) c_m = confusion_matrix(prediction, trainer.get_labels()) line = ax.plot(learn, label="Learning Curve", linewidth=2.5, c="b") line2 = ax2.plot(costs, label="MSE", linestyle="--", linewidth=2.5, c="r") lines = line + line2 ax.set_ylabel("Learning Curve", fontsize=FONT_SIZE) ax.set_xlabel("Epochs", fontsize=FONT_SIZE) ax.set_title("Network on {}\n".format(dataset_name), fontsize=TITLE_SIZE) ax.grid() ax2.set_ylabel("Cost", fontsize=FONT_SIZE) ax2.grid() labels = [l.get_label() for l in lines] ax2.legend(lines, labels, fontsize=FONT_SIZE, loc="center right") show_matrix( ax3, c_m, (classes, ["Predicted\n{}".format(a_class) for a_class in classes]), "Confusion Matrix of Test Set\n", FONT_SIZE, TITLE_SIZE) measures = { "accuracy": accuracy(c_m), "precision": precision(c_m), "recall": recall(c_m), "f1_score": f1_score(c_m) } print("Summary on {}:\n".format(dataset)) report_results(c_m) plt.savefig("../results/Network_on_{}{}.png".format( dataset_name, more_info)) return trained
else: path_dataset = args.dataset + ".data" # import dataset dataset = import_data("../data/{}".format(path_dataset), sep=args.sep, header=args.header) more_info = "" if args.oversample: dataset = oversample(dataset, label=args.labels) more_info = "(oversampled)" if args.undersample: dataset = undersample(dataset, label=args.labels) more_info = "(undersampled)" labels, encoding = one_hot_encoding(dataset[args.labels]) classes = list(encoding.keys()) dataset = np.delete(dataset, args.labels, 0) encodings = list() first = True for i in reversed(args.categorical): temp1, temp2 = one_hot_encoding(dataset[i]) dataset = np.concatenate((np.delete(dataset, i, 0), temp1.copy().T)) encodings.append(temp2.copy()) del temp1 del temp2 dataset = np.delete(dataset, args.exclude, 0)
from useful.preprocess_dataset import import_data, one_hot_encoding from useful.results import confusion_matrix, show_matrix, annotate from useful.results import accuracy, precision, recall, f1_score N = int(1e5) X_MIN = Y_MIN = -50 X_MAX = Y_MAX = 50 FIG_SIZE = (28, 12) FONT_SIZE = 20 TITLE_SIZE = 30 np.random.seed(2) if __name__ == '__main__': dataset = import_data("../../data/iris.data")[4] classes = np.unique(dataset) labels, _ = one_hot_encoding(dataset) prediction, _ = one_hot_encoding( np.random.choice(["a", "b", "c"], size=labels.shape[0], replace=True)) matrix = confusion_matrix(prediction.T, labels.T) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=FIG_SIZE) show_matrix(ax1, matrix, ([classes[0], classes[1], classes[2]], [ "Predicted\n" + classes[0], "Predicted\n" + classes[1], "Predicted\n" + classes[2] ]), "Confusion matrix of a iris dataset\n", FONT_SIZE, TITLE_SIZE) measures = np.zeros((3, 4)) ax2.matshow(measures, cmap="Greys") to_show = np.zeros((3, 4)) to_show[0][0] = round(accuracy(matrix), 4) to_show[1][0] = np.nan to_show[2][0] = np.nan _precision = precision(matrix)