def main(): # Initializer vos paramètres i = ld.load_iris_dataset(0.7) c = ld.load_congressional_dataset(0.7) m1 = ld.load_monks_dataset(1) m2 = ld.load_monks_dataset(2) m3 = ld.load_monks_dataset(3) # Initializer/instanciez vos classifieurs avec leurs paramètres euclide = lambda x, y: pow( (x - y), 2 ) # Pas besoin d'extraire la racine, car cela ne changera pas l'ordre de classement diff_binaire = lambda x, y: 0 if x == y else 1 knn_i = Knn(train=i[0], train_labels=i[1], dist_equation=euclide) knn_c = Knn(train=c[0], train_labels=c[1], dist_equation=euclide) knn_m1 = Knn(train=m1[0], train_labels=m1[1], dist_equation=diff_binaire) knn_m2 = Knn(train=m2[0], train_labels=m2[1], dist_equation=diff_binaire) knn_m3 = Knn(train=m3[0], train_labels=m3[1], dist_equation=diff_binaire) bn_i = BayesNaifClassifier([1]) bn_c = BayesNaifClassifier([0]) bn_m1 = BayesNaifClassifier([2]) bn_m2 = BayesNaifClassifier([2]) bn_m3 = BayesNaifClassifier([2]) # Entrainez votre classifieur print("\n=============\nKNN train tests\n=============") knn_i.train_test(i[0], i[1], "Dataset: Iris, Training") knn_c.train_test(c[0], c[1], "Dataset: Congressional, Training") knn_m1.train_test(m1[0], m1[1], "Dataset: MONKS-1, Training") knn_m2.train_test(m2[0], m2[1], "Dataset: MONKS-2, Training") knn_m3.train_test(m3[0], m3[1], "Dataset: MONKS-3, Training") print("\n=============\nBayes Naif train tests\n=============") bn_i.train(i[0], i[1], "Dataset: Iris, Test") bn_c.train(c[0], c[1], "Dataset: Congressional, Test") bn_m1.train(m1[0], m1[1], "Dataset: MONKS-1, Test") bn_m2.train(m2[0], m2[1], "Dataset: MONKS-2, Test") bn_m3.train(m3[0], m3[1], "Dataset: MONKS-3, Test") print("\n=============\nKNN tests\n=============") # Tester votre classifieur knn_i.train_test(i[2], i[3], "Dataset: Iris, Test") knn_c.train_test(c[2], c[3], "Dataset: Congressional, Test") knn_m1.train_test(m1[2], m1[3], "Dataset: MONKS-1, Test") knn_m2.train_test(m2[2], m2[3], "Dataset: MONKS-2, Test") knn_m3.train_test(m3[2], m3[3], "Dataset: MONKS-3, Test") print("\n=============\nBayes Naif tests\n=============") bn_i.test(i[2], i[3], "Dataset: Iris, Test") bn_c.test(c[2], c[3], "Dataset: Congressional, Test") bn_m1.test(m1[2], m1[3], "Dataset: MONKS-1, Test") bn_m2.test(m2[2], m2[3], "Dataset: MONKS-2, Test") bn_m3.test(m3[2], m3[3], "Dataset: MONKS-3, Test")
if __name__ == '__main__': import load_datasets import time train_ratio: float = 0.90 print(f"Train ratio: {train_ratio}") print("\n") print('-' * 175) print(f"Iris dataset classification: \n") startTime = time.time() iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset(train_ratio) iris_knn = NbcGaussian() iris_knn.train(iris_train, iris_train_labels) iris_knn.test(iris_test, iris_test_labels) print(f"\n --- Elapse time: {time.time() - startTime:.2f} s --- \n") print('-' * 175) print(f"Congressional dataset classification: \n") startTime = time.time() cong_train, cong_train_labels, cong_test, cong_test_labels = load_datasets.load_congressional_dataset(train_ratio) cong_knn = Nbc() cong_knn.train(cong_train, cong_train_labels) cong_knn.test(cong_test, cong_test_labels)
import numpy as np import pandas as pd import matplotlib import matplotlib.pyplot as plt import sys import time import load_datasets import classifieur # importer la classe de l'Arbre de Décision train_ratio = 70 train_iris, train_labels_iris, test_iris, test_labels_iris = load_datasets.load_iris_dataset(train_ratio) def conversion(train , train_labels , test , test_labels): train = pd.DataFrame(train) train["labels"] = train_labels features_train = list(train) features_train.pop() test = pd.DataFrame(test) test["labels"] = test_labels features_test = list(train) features_test.pop() return(train, test , features_train , features_test) train_iris,test_iris,features_train_iris,feature_test_iris= conversion(train_iris, train_labels_iris, test_iris, test_labels_iris) print("\n\n-------DATASET IRIS---------\n\n") #On crée nos modèles model_tree = classifieur.DecisionTree(30)
import numpy as np import copy import NeuralNet import load_datasets # X = (hours sleeping, hours studying), y = score on test X = ([2, 9], [1, 5], [3, 6]) y = np.array(([92], [86], [89]), dtype=float) X = [[0,0,1], [0,1,1], [1,0,1], [1,1,1]] y = [0, 1, 1, 0] n= 1; train_iris, train_labels_iris, test_iris, test_labels_iris = load_datasets.load_iris_dataset(0.03) train_votes, train_labels_votes, test_votes, test_labels_votes = load_datasets.load_congressional_dataset(0.02) train_monks, train_labels_monks, test_monks, test_labels_monks = load_datasets.load_monks_dataset(n) train = train_votes labels = train_labels_votes NN = NeuralNet.NeuralNet(1, 2, len(train[0]), 1) for i in xrange(1000): NN.train(train, labels) print "Actual Output: \n" + str(labels) print "Predicted Output: \n" + str(NN.forward(train).T[0]) # [0,1,0,1,1,1,0,0,0,0,0,0,1,1,2,1], 0))
decision_tree_iris = DecisionTree.DecisionTree() decision_tree_congress = DecisionTree.DecisionTree() decision_tree_monks1 = DecisionTree.DecisionTree() decision_tree_monks2 = DecisionTree.DecisionTree() decision_tree_monks3 = DecisionTree.DecisionTree() rn_iris = NeuralNet.NeuralNet() rn_congress = NeuralNet.NeuralNet() rn_monks1 = NeuralNet.NeuralNet() rn_monks2 = NeuralNet.NeuralNet() rn_monks3 = NeuralNet.NeuralNet() # Charger/lire les datasets (train_iris, train_labels_iris, test_iris, test_labels_iris) = load_datasets.load_iris_dataset(0.7) (train_congress, train_labels_congress, test_congress, test_labels_congress) = load_datasets.load_congressional_dataset(0.7) (train_monks1, train_labels_monks1, test_monks1, test_labels_monks1) = load_datasets.load_monks_dataset(1) (train_monks2, train_labels_monks2, test_monks2, test_labels_monks2) = load_datasets.load_monks_dataset(2) (train_monks3, train_labels_monks3, test_monks3, test_labels_monks3) = load_datasets.load_monks_dataset(3) #Learning_curve # decision_tree_iris.learning_curve(train_iris, train_labels_iris, test_iris, test_labels_iris) # decision_tree_congress.learning_curve(train_congress, train_labels_congress, test_congress, test_labels_congress) # decision_tree_monks1.learning_curve(train_monks1, train_labels_monks1, test_monks1, test_labels_monks1) # decision_tree_monks2.learning_curve(train_monks2, train_labels_monks2, test_monks2, test_labels_monks2) # decision_tree_monks3.learning_curve(train_monks3, train_labels_monks3, test_monks3, test_labels_monks3)
import numpy import load_datasets from matplotlib import pyplot import BayesNaif import Knn data, train_labels, test, test_labels = load_datasets.load_iris_dataset(0.8) # pairs = [(i, j) for i in range(4) for j in range(i+1, 4)] pairs = [(0, 1)] for (f1, f2) in pairs: X = numpy.array([[i[f1],i[f2]] for i in data]) y = numpy.array(train_labels) classifieurs = [ BayesNaif.BayesNaif(), Knn.Knn() ] x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, step=0.05), numpy.arange(y_min, y_max, step=0.05)) colors = ['blue', 'green','red'] pyplot.jet() # On crée une figure à plusieurs sous-graphes fig, subfigs = pyplot.subplots(1, 2, sharex='all', sharey='all') # _times.append(time.time()) # print(total_data['feature_names'][f2], "en fonction de :", total_data['feature_names'][f1]) for clf,subfig in zip(classifieurs, subfigs.reshape(-1)): # TODO Q2B # Entraînez le classifieur
4- Le tester """ ############################################################################################### # KNN # ############################################################################################### # Initialisez vos paramètres K_values_iris = [k for k in range(1, 11)] K_values_wine = [k for k in range(1, 51)] K_values_abalone = [k for k in range(1, 51)] # Initialisez/instanciez vos classifieurs avec leurs paramètres # Charger/lire les datasets X_train_iris, y_train_iris, X_test_iris, y_test_iris = load_iris_dataset( train_ratio=0.7, seed=69, shuffle=True) X_train_wine, y_train_wine, X_test_wine, y_test_wine = load_wine_dataset( train_ratio=0.7, seed=42, shuffle=False) X_train_abalone, y_train_abalone, X_test_abalone, y_test_abalone = load_abalone_dataset( train_ratio=0.7, seed=42, shuffle=False) # Pop une valeur de train car len(X_abalone) == 4177 est premier donc on ne peut pas split ... X_train_abalone = np.delete(X_train_abalone, 0, axis=0) y_train_abalone = np.delete(y_train_abalone, 0, axis=0) # Encode Iris y le = labelEncoder() y_train_iris = le.fit_transform(y_train_iris) y_test_iris = le.transform(y_test_iris) # Validation croisée
4- Le tester """ # On initialise les classifieurs BayesNaïf en leur indiquant le dataset iris_bayes = BayesNaif.BayesNaif("iris dataset") congressional_bayes = BayesNaif.BayesNaif("congres dataset") monks_bayes = BayesNaif.BayesNaif("monks dataset") # On initialise les classifieurs Knn en leur indiquant le dataset iris_knn = Knn.Knn("iris dataset") congressional_knn = Knn.Knn("congres dataset") monks_knn = Knn.Knn("monks dataset") # On charge les 4 dataset, et pour chacun on les sépare en 4 np_matrix iris_train_dataset, iris_train_labels, iris_test_dataset, iris_test_labels =\ load_datasets.load_iris_dataset(0.60) # On utilise un ratio de 0.60 pour les instances qui vont servir à l'entrainement congressional_train_dataset, congressional_train_labels, congressional_test_dataset, congressional_test_labels =\ load_datasets.load_congressional_dataset(0.60) # On utilise un ratio de 0.60 pour les instances qui vont servir à l'entrainement monks_train_dataset, monks_train_labels, monks_test_dataset, monks_test_labels =\ load_datasets.load_monks_dataset(2) # Ici on utilise les sets numéro 2 # On entraine nos classifieurs et puis on fait les tests iris_bayes.train(iris_train_dataset, iris_train_labels, "iris dataset") iris_bayes.test(iris_test_dataset, iris_test_labels, "iris dataset") iris_knn.train(iris_train_dataset, iris_train_labels, "iris dataset") iris_knn.test(iris_test_dataset, iris_test_labels, "iris dataset") congressional_bayes.train(congressional_train_dataset, congressional_train_labels, "congressional dataset") congressional_bayes.test(congressional_test_dataset, congressional_test_labels, "congressional dataset")
distanceFunc = util.euclidean_distance # On choisi notre métric de distance comme étant la distance euclidienne ConfusionMatrixListKnn: list = list( ) # list des matrices de confusion pour Knn print(f"Knn Train ratio: {knn_train_ratio}") print(f"findBestKWithCrossValidation: {findBestKWithCrossValidation}") print("\n") print('-' * 175) print(f"Iris dataset classification: \n") startTime = time.time() # Entrainement sur l'ensemble de données Iris iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset( knn_train_ratio) iris_knn = Knn(distance_func=distanceFunc) iris_knn.train(iris_train, iris_train_labels, findBestKWithCrossValidation=findBestKWithCrossValidation) cm, _, _, _ = iris_knn.test(iris_test, iris_test_labels) ConfusionMatrixListKnn.append(cm) print( f"\n --- Elapse time: {1_000*(time.time() - startTime):.2f} ms --- \n") print('-' * 175) print(f"Congressional dataset classification: \n") startTime = time.time() # Entrainement sur l'ensemble de données Congressional
# Initializer/instanciez vos classifieurs avec leurs paramètres classifieur_Knn_iris = Knn.Knn() classifieur_Knn_congressional = Knn.Knn() classifieur_Knn_monks_1 = Knn.Knn() classifieur_Knn_monks_2 = Knn.Knn() classifieur_Knn_monks_3 = Knn.Knn() classifieur_bayes_naif_iris = BayesNaif.BayesNaif() classifieur_bayes_naif_congressional = BayesNaif.BayesNaif() classifieur_bayes_naif_monks_1 = BayesNaif.BayesNaif() classifieur_bayes_naif_monks_2 = BayesNaif.BayesNaif() classifieur_bayes_naif_monks_3 = BayesNaif.BayesNaif() # Charger/lire les datasets dataset_iris = load_datasets.load_iris_dataset(train_pourcentage_iris) dataset_congressional = load_datasets.load_congressional_dataset( train_pourcentage_congressional) dataset_monks_1 = load_datasets.load_monks_dataset(1) dataset_monks_2 = load_datasets.load_monks_dataset(2) dataset_monks_3 = load_datasets.load_monks_dataset(3) # Entrainez votre classifieur print( "Entrainement du classifieur KNN avec le dataset Iris et un pourcentage d'entrainement de " + str(train_pourcentage_iris)) algo_starting_time = time() classifieur_Knn_iris.train(dataset_iris[0], dataset_iris[1], num_datset_iris) algo_end_time = time() print("\nTemps d'exécution de l'algorithme : " + str(algo_end_time - algo_starting_time) + "\n")