def cv_knn(N_split, X_train, y_train, X_test, y_test, K_values): X = np.concatenate((X_train, X_test), axis=0) y = np.concatenate((y_train, y_test), axis=0) # Splits X_splits = np.split(X, N_split) y_splits = np.split(y, N_split) # Recherche scores = {} for K in K_values: accuracys = [] for i in range(N_split): X_train = np.concatenate(np.delete(X_splits, i, 0)) X_test = X_splits[i] y_train = np.concatenate(np.delete(y_splits, i, 0)) y_test = y_splits[i] model = Knn(K=K) model.train(X_train, y_train) evaluate = model.evaluate(X_test, y_test) accuracys.append(evaluate['mean_accuracy']) scores[K] = np.mean(accuracys) # print + selection print(max(scores.items(), key=operator.itemgetter(1))) return max(scores.items(), key=operator.itemgetter(1))[0]
def score_result(reducer_function, data, x_scaled, ini, dimensions, label_data, title): knn = Knn() score = [] valor_k = range(ini, dimensions) for k in valor_k: new_data = reducer_function(data, x_scaled, k) score.append(knn.avg(new_data, label_data)) Visualization.hit_rate_per_k(valor_k, score, title)
class EmgModel: def __init__(self, labels): self.model = Knn(k=5) self.labels = labels self.all_data = list() self.all_target = list() for pose in labels: label = labels[pose] data = self.load_data(label) target = [pose] * len(data) self.all_data += data self.all_target += target def run(self): # 训练数据和测试数据分离 train_data = self.all_data[0::2] train_target = self.all_target[0::2] predict_data = self.all_data[1::2] predict_target = self.all_target[1::2] # 模型训练 self.model.fit(train_data, train_target) # 模型预测 y = self.model.predict(predict_data) num = 0 for i in range(len(y)): if predict_target[i] == y[i]: num += 1 cm = metrics.confusion_matrix(y, predict_target) print("混淆矩阵") print(cm) print("分类报告") cr = metrics.classification_report(y, predict_target) print(cr) print("准确率:%s %%" % ((num / len(predict_target)) * 100)) def load_data(self, label): parent_dir = "data/%s" % label res = [] for file in os.listdir(parent_dir): filename = "%s/%s" % (parent_dir, file) f = open(filename, 'r') data = [] for line in f.readlines(): row = line.replace('\n', '').replace("[", "").replace("]", "").split(",") row = [float(x) for x in row] data.append(row) f.close() data = np.array(data).T res.append(data) return res
def __init__(self, labels): self.model = Knn(k=5) self.labels = labels self.all_data = list() self.all_target = list() for pose in labels: label = labels[pose] data = self.load_data(label) target = [pose] * len(data) self.all_data += data self.all_target += target
def main(): knn = Knn(7) knn.fit(x, y) img = mpimg.imread(sys.argv[1]) for j in range(img.shape[0] / 50 + 1): for i in range(img.shape[1] / 50 + 1): h, b = np.histogram(img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50]) if (knn.predict(h) == 1): img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0 plt.imshow(img) plt.show()
def checkPerformance(self, valSet, trainSet, k): result = 0 for i in range(len(valSet)): # perform knn predicted = Knn.knn(trainSet, valSet[i], k, True) # check if predicted class and actual class match result += self.checkPrediction(predicted, valSet[i][-1]) # return percent correctly predicted return result / len(valSet)
def cross_validation(origiData, origiLabel, splitNum): lastIndex = 0 offset = int(len(origiData) / splitNum) accurateRateSum = 0 for i in range(1, splitNum + 1): # 切割数据集 分为splitNum-1个训练集和1个测试集 tempData = np.split(origiData, (lastIndex, i * offset + 1)) tempLabel = np.split(origiLabel, (lastIndex, i * offset + 1)) testData = tempData[1] testLabel = tempLabel[1] trainData = np.concatenate([tempData[0], tempData[2]]) trainLabel = np.concatenate([tempLabel[0], tempLabel[2]]) # 迭代区间的开始索引值 lastIndex = i * offset # 使用knn进行预测 knn = Knn(trainData, trainLabel, 10) predictRsl = knn.predict(testData) accuracy = caculate_accuracy(predictRsl, testLabel) accurateRateSum += accuracy # 返回预测结果的平均值 return accurateRateSum / splitNum
def checkPerformance(self, valSet, trainSet, k): result = 0 for i in range(len(valSet)): # perform knn predicted = Knn.knn(trainSet, valSet[i], k, True) # check if predicted class and actual class match result += self.checkPrediction(predicted, valSet[i][-1]) # return percent correctly predicted return result / len(valSet) # testing data #testClass = np.array([[1, 2, 1], [2, 3, 0], [7, 8, 1], [4, 5, 0], [6, 6, 1], # [6, 5, 1], [0, 1, 0], [1, 1, 0], [2, 2, 0], [3, 7, 1], # [3, 1, 1], [9, 7, 1], [1, 5, 0], [7, 2, 1], [5, 5, 0]]) #editednn = EditedNN() #print(editednn.eknn(testClass, 3))
def eknn(self, trainSet, k): # separate into training and validation sets ennSet, val = train_test_split(trainSet, test_size=0.2, random_state=0) # repeat until performance stops improving on validation set prevPerformance = 0.0 perfImprove = True loopCount = len(ennSet) while (perfImprove): # copy previous enn set prevTrain = ennSet.copy() # loop through training set i = 0 while (i < loopCount): # remove ith point from training set b/c it will be test point for knn knnTestPoint = ennSet[i] tempTrain = np.delete(ennSet, i, 0) # perform knn predicted = Knn.knn(tempTrain, knnTestPoint, k, True) # check if predicted class and actual class match result = self.checkPrediction(predicted, knnTestPoint[-1]) # if not equal then keep set with removed point if result == 0: ennSet = tempTrain loopCount -= 1 else: i += 1 # check performance and only continue if not degrading curPerformance = self.checkPerformance(val, tempTrain, k) if curPerformance > prevPerformance: prevPerformance = curPerformance else: ennSet = prevTrain perfImprove = False pd.DataFrame(ennSet).to_csv("reduced_datasets/enn.csv", header=None, index=None) # return edited-nn set return ennSet
from Knn import Knn from ContextEngineBase import Complexity ## For different tests, these values will vary. inputFilePath = "dish.csv" outputFilePath = "dishOutput.csv" complexity = Complexity.secondOrder numTrainingSamples = 96 numExecuteSamples = 96 inputFile = open(inputFilePath) outputFile = open(outputFilePath) inputReader = csv.reader(inputFile) outputReader = csv.reader(outputFile) csv = recfromcsv(inputFilePath, delimiter=',') ## Change the name of the algorithm to test it out. algorithmTest = Knn(complexity, 7, 0, [0, 0, 0, 0, 0, 0, 0], {}) teslaTimestamps = {} knnTimestamps = {} print(algorithmTest.complexity) print(algorithmTest.functionOrder) numRow = 96 day_train_start = 0 day_train_end = 0 day_predict_start = 1 day_predict_end = 1 #read in csv and parse data to trainer for i in range(numRow * day_train_start, numRow * (day_train_end + 1)): row = csv[i]
def __init__(self): Knn.inputFeature(self) Knn.inputClass(self) Knn.inputData(self) Knn.inputData(self) exit()
bayesC.start() else: for r in range(0, len(documents_to_clasificated) - 1): bayesC = ClasificationBayes(documents_to_clasificated[r], option) bayesC.start() elif action_option == "3": print( "Comienza el programa de clasifiación de datos.\n" "Elija opción para la ruta donde obtener los documentos: R (raiz del proyecto) o escriba ruta" ) option = input() if option == "R" or option == "r": knn = Knn(documents, categories, documents_by_category, "") knn.start_algorithm() else: kann = Knn(documents, categories, documents_by_category, option) knn.start_algorithm() documents_to_clasificated = AuxiliaryMethod.get_documents_words_to_clasificated( ) k = input("Establezca un k mayor que cero: ") if k.isdigit() and int(k) > 0: for r in range(0, len(documents_to_clasificated) - 1): KnnC = ClasificationKnn(documents_to_clasificated[r], documents, categories, "datos/datos_knn.csv", k) KnnC.start()
numTrainingSamples = trainRecStop - trainRecStart + 1 inDataTrain, outDataTrain = gdpTest.collectData(trainRecStart, trainRecStop) # Use the collect data routine to fetch test data in separate lists # for input and output testRecStart = 2001 testRecStop = 3000 numExecuteSamples = testRecStop - testRecStart + 1 inDataTest, outDataTest = gdpTest.collectData(testRecStart,testRecStop) print "Done: collecting data from GDP" print "Beginning loading and training" # For testing purpose. print input for test data # each line in output corresponds to one input data field (record) # print inDataTest ## Change the name of the algorithm to test it out. algorithmTest = Knn(complexity, numInp, 0, numInp*[0], {}) timestamps = {} # Add training data to CE object for i in xrange(len(outDataTrain)): # recording time stamps before and after adding to measure load time firstTS = time.time() algorithmTest.addSingleObservation(inDataTrain[:][i], outDataTrain[i]) secondTS = time.time() timestamps["load" + str(i)] = secondTS - firstTS # training CE using the added data, while the training time is measured firstTS = time.time() algorithmTest.train() secondTS = time.time() timestamps["train"] = secondTS - firstTS print "Done: loading and training" print "Beginning execution"
from numpy import recfromcsv from time import strptime import matplotlib.pyplot as plt from Knn import Knn import numpy as np csv = recfromcsv('refridge.csv', delimiter=',') trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0) x_train = [] y_train = [] x_predict = [] x_real = [] y_real = [] numRow = 96 day_train_start = 0 day_train_end = 3 day_predict = 4 #read in csv and parse data to trainer #use the first 4 weeks data as training set for i in range(numRow * day_train_start, numRow * (day_train_end + 1)): #for row in csv: #date = csv[0][0] #energy = csv[0][1] row = csv[i] date = row[0] energy = row[1] date = date.replace("/", " ") date = date.replace(":", " ")
#start = time.time() #print("hello") #end = time.time() #open output file output = open('test/knnDishwasherTestResult.csv', 'w') fieldnames = ['real_power', 'predict_power'] writer = csv.DictWriter(output, fieldnames=fieldnames) writer.writeheader() #writer.writerow({'real_power': 'Baked', 'predict_power': 'Beans'}) csv = recfromcsv('all.csv', delimiter=',') trainer = Knn(complexity=2, numInputs=7, inputClassifiers=np.empty([7]), outputClassifier=0, appFieldsDict=0) x_train = [] y_train = [] x_predict = [] y_predict = [] x_real = [] y_real = [] numRow = 96 day_train_start = 0 day_train_end = 150 #day_predict = 103 day_predict_start = 150 day_predict_end = 299
Vous allez dire en commentaire c'est quoi les paramètres que vous avez utilisés En gros, vous allez : 1- Initialiser votre classifieur avec ses paramètres 2- Charger les datasets 3- Entrainer votre classifieur 4- Le tester """ # Initializer vos paramètres k = 3 # Initializer/instanciez vos classifieurs avec leurs paramètres knn_iris = Knn(k=k) knn_vote = Knn(k=k) knn_monks_1 = Knn(k=k) knn_monks_2 = Knn(k=k) knn_monks_3 = Knn(k=k) bayesNaif_iris = BayesNaif() bayesNaif_vote = BayesNaif() bayesNaif_monks = BayesNaif() # Charger/lire les datasets iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset( 0.7) congressional_train, congressional_train_labels, congressional_test, \ congressional_test_labels = load_datasets.load_congressional_dataset(0.7)
train_data_file_name = "../HandWrittenLetters.txt" classes_label = 'ABCDE' # numbers = '1245' letter_to_digit = Task_E.letter_2_digit_convert(classes_label) # for i in numbers: # letter_to_digit.append(i) data_frame = Task_E.pickDataClass(train_data_file_name, letter_to_digit) train_data_set_without_labels, train_y, test_data_set_without_labels, test_y, train_data_with_labels, test_data_with_labels = Task_E.splitData2TestTrain( data_frame, 39, 9) centroid_data_frame_train = deepcopy(train_data_with_labels) centroid_data_frame_test = deepcopy(test_data_with_labels) # make_file_and_save_data_train = Task_E.store(train_data_set_without_labels.T, train_y, 'jenil_train.csv') # make_file_and_save_data_test = Task_E.store(test_data_set_without_labels.T, test_y, 'jenil_test.csv') k = 5 knn_object = Knn(k) data_with_euclidean_distance = knn_object.calculate_distance( train_data_with_labels.values, test_data_with_labels.values) accuracy = knn_object.get_accuracy([ (k['Test Label'], k['Classification']) for k in data_with_euclidean_distance ]) print('Accuracy of Knn is:', accuracy) # Linear Regression linear_regression_object = LinearRegression.LinearRegression() N_train, L_train, Xtrain = len( train_y), train_y, train_data_set_without_labels.T N_test, Ytest, Xtest = len( test_y), test_y, test_data_set_without_labels.T
import matplotlib.pyplot as plt iris = datasets.load_digits() # print(print(iris)) X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1000) # print(X_train.shape) # print(X_train[0]) # plt.figure() # plt.scatter(X[:,[0]], X[:,[1]], c=y) # plt.show() acc = 0 for k in [3, 5, 7]: clf = Knn(k) clf.fit(X_train, y_train) predictions = clf.predict(X_test) t_acc = np.sum(predictions == y_test) / len(y_test) if t_acc > acc: acc = t_acc best_k = k print(acc) print(best_k)
#start = time.time() #print("hello") #end = time.time() #open output file output=open('test/knnDishwasherTestResult.csv', 'w') fieldnames = ['real_power', 'predict_power'] writer = csv.DictWriter(output, fieldnames=fieldnames) writer.writeheader() #writer.writerow({'real_power': 'Baked', 'predict_power': 'Beans'}) csv = recfromcsv('all.csv', delimiter=',') trainer = Knn(complexity=2, numInputs=7, inputClassifiers=np.empty([7]), outputClassifier=0, appFieldsDict=0); x_train = []; y_train = []; x_predict = []; y_predict = []; x_real=[]; y_real=[]; numRow = 96 day_train_start=0 day_train_end=150 #day_predict = 103 day_predict_start=150 day_predict_end = 299 #read in csv and parse data to trainer
import csv from Person import Person from Classifier import Classifier from Lr import Lr from Knn import Knn from Kmeans import testClustering # loading data from csv file personList = [] with open('diabetes-dataset.csv', newline = '') as csvfile: reader = csv.reader(csvfile, delimiter=' ', quotechar='|') count = 1 for row in reader: if count != 1: data = ''.join(row).split(',') p = Person(data) personList.append(p) count += 1 print('result on based of logical regression') lr = Lr() c = Classifier(personList,lr) c.run() print('result on based on K nearest neighbours') knn = Knn() c = Classifier(personList,knn) c.run() print('result on based on K means') for k in (2,4,6): print('\n Test k-means (k = ' + str(k) + ')') posFracs = testClustering(personList, k, 2)
atributos = Setup.atributos estudiantes = Setup.estudiantes valoresPosibles = Setup.valoresPosibles operadores = {} OperadoresBuilder.multiple(operadores, OperadoresBuilder.hamming, ["school", "sex", "schoolsup", "famsup", "paid", "activities", "nursery", "higher", "internet", "romantic", "address", "famsize", "Pstatus", "Mjob", "Fjob", "reason", "guardian"]) OperadoresBuilder.multiple(operadores, OperadoresBuilder.rango, ["age","Medu","Fedu", "traveltime","studytime","failures","famrel", "freetime","goout","Dalc","Walc","health","absences"], valoresPosibles) knn = Knn(estudiantes, "G3", atributos, operadores) # Se realizaran multiples pruebas para obtener una estimacion de la efectividad del algoritmo # para cualquier eleccion de casos de entrenamiento / prueba CANTIDAD_PRUEBAS = 25 # Separo 1/5 de los casos de prueba cantEstTest = len(estudiantes)/5 shuffle(estudiantes) estudiantesTest = estudiantes[:cantEstTest] estudiantesEntrenamiento = estudiantes[cantEstTest + 1:] cantEstTest = len(estudiantes)/5
'param': 'apparent_power', 'lag': 4, 'norm': 'lin'} # Number of CE input numInp = 4 ## Algorithm to be tested interfaceDict = {'in': [dict1, dict2, dict3, dict4], 'out': dict0} ceDict = {'interface': interfaceDict, 'n_neighbors': 4, 'weights': 'uniform', 'algorithm': 'auto', 'n_jobs': 1, 'complexity': 1} algorithmTest = Knn(numInp, 0, [0,0,0,0], ceDict) print "Collecting training and test data from GDP" # Use the collect data routine to fetch training data in separate lists # for input and output trainRecStart = 100 trainRecStop = 200 numTrainingSamples = trainRecStop - trainRecStart + 1 inDataTrain, outDataTrain = algorithmTest.interface.collectData(trainRecStart, trainRecStop) # Use the collect data routine to fetch test data in separate lists # for input and output testRecStart = 201 testRecStop = 250 numExecuteSamples = testRecStop - testRecStart + 1 inDataTest, outDataTest = algorithmTest.interface.collectData(testRecStart,testRecStop) print "Done: collecting data from GDP"
ConfusionMatrixListKnn: list = list( ) # list des matrices de confusion pour Knn print(f"Knn Train ratio: {knn_train_ratio}") print(f"findBestKWithCrossValidation: {findBestKWithCrossValidation}") print("\n") print('-' * 175) print(f"Iris dataset classification: \n") startTime = time.time() # Entrainement sur l'ensemble de données Iris iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset( knn_train_ratio) iris_knn = Knn(distance_func=distanceFunc) iris_knn.train(iris_train, iris_train_labels, findBestKWithCrossValidation=findBestKWithCrossValidation) cm, _, _, _ = iris_knn.test(iris_test, iris_test_labels) ConfusionMatrixListKnn.append(cm) print( f"\n --- Elapse time: {1_000*(time.time() - startTime):.2f} ms --- \n") print('-' * 175) print(f"Congressional dataset classification: \n") startTime = time.time() # Entrainement sur l'ensemble de données Congressional
from Knn import Knn from ContextEngineBase import Complexity ## For different tests, these values will vary. inputFilePath = "dish.csv" outputFilePath = "dishOutput.csv" complexity = Complexity.secondOrder; numTrainingSamples = 96; numExecuteSamples = 96; inputFile = open(inputFilePath); outputFile = open(outputFilePath); inputReader = csv.reader(inputFile); outputReader = csv.reader(outputFile); csv = recfromcsv(inputFilePath, delimiter=',') ## Change the name of the algorithm to test it out. algorithmTest = Knn(complexity, 7, 0, [0,0,0,0,0,0,0], {}); teslaTimestamps = {}; knnTimestamps = {}; print(algorithmTest.complexity); print(algorithmTest.functionOrder); numRow = 96 day_train_start=0 day_train_end=0 day_predict_start= 1 day_predict_end = 1 #read in csv and parse data to trainer for i in range(numRow*day_train_start,numRow*(day_train_end+1)): row = csv[i]
atributos = Setup.atributos estudiantes = Setup.estudiantes valoresPosibles = Setup.valoresPosibles operadores = {} OperadoresBuilder.multiple(operadores, OperadoresBuilder.hamming, ["school", "sex", "schoolsup", "famsup", "paid", "activities", "nursery", "higher", "internet", "romantic", "address", "famsize", "Pstatus", "Mjob", "Fjob", "reason", "guardian"]) OperadoresBuilder.multiple(operadores, OperadoresBuilder.rango, ["age","Medu","Fedu", "traveltime","studytime","failures","famrel", "freetime","goout","Dalc","Walc","health","absences"], valoresPosibles) knn = Knn(estudiantes, "G3", atributos, operadores) # Se realizaran multiples pruebas para obtener una estimacion de la efectividad del algoritmo # para cualquier eleccion de casos de entrenamiento / prueba CANTIDAD_PRUEBAS = 3 # Separo 1/5 de los casos de prueba cantEstTest = len(estudiantes)/5 shuffle(estudiantes) estudiantesTest = estudiantes[:cantEstTest] estudiantesEntrenamiento = estudiantes[cantEstTest + 1:] cantEstTest = len(estudiantes)/5
import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsRegressor from Knn import Knn np.random.seed(0) X = np.sort(5 * np.random.rand(40, 1), axis=0) T = np.linspace(0, 5, 500)[:, np.newaxis] y = np.sin(X).ravel(); # Add noise to targets y[::5] += 1 * (0.5 - np.random.rand(8)) #y = y.ravel(); ############################################################################### # Fit regression model trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0); trainer.addBatchObservations(X,y); trainer.train(); y_predict = np.empty([0]); for i in range(T.shape[0]): result = trainer.execute(T[i]); y_predict = np.concatenate((y_predict,result)); plt.subplot(1, 1, 1) plt.scatter(X, y, c='k', label='data') plt.plot(T, y_predict, c='g', label='prediction') plt.axis('tight') plt.legend()
#!/usr/bin/env python import sys import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg from Knn import Knn from img.data import x, y if len(sys.argv) < 2: print "Informar caminho da imagem" exit() knn = Knn(7) knn.fit(x, y) img = mpimg.imread(sys.argv[1]) for j in range(img.shape[0] / 50 + 1): for i in range(img.shape[1] / 50 + 1): h, b = np.histogram(img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50]) if (knn.predict(h) == 1): img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0 plt.imshow(img) plt.show()
'norm': 'lin' } # Number of CE input numInp = 4 ## Algorithm to be tested interfaceDict = {'in': [dict1, dict2, dict3, dict4], 'out': dict0} ceDict = { 'interface': interfaceDict, 'n_neighbors': 4, 'weights': 'uniform', 'algorithm': 'auto', 'n_jobs': 1, 'complexity': 1 } algorithmTest = Knn(numInp, 0, [0, 0, 0, 0], ceDict) print "Collecting training and test data from GDP" # Use the collect data routine to fetch training data in separate lists # for input and output trainRecStart = 100 trainRecStop = 200 numTrainingSamples = trainRecStop - trainRecStart + 1 inDataTrain, outDataTrain = algorithmTest.interface.collectData( trainRecStart, trainRecStop) # Use the collect data routine to fetch test data in separate lists # for input and output testRecStart = 201 testRecStop = 250 numExecuteSamples = testRecStop - testRecStart + 1 inDataTest, outDataTest = algorithmTest.interface.collectData(
N_split_iris = 10 N_split_wine = 3 # --> Trop long N_split_abalone = 3 # --> Trop long print('IRIS') K_opti_iris = cv_knn(N_split_iris, X_train_iris, y_train_iris, X_test_iris, y_test_iris, K_values_iris) print('WINE') K_opti_wine = cv_knn(N_split_wine, X_train_wine, y_train_wine, X_test_wine, y_test_wine, K_values_wine) print('ABALONE') K_opti_abalone = cv_knn(N_split_abalone, X_train_abalone, y_train_abalone, X_test_abalone, y_test_abalone, K_values_abalone) # Entrainez votre classifieur clf_Knn_iris = Knn(K=K_opti_iris) clf_Knn_iris.train(X_train_iris, y_train_iris) clf_Knn_wine = Knn(K=K_opti_wine) clf_Knn_wine.train(X_train_wine, y_train_wine) clf_Knn_abalone = Knn(K=K_opti_abalone) clf_Knn_abalone.train(X_train_abalone, y_train_abalone) """ Après avoir fait l'entrainement, évaluez votre modèle sur les données d'entrainement. IMPORTANT : Vous devez afficher ici avec la commande print() de python, - la matrice de confusion (confusion matrix) - l'accuracy - la précision (precision)
def main(): # Initializer vos paramètres i = ld.load_iris_dataset(0.7) c = ld.load_congressional_dataset(0.7) m1 = ld.load_monks_dataset(1) m2 = ld.load_monks_dataset(2) m3 = ld.load_monks_dataset(3) # Initializer/instanciez vos classifieurs avec leurs paramètres euclide = lambda x, y: pow( (x - y), 2 ) # Pas besoin d'extraire la racine, car cela ne changera pas l'ordre de classement diff_binaire = lambda x, y: 0 if x == y else 1 knn_i = Knn(train=i[0], train_labels=i[1], dist_equation=euclide) knn_c = Knn(train=c[0], train_labels=c[1], dist_equation=euclide) knn_m1 = Knn(train=m1[0], train_labels=m1[1], dist_equation=diff_binaire) knn_m2 = Knn(train=m2[0], train_labels=m2[1], dist_equation=diff_binaire) knn_m3 = Knn(train=m3[0], train_labels=m3[1], dist_equation=diff_binaire) bn_i = BayesNaifClassifier([1]) bn_c = BayesNaifClassifier([0]) bn_m1 = BayesNaifClassifier([2]) bn_m2 = BayesNaifClassifier([2]) bn_m3 = BayesNaifClassifier([2]) # Entrainez votre classifieur print("\n=============\nKNN train tests\n=============") knn_i.train_test(i[0], i[1], "Dataset: Iris, Training") knn_c.train_test(c[0], c[1], "Dataset: Congressional, Training") knn_m1.train_test(m1[0], m1[1], "Dataset: MONKS-1, Training") knn_m2.train_test(m2[0], m2[1], "Dataset: MONKS-2, Training") knn_m3.train_test(m3[0], m3[1], "Dataset: MONKS-3, Training") print("\n=============\nBayes Naif train tests\n=============") bn_i.train(i[0], i[1], "Dataset: Iris, Test") bn_c.train(c[0], c[1], "Dataset: Congressional, Test") bn_m1.train(m1[0], m1[1], "Dataset: MONKS-1, Test") bn_m2.train(m2[0], m2[1], "Dataset: MONKS-2, Test") bn_m3.train(m3[0], m3[1], "Dataset: MONKS-3, Test") print("\n=============\nKNN tests\n=============") # Tester votre classifieur knn_i.train_test(i[2], i[3], "Dataset: Iris, Test") knn_c.train_test(c[2], c[3], "Dataset: Congressional, Test") knn_m1.train_test(m1[2], m1[3], "Dataset: MONKS-1, Test") knn_m2.train_test(m2[2], m2[3], "Dataset: MONKS-2, Test") knn_m3.train_test(m3[2], m3[3], "Dataset: MONKS-3, Test") print("\n=============\nBayes Naif tests\n=============") bn_i.test(i[2], i[3], "Dataset: Iris, Test") bn_c.test(c[2], c[3], "Dataset: Congressional, Test") bn_m1.test(m1[2], m1[3], "Dataset: MONKS-1, Test") bn_m2.test(m2[2], m2[3], "Dataset: MONKS-2, Test") bn_m3.test(m3[2], m3[3], "Dataset: MONKS-3, Test")
import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsRegressor from Knn import Knn np.random.seed(0) X = np.sort(5 * np.random.rand(40, 1), axis=0) T = np.linspace(0, 5, 500)[:, np.newaxis] y = np.sin(X).ravel() # Add noise to targets y[::5] += 1 * (0.5 - np.random.rand(8)) #y = y.ravel(); ############################################################################### # Fit regression model trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0) trainer.addBatchObservations(X, y) trainer.train() y_predict = np.empty([0]) for i in range(T.shape[0]): result = trainer.execute(T[i]) y_predict = np.concatenate((y_predict, result)) plt.subplot(1, 1, 1) plt.scatter(X, y, c='k', label='data') plt.plot(T, y_predict, c='g', label='prediction') plt.axis('tight') plt.legend() plt.title("KNeighborsRegressor (k = %i, weights = '%s')" % (2, "uniform"))