def main(): # Initializer vos paramètres i = ld.load_iris_dataset(0.7) c = ld.load_congressional_dataset(0.7) m1 = ld.load_monks_dataset(1) m2 = ld.load_monks_dataset(2) m3 = ld.load_monks_dataset(3) # Initializer/instanciez vos classifieurs avec leurs paramètres euclide = lambda x, y: pow( (x - y), 2 ) # Pas besoin d'extraire la racine, car cela ne changera pas l'ordre de classement diff_binaire = lambda x, y: 0 if x == y else 1 knn_i = Knn(train=i[0], train_labels=i[1], dist_equation=euclide) knn_c = Knn(train=c[0], train_labels=c[1], dist_equation=euclide) knn_m1 = Knn(train=m1[0], train_labels=m1[1], dist_equation=diff_binaire) knn_m2 = Knn(train=m2[0], train_labels=m2[1], dist_equation=diff_binaire) knn_m3 = Knn(train=m3[0], train_labels=m3[1], dist_equation=diff_binaire) bn_i = BayesNaifClassifier([1]) bn_c = BayesNaifClassifier([0]) bn_m1 = BayesNaifClassifier([2]) bn_m2 = BayesNaifClassifier([2]) bn_m3 = BayesNaifClassifier([2]) # Entrainez votre classifieur print("\n=============\nKNN train tests\n=============") knn_i.train_test(i[0], i[1], "Dataset: Iris, Training") knn_c.train_test(c[0], c[1], "Dataset: Congressional, Training") knn_m1.train_test(m1[0], m1[1], "Dataset: MONKS-1, Training") knn_m2.train_test(m2[0], m2[1], "Dataset: MONKS-2, Training") knn_m3.train_test(m3[0], m3[1], "Dataset: MONKS-3, Training") print("\n=============\nBayes Naif train tests\n=============") bn_i.train(i[0], i[1], "Dataset: Iris, Test") bn_c.train(c[0], c[1], "Dataset: Congressional, Test") bn_m1.train(m1[0], m1[1], "Dataset: MONKS-1, Test") bn_m2.train(m2[0], m2[1], "Dataset: MONKS-2, Test") bn_m3.train(m3[0], m3[1], "Dataset: MONKS-3, Test") print("\n=============\nKNN tests\n=============") # Tester votre classifieur knn_i.train_test(i[2], i[3], "Dataset: Iris, Test") knn_c.train_test(c[2], c[3], "Dataset: Congressional, Test") knn_m1.train_test(m1[2], m1[3], "Dataset: MONKS-1, Test") knn_m2.train_test(m2[2], m2[3], "Dataset: MONKS-2, Test") knn_m3.train_test(m3[2], m3[3], "Dataset: MONKS-3, Test") print("\n=============\nBayes Naif tests\n=============") bn_i.test(i[2], i[3], "Dataset: Iris, Test") bn_c.test(c[2], c[3], "Dataset: Congressional, Test") bn_m1.test(m1[2], m1[3], "Dataset: MONKS-1, Test") bn_m2.test(m2[2], m2[3], "Dataset: MONKS-2, Test") bn_m3.test(m3[2], m3[3], "Dataset: MONKS-3, Test")
def cv_knn(N_split, X_train, y_train, X_test, y_test, K_values): X = np.concatenate((X_train, X_test), axis=0) y = np.concatenate((y_train, y_test), axis=0) # Splits X_splits = np.split(X, N_split) y_splits = np.split(y, N_split) # Recherche scores = {} for K in K_values: accuracys = [] for i in range(N_split): X_train = np.concatenate(np.delete(X_splits, i, 0)) X_test = X_splits[i] y_train = np.concatenate(np.delete(y_splits, i, 0)) y_test = y_splits[i] model = Knn(K=K) model.train(X_train, y_train) evaluate = model.evaluate(X_test, y_test) accuracys.append(evaluate['mean_accuracy']) scores[K] = np.mean(accuracys) # print + selection print(max(scores.items(), key=operator.itemgetter(1))) return max(scores.items(), key=operator.itemgetter(1))[0]
def score_result(reducer_function, data, x_scaled, ini, dimensions, label_data, title): knn = Knn() score = [] valor_k = range(ini, dimensions) for k in valor_k: new_data = reducer_function(data, x_scaled, k) score.append(knn.avg(new_data, label_data)) Visualization.hit_rate_per_k(valor_k, score, title)
def __init__(self, labels): self.model = Knn(k=5) self.labels = labels self.all_data = list() self.all_target = list() for pose in labels: label = labels[pose] data = self.load_data(label) target = [pose] * len(data) self.all_data += data self.all_target += target
def main(): knn = Knn(7) knn.fit(x, y) img = mpimg.imread(sys.argv[1]) for j in range(img.shape[0] / 50 + 1): for i in range(img.shape[1] / 50 + 1): h, b = np.histogram(img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50]) if (knn.predict(h) == 1): img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0 plt.imshow(img) plt.show()
def cross_validation(origiData, origiLabel, splitNum): lastIndex = 0 offset = int(len(origiData) / splitNum) accurateRateSum = 0 for i in range(1, splitNum + 1): # 切割数据集 分为splitNum-1个训练集和1个测试集 tempData = np.split(origiData, (lastIndex, i * offset + 1)) tempLabel = np.split(origiLabel, (lastIndex, i * offset + 1)) testData = tempData[1] testLabel = tempLabel[1] trainData = np.concatenate([tempData[0], tempData[2]]) trainLabel = np.concatenate([tempLabel[0], tempLabel[2]]) # 迭代区间的开始索引值 lastIndex = i * offset # 使用knn进行预测 knn = Knn(trainData, trainLabel, 10) predictRsl = knn.predict(testData) accuracy = caculate_accuracy(predictRsl, testLabel) accurateRateSum += accuracy # 返回预测结果的平均值 return accurateRateSum / splitNum
train_data_file_name = "../HandWrittenLetters.txt" classes_label = 'ABCDE' # numbers = '1245' letter_to_digit = Task_E.letter_2_digit_convert(classes_label) # for i in numbers: # letter_to_digit.append(i) data_frame = Task_E.pickDataClass(train_data_file_name, letter_to_digit) train_data_set_without_labels, train_y, test_data_set_without_labels, test_y, train_data_with_labels, test_data_with_labels = Task_E.splitData2TestTrain( data_frame, 39, 9) centroid_data_frame_train = deepcopy(train_data_with_labels) centroid_data_frame_test = deepcopy(test_data_with_labels) # make_file_and_save_data_train = Task_E.store(train_data_set_without_labels.T, train_y, 'jenil_train.csv') # make_file_and_save_data_test = Task_E.store(test_data_set_without_labels.T, test_y, 'jenil_test.csv') k = 5 knn_object = Knn(k) data_with_euclidean_distance = knn_object.calculate_distance( train_data_with_labels.values, test_data_with_labels.values) accuracy = knn_object.get_accuracy([ (k['Test Label'], k['Classification']) for k in data_with_euclidean_distance ]) print('Accuracy of Knn is:', accuracy) # Linear Regression linear_regression_object = LinearRegression.LinearRegression() N_train, L_train, Xtrain = len( train_y), train_y, train_data_set_without_labels.T N_test, Ytest, Xtest = len( test_y), test_y, test_data_set_without_labels.T
from Knn import Knn from ContextEngineBase import Complexity ## For different tests, these values will vary. inputFilePath = "dish.csv" outputFilePath = "dishOutput.csv" complexity = Complexity.secondOrder numTrainingSamples = 96 numExecuteSamples = 96 inputFile = open(inputFilePath) outputFile = open(outputFilePath) inputReader = csv.reader(inputFile) outputReader = csv.reader(outputFile) csv = recfromcsv(inputFilePath, delimiter=',') ## Change the name of the algorithm to test it out. algorithmTest = Knn(complexity, 7, 0, [0, 0, 0, 0, 0, 0, 0], {}) teslaTimestamps = {} knnTimestamps = {} print(algorithmTest.complexity) print(algorithmTest.functionOrder) numRow = 96 day_train_start = 0 day_train_end = 0 day_predict_start = 1 day_predict_end = 1 #read in csv and parse data to trainer for i in range(numRow * day_train_start, numRow * (day_train_end + 1)): row = csv[i]
from numpy import recfromcsv from time import strptime import matplotlib.pyplot as plt from Knn import Knn import numpy as np csv = recfromcsv('refridge.csv', delimiter=',') trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0) x_train = [] y_train = [] x_predict = [] x_real = [] y_real = [] numRow = 96 day_train_start = 0 day_train_end = 3 day_predict = 4 #read in csv and parse data to trainer #use the first 4 weeks data as training set for i in range(numRow * day_train_start, numRow * (day_train_end + 1)): #for row in csv: #date = csv[0][0] #energy = csv[0][1] row = csv[i] date = row[0] energy = row[1] date = date.replace("/", " ") date = date.replace(":", " ")
#!/usr/bin/env python import sys import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg from Knn import Knn from img.data import x, y if len(sys.argv) < 2: print "Informar caminho da imagem" exit() knn = Knn(7) knn.fit(x, y) img = mpimg.imread(sys.argv[1]) for j in range(img.shape[0] / 50 + 1): for i in range(img.shape[1] / 50 + 1): h, b = np.histogram(img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50]) if (knn.predict(h) == 1): img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0 plt.imshow(img) plt.show()
ConfusionMatrixListKnn: list = list( ) # list des matrices de confusion pour Knn print(f"Knn Train ratio: {knn_train_ratio}") print(f"findBestKWithCrossValidation: {findBestKWithCrossValidation}") print("\n") print('-' * 175) print(f"Iris dataset classification: \n") startTime = time.time() # Entrainement sur l'ensemble de données Iris iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset( knn_train_ratio) iris_knn = Knn(distance_func=distanceFunc) iris_knn.train(iris_train, iris_train_labels, findBestKWithCrossValidation=findBestKWithCrossValidation) cm, _, _, _ = iris_knn.test(iris_test, iris_test_labels) ConfusionMatrixListKnn.append(cm) print( f"\n --- Elapse time: {1_000*(time.time() - startTime):.2f} ms --- \n") print('-' * 175) print(f"Congressional dataset classification: \n") startTime = time.time() # Entrainement sur l'ensemble de données Congressional
import csv from Person import Person from Classifier import Classifier from Lr import Lr from Knn import Knn from Kmeans import testClustering # loading data from csv file personList = [] with open('diabetes-dataset.csv', newline = '') as csvfile: reader = csv.reader(csvfile, delimiter=' ', quotechar='|') count = 1 for row in reader: if count != 1: data = ''.join(row).split(',') p = Person(data) personList.append(p) count += 1 print('result on based of logical regression') lr = Lr() c = Classifier(personList,lr) c.run() print('result on based on K nearest neighbours') knn = Knn() c = Classifier(personList,knn) c.run() print('result on based on K means') for k in (2,4,6): print('\n Test k-means (k = ' + str(k) + ')') posFracs = testClustering(personList, k, 2)
import matplotlib.pyplot as plt iris = datasets.load_digits() # print(print(iris)) X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1000) # print(X_train.shape) # print(X_train[0]) # plt.figure() # plt.scatter(X[:,[0]], X[:,[1]], c=y) # plt.show() acc = 0 for k in [3, 5, 7]: clf = Knn(k) clf.fit(X_train, y_train) predictions = clf.predict(X_test) t_acc = np.sum(predictions == y_test) / len(y_test) if t_acc > acc: acc = t_acc best_k = k print(acc) print(best_k)
Vous allez dire en commentaire c'est quoi les paramètres que vous avez utilisés En gros, vous allez : 1- Initialiser votre classifieur avec ses paramètres 2- Charger les datasets 3- Entrainer votre classifieur 4- Le tester """ # Initializer vos paramètres k = 3 # Initializer/instanciez vos classifieurs avec leurs paramètres knn_iris = Knn(k=k) knn_vote = Knn(k=k) knn_monks_1 = Knn(k=k) knn_monks_2 = Knn(k=k) knn_monks_3 = Knn(k=k) bayesNaif_iris = BayesNaif() bayesNaif_vote = BayesNaif() bayesNaif_monks = BayesNaif() # Charger/lire les datasets iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset( 0.7) congressional_train, congressional_train_labels, congressional_test, \ congressional_test_labels = load_datasets.load_congressional_dataset(0.7)
#start = time.time() #print("hello") #end = time.time() #open output file output = open('test/knnDishwasherTestResult.csv', 'w') fieldnames = ['real_power', 'predict_power'] writer = csv.DictWriter(output, fieldnames=fieldnames) writer.writeheader() #writer.writerow({'real_power': 'Baked', 'predict_power': 'Beans'}) csv = recfromcsv('all.csv', delimiter=',') trainer = Knn(complexity=2, numInputs=7, inputClassifiers=np.empty([7]), outputClassifier=0, appFieldsDict=0) x_train = [] y_train = [] x_predict = [] y_predict = [] x_real = [] y_real = [] numRow = 96 day_train_start = 0 day_train_end = 150 #day_predict = 103 day_predict_start = 150 day_predict_end = 299
'norm': 'lin' } # Number of CE input numInp = 4 ## Algorithm to be tested interfaceDict = {'in': [dict1, dict2, dict3, dict4], 'out': dict0} ceDict = { 'interface': interfaceDict, 'n_neighbors': 4, 'weights': 'uniform', 'algorithm': 'auto', 'n_jobs': 1, 'complexity': 1 } algorithmTest = Knn(numInp, 0, [0, 0, 0, 0], ceDict) print "Collecting training and test data from GDP" # Use the collect data routine to fetch training data in separate lists # for input and output trainRecStart = 100 trainRecStop = 200 numTrainingSamples = trainRecStop - trainRecStart + 1 inDataTrain, outDataTrain = algorithmTest.interface.collectData( trainRecStart, trainRecStop) # Use the collect data routine to fetch test data in separate lists # for input and output testRecStart = 201 testRecStop = 250 numExecuteSamples = testRecStop - testRecStart + 1 inDataTest, outDataTest = algorithmTest.interface.collectData(
## For different tests, these values will vary. inputFilePath = "dish.csv" outputFilePath = "dishOutput.csv" inputFile = open(inputFilePath) outputFile = open(outputFilePath) inputReader = csv.reader(inputFile) outputReader = csv.reader(outputFile) complexity = Complexity.secondOrder numTrainingSamples = 96 numExecuteSamples = 96 inputReader = csv.reader(inputFile) outputReader = csv.reader(outputFile) csv = recfromcsv(inputFilePath, delimiter=',') ## Change the name of the algorithm to test it out. algorithmTest = Knn(Complexity.secondOrder, 7, 0, [0, 0, 0, 0, 0, 0, 0], {}) teslaTimestamps = {} knnTimestamps = {} numRow = 96 day_train_start = 0 day_train_end = 0 day_predict_start = 0 day_predict_end = 0 #read in csv and parse data to trainer for i in range(numRow * day_train_start, numRow * (day_train_end + 1)): row = csv[i] date = row[0] date = date.decode() dishwasher = csv[i + 1][3]
N_split_iris = 10 N_split_wine = 3 # --> Trop long N_split_abalone = 3 # --> Trop long print('IRIS') K_opti_iris = cv_knn(N_split_iris, X_train_iris, y_train_iris, X_test_iris, y_test_iris, K_values_iris) print('WINE') K_opti_wine = cv_knn(N_split_wine, X_train_wine, y_train_wine, X_test_wine, y_test_wine, K_values_wine) print('ABALONE') K_opti_abalone = cv_knn(N_split_abalone, X_train_abalone, y_train_abalone, X_test_abalone, y_test_abalone, K_values_abalone) # Entrainez votre classifieur clf_Knn_iris = Knn(K=K_opti_iris) clf_Knn_iris.train(X_train_iris, y_train_iris) clf_Knn_wine = Knn(K=K_opti_wine) clf_Knn_wine.train(X_train_wine, y_train_wine) clf_Knn_abalone = Knn(K=K_opti_abalone) clf_Knn_abalone.train(X_train_abalone, y_train_abalone) """ Après avoir fait l'entrainement, évaluez votre modèle sur les données d'entrainement. IMPORTANT : Vous devez afficher ici avec la commande print() de python, - la matrice de confusion (confusion matrix) - l'accuracy - la précision (precision)
bayesC.start() else: for r in range(0, len(documents_to_clasificated) - 1): bayesC = ClasificationBayes(documents_to_clasificated[r], option) bayesC.start() elif action_option == "3": print( "Comienza el programa de clasifiación de datos.\n" "Elija opción para la ruta donde obtener los documentos: R (raiz del proyecto) o escriba ruta" ) option = input() if option == "R" or option == "r": knn = Knn(documents, categories, documents_by_category, "") knn.start_algorithm() else: kann = Knn(documents, categories, documents_by_category, option) knn.start_algorithm() documents_to_clasificated = AuxiliaryMethod.get_documents_words_to_clasificated( ) k = input("Establezca un k mayor que cero: ") if k.isdigit() and int(k) > 0: for r in range(0, len(documents_to_clasificated) - 1): KnnC = ClasificationKnn(documents_to_clasificated[r], documents, categories, "datos/datos_knn.csv", k) KnnC.start()