def baye_voisin(data, labels, ts): train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size = ts / 100, random_state = 42) # GAUSSIENNE g = GaussianBayes() evaltime() g.fit(train_data, train_labels) somme_bayes.append(evaltime()) # - Score: score_baye = g.score(test_data, test_labels) Z = g.predict(test_data) cfmat_bayes = confusion_matrix(test_labels, Z, labels=np.unique(test_labels)) # K-NN n_neighbors = 10 clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform') evaltime() clf.fit(train_data, train_labels) somme_knn.append(evaltime()) # - Score: score_voisin = clf.score(test_data, test_labels) Z = clf.predict(test_data) cfmat_knn = confusion_matrix(test_labels, Z, labels=np.unique(test_labels)) return score_baye, cfmat_bayes, score_voisin, cfmat_knn
def cross_validation(path): kf = KFold(n_splits=5) z = np.zeros(100) data, labels = load_dataset(join(DATA, path[0])) data = np.array(data) labels = np.array(labels) for train, test in kf.split(data): """ train_data = [] train_labels = [] test_data = [] test_labels = [] for indice in train: train_data.append(data[indice]) train_labels.append(labels[indice]) for indice in test: test_data.append(data[indice]) test_labels.append(labels[indice]) print(train) print(test) train_data = np.array(train_data) train_labels = np.array(train_labels) test_data = np.array(test_data) test_labels = np.array(test_labels)""" #train_data = data[0:800] #train_labels = labels[0:800] #test_data = data[801:999] #test_labels = labels[801:999] train_data = data[train] train_labels = labels[train] test_data = data[test] test_labels = labels[test] print(len(train_data)) print(len(train_labels)) print(len(test_data)) print(len(test_labels)) #print(train_data) #print(train_labels) #print(test_data) #print(test_labels) # GAUSSIENNE g = GaussianBayes() g.fit(train_data, train_labels) # - Score: score_baye = g.score(test_data, test_labels) # K-NN n_neighbors = 10 clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform') clf.fit(train_data, train_labels) # - Score: score_voisin = clf.score(test_data, test_labels) print(score_voisin) break
def baye_voisin(data, labels, ts): train_data, test_data, train_labels, test_labels = train_test_split( data, labels, train_size=ts / 100, random_state=42) print("test taille:", len(test_data), " train test:", len(train_data), " test size: ", ts) # GAUSSIENNE g = GaussianBayes() g.fit(train_data, train_labels) # - Score: score_baye = g.score(test_data, test_labels) Z = g.predict(test_data) cfmat_bayes = confusion_matrix(test_labels, Z, labels=np.unique(test_labels)) # K-NN n_neighbors = 10 clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform') clf.fit(train_data, train_labels) # - Score: score_voisin = clf.score(test_data, test_labels) Z = clf.predict(test_data) cfmat_knn = confusion_matrix(test_labels, Z, labels=np.unique(test_labels)) return score_baye, cfmat_bayes, score_voisin, cfmat_knn
def main(): train_data, train_labels = load_dataset("./data/train.csv") test_data, test_labels = load_dataset("./data/test.csv") #affichage plot_scatter_hist(train_data, train_labels) # Instanciation de la classe GaussianB g = GaussianBayes(priors=[0.3, 0.3, 0.3], diag=True) # Apprentissage g.fit(train_data, train_labels) # Score score = g.score(test_data, test_labels) print("precision : {:.2f}".format(score)) input("Press any key to exit...")
labelteststart = 100 * i + teststart labeltestend = 100 * i + testend labeltrainend = 100 * (i + 1) labelstest = np.concatenate( (labelstest, total_labels[labelteststart:labeltestend])) valeurstest = np.concatenate( (valeurstest, total_data[labelteststart:labeltestend])) labelstrain = np.concatenate( (np.concatenate( (labelstrain, total_labels[labelstrainstart:labelteststart])), total_labels[labeltestend:labeltrainend])) valeurstrain = np.concatenate((np.concatenate( (valeurstrain, total_data[labelstrainstart:labelteststart])), total_data[labeltestend:labeltrainend])) g = GaussianBayes(priors=None, diag=False) # Apprentissage g.fit(valeurstrain, labelstrain) # Score gaussianresult = gaussianresult + g.score(valeurstest, labelstest) neigh = KNeighborsClassifier(n_neighbors=3, weights='uniform', algorithm='brute') neigh.fit(valeurstrain, labelstrain) KNNresult = KNNresult + np.sum( labelstest == neigh.predict(valeurstest)) / len(labelstest) gaussianresult = gaussianresult / K KNNresult = KNNresult / K print("gaussian average precision")
for i in range(1, round(len(total_labels) / 100)): labelteststart = 100 * i labeltestend = 100 * i + proportionseparation labeltrainend = 100 * (i + 1) print(i) labelstest = np.concatenate( (labelstest, total_labels[labelteststart:labeltestend])) valeurstest = np.concatenate( (valeurstest, total_data[labelteststart:labeltestend])) labelstrain = np.concatenate( (labelstrain, total_labels[labeltestend:labeltrainend])) valeurstrain = np.concatenate( (valeurstrain, total_data[labeltestend:labeltrainend])) g = GaussianBayes(priors=None, diag=True) # Apprentissage g.fit(valeurstrain, labelstrain) # Score score = g.score(valeurstest, labelstest) X.append(score) print("precision : {:.2f}".format(score)) neigh = KNeighborsClassifier(n_neighbors=3, weights='uniform', algorithm='brute') neigh.fit(valeurstrain, labelstrain) Y.append( np.sum(labelstest == neigh.predict(valeurstest)) / len(labelstest)) t = np.linspace(1, 50, 50)
def main(): folder = './data/' files = os.listdir(folder) for name in files: print("\n\n-Filename=", name) filename = folder + name rates = [0.8, 0.5, 0.2] for rate in rates: learnCut = round(rate * 100) testCut = round((1 - rate) * 100) print("\n\n\n-Actual rate:", rate) learn, test = utils.build_dataset(filename, random=False, learnCut=rate) X_test, y_test, labels = format_dataset(test) X_learn, y_learn, _ = format_dataset(learn) data_dim = len(X_test[0]) #Gaussian Bayes start = time.perf_counter() b = GaussianBayes(diag=True) b.fit(X_learn, y_learn) pred = b.predict(X_test) end = time.perf_counter() print("\n-Gaussian Bayes:\nTime : ", (end - start)) print("Confusion Matrix :\n", confusion_matrix(y_test, pred), "\nScore : ", score(pred, y_test)) plot_confusion_matrix( confusion_matrix(y_test, pred), labels, title= "Confusion matrix, Bayes, dim=%d, learn/test division : %d%%/%d%%" % (data_dim, learnCut, testCut), filename="cm_bayes_dim%d_div%d" % (data_dim, learnCut)) #K Neighbors Regressor success = [] bestPredN = [] bestTime = 0 bestScore = 0 bestK = 0 #Test in different K for i in range(1, 40): start = time.perf_counter() neigh = KNeighborsRegressor(n_neighbors=i, weights='uniform') neigh.fit(X_learn, y_learn) predN = neigh.predict(X_test).astype(int) end = time.perf_counter() success.append(score(predN, y_test)) if (bestScore < score(predN, y_test)): bestPredN = predN bestTime = end - start bestScore = score(predN, y_test) bestK = i print("\n-The best: K=", bestK, " Neighbors Regressor:\nTime : ", bestTime) print("Confusion Matrix :\n", confusion_matrix(y_test, bestPredN), "\nScore : ", bestScore) plot_confusion_matrix( confusion_matrix(y_test, bestPredN), labels, title= 'Confusion matrix, KNN, k=%d, dim=%d, learn/test division : %d%%/%d%%' % (bestK, data_dim, learnCut, testCut), filename="cm_knn_k%d_dim%d_div%d" % (bestK, data_dim, learnCut)) #Affichage comparaison K Neighbors Regressor plt.close() #plt.figure(figsize=(12,6)) plt.plot([score(pred, y_test) for x in range(40)], color='blue', label="Bayes") plt.plot(range(1, 40), success, color='green', linestyle='dashed', marker='o', markerfacecolor='green', markersize=5, label="KNN") plt.title( 'Success Rate (higher is better), dim=%d, learn/test division : %d%%/%d%%' % (data_dim, learnCut, testCut)) plt.xlabel('K value') plt.ylabel('Success Rate') plt.legend() plt.savefig("bayesVknn_dim%d_div%d" % (data_dim, learnCut)) #plot effect of learn/test division bayesScores = [] knnScores = [] cutRange = range(5, 100, 5) for i in cutRange: rate = round(i / 100.0, 2) #print(rate) learn, test = utils.build_dataset(filename, random=False, learnCut=rate) X_test, y_test, labels = format_dataset(test) X_learn, y_learn, _ = format_dataset(learn) data_dim = len(X_test[0]) b = GaussianBayes(diag=True) b.fit(X_learn, y_learn) pred = b.predict(X_test) bayesScores.append(score(pred, y_test)) neigh = KNeighborsRegressor(n_neighbors=1, weights='uniform') neigh.fit(X_learn, y_learn) pred = neigh.predict(X_test).astype(int) knnScores.append(score(pred, y_test)) plt.close() #plt.ylim(bottom=0, top=1.1) plt.xticks(ticks=range(len(cutRange)), labels=[str(i) for i in cutRange]) plt.plot(bayesScores, color='blue', label="Bayes") plt.plot(knnScores, color='green', linestyle='dashed', marker='o', markerfacecolor='green', markersize=5, label="KNN") plt.title('Success Rate with different learn/test division, dim=%d' % (data_dim)) plt.xlabel('Learn cut of the dataset (%)') plt.ylabel('Success Rate') plt.legend() plt.savefig("learn-test-div_dim%d" % (data_dim), pad_inches=1)
ax.scatter(xd[20:30], yd[20:30], Z[20:30], c='red') ax.set_xlabel('r') ax.set_ylabel('v') ax.set_zlabel('Vraisemblance') plt.show() color, labels = load_dataset("./couleurs_moyennes_better.csv") #affichage nuage de point #plot_training(color,labels) # Instanciation de la classe GaussianB g = GaussianBayes(priors=[0.33, 0.3, 0.326]) # Apprentissage mu, sig = g.fit(color, labels) #tab_proba = g.predict(color) #affichage vraissemblance #plot_vraissemblance(color,labels,tab_proba) g.predict(color) print(g.score(color, labels)) """ for i in range(n_fleurs): file_name = "ch"