コード例 #1
0
ファイル: main.py プロジェクト: Guitheg/bayes_vs_knn
def cross_validation(path):
    kf = KFold(n_splits=5)
    z = np.zeros(100)
    data, labels = load_dataset(join(DATA, path[0]))
    data = np.array(data)
    labels = np.array(labels)
    for train, test in kf.split(data):
        """
        train_data = []
        train_labels = []
        test_data = []
        test_labels = []
        for indice in train:
            train_data.append(data[indice])
            train_labels.append(labels[indice])
        for indice in test:
            test_data.append(data[indice])
            test_labels.append(labels[indice])
        print(train)
        print(test)

        train_data = np.array(train_data)
        train_labels = np.array(train_labels)
        test_data = np.array(test_data)
        test_labels = np.array(test_labels)"""

        #train_data = data[0:800]
        #train_labels = labels[0:800]
        #test_data = data[801:999]
        #test_labels = labels[801:999]
        train_data = data[train]
        train_labels = labels[train]
        test_data = data[test]
        test_labels = labels[test]
        print(len(train_data))
        print(len(train_labels))
        print(len(test_data))
        print(len(test_labels))
        #print(train_data)
        #print(train_labels)
        #print(test_data)
        #print(test_labels)

        # GAUSSIENNE
        g = GaussianBayes()
        g.fit(train_data, train_labels)

        # - Score:
        score_baye = g.score(test_data, test_labels)

        # K-NN
        n_neighbors = 10
        clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
        clf.fit(train_data, train_labels)

        # - Score:
        score_voisin = clf.score(test_data, test_labels)

        print(score_voisin)
        break
コード例 #2
0
def main():
    train_data, train_labels = load_dataset("./data/train.csv")
    test_data, test_labels = load_dataset("./data/test.csv")

    #affichage
    plot_scatter_hist(train_data, train_labels)

    # Instanciation de la classe GaussianB
    g = GaussianBayes(priors=[0.3, 0.3, 0.3], diag=True)

    # Apprentissage
    g.fit(train_data, train_labels)

    # Score
    score = g.score(test_data, test_labels)
    print("precision : {:.2f}".format(score))

    input("Press any key to exit...")
コード例 #3
0
ファイル: test_main.py プロジェクト: Guitheg/bayes_vs_knn
def baye_voisin(data, labels, ts):
    train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size = ts / 100, random_state = 42)

    # GAUSSIENNE
    g = GaussianBayes()

    evaltime()
    g.fit(train_data, train_labels)
    somme_bayes.append(evaltime())
    
    # - Score:
    score_baye = g.score(test_data, test_labels)
    Z = g.predict(test_data)
    cfmat_bayes = confusion_matrix(test_labels, Z, labels=np.unique(test_labels))

    # K-NN
    n_neighbors = 10
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')

    evaltime()
    clf.fit(train_data, train_labels)
    somme_knn.append(evaltime())


    # - Score:
    score_voisin = clf.score(test_data, test_labels)
    Z = clf.predict(test_data)
    cfmat_knn = confusion_matrix(test_labels, Z, labels=np.unique(test_labels))
    
    return score_baye, cfmat_bayes, score_voisin, cfmat_knn
コード例 #4
0
ファイル: main.py プロジェクト: Guitheg/bayes_vs_knn
def baye_voisin(data, labels, ts):
    train_data, test_data, train_labels, test_labels = train_test_split(
        data, labels, train_size=ts / 100, random_state=42)
    print("test taille:", len(test_data), " train test:", len(train_data),
          " test size: ", ts)
    # GAUSSIENNE
    g = GaussianBayes()
    g.fit(train_data, train_labels)

    # - Score:
    score_baye = g.score(test_data, test_labels)
    Z = g.predict(test_data)
    cfmat_bayes = confusion_matrix(test_labels,
                                   Z,
                                   labels=np.unique(test_labels))

    # K-NN
    n_neighbors = 10
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
    clf.fit(train_data, train_labels)

    # - Score:
    score_voisin = clf.score(test_data, test_labels)
    Z = clf.predict(test_data)
    cfmat_knn = confusion_matrix(test_labels, Z, labels=np.unique(test_labels))

    return score_baye, cfmat_bayes, score_voisin, cfmat_knn
コード例 #5
0
        labelteststart = 100 * i + teststart
        labeltestend = 100 * i + testend
        labeltrainend = 100 * (i + 1)
        labelstest = np.concatenate(
            (labelstest, total_labels[labelteststart:labeltestend]))
        valeurstest = np.concatenate(
            (valeurstest, total_data[labelteststart:labeltestend]))
        labelstrain = np.concatenate(
            (np.concatenate(
                (labelstrain, total_labels[labelstrainstart:labelteststart])),
             total_labels[labeltestend:labeltrainend]))
        valeurstrain = np.concatenate((np.concatenate(
            (valeurstrain, total_data[labelstrainstart:labelteststart])),
                                       total_data[labeltestend:labeltrainend]))

    g = GaussianBayes(priors=None, diag=False)
    # Apprentissage
    g.fit(valeurstrain, labelstrain)

    # Score
    gaussianresult = gaussianresult + g.score(valeurstest, labelstest)

    neigh = KNeighborsClassifier(n_neighbors=3,
                                 weights='uniform',
                                 algorithm='brute')
    neigh.fit(valeurstrain, labelstrain)
    KNNresult = KNNresult + np.sum(
        labelstest == neigh.predict(valeurstest)) / len(labelstest)
gaussianresult = gaussianresult / K
KNNresult = KNNresult / K
print("gaussian average precision")
コード例 #6
0
for i in range(1, round(len(total_labels) / 100)):
    labelteststart = 100 * i
    labeltestend = 100 * i + proportionseparation
    labeltrainend = 100 * (i + 1)
    print(i)
    labelstest = np.concatenate(
        (labelstest, total_labels[labelteststart:labeltestend]))
    valeurstest = np.concatenate(
        (valeurstest, total_data[labelteststart:labeltestend]))
    labelstrain = np.concatenate(
        (labelstrain, total_labels[labeltestend:labeltrainend]))
    valeurstrain = np.concatenate(
        (valeurstrain, total_data[labeltestend:labeltrainend]))

g = GaussianBayes(priors=None, diag=False)
# Apprentissage
g.fit(valeurstrain, labelstrain)

# Score
score = g.score(valeurstest, labelstest)
print("precision : {:.2f}".format(score))

neigh = KNeighborsClassifier(n_neighbors=3,
                             weights='uniform',
                             algorithm='brute')
neigh.fit(valeurstrain, labelstrain)
KNeighborsClassifier()
print(np.sum(labelstest == neigh.predict(valeurstest)) / len(labelstest))

#Confusion matrixes
コード例 #7
0
def main():
    folder = './data/'
    files = os.listdir(folder)
    for name in files:
        print("\n\n-Filename=", name)
        filename = folder + name

        rates = [0.8, 0.5, 0.2]
        for rate in rates:
            learnCut = round(rate * 100)
            testCut = round((1 - rate) * 100)
            print("\n\n\n-Actual rate:", rate)
            learn, test = utils.build_dataset(filename,
                                              random=False,
                                              learnCut=rate)
            X_test, y_test, labels = format_dataset(test)
            X_learn, y_learn, _ = format_dataset(learn)
            data_dim = len(X_test[0])

            #Gaussian Bayes
            start = time.perf_counter()
            b = GaussianBayes(diag=True)
            b.fit(X_learn, y_learn)
            pred = b.predict(X_test)

            end = time.perf_counter()
            print("\n-Gaussian Bayes:\nTime : ", (end - start))
            print("Confusion Matrix :\n", confusion_matrix(y_test, pred),
                  "\nScore : ", score(pred, y_test))
            plot_confusion_matrix(
                confusion_matrix(y_test, pred),
                labels,
                title=
                "Confusion matrix, Bayes, dim=%d, learn/test division : %d%%/%d%%"
                % (data_dim, learnCut, testCut),
                filename="cm_bayes_dim%d_div%d" % (data_dim, learnCut))

            #K Neighbors Regressor
            success = []
            bestPredN = []
            bestTime = 0
            bestScore = 0
            bestK = 0

            #Test in different K
            for i in range(1, 40):
                start = time.perf_counter()
                neigh = KNeighborsRegressor(n_neighbors=i, weights='uniform')
                neigh.fit(X_learn, y_learn)
                predN = neigh.predict(X_test).astype(int)
                end = time.perf_counter()
                success.append(score(predN, y_test))
                if (bestScore < score(predN, y_test)):
                    bestPredN = predN
                    bestTime = end - start
                    bestScore = score(predN, y_test)
                    bestK = i

            print("\n-The best: K=", bestK, " Neighbors Regressor:\nTime : ",
                  bestTime)
            print("Confusion Matrix :\n", confusion_matrix(y_test, bestPredN),
                  "\nScore : ", bestScore)
            plot_confusion_matrix(
                confusion_matrix(y_test, bestPredN),
                labels,
                title=
                'Confusion matrix, KNN, k=%d, dim=%d, learn/test division : %d%%/%d%%'
                % (bestK, data_dim, learnCut, testCut),
                filename="cm_knn_k%d_dim%d_div%d" %
                (bestK, data_dim, learnCut))

            #Affichage comparaison K Neighbors Regressor
            plt.close()
            #plt.figure(figsize=(12,6))
            plt.plot([score(pred, y_test) for x in range(40)],
                     color='blue',
                     label="Bayes")
            plt.plot(range(1, 40),
                     success,
                     color='green',
                     linestyle='dashed',
                     marker='o',
                     markerfacecolor='green',
                     markersize=5,
                     label="KNN")
            plt.title(
                'Success Rate (higher is better), dim=%d, learn/test division : %d%%/%d%%'
                % (data_dim, learnCut, testCut))
            plt.xlabel('K value')
            plt.ylabel('Success Rate')
            plt.legend()
            plt.savefig("bayesVknn_dim%d_div%d" % (data_dim, learnCut))

        #plot effect of learn/test division
        bayesScores = []
        knnScores = []
        cutRange = range(5, 100, 5)
        for i in cutRange:
            rate = round(i / 100.0, 2)
            #print(rate)
            learn, test = utils.build_dataset(filename,
                                              random=False,
                                              learnCut=rate)
            X_test, y_test, labels = format_dataset(test)
            X_learn, y_learn, _ = format_dataset(learn)
            data_dim = len(X_test[0])

            b = GaussianBayes(diag=True)
            b.fit(X_learn, y_learn)
            pred = b.predict(X_test)
            bayesScores.append(score(pred, y_test))

            neigh = KNeighborsRegressor(n_neighbors=1, weights='uniform')
            neigh.fit(X_learn, y_learn)
            pred = neigh.predict(X_test).astype(int)
            knnScores.append(score(pred, y_test))
        plt.close()
        #plt.ylim(bottom=0, top=1.1)
        plt.xticks(ticks=range(len(cutRange)),
                   labels=[str(i) for i in cutRange])
        plt.plot(bayesScores, color='blue', label="Bayes")
        plt.plot(knnScores,
                 color='green',
                 linestyle='dashed',
                 marker='o',
                 markerfacecolor='green',
                 markersize=5,
                 label="KNN")
        plt.title('Success Rate with different learn/test division, dim=%d' %
                  (data_dim))
        plt.xlabel('Learn cut of the dataset (%)')
        plt.ylabel('Success Rate')
        plt.legend()
        plt.savefig("learn-test-div_dim%d" % (data_dim), pad_inches=1)
コード例 #8
0
        ax.scatter(xd[20:30], yd[20:30], Z[20:30], c='red')

    ax.set_xlabel('r')
    ax.set_ylabel('v')
    ax.set_zlabel('Vraisemblance')

    plt.show()


color, labels = load_dataset("./couleurs_moyennes_better.csv")

#affichage nuage de point
#plot_training(color,labels)

# Instanciation de la classe GaussianB
g = GaussianBayes(priors=[0.33, 0.3, 0.326])

# Apprentissage
mu, sig = g.fit(color, labels)

#tab_proba = g.predict(color)

#affichage vraissemblance
#plot_vraissemblance(color,labels,tab_proba)

g.predict(color)

print(g.score(color, labels))
"""
for i in range(n_fleurs):
    file_name = "ch"