def main():
    # Initializer vos paramètres
    i = ld.load_iris_dataset(0.7)
    c = ld.load_congressional_dataset(0.7)
    m1 = ld.load_monks_dataset(1)
    m2 = ld.load_monks_dataset(2)
    m3 = ld.load_monks_dataset(3)

    # Initializer/instanciez vos classifieurs avec leurs paramètres

    euclide = lambda x, y: pow(
        (x - y), 2
    )  # Pas besoin d'extraire la racine, car cela ne changera pas l'ordre de classement
    diff_binaire = lambda x, y: 0 if x == y else 1

    knn_i = Knn(train=i[0], train_labels=i[1], dist_equation=euclide)
    knn_c = Knn(train=c[0], train_labels=c[1], dist_equation=euclide)
    knn_m1 = Knn(train=m1[0], train_labels=m1[1], dist_equation=diff_binaire)
    knn_m2 = Knn(train=m2[0], train_labels=m2[1], dist_equation=diff_binaire)
    knn_m3 = Knn(train=m3[0], train_labels=m3[1], dist_equation=diff_binaire)

    bn_i = BayesNaifClassifier([1])
    bn_c = BayesNaifClassifier([0])
    bn_m1 = BayesNaifClassifier([2])
    bn_m2 = BayesNaifClassifier([2])
    bn_m3 = BayesNaifClassifier([2])

    # Entrainez votre classifieur
    print("\n=============\nKNN train tests\n=============")
    knn_i.train_test(i[0], i[1], "Dataset: Iris, Training")
    knn_c.train_test(c[0], c[1], "Dataset: Congressional, Training")
    knn_m1.train_test(m1[0], m1[1], "Dataset: MONKS-1, Training")
    knn_m2.train_test(m2[0], m2[1], "Dataset: MONKS-2, Training")
    knn_m3.train_test(m3[0], m3[1], "Dataset: MONKS-3, Training")

    print("\n=============\nBayes Naif train tests\n=============")
    bn_i.train(i[0], i[1], "Dataset: Iris, Test")
    bn_c.train(c[0], c[1], "Dataset: Congressional, Test")
    bn_m1.train(m1[0], m1[1], "Dataset: MONKS-1, Test")
    bn_m2.train(m2[0], m2[1], "Dataset: MONKS-2, Test")
    bn_m3.train(m3[0], m3[1], "Dataset: MONKS-3, Test")

    print("\n=============\nKNN tests\n=============")
    # Tester votre classifieur
    knn_i.train_test(i[2], i[3], "Dataset: Iris, Test")
    knn_c.train_test(c[2], c[3], "Dataset: Congressional, Test")
    knn_m1.train_test(m1[2], m1[3], "Dataset: MONKS-1, Test")
    knn_m2.train_test(m2[2], m2[3], "Dataset: MONKS-2, Test")
    knn_m3.train_test(m3[2], m3[3], "Dataset: MONKS-3, Test")

    print("\n=============\nBayes Naif tests\n=============")
    bn_i.test(i[2], i[3], "Dataset: Iris, Test")
    bn_c.test(c[2], c[3], "Dataset: Congressional, Test")
    bn_m1.test(m1[2], m1[3], "Dataset: MONKS-1, Test")
    bn_m2.test(m2[2], m2[3], "Dataset: MONKS-2, Test")
    bn_m3.test(m3[2], m3[3], "Dataset: MONKS-3, Test")
Exemple #2
0
def cv_knn(N_split, X_train, y_train, X_test, y_test, K_values):

    X = np.concatenate((X_train, X_test), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)

    # Splits
    X_splits = np.split(X, N_split)
    y_splits = np.split(y, N_split)

    # Recherche
    scores = {}
    for K in K_values:
        accuracys = []
        for i in range(N_split):
            X_train = np.concatenate(np.delete(X_splits, i, 0))
            X_test = X_splits[i]
            y_train = np.concatenate(np.delete(y_splits, i, 0))
            y_test = y_splits[i]

            model = Knn(K=K)
            model.train(X_train, y_train)
            evaluate = model.evaluate(X_test, y_test)

            accuracys.append(evaluate['mean_accuracy'])

        scores[K] = np.mean(accuracys)

    # print + selection
    print(max(scores.items(), key=operator.itemgetter(1)))
    return max(scores.items(), key=operator.itemgetter(1))[0]
def score_result(reducer_function, data, x_scaled, ini, dimensions, label_data,
                 title):
    knn = Knn()
    score = []
    valor_k = range(ini, dimensions)
    for k in valor_k:
        new_data = reducer_function(data, x_scaled, k)
        score.append(knn.avg(new_data, label_data))

    Visualization.hit_rate_per_k(valor_k, score, title)
Exemple #4
0
 def __init__(self, labels):
     self.model = Knn(k=5)
     self.labels = labels
     self.all_data = list()
     self.all_target = list()
     for pose in labels:
         label = labels[pose]
         data = self.load_data(label)
         target = [pose] * len(data)
         self.all_data += data
         self.all_target += target
def main():
    knn = Knn(7)

    knn.fit(x, y)

    img = mpimg.imread(sys.argv[1])

    for j in range(img.shape[0] / 50 + 1):
        for i in range(img.shape[1] / 50 + 1):
            h, b = np.histogram(img[j * 50:(j * 50) + 50,
                                    i * 50:(i * 50) + 50])
            if (knn.predict(h) == 1):
                img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0

    plt.imshow(img)
    plt.show()
Exemple #6
0
def cross_validation(origiData, origiLabel, splitNum):
    lastIndex = 0
    offset = int(len(origiData) / splitNum)
    accurateRateSum = 0
    for i in range(1, splitNum + 1):
        # 切割数据集 分为splitNum-1个训练集和1个测试集
        tempData = np.split(origiData, (lastIndex, i * offset + 1))
        tempLabel = np.split(origiLabel, (lastIndex, i * offset + 1))

        testData = tempData[1]
        testLabel = tempLabel[1]
        trainData = np.concatenate([tempData[0], tempData[2]])
        trainLabel = np.concatenate([tempLabel[0], tempLabel[2]])
        # 迭代区间的开始索引值
        lastIndex = i * offset

        # 使用knn进行预测
        knn = Knn(trainData, trainLabel, 10)
        predictRsl = knn.predict(testData)
        accuracy = caculate_accuracy(predictRsl, testLabel)
        accurateRateSum += accuracy
    # 返回预测结果的平均值
    return accurateRateSum / splitNum
Exemple #7
0
        train_data_file_name = "../HandWrittenLetters.txt"
        classes_label = 'ABCDE'
        # numbers = '1245'
        letter_to_digit = Task_E.letter_2_digit_convert(classes_label)
        # for i in numbers:
        #     letter_to_digit.append(i)
        data_frame = Task_E.pickDataClass(train_data_file_name,
                                          letter_to_digit)
        train_data_set_without_labels, train_y, test_data_set_without_labels, test_y, train_data_with_labels, test_data_with_labels = Task_E.splitData2TestTrain(
            data_frame, 39, 9)
        centroid_data_frame_train = deepcopy(train_data_with_labels)
        centroid_data_frame_test = deepcopy(test_data_with_labels)
        # make_file_and_save_data_train = Task_E.store(train_data_set_without_labels.T, train_y, 'jenil_train.csv')
        # make_file_and_save_data_test = Task_E.store(test_data_set_without_labels.T, test_y, 'jenil_test.csv')
        k = 5
        knn_object = Knn(k)
        data_with_euclidean_distance = knn_object.calculate_distance(
            train_data_with_labels.values, test_data_with_labels.values)
        accuracy = knn_object.get_accuracy([
            (k['Test Label'], k['Classification'])
            for k in data_with_euclidean_distance
        ])
        print('Accuracy of Knn is:', accuracy)
        # Linear Regression
        linear_regression_object = LinearRegression.LinearRegression()
        N_train, L_train, Xtrain = len(
            train_y), train_y, train_data_set_without_labels.T

        N_test, Ytest, Xtest = len(
            test_y), test_y, test_data_set_without_labels.T
Exemple #8
0
from Knn import Knn
from ContextEngineBase import Complexity

## For different tests, these values will vary.
inputFilePath = "dish.csv"
outputFilePath = "dishOutput.csv"
complexity = Complexity.secondOrder
numTrainingSamples = 96
numExecuteSamples = 96
inputFile = open(inputFilePath)
outputFile = open(outputFilePath)
inputReader = csv.reader(inputFile)
outputReader = csv.reader(outputFile)
csv = recfromcsv(inputFilePath, delimiter=',')
## Change the name of the algorithm to test it out.
algorithmTest = Knn(complexity, 7, 0, [0, 0, 0, 0, 0, 0, 0], {})
teslaTimestamps = {}
knnTimestamps = {}

print(algorithmTest.complexity)
print(algorithmTest.functionOrder)

numRow = 96
day_train_start = 0
day_train_end = 0
day_predict_start = 1
day_predict_end = 1
#read in csv and parse data to trainer

for i in range(numRow * day_train_start, numRow * (day_train_end + 1)):
    row = csv[i]
Exemple #9
0
from numpy import recfromcsv
from time import strptime
import matplotlib.pyplot as plt
from Knn import Knn
import numpy as np

csv = recfromcsv('refridge.csv', delimiter=',')

trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0)
x_train = []
y_train = []
x_predict = []
x_real = []
y_real = []

numRow = 96
day_train_start = 0
day_train_end = 3
day_predict = 4
#read in csv and parse data to trainer
#use the first 4 weeks data as training set
for i in range(numRow * day_train_start, numRow * (day_train_end + 1)):
    #for row in csv:
    #date = csv[0][0]
    #energy = csv[0][1]
    row = csv[i]
    date = row[0]
    energy = row[1]

    date = date.replace("/", " ")
    date = date.replace(":", " ")
Exemple #10
0
#!/usr/bin/env python

import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from Knn import Knn
from img.data import x, y

if len(sys.argv) < 2:
    print "Informar caminho da imagem"
    exit()

knn = Knn(7)

knn.fit(x, y)

img = mpimg.imread(sys.argv[1])

for j in range(img.shape[0] / 50 + 1):
    for i in range(img.shape[1] / 50 + 1):
        h, b = np.histogram(img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50])
        if (knn.predict(h) == 1):
            img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0

plt.imshow(img)
plt.show()
Exemple #11
0
    ConfusionMatrixListKnn: list = list(
    )  # list des matrices de confusion pour Knn

    print(f"Knn Train ratio: {knn_train_ratio}")
    print(f"findBestKWithCrossValidation: {findBestKWithCrossValidation}")
    print("\n")

    print('-' * 175)
    print(f"Iris dataset classification: \n")
    startTime = time.time()

    #  Entrainement sur l'ensemble de données Iris

    iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset(
        knn_train_ratio)
    iris_knn = Knn(distance_func=distanceFunc)
    iris_knn.train(iris_train,
                   iris_train_labels,
                   findBestKWithCrossValidation=findBestKWithCrossValidation)
    cm, _, _, _ = iris_knn.test(iris_test, iris_test_labels)
    ConfusionMatrixListKnn.append(cm)

    print(
        f"\n --- Elapse time: {1_000*(time.time() - startTime):.2f} ms --- \n")

    print('-' * 175)
    print(f"Congressional dataset classification: \n")
    startTime = time.time()

    #  Entrainement sur l'ensemble de données Congressional
Exemple #12
0
import csv
from Person import Person
from Classifier import Classifier
from Lr import Lr
from Knn import Knn
from Kmeans import testClustering

# loading data from csv file
personList = []
with open('diabetes-dataset.csv', newline = '') as csvfile:
    reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    count = 1
    for row in reader:
        if count != 1:
            data = ''.join(row).split(',')
            p = Person(data)
            personList.append(p)
        count += 1

print('result on based of logical regression')
lr = Lr()
c = Classifier(personList,lr)
c.run()
print('result on based on K nearest neighbours')
knn = Knn()
c = Classifier(personList,knn)
c.run()
print('result on based on K means')
for k in (2,4,6):
    print('\n     Test k-means (k = ' + str(k) + ')')
    posFracs = testClustering(personList, k, 2)
Exemple #13
0
import matplotlib.pyplot as plt

iris = datasets.load_digits()
# print(print(iris))
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1000)
# print(X_train.shape)
# print(X_train[0])

# plt.figure()
# plt.scatter(X[:,[0]], X[:,[1]], c=y)
# plt.show()
acc = 0
for k in [3, 5, 7]:
    clf = Knn(k)

    clf.fit(X_train, y_train)
    predictions = clf.predict(X_test)

    t_acc = np.sum(predictions == y_test) / len(y_test)
    if t_acc > acc:
        acc = t_acc
        best_k = k

print(acc)
print(best_k)
Vous allez dire en commentaire c'est quoi les paramètres que vous avez utilisés
En gros, vous allez :
1- Initialiser votre classifieur avec ses paramètres
2- Charger les datasets
3- Entrainer votre classifieur
4- Le tester

"""

# Initializer vos paramètres

k = 3

# Initializer/instanciez vos classifieurs avec leurs paramètres

knn_iris = Knn(k=k)
knn_vote = Knn(k=k)
knn_monks_1 = Knn(k=k)
knn_monks_2 = Knn(k=k)
knn_monks_3 = Knn(k=k)
bayesNaif_iris = BayesNaif()
bayesNaif_vote = BayesNaif()
bayesNaif_monks = BayesNaif()

# Charger/lire les datasets

iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset(
    0.7)

congressional_train, congressional_train_labels, congressional_test, \
    congressional_test_labels = load_datasets.load_congressional_dataset(0.7)
Exemple #15
0
#start = time.time()
#print("hello")
#end = time.time()

#open output file
output = open('test/knnDishwasherTestResult.csv', 'w')
fieldnames = ['real_power', 'predict_power']
writer = csv.DictWriter(output, fieldnames=fieldnames)
writer.writeheader()
#writer.writerow({'real_power': 'Baked', 'predict_power': 'Beans'})

csv = recfromcsv('all.csv', delimiter=',')

trainer = Knn(complexity=2,
              numInputs=7,
              inputClassifiers=np.empty([7]),
              outputClassifier=0,
              appFieldsDict=0)
x_train = []
y_train = []
x_predict = []
y_predict = []
x_real = []
y_real = []

numRow = 96
day_train_start = 0
day_train_end = 150
#day_predict = 103
day_predict_start = 150
day_predict_end = 299
Exemple #16
0
    'norm': 'lin'
}
# Number of CE input
numInp = 4
## Algorithm to be tested
interfaceDict = {'in': [dict1, dict2, dict3, dict4], 'out': dict0}
ceDict = {
    'interface': interfaceDict,
    'n_neighbors': 4,
    'weights': 'uniform',
    'algorithm': 'auto',
    'n_jobs': 1,
    'complexity': 1
}

algorithmTest = Knn(numInp, 0, [0, 0, 0, 0], ceDict)

print "Collecting training and test data from GDP"
# Use the collect data routine to fetch training data in separate lists
# for input and output
trainRecStart = 100
trainRecStop = 200
numTrainingSamples = trainRecStop - trainRecStart + 1
inDataTrain, outDataTrain = algorithmTest.interface.collectData(
    trainRecStart, trainRecStop)
# Use the collect data routine to fetch test data in separate lists
# for input and output
testRecStart = 201
testRecStop = 250
numExecuteSamples = testRecStop - testRecStart + 1
inDataTest, outDataTest = algorithmTest.interface.collectData(
Exemple #17
0
## For different tests, these values will vary.
inputFilePath = "dish.csv"
outputFilePath = "dishOutput.csv"
inputFile = open(inputFilePath)
outputFile = open(outputFilePath)
inputReader = csv.reader(inputFile)
outputReader = csv.reader(outputFile)
complexity = Complexity.secondOrder
numTrainingSamples = 96
numExecuteSamples = 96
inputReader = csv.reader(inputFile)
outputReader = csv.reader(outputFile)

csv = recfromcsv(inputFilePath, delimiter=',')
## Change the name of the algorithm to test it out.
algorithmTest = Knn(Complexity.secondOrder, 7, 0, [0, 0, 0, 0, 0, 0, 0], {})
teslaTimestamps = {}
knnTimestamps = {}

numRow = 96
day_train_start = 0
day_train_end = 0
day_predict_start = 0
day_predict_end = 0
#read in csv and parse data to trainer

for i in range(numRow * day_train_start, numRow * (day_train_end + 1)):
    row = csv[i]
    date = row[0]
    date = date.decode()
    dishwasher = csv[i + 1][3]
Exemple #18
0
N_split_iris = 10
N_split_wine = 3  # --> Trop long
N_split_abalone = 3  # --> Trop long

print('IRIS')
K_opti_iris = cv_knn(N_split_iris, X_train_iris, y_train_iris, X_test_iris,
                     y_test_iris, K_values_iris)
print('WINE')
K_opti_wine = cv_knn(N_split_wine, X_train_wine, y_train_wine, X_test_wine,
                     y_test_wine, K_values_wine)
print('ABALONE')
K_opti_abalone = cv_knn(N_split_abalone, X_train_abalone, y_train_abalone,
                        X_test_abalone, y_test_abalone, K_values_abalone)

# Entrainez votre classifieur
clf_Knn_iris = Knn(K=K_opti_iris)
clf_Knn_iris.train(X_train_iris, y_train_iris)

clf_Knn_wine = Knn(K=K_opti_wine)
clf_Knn_wine.train(X_train_wine, y_train_wine)

clf_Knn_abalone = Knn(K=K_opti_abalone)
clf_Knn_abalone.train(X_train_abalone, y_train_abalone)
"""
Après avoir fait l'entrainement, évaluez votre modèle sur 
les données d'entrainement.
IMPORTANT : 
    Vous devez afficher ici avec la commande print() de python,
    - la matrice de confusion (confusion matrix)
    - l'accuracy
    - la précision (precision)
                bayesC.start()
        else:
            for r in range(0, len(documents_to_clasificated) - 1):
                bayesC = ClasificationBayes(documents_to_clasificated[r],
                                            option)
                bayesC.start()

    elif action_option == "3":
        print(
            "Comienza el programa de clasifiación de datos.\n"
            "Elija opción para la ruta donde obtener los documentos: R (raiz del proyecto) o escriba ruta"
        )
        option = input()
        if option == "R" or option == "r":

            knn = Knn(documents, categories, documents_by_category, "")
            knn.start_algorithm()
        else:
            kann = Knn(documents, categories, documents_by_category, option)
            knn.start_algorithm()

        documents_to_clasificated = AuxiliaryMethod.get_documents_words_to_clasificated(
        )

        k = input("Establezca un k mayor que cero: ")
        if k.isdigit() and int(k) > 0:
            for r in range(0, len(documents_to_clasificated) - 1):
                KnnC = ClasificationKnn(documents_to_clasificated[r],
                                        documents, categories,
                                        "datos/datos_knn.csv", k)
                KnnC.start()