Example #1
0
def cv_knn(N_split, X_train, y_train, X_test, y_test, K_values):

    X = np.concatenate((X_train, X_test), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)

    # Splits
    X_splits = np.split(X, N_split)
    y_splits = np.split(y, N_split)

    # Recherche
    scores = {}
    for K in K_values:
        accuracys = []
        for i in range(N_split):
            X_train = np.concatenate(np.delete(X_splits, i, 0))
            X_test = X_splits[i]
            y_train = np.concatenate(np.delete(y_splits, i, 0))
            y_test = y_splits[i]

            model = Knn(K=K)
            model.train(X_train, y_train)
            evaluate = model.evaluate(X_test, y_test)

            accuracys.append(evaluate['mean_accuracy'])

        scores[K] = np.mean(accuracys)

    # print + selection
    print(max(scores.items(), key=operator.itemgetter(1)))
    return max(scores.items(), key=operator.itemgetter(1))[0]
def score_result(reducer_function, data, x_scaled, ini, dimensions, label_data,
                 title):
    knn = Knn()
    score = []
    valor_k = range(ini, dimensions)
    for k in valor_k:
        new_data = reducer_function(data, x_scaled, k)
        score.append(knn.avg(new_data, label_data))

    Visualization.hit_rate_per_k(valor_k, score, title)
Example #3
0
class EmgModel:
    def __init__(self, labels):
        self.model = Knn(k=5)
        self.labels = labels
        self.all_data = list()
        self.all_target = list()
        for pose in labels:
            label = labels[pose]
            data = self.load_data(label)
            target = [pose] * len(data)
            self.all_data += data
            self.all_target += target

    def run(self):
        # 训练数据和测试数据分离
        train_data = self.all_data[0::2]
        train_target = self.all_target[0::2]
        predict_data = self.all_data[1::2]
        predict_target = self.all_target[1::2]

        # 模型训练
        self.model.fit(train_data, train_target)
        # 模型预测
        y = self.model.predict(predict_data)
        num = 0
        for i in range(len(y)):
            if predict_target[i] == y[i]:
                num += 1

        cm = metrics.confusion_matrix(y, predict_target)
        print("混淆矩阵")
        print(cm)
        print("分类报告")
        cr = metrics.classification_report(y, predict_target)
        print(cr)
        print("准确率:%s %%" % ((num / len(predict_target)) * 100))

    def load_data(self, label):
        parent_dir = "data/%s" % label
        res = []
        for file in os.listdir(parent_dir):
            filename = "%s/%s" % (parent_dir, file)
            f = open(filename, 'r')
            data = []
            for line in f.readlines():
                row = line.replace('\n', '').replace("[",
                                                     "").replace("]",
                                                                 "").split(",")
                row = [float(x) for x in row]
                data.append(row)
            f.close()
            data = np.array(data).T
            res.append(data)
        return res
Example #4
0
 def __init__(self, labels):
     self.model = Knn(k=5)
     self.labels = labels
     self.all_data = list()
     self.all_target = list()
     for pose in labels:
         label = labels[pose]
         data = self.load_data(label)
         target = [pose] * len(data)
         self.all_data += data
         self.all_target += target
Example #5
0
def main():
    knn = Knn(7)

    knn.fit(x, y)

    img = mpimg.imread(sys.argv[1])

    for j in range(img.shape[0] / 50 + 1):
        for i in range(img.shape[1] / 50 + 1):
            h, b = np.histogram(img[j * 50:(j * 50) + 50,
                                    i * 50:(i * 50) + 50])
            if (knn.predict(h) == 1):
                img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0

    plt.imshow(img)
    plt.show()
Example #6
0
 def checkPerformance(self, valSet, trainSet, k):
     result = 0
     for i in range(len(valSet)):
         # perform knn
         predicted = Knn.knn(trainSet, valSet[i], k, True)
         # check if predicted class and actual class match
         result += self.checkPrediction(predicted, valSet[i][-1])
     # return percent correctly predicted
     return result / len(valSet)
Example #7
0
def cross_validation(origiData, origiLabel, splitNum):
    lastIndex = 0
    offset = int(len(origiData) / splitNum)
    accurateRateSum = 0
    for i in range(1, splitNum + 1):
        # 切割数据集 分为splitNum-1个训练集和1个测试集
        tempData = np.split(origiData, (lastIndex, i * offset + 1))
        tempLabel = np.split(origiLabel, (lastIndex, i * offset + 1))

        testData = tempData[1]
        testLabel = tempLabel[1]
        trainData = np.concatenate([tempData[0], tempData[2]])
        trainLabel = np.concatenate([tempLabel[0], tempLabel[2]])
        # 迭代区间的开始索引值
        lastIndex = i * offset

        # 使用knn进行预测
        knn = Knn(trainData, trainLabel, 10)
        predictRsl = knn.predict(testData)
        accuracy = caculate_accuracy(predictRsl, testLabel)
        accurateRateSum += accuracy
    # 返回预测结果的平均值
    return accurateRateSum / splitNum
    def checkPerformance(self, valSet, trainSet, k):
        result = 0
        for i in range(len(valSet)):
            # perform knn
            predicted = Knn.knn(trainSet, valSet[i], k, True)
            # check if predicted class and actual class match
            result += self.checkPrediction(predicted, valSet[i][-1])
        # return percent correctly predicted
        return result / len(valSet)


# testing data
#testClass = np.array([[1, 2, 1], [2, 3, 0], [7, 8, 1], [4, 5, 0], [6, 6, 1],
#                      [6, 5, 1], [0, 1, 0], [1, 1, 0], [2, 2, 0], [3, 7, 1],
#                      [3, 1, 1], [9, 7, 1], [1, 5, 0], [7, 2, 1], [5, 5, 0]])

#editednn = EditedNN()
#print(editednn.eknn(testClass, 3))
Example #9
0
    def eknn(self, trainSet, k):

        # separate into training and validation sets
        ennSet, val = train_test_split(trainSet, test_size=0.2, random_state=0)

        # repeat until performance stops improving on validation set
        prevPerformance = 0.0
        perfImprove = True
        loopCount = len(ennSet)
        while (perfImprove):
            # copy previous enn set
            prevTrain = ennSet.copy()

            # loop through training set
            i = 0
            while (i < loopCount):
                # remove ith point from training set b/c it will be test point for knn
                knnTestPoint = ennSet[i]
                tempTrain = np.delete(ennSet, i, 0)
                # perform knn
                predicted = Knn.knn(tempTrain, knnTestPoint, k, True)
                # check if predicted class and actual class match
                result = self.checkPrediction(predicted, knnTestPoint[-1])
                # if not equal then keep set with removed point
                if result == 0:
                    ennSet = tempTrain
                    loopCount -= 1
                else:
                    i += 1

            # check performance and only continue if not degrading
            curPerformance = self.checkPerformance(val, tempTrain, k)
            if curPerformance > prevPerformance:
                prevPerformance = curPerformance
            else:
                ennSet = prevTrain
                perfImprove = False

        pd.DataFrame(ennSet).to_csv("reduced_datasets/enn.csv",
                                    header=None,
                                    index=None)
        # return edited-nn set
        return ennSet
Example #10
0
from Knn import Knn
from ContextEngineBase import Complexity

## For different tests, these values will vary.
inputFilePath = "dish.csv"
outputFilePath = "dishOutput.csv"
complexity = Complexity.secondOrder
numTrainingSamples = 96
numExecuteSamples = 96
inputFile = open(inputFilePath)
outputFile = open(outputFilePath)
inputReader = csv.reader(inputFile)
outputReader = csv.reader(outputFile)
csv = recfromcsv(inputFilePath, delimiter=',')
## Change the name of the algorithm to test it out.
algorithmTest = Knn(complexity, 7, 0, [0, 0, 0, 0, 0, 0, 0], {})
teslaTimestamps = {}
knnTimestamps = {}

print(algorithmTest.complexity)
print(algorithmTest.functionOrder)

numRow = 96
day_train_start = 0
day_train_end = 0
day_predict_start = 1
day_predict_end = 1
#read in csv and parse data to trainer

for i in range(numRow * day_train_start, numRow * (day_train_end + 1)):
    row = csv[i]
Example #11
0
 def __init__(self):
     Knn.inputFeature(self)
     Knn.inputClass(self)
     Knn.inputData(self)
     Knn.inputData(self)
     exit()
                bayesC.start()
        else:
            for r in range(0, len(documents_to_clasificated) - 1):
                bayesC = ClasificationBayes(documents_to_clasificated[r],
                                            option)
                bayesC.start()

    elif action_option == "3":
        print(
            "Comienza el programa de clasifiación de datos.\n"
            "Elija opción para la ruta donde obtener los documentos: R (raiz del proyecto) o escriba ruta"
        )
        option = input()
        if option == "R" or option == "r":

            knn = Knn(documents, categories, documents_by_category, "")
            knn.start_algorithm()
        else:
            kann = Knn(documents, categories, documents_by_category, option)
            knn.start_algorithm()

        documents_to_clasificated = AuxiliaryMethod.get_documents_words_to_clasificated(
        )

        k = input("Establezca un k mayor que cero: ")
        if k.isdigit() and int(k) > 0:
            for r in range(0, len(documents_to_clasificated) - 1):
                KnnC = ClasificationKnn(documents_to_clasificated[r],
                                        documents, categories,
                                        "datos/datos_knn.csv", k)
                KnnC.start()
numTrainingSamples = trainRecStop - trainRecStart + 1
inDataTrain, outDataTrain = gdpTest.collectData(trainRecStart, trainRecStop)
# Use the collect data routine to fetch test data in separate lists
# for input and output
testRecStart = 2001
testRecStop = 3000
numExecuteSamples = testRecStop - testRecStart + 1
inDataTest, outDataTest = gdpTest.collectData(testRecStart,testRecStop)
print "Done: collecting data from GDP"
print "Beginning loading and training"
# For testing purpose. print input for test data
# each line in output corresponds to one input data field (record)
# print inDataTest

## Change the name of the algorithm to test it out.
algorithmTest = Knn(complexity, numInp, 0, numInp*[0], {})
timestamps = {}
# Add training data to CE object
for i in xrange(len(outDataTrain)):
    # recording time stamps before and after adding to measure load time
    firstTS = time.time()
    algorithmTest.addSingleObservation(inDataTrain[:][i], outDataTrain[i])
    secondTS = time.time()
    timestamps["load" + str(i)] = secondTS - firstTS
# training CE using the added data, while the training time is measured
firstTS = time.time()
algorithmTest.train()
secondTS = time.time()
timestamps["train"] = secondTS - firstTS
print "Done: loading and training"
print "Beginning execution"
Example #14
0
from numpy import recfromcsv
from time import strptime
import matplotlib.pyplot as plt
from Knn import Knn
import numpy as np

csv = recfromcsv('refridge.csv', delimiter=',')

trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0)
x_train = []
y_train = []
x_predict = []
x_real = []
y_real = []

numRow = 96
day_train_start = 0
day_train_end = 3
day_predict = 4
#read in csv and parse data to trainer
#use the first 4 weeks data as training set
for i in range(numRow * day_train_start, numRow * (day_train_end + 1)):
    #for row in csv:
    #date = csv[0][0]
    #energy = csv[0][1]
    row = csv[i]
    date = row[0]
    energy = row[1]

    date = date.replace("/", " ")
    date = date.replace(":", " ")
Example #15
0
#start = time.time()
#print("hello")
#end = time.time()

#open output file
output = open('test/knnDishwasherTestResult.csv', 'w')
fieldnames = ['real_power', 'predict_power']
writer = csv.DictWriter(output, fieldnames=fieldnames)
writer.writeheader()
#writer.writerow({'real_power': 'Baked', 'predict_power': 'Beans'})

csv = recfromcsv('all.csv', delimiter=',')

trainer = Knn(complexity=2,
              numInputs=7,
              inputClassifiers=np.empty([7]),
              outputClassifier=0,
              appFieldsDict=0)
x_train = []
y_train = []
x_predict = []
y_predict = []
x_real = []
y_real = []

numRow = 96
day_train_start = 0
day_train_end = 150
#day_predict = 103
day_predict_start = 150
day_predict_end = 299
Vous allez dire en commentaire c'est quoi les paramètres que vous avez utilisés
En gros, vous allez :
1- Initialiser votre classifieur avec ses paramètres
2- Charger les datasets
3- Entrainer votre classifieur
4- Le tester

"""

# Initializer vos paramètres

k = 3

# Initializer/instanciez vos classifieurs avec leurs paramètres

knn_iris = Knn(k=k)
knn_vote = Knn(k=k)
knn_monks_1 = Knn(k=k)
knn_monks_2 = Knn(k=k)
knn_monks_3 = Knn(k=k)
bayesNaif_iris = BayesNaif()
bayesNaif_vote = BayesNaif()
bayesNaif_monks = BayesNaif()

# Charger/lire les datasets

iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset(
    0.7)

congressional_train, congressional_train_labels, congressional_test, \
    congressional_test_labels = load_datasets.load_congressional_dataset(0.7)
Example #17
0
        train_data_file_name = "../HandWrittenLetters.txt"
        classes_label = 'ABCDE'
        # numbers = '1245'
        letter_to_digit = Task_E.letter_2_digit_convert(classes_label)
        # for i in numbers:
        #     letter_to_digit.append(i)
        data_frame = Task_E.pickDataClass(train_data_file_name,
                                          letter_to_digit)
        train_data_set_without_labels, train_y, test_data_set_without_labels, test_y, train_data_with_labels, test_data_with_labels = Task_E.splitData2TestTrain(
            data_frame, 39, 9)
        centroid_data_frame_train = deepcopy(train_data_with_labels)
        centroid_data_frame_test = deepcopy(test_data_with_labels)
        # make_file_and_save_data_train = Task_E.store(train_data_set_without_labels.T, train_y, 'jenil_train.csv')
        # make_file_and_save_data_test = Task_E.store(test_data_set_without_labels.T, test_y, 'jenil_test.csv')
        k = 5
        knn_object = Knn(k)
        data_with_euclidean_distance = knn_object.calculate_distance(
            train_data_with_labels.values, test_data_with_labels.values)
        accuracy = knn_object.get_accuracy([
            (k['Test Label'], k['Classification'])
            for k in data_with_euclidean_distance
        ])
        print('Accuracy of Knn is:', accuracy)
        # Linear Regression
        linear_regression_object = LinearRegression.LinearRegression()
        N_train, L_train, Xtrain = len(
            train_y), train_y, train_data_set_without_labels.T

        N_test, Ytest, Xtest = len(
            test_y), test_y, test_data_set_without_labels.T
Example #18
0
import matplotlib.pyplot as plt

iris = datasets.load_digits()
# print(print(iris))
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1000)
# print(X_train.shape)
# print(X_train[0])

# plt.figure()
# plt.scatter(X[:,[0]], X[:,[1]], c=y)
# plt.show()
acc = 0
for k in [3, 5, 7]:
    clf = Knn(k)

    clf.fit(X_train, y_train)
    predictions = clf.predict(X_test)

    t_acc = np.sum(predictions == y_test) / len(y_test)
    if t_acc > acc:
        acc = t_acc
        best_k = k

print(acc)
print(best_k)
#start = time.time()
#print("hello")
#end = time.time()

#open output file
output=open('test/knnDishwasherTestResult.csv', 'w')
fieldnames = ['real_power', 'predict_power']
writer = csv.DictWriter(output, fieldnames=fieldnames)
writer.writeheader()
#writer.writerow({'real_power': 'Baked', 'predict_power': 'Beans'})

    
csv = recfromcsv('all.csv', delimiter=',')

trainer = Knn(complexity=2, numInputs=7, inputClassifiers=np.empty([7]), outputClassifier=0, appFieldsDict=0);
x_train = [];
y_train = [];
x_predict = [];
y_predict = [];
x_real=[];
y_real=[];

numRow = 96
day_train_start=0
day_train_end=150
#day_predict = 103
day_predict_start=150
day_predict_end = 299
#read in csv and parse data to trainer
Example #20
0
import csv
from Person import Person
from Classifier import Classifier
from Lr import Lr
from Knn import Knn
from Kmeans import testClustering

# loading data from csv file
personList = []
with open('diabetes-dataset.csv', newline = '') as csvfile:
    reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    count = 1
    for row in reader:
        if count != 1:
            data = ''.join(row).split(',')
            p = Person(data)
            personList.append(p)
        count += 1

print('result on based of logical regression')
lr = Lr()
c = Classifier(personList,lr)
c.run()
print('result on based on K nearest neighbours')
knn = Knn()
c = Classifier(personList,knn)
c.run()
print('result on based on K means')
for k in (2,4,6):
    print('\n     Test k-means (k = ' + str(k) + ')')
    posFracs = testClustering(personList, k, 2)
Example #21
0
atributos = Setup.atributos
estudiantes = Setup.estudiantes
valoresPosibles = Setup.valoresPosibles

operadores = {}

OperadoresBuilder.multiple(operadores, OperadoresBuilder.hamming, ["school", "sex", "schoolsup", "famsup", "paid",
                                                                   "activities", "nursery", "higher", "internet",
                                                                   "romantic", "address", "famsize", "Pstatus",
                                                                   "Mjob", "Fjob", "reason", "guardian"])

OperadoresBuilder.multiple(operadores, OperadoresBuilder.rango, ["age","Medu","Fedu",
                                    "traveltime","studytime","failures","famrel",
                                    "freetime","goout","Dalc","Walc","health","absences"], valoresPosibles)

knn = Knn(estudiantes, "G3", atributos, operadores)


# Se realizaran multiples pruebas para obtener una estimacion de la efectividad del algoritmo
# para cualquier eleccion de casos de entrenamiento / prueba
CANTIDAD_PRUEBAS = 25

# Separo 1/5 de los casos de prueba
cantEstTest = len(estudiantes)/5

shuffle(estudiantes)

estudiantesTest = estudiantes[:cantEstTest]
estudiantesEntrenamiento = estudiantes[cantEstTest + 1:]

cantEstTest = len(estudiantes)/5
          'param': 'apparent_power',
          'lag': 4,
          'norm': 'lin'}
# Number of CE input
numInp = 4
## Algorithm to be tested
interfaceDict = {'in': [dict1, dict2, dict3, dict4], 
                 'out': dict0}
ceDict = {'interface': interfaceDict,
          'n_neighbors': 4,
          'weights': 'uniform',
          'algorithm': 'auto',
          'n_jobs': 1,
          'complexity': 1}

algorithmTest = Knn(numInp, 0, [0,0,0,0], ceDict)

print "Collecting training and test data from GDP"
# Use the collect data routine to fetch training data in separate lists
# for input and output
trainRecStart = 100
trainRecStop = 200
numTrainingSamples = trainRecStop - trainRecStart + 1
inDataTrain, outDataTrain = algorithmTest.interface.collectData(trainRecStart, trainRecStop)
# Use the collect data routine to fetch test data in separate lists
# for input and output
testRecStart = 201
testRecStop = 250
numExecuteSamples = testRecStop - testRecStart + 1
inDataTest, outDataTest = algorithmTest.interface.collectData(testRecStart,testRecStop)
print "Done: collecting data from GDP"
Example #23
0
    ConfusionMatrixListKnn: list = list(
    )  # list des matrices de confusion pour Knn

    print(f"Knn Train ratio: {knn_train_ratio}")
    print(f"findBestKWithCrossValidation: {findBestKWithCrossValidation}")
    print("\n")

    print('-' * 175)
    print(f"Iris dataset classification: \n")
    startTime = time.time()

    #  Entrainement sur l'ensemble de données Iris

    iris_train, iris_train_labels, iris_test, iris_test_labels = load_datasets.load_iris_dataset(
        knn_train_ratio)
    iris_knn = Knn(distance_func=distanceFunc)
    iris_knn.train(iris_train,
                   iris_train_labels,
                   findBestKWithCrossValidation=findBestKWithCrossValidation)
    cm, _, _, _ = iris_knn.test(iris_test, iris_test_labels)
    ConfusionMatrixListKnn.append(cm)

    print(
        f"\n --- Elapse time: {1_000*(time.time() - startTime):.2f} ms --- \n")

    print('-' * 175)
    print(f"Congressional dataset classification: \n")
    startTime = time.time()

    #  Entrainement sur l'ensemble de données Congressional
from Knn import Knn
from ContextEngineBase import Complexity

## For different tests, these values will vary.
inputFilePath = "dish.csv"
outputFilePath = "dishOutput.csv"
complexity = Complexity.secondOrder;
numTrainingSamples = 96;
numExecuteSamples = 96;
inputFile = open(inputFilePath);
outputFile = open(outputFilePath);
inputReader = csv.reader(inputFile);
outputReader = csv.reader(outputFile);
csv = recfromcsv(inputFilePath, delimiter=',')
## Change the name of the algorithm to test it out.
algorithmTest = Knn(complexity, 7, 0, [0,0,0,0,0,0,0], {});
teslaTimestamps = {};
knnTimestamps = {};

print(algorithmTest.complexity);
print(algorithmTest.functionOrder);

numRow = 96
day_train_start=0
day_train_end=0
day_predict_start= 1
day_predict_end = 1
#read in csv and parse data to trainer

for i in range(numRow*day_train_start,numRow*(day_train_end+1)):
	row = csv[i]
Example #25
0
atributos = Setup.atributos
estudiantes = Setup.estudiantes
valoresPosibles = Setup.valoresPosibles

operadores = {}

OperadoresBuilder.multiple(operadores, OperadoresBuilder.hamming, ["school", "sex", "schoolsup", "famsup", "paid",
                                                                   "activities", "nursery", "higher", "internet",
                                                                   "romantic", "address", "famsize", "Pstatus",
                                                                   "Mjob", "Fjob", "reason", "guardian"])

OperadoresBuilder.multiple(operadores, OperadoresBuilder.rango, ["age","Medu","Fedu",
                                    "traveltime","studytime","failures","famrel",
                                    "freetime","goout","Dalc","Walc","health","absences"], valoresPosibles)

knn = Knn(estudiantes, "G3", atributos, operadores)


# Se realizaran multiples pruebas para obtener una estimacion de la efectividad del algoritmo
# para cualquier eleccion de casos de entrenamiento / prueba
CANTIDAD_PRUEBAS = 3

# Separo 1/5 de los casos de prueba
cantEstTest = len(estudiantes)/5

shuffle(estudiantes)

estudiantesTest = estudiantes[:cantEstTest]
estudiantesEntrenamiento = estudiantes[cantEstTest + 1:]

cantEstTest = len(estudiantes)/5
Example #26
0
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from Knn import Knn

np.random.seed(0)
X = np.sort(5 * np.random.rand(40, 1), axis=0)
T = np.linspace(0, 5, 500)[:, np.newaxis]
y = np.sin(X).ravel();

# Add noise to targets
y[::5] += 1 * (0.5 - np.random.rand(8))
#y = y.ravel();

###############################################################################
# Fit regression model
trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0);
trainer.addBatchObservations(X,y);
trainer.train();


y_predict = np.empty([0]);

for i in range(T.shape[0]):
	result = trainer.execute(T[i]);
	y_predict = np.concatenate((y_predict,result));

plt.subplot(1, 1, 1)
plt.scatter(X, y, c='k', label='data')
plt.plot(T, y_predict, c='g', label='prediction')
plt.axis('tight')
plt.legend()
Example #27
0
#!/usr/bin/env python

import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from Knn import Knn
from img.data import x, y

if len(sys.argv) < 2:
    print "Informar caminho da imagem"
    exit()

knn = Knn(7)

knn.fit(x, y)

img = mpimg.imread(sys.argv[1])

for j in range(img.shape[0] / 50 + 1):
    for i in range(img.shape[1] / 50 + 1):
        h, b = np.histogram(img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50])
        if (knn.predict(h) == 1):
            img[j * 50:(j * 50) + 50, i * 50:(i * 50) + 50, 1] = 0

plt.imshow(img)
plt.show()
Example #28
0
    'norm': 'lin'
}
# Number of CE input
numInp = 4
## Algorithm to be tested
interfaceDict = {'in': [dict1, dict2, dict3, dict4], 'out': dict0}
ceDict = {
    'interface': interfaceDict,
    'n_neighbors': 4,
    'weights': 'uniform',
    'algorithm': 'auto',
    'n_jobs': 1,
    'complexity': 1
}

algorithmTest = Knn(numInp, 0, [0, 0, 0, 0], ceDict)

print "Collecting training and test data from GDP"
# Use the collect data routine to fetch training data in separate lists
# for input and output
trainRecStart = 100
trainRecStop = 200
numTrainingSamples = trainRecStop - trainRecStart + 1
inDataTrain, outDataTrain = algorithmTest.interface.collectData(
    trainRecStart, trainRecStop)
# Use the collect data routine to fetch test data in separate lists
# for input and output
testRecStart = 201
testRecStop = 250
numExecuteSamples = testRecStop - testRecStart + 1
inDataTest, outDataTest = algorithmTest.interface.collectData(
Example #29
0
N_split_iris = 10
N_split_wine = 3  # --> Trop long
N_split_abalone = 3  # --> Trop long

print('IRIS')
K_opti_iris = cv_knn(N_split_iris, X_train_iris, y_train_iris, X_test_iris,
                     y_test_iris, K_values_iris)
print('WINE')
K_opti_wine = cv_knn(N_split_wine, X_train_wine, y_train_wine, X_test_wine,
                     y_test_wine, K_values_wine)
print('ABALONE')
K_opti_abalone = cv_knn(N_split_abalone, X_train_abalone, y_train_abalone,
                        X_test_abalone, y_test_abalone, K_values_abalone)

# Entrainez votre classifieur
clf_Knn_iris = Knn(K=K_opti_iris)
clf_Knn_iris.train(X_train_iris, y_train_iris)

clf_Knn_wine = Knn(K=K_opti_wine)
clf_Knn_wine.train(X_train_wine, y_train_wine)

clf_Knn_abalone = Knn(K=K_opti_abalone)
clf_Knn_abalone.train(X_train_abalone, y_train_abalone)
"""
Après avoir fait l'entrainement, évaluez votre modèle sur 
les données d'entrainement.
IMPORTANT : 
    Vous devez afficher ici avec la commande print() de python,
    - la matrice de confusion (confusion matrix)
    - l'accuracy
    - la précision (precision)
def main():
    # Initializer vos paramètres
    i = ld.load_iris_dataset(0.7)
    c = ld.load_congressional_dataset(0.7)
    m1 = ld.load_monks_dataset(1)
    m2 = ld.load_monks_dataset(2)
    m3 = ld.load_monks_dataset(3)

    # Initializer/instanciez vos classifieurs avec leurs paramètres

    euclide = lambda x, y: pow(
        (x - y), 2
    )  # Pas besoin d'extraire la racine, car cela ne changera pas l'ordre de classement
    diff_binaire = lambda x, y: 0 if x == y else 1

    knn_i = Knn(train=i[0], train_labels=i[1], dist_equation=euclide)
    knn_c = Knn(train=c[0], train_labels=c[1], dist_equation=euclide)
    knn_m1 = Knn(train=m1[0], train_labels=m1[1], dist_equation=diff_binaire)
    knn_m2 = Knn(train=m2[0], train_labels=m2[1], dist_equation=diff_binaire)
    knn_m3 = Knn(train=m3[0], train_labels=m3[1], dist_equation=diff_binaire)

    bn_i = BayesNaifClassifier([1])
    bn_c = BayesNaifClassifier([0])
    bn_m1 = BayesNaifClassifier([2])
    bn_m2 = BayesNaifClassifier([2])
    bn_m3 = BayesNaifClassifier([2])

    # Entrainez votre classifieur
    print("\n=============\nKNN train tests\n=============")
    knn_i.train_test(i[0], i[1], "Dataset: Iris, Training")
    knn_c.train_test(c[0], c[1], "Dataset: Congressional, Training")
    knn_m1.train_test(m1[0], m1[1], "Dataset: MONKS-1, Training")
    knn_m2.train_test(m2[0], m2[1], "Dataset: MONKS-2, Training")
    knn_m3.train_test(m3[0], m3[1], "Dataset: MONKS-3, Training")

    print("\n=============\nBayes Naif train tests\n=============")
    bn_i.train(i[0], i[1], "Dataset: Iris, Test")
    bn_c.train(c[0], c[1], "Dataset: Congressional, Test")
    bn_m1.train(m1[0], m1[1], "Dataset: MONKS-1, Test")
    bn_m2.train(m2[0], m2[1], "Dataset: MONKS-2, Test")
    bn_m3.train(m3[0], m3[1], "Dataset: MONKS-3, Test")

    print("\n=============\nKNN tests\n=============")
    # Tester votre classifieur
    knn_i.train_test(i[2], i[3], "Dataset: Iris, Test")
    knn_c.train_test(c[2], c[3], "Dataset: Congressional, Test")
    knn_m1.train_test(m1[2], m1[3], "Dataset: MONKS-1, Test")
    knn_m2.train_test(m2[2], m2[3], "Dataset: MONKS-2, Test")
    knn_m3.train_test(m3[2], m3[3], "Dataset: MONKS-3, Test")

    print("\n=============\nBayes Naif tests\n=============")
    bn_i.test(i[2], i[3], "Dataset: Iris, Test")
    bn_c.test(c[2], c[3], "Dataset: Congressional, Test")
    bn_m1.test(m1[2], m1[3], "Dataset: MONKS-1, Test")
    bn_m2.test(m2[2], m2[3], "Dataset: MONKS-2, Test")
    bn_m3.test(m3[2], m3[3], "Dataset: MONKS-3, Test")
Example #31
0
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from Knn import Knn

np.random.seed(0)
X = np.sort(5 * np.random.rand(40, 1), axis=0)
T = np.linspace(0, 5, 500)[:, np.newaxis]
y = np.sin(X).ravel()

# Add noise to targets
y[::5] += 1 * (0.5 - np.random.rand(8))
#y = y.ravel();

###############################################################################
# Fit regression model
trainer = Knn(complexity=0, numInputs=1, discreteOutputs=0, discreteInputs=0)
trainer.addBatchObservations(X, y)
trainer.train()

y_predict = np.empty([0])

for i in range(T.shape[0]):
    result = trainer.execute(T[i])
    y_predict = np.concatenate((y_predict, result))

plt.subplot(1, 1, 1)
plt.scatter(X, y, c='k', label='data')
plt.plot(T, y_predict, c='g', label='prediction')
plt.axis('tight')
plt.legend()
plt.title("KNeighborsRegressor (k = %i, weights = '%s')" % (2, "uniform"))