Esempio n. 1
0
def findKVariableWeighted(combis, labels, validlabels):
    wresults = []
    for combi in combis:
        scaleddata = scale2(genDataSet("dataset1.csv"), scalers, combi)
        scaledvaliddata = scale2(genDataSet("validation1.csv"), scalers, combi)
        classifiedlabels = findLabel(scaleddata, scaledvaliddata, labels, 52) # << Hier voer je bepaalde K in
        correctcounter = 0
        for h in range(len(validlabels)):
            if validlabels[h] == classifiedlabels[h]:
                correctcounter += 1
            wresults.append((correctcounter / len(validlabels) * 100.0, combi))
    wresults.sort(key=lambda tup: tup[0], reverse=True)
    print(wresults[0])
    t = time.localtime()
    current_time = time.strftime("%H:%M:%S", t)
    print(current_time)
Esempio n. 2
0
# Vindt de optimale K met behulp van de validatieset en beantwoordt de vraag met deze K.
from kNN import findK, scale, genDataSet, genLabels, findLabel, findminmax, scale2
import time

#generate dataset and validationset
data = genDataSet("dataset1.csv")

scalers = findminmax(data)
weights = [1, 1, 2, 4, 2, 1, 3]

scaleddata = scale2(data, scalers, weights)
scaledvaliddata = scale2(genDataSet("validation1.csv"), scalers, weights)
scaledtestdata = scale2(genDataSet("days.csv"), scalers, weights)

#generate labels for aformentioned sets
labels = genLabels("dataset1.csv", 2000)
validlabels = genLabels("validation1.csv", 2001)

# #find optimal K value and corresponding accuracy
t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print(current_time)
accuracy, optimalK = findK(scaleddata, scaledvaliddata, labels, validlabels)
t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print(current_time)

# # generate and print labels for testdata
print(findLabel(scaleddata, scaledtestdata, labels, optimalK))
print("Used K: ", optimalK)
print("Accuracy validationset: ", accuracy)
Esempio n. 3
0
# Beantwoordt de vraag met gegeven K
from kNN import findK, scale, genDataSet, genLabels, findLabel

#generate dataset
scaleddata = scale(genDataSet("dataset1.csv"))
#generate validationset
scaledtestdata = scale(genDataSet("days.csv"))

#generate list of labels for dataset
labels = genLabels("dataset1.csv", 2000)

#generate and print labels for testdata
print(findLabel(scaleddata, scaledtestdata, labels, 61))
Esempio n. 4
0
# Dit is geen deel van de opdracht
# Ik probeer hier te kijken of wegingen per parameter nog iets helpt
# Heb maar multithreading gebruikt zodat ik binnen een uur een nuttig aantal combinaties kon testen.

from kNN import findK, scale, genDataSet, genLabels, findLabel, scale2, findminmax
import _thread as trd
import time

#Maak labels aan
labels = genLabels("dataset1.csv", 2000)
validlabels = genLabels("validation1.csv", 2001)

#Maak dataset aan
data = genDataSet("dataset1.csv")
scalers = findminmax(data)

#Functie om voor een bepaalde K een lijst met 7 gewichten voor de 7 parameters te returnen die de hoogste accuraatheid geven
def findKVariableWeighted(combis, labels, validlabels):
    wresults = []
    for combi in combis:
        scaleddata = scale2(genDataSet("dataset1.csv"), scalers, combi)
        scaledvaliddata = scale2(genDataSet("validation1.csv"), scalers, combi)
        classifiedlabels = findLabel(scaleddata, scaledvaliddata, labels, 52) # << Hier voer je bepaalde K in
        correctcounter = 0
        for h in range(len(validlabels)):
            if validlabels[h] == classifiedlabels[h]:
                correctcounter += 1
            wresults.append((correctcounter / len(validlabels) * 100.0, combi))
    wresults.sort(key=lambda tup: tup[0], reverse=True)
    print(wresults[0])
    t = time.localtime()