def findKVariableWeighted(combis, labels, validlabels): wresults = [] for combi in combis: scaleddata = scale2(genDataSet("dataset1.csv"), scalers, combi) scaledvaliddata = scale2(genDataSet("validation1.csv"), scalers, combi) classifiedlabels = findLabel(scaleddata, scaledvaliddata, labels, 52) # << Hier voer je bepaalde K in correctcounter = 0 for h in range(len(validlabels)): if validlabels[h] == classifiedlabels[h]: correctcounter += 1 wresults.append((correctcounter / len(validlabels) * 100.0, combi)) wresults.sort(key=lambda tup: tup[0], reverse=True) print(wresults[0]) t = time.localtime() current_time = time.strftime("%H:%M:%S", t) print(current_time)
# Vindt de optimale K met behulp van de validatieset en beantwoordt de vraag met deze K. from kNN import findK, scale, genDataSet, genLabels, findLabel, findminmax, scale2 import time #generate dataset and validationset data = genDataSet("dataset1.csv") scalers = findminmax(data) weights = [1, 1, 2, 4, 2, 1, 3] scaleddata = scale2(data, scalers, weights) scaledvaliddata = scale2(genDataSet("validation1.csv"), scalers, weights) scaledtestdata = scale2(genDataSet("days.csv"), scalers, weights) #generate labels for aformentioned sets labels = genLabels("dataset1.csv", 2000) validlabels = genLabels("validation1.csv", 2001) # #find optimal K value and corresponding accuracy t = time.localtime() current_time = time.strftime("%H:%M:%S", t) print(current_time) accuracy, optimalK = findK(scaleddata, scaledvaliddata, labels, validlabels) t = time.localtime() current_time = time.strftime("%H:%M:%S", t) print(current_time) # # generate and print labels for testdata print(findLabel(scaleddata, scaledtestdata, labels, optimalK)) print("Used K: ", optimalK) print("Accuracy validationset: ", accuracy)
# Beantwoordt de vraag met gegeven K from kNN import findK, scale, genDataSet, genLabels, findLabel #generate dataset scaleddata = scale(genDataSet("dataset1.csv")) #generate validationset scaledtestdata = scale(genDataSet("days.csv")) #generate list of labels for dataset labels = genLabels("dataset1.csv", 2000) #generate and print labels for testdata print(findLabel(scaleddata, scaledtestdata, labels, 61))
# Dit is geen deel van de opdracht # Ik probeer hier te kijken of wegingen per parameter nog iets helpt # Heb maar multithreading gebruikt zodat ik binnen een uur een nuttig aantal combinaties kon testen. from kNN import findK, scale, genDataSet, genLabels, findLabel, scale2, findminmax import _thread as trd import time #Maak labels aan labels = genLabels("dataset1.csv", 2000) validlabels = genLabels("validation1.csv", 2001) #Maak dataset aan data = genDataSet("dataset1.csv") scalers = findminmax(data) #Functie om voor een bepaalde K een lijst met 7 gewichten voor de 7 parameters te returnen die de hoogste accuraatheid geven def findKVariableWeighted(combis, labels, validlabels): wresults = [] for combi in combis: scaleddata = scale2(genDataSet("dataset1.csv"), scalers, combi) scaledvaliddata = scale2(genDataSet("validation1.csv"), scalers, combi) classifiedlabels = findLabel(scaleddata, scaledvaliddata, labels, 52) # << Hier voer je bepaalde K in correctcounter = 0 for h in range(len(validlabels)): if validlabels[h] == classifiedlabels[h]: correctcounter += 1 wresults.append((correctcounter / len(validlabels) * 100.0, combi)) wresults.sort(key=lambda tup: tup[0], reverse=True) print(wresults[0]) t = time.localtime()