/
test21.py
120 lines (104 loc) · 3.91 KB
/
test21.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import math
import numpy as np
import pylab as pl
from matplotlib.colors import ListedColormap
nItemsInClass = 0
nClasses = 2
# Train data generator
def generateData():
data = []
with open('data.txt', 'r') as f:
for line in f:
subs = line.split(",")
x = float(subs[0])
y = float(subs[1])
point_class = int(subs[2])
data.append([[x, y], point_class])
f.close()
nItemsInClass = len(data) / 2
return data
# Separate N data elements in two parts:
# test data with N*testPercent elements
# train_data with N*(1.0 - testPercent) elements
def splitTrainTest(data):
trainData = []
testData = []
test = False
for row in data:
if test:
testData.append(row)
else:
trainData.append(row)
test = not test
return trainData, testData
# Main classification procedure
def classifyKNN(trainData, testData, k):
# Euclidean distance between 2-dimensional point
def dist(a, b):
return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)
testLabels = []
for testPoint in testData:
# Claculate distances between test point and all of the train points
testDist = [[dist(testPoint, trainData[i][0]), trainData[i][1]] for i in range(len(trainData))]
# How many points of each class among nearest K
stat = [0 for i in range(nClasses)]
for d in sorted(testDist)[0:k]:
stat[d[1]] += 1
# Assign a class with the most number of occurences among K nearest neighbours
testLabels.append(sorted(zip(stat, range(nClasses)), reverse=True)[0][1])
return testLabels
# Calculate classification accuracy
def calculateAccuracy(k):
data = generateData()
trainData, testDataWithLabels = splitTrainTest(data)
testData = [testDataWithLabels[i][0] for i in range(len(testDataWithLabels))]
testDataLabels = classifyKNN(trainData, testData, k)
print("Accuracy: ",
sum([int(testDataLabels[i] == testDataWithLabels[i][1]) for i in range(len(testDataWithLabels))]) / float(
len(testDataWithLabels)))
# Visualize classification regions
def showDataOnMesh(k):
# Generate a mesh of nodes that covers all train cases
def generateTestMesh(trainData):
border_offset = 0.5
x_min = min([trainData[i][0][0] for i in range(len(trainData))]) - border_offset
x_max = max([trainData[i][0][0] for i in range(len(trainData))]) + border_offset
y_min = min([trainData[i][0][1] for i in range(len(trainData))]) - border_offset
y_max = max([trainData[i][0][1] for i in range(len(trainData))]) + border_offset
h = 0.1
testX, testY = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
return [testX, testY]
trainData = generateData()
testMesh = generateTestMesh(trainData)
testMeshLabels = classifyKNN(trainData, zip(testMesh[0].ravel(), testMesh[1].ravel()), k)
classColormap = ListedColormap(['#FF9900', '#00FF00'])
testColormap = ListedColormap(['#FFCCAA', '#AAFFAA'])
pl.ion()
pl.pcolormesh(testMesh[0],
testMesh[1],
np.asarray(testMeshLabels).reshape(testMesh[0].shape),
cmap=testColormap)
pl.scatter([trainData[i][0][0] for i in range(len(trainData))],
[trainData[i][0][1] for i in range(len(trainData))],
c=[trainData[i][1] for i in range(len(trainData))],
cmap=classColormap)
pl.pause(0.05)
# pl.show()
if __name__ == '__main__':
k = 3
calculateAccuracy(k)
showDataOnMesh(k)
while True:
input_key = input()
print(input_key)
if input_key == '+':
k += 2
elif input_key == '-':
if k == 1:
continue
k -= 2
else:
continue
calculateAccuracy(k)
showDataOnMesh(k)