Ejemplo n.º 1
0
Archivo: kmeans.py Proyecto: gszxwd/ML
 def clustering(self, K, N=100):
     classes = {}
     # generate K initial points
     pts = []
     for i in range(K):
         classes[i] = []
         pt = []
         for j in range(len(self.data[0][0])):
             pt.append(random.random())    # normallized data
         pts.append(pt)
     #
     for n in range(N):
         for i in range(K):
             classes[i] = []
         for i in range(len(self.data)):
             temp = 9999
             flag = 0
             for j in range(K):
                 dist = Distance.euclideanDist(self.data[i][0], pts[j])
                 if dist < temp:
                     temp = dist
                     flag = j
             classes[flag].append(i)
         #
         for i in range(K):
             means = [0]*len(self.data[0][0])
             for j in range(len(classes[i])):
                 for k in range(len(self.data[0][0])):
                     means[k] = means[k]+self.data[j][0][k]
                 for k in range(len(self.data[0][0])):
                     means[k] = means[k]/len(self.data[0][0])
             pts[i] = means
     return classes
Ejemplo n.º 2
0
Archivo: knn.py Proyecto: gszxwd/ML
 def predict(self, sample, k):
     candidats = [{9999: 0}] * k
     ids = [0] * k
     for i in range(len(self.tdata)):
         dist = Distance.euclideanDist(sample, self.tdata[i][0])
         if dist < candidats[-1].keys()[0]:
             candidats.sort()
             candidats.pop()
             candidats.insert(0, {dist: i})
     # most is the winner
     counter = {}
     for c in candidats:
         counter[self.tdata[c.values()[0]][1]] = 0
     for c in candidats:
         counter[self.tdata[c.values()[0]][1]] = counter[self.tdata[c.values()[0]][1]] + 1
     print counter