def kmeans(d,points=3,axis=0,iter=20): def init_centroids(dok_vecs): centroids = [] # 1a. choose an initial center c_1 uniformly at random X centroids.append(dok_vecs.values()[0]) # 1b. choose the next ceter c_i selecting c_i = x' in X w/probablity D(x')^2/\sum_{x in X} D(x) # 1c. repeat step 1b until i == k items = [] for i in range(1,points): for w,v in dok_vecs.items(): temp = [ dok.squaredistance(v,c) for c in centroids ] temp = [ x for x in temp if temp > 0 ] items += [(w,min(temp))] centroids.append(dok_vecs[dok.weighted_choice_bisect_compile(items)()]) return centroids dok_vecs = d.tovecs(axis) centroids = init_centroids(dok_vecs) #centroidsを更新する。 for i in range(iter): clusters = [] for i in range(points): clusters += [[]] for v in dok_vecs.values(): temp = [ (dok.squaredistance(v,centroids[i]),i) for i in range(points) ] clusters[min(temp)[1]] += [v] for i in range(points): if len(clusters[i]) > 1: centroids[i] = dok.mean(clusters[i]) else: centroids[i] = centroids[i] return centroids,clusters
def init_centroids(dok_vecs): centroids = [] # 1a. choose an initial center c_1 uniformly at random X centroids.append(dok_vecs.values()[0]) # 1b. choose the next ceter c_i selecting c_i = x' in X w/probablity D(x')^2/\sum_{x in X} D(x) # 1c. repeat step 1b until i == k items = [] for i in range(1,points): for w,v in dok_vecs.items(): temp = [ dok.squaredistance(v,c) for c in centroids ] temp = [ x for x in temp if temp > 0 ] items += [(w,min(temp))] centroids.append(dok_vecs[dok.weighted_choice_bisect_compile(items)()]) return centroids