コード例 #1
0
 def clustering(self):
     X = np.array([np.array(self.feature_table[i]) for i in self.cluster_factors])
     for i in range(len(X)):
         X[i] = (X[i] - X[i].mean()) / X[i].std()
     X = X.T
     Cluster.KMeans(X).test()
     self.y_km = cluster(Cluster.KMeans(X), self.feature_table[self.cluster_factors], self.CLUSTER)
     fig_km = self.plot(self.y_km)
コード例 #2
0
ファイル: main.py プロジェクト: arikoi0703/gitrepo
size = int(sys.argv[3])
num_cluster = int(sys.argv[4])
iterator = int(sys.argv[5])
dataSet = []
fp = open(fileName, 'r')

#iris data preprocess
for lines in fp:
    line = lines.split(',')
    line.pop()
    dataSet.append(line)

#run 30 times
#begin by random assign
sse_km = []
km = Cluster.KMeans(dataSet, dimension, size, num_cluster)
for i in range(run_times):
    km.run(iterator)
    sse_km.append(km.get_sse())
    print(km.get_cluster())

#begin by clustering with definer
#define an interval [a, b, c, ...]
# x < a : x = 0,
# a < x < b : x = 1, ...
definerSet = [[5, 6], [2.8, 3.5], [2, 5], [1, 1.5]]
sse_definer = []
km_definer = Cluster.KMeans(dataSet, dimension, size, num_cluster)
for i in range(run_times):
    km_definer.run(iterator, definerSet)
    sse_definer.append(km_definer.get_sse())