def clustering(self): X = np.array([np.array(self.feature_table[i]) for i in self.cluster_factors]) for i in range(len(X)): X[i] = (X[i] - X[i].mean()) / X[i].std() X = X.T Cluster.KMeans(X).test() self.y_km = cluster(Cluster.KMeans(X), self.feature_table[self.cluster_factors], self.CLUSTER) fig_km = self.plot(self.y_km)
size = int(sys.argv[3]) num_cluster = int(sys.argv[4]) iterator = int(sys.argv[5]) dataSet = [] fp = open(fileName, 'r') #iris data preprocess for lines in fp: line = lines.split(',') line.pop() dataSet.append(line) #run 30 times #begin by random assign sse_km = [] km = Cluster.KMeans(dataSet, dimension, size, num_cluster) for i in range(run_times): km.run(iterator) sse_km.append(km.get_sse()) print(km.get_cluster()) #begin by clustering with definer #define an interval [a, b, c, ...] # x < a : x = 0, # a < x < b : x = 1, ... definerSet = [[5, 6], [2.8, 3.5], [2, 5], [1, 1.5]] sse_definer = [] km_definer = Cluster.KMeans(dataSet, dimension, size, num_cluster) for i in range(run_times): km_definer.run(iterator, definerSet) sse_definer.append(km_definer.get_sse())