def plot_unnecessary_part_by_clustering(in_mask_path, in_img_path, out_img_path, show_flag=0): """ディティールマスク部を画像に矩形として表示する 【引数】 in_mask_path: 入力するディティールマスクのパス in_img_path: 矩形を重ねたいフレーム画像のパス out_img_path: 出力する矩形描画後の画像のパス show_flag: マスク画像生成後、画像を表示するかどうか。0だと表示しない。1だと表示する。 【返り値】 なし """ in_mask = cv2.imread(in_mask_path, cv2.IMREAD_GRAYSCALE) height = in_mask.shape[0] width = in_mask.shape[1] X = [] print('Checking unnecessary part...') for i in tqdm(range(height)): for j in range(width): if (in_mask[i, j] == 255): X.append([i, j]) initializer = xmeans.kmeans_plusplus_initializer(data=X, amount_centers=2) initial_centers = initializer.initialize() xm = xmeans.xmeans(data=X, initial_centers=initial_centers) xm.process() clusters = xm.get_clusters() img_out = cv2.imread(in_img_path) mask = create_blank_mask(height, width) print('Clustering unnecessary part...') for cluster in tqdm(clusters): coodinates = [] for item in cluster: coodinates.append(X[item]) x, y, width, height = cv2.boundingRect(np.array(coodinates)) img_out = cv2.rectangle(img_out, (y, x), (y + height, x + width), (0, 0, 255), 2) for i in range(y, y + height): for j in range(x, x + width): mask[j, i] = 255 cv2.imwrite(out_img_path, img_out) if show_flag == 0: return elif show_flag == 1: cv2.imshow('window', img_out) cv2.waitKey(0) cv2.destroyAllWindows() return else: return
def make_range_mask(in_mask_path, out_mask_path, show_flag=0): """x-meansを使ってレンジマスクを生成する 【引数】 in_mask_path: 入力するディティールマスク画像のパス out_mask_path: 出力するレンジマスク画像のパス show_flag: マスク画像生成後、画像を表示するかどうか。0だと表示しない。1だと表示する。 【返り値】 なし """ in_mask = cv2.imread(in_mask_path, cv2.IMREAD_GRAYSCALE) height = in_mask.shape[0] width = in_mask.shape[1] X = [] print('Checking unnecessary part...') for i in tqdm(range(height)): for j in range(width): if (in_mask[i, j] == 255): X.append([i, j]) initializer = xmeans.kmeans_plusplus_initializer(data=X, amount_centers=2) initial_centers = initializer.initialize() xm = xmeans.xmeans(data=X, initial_centers=initial_centers) xm.process() clusters = xm.get_clusters() mask = create_blank_mask(height, width) print('Clustering unnecessary part...') for cluster in tqdm(clusters): coodinates = [] for item in cluster: coodinates.append(X[item]) x, y, width, height = cv2.boundingRect(np.array(coodinates)) for i in range(y, y + height): for j in range(x, x + width): mask[j, i] = 255 cv2.imwrite(out_mask_path, mask) if show_flag == 0: return elif show_flag == 1: cv2.imshow('window', mask) cv2.waitKey(0) cv2.destroyAllWindows() return else: return
def xmean(X): try: initial_centers = kmeans_plusplus_initializer(X, 2).initialize() xmeans_instance = xmeans(X, initial_centers, ccore=False) xmeans_instance.process() tmp = xmeans_instance.get_clusters() flat = list(chain.from_iterable(tmp)) clusters = [None] * len(flat) for i in range(len(tmp)): for x in tmp[i]: clusters[x] = i except AttributeError: clusters = [] return clusters
def Xmeans(_df): import pyclustering from pyclustering.cluster import xmeans cast_arr = np.array([ _df['sweety'].tolist(), _df['acidity'].tolist(), _df['bitter'].tolist(), _df['rich'].tolist(), ]) init_center = xmeans.kmeans_plusplus_initializer(cast_arr, 2).initialize() xm = pyclustering.cluster.xmeans.xmeans(cast_arr, init_center, ccore=False) #クラスタリング xm.process() clusters = xm.get_clusters() print(clusters)
def cluster(x, y): try: init_center = xmeans.kmeans_plusplus_initializer(np.c_[x, y], 2).initialize() xm = xmeans.xmeans(np.c_[x, y], init_center, ccore=False) xm.process() sizes = [len(cluster) for cluster in xm.get_clusters()] centers = xm.get_centers() # cluster=x_means.x_means_cluster() # centers=cluster.cluster_centers_ num_clans = len(centers) clans = [] for i in range(num_clans): mate = 0 cur = 100 for j in range(num_clans): if abs(centers[i][1] - centers[j][0]) < cur: mate = j cur = abs(centers[i][1] - centers[j][0]) clans.append([i, mate]) candidate = list(range(num_clans)) cur_cycle = 0 while len(candidate) > 0: cycle = [candidate[0]] cur = candidate[0] while True: next = clans[cur][1] if clans[cur][1] in cycle: if cur_cycle < len(cycle) - cycle.index(next): cur_cycle = len(cycle) - cycle.index(next) for clan in cycle: if clan in candidate: candidate.remove(clan) break else: cycle.append(next) cur = next except: cur_cycle = 1 return cur_cycle
def clustering(genomes): keys = set() for gid, g in genomes: for key in g.info.keys(): keys.add(key) keys = sorted(list(keys)) keys_to_i = {keys[i]: i for i in range(len(keys))} ng = len(genomes) na = len(keys) props = np.zeros((ng, na)) for i in range(len(genomes)): gid, g = genomes[i] for key, value in g.info.items(): props[i][keys_to_i[key]] = random.random() props = scipy.stats.zscore(props) init_center = kmeans_plusplus_initializer(props, 2).initialize() xm = xmeans(props, init_center, ccore=False) xm.process() clusters = xm.get_clusters() draw_clusters(props, clusters)
def cluster(data): structure = 0 descent = 0 data = np.array(data) clans = [] clusters_clan = [] try: init_center = xmeans.kmeans_plusplus_initializer(data, 2).initialize() xm = xmeans.xmeans(data, init_center, ccore=False) xm.process() sizes = [len(cluster) for cluster in xm.get_clusters()] centers = xm.get_centers() clusters = xm.get_clusters() for i in range(len(sizes)): if sizes[i] > 5: clans.append(centers[i]) clusters_clan.append(clusters[i]) except: pass if len(clans) > 0: while len(clans) > 0: num_clans = len(clans) clan_ls = [] for i in range(num_clans): mate = i child = i cur_mate = (clans[i][2] - clans[i][0])**2 + ( clans[i][3] - clans[i][1])**2 - 1 cur_child = (clans[i][0] - clans[i][0])**2 + ( clans[i][3] - clans[i][1])**2 - 1 for j in range(num_clans): mate_cur = (clans[i][2] - clans[j][0])**2 + ( clans[i][3] - clans[j][1])**2 child_cur = (clans[i][0] - clans[j][0])**2 + ( clans[i][3] - clans[j][1])**2 if mate_cur < cur_mate: mate = j cur_mate = mate_cur if child_cur < cur_child: child = j cur_child = child_cur clan_ls.append([i, mate, child]) cur_ls = list(set(np.array(clan_ls)[:, -1])) if len(cur_ls) == num_clans: break else: clans = [clans[i] for i in cur_ls] cur_descent_cycle = 0 cur_marriage_cycle = 0 cur_population = 0 cur_clans = [] candidate = list(range(len(clans))) while len(candidate) > 0: marriage_path = [] cur = candidate[-1] man_path = [cur] kinship_clans = [] while True: next = clan_ls[cur][2] if next in man_path: man_path = man_path[man_path.index(next):] break else: man_path.append(next) cur = next kinship_clans.extend(man_path) descent_cycle = len(man_path) cur_woman_cycle_cur = 0 for clan in man_path: cur_path = [clan] cur = clan while True: next = clan_ls[cur][1] if next in cur_path: cur_path = cur_path[cur_path.index(next):] kinship_clans.extend(cur_path) if len(cur_path) > cur_woman_cycle_cur: cur_woman_cycle_cur = len(cur_path) break else: cur_path.append(next) cur = next marriage_cycle = cur_woman_cycle_cur candidate.pop() for man in man_path: if man in candidate: candidate.remove(man) kinship_clans = list(set(kinship_clans)) if descent_cycle >= cur_descent_cycle and marriage_cycle >= cur_marriage_cycle and len( kinship_clans) >= len(cur_clans): cur_descent_cycle = descent_cycle cur_marriage_cycle = marriage_cycle cur_clans = kinship_clans[:] cur_paternal_cycle = 0 clans_ori = clans[:] clans = [] for clan in clans_ori: clans.append([clan[0], clan[2]]) if len(clans) > 0: num_clans = len(clans) clan_ls = [] for i in range(num_clans): mate = i cur_mate = (clans[i][1] - clans[i][0])**2 - 1 for j in range(num_clans): mate_cur = (clans[i][1] - clans[j][0])**2 if mate_cur < cur_mate: mate = j cur_mate = mate_cur clan_ls.append([i, mate]) candidate = list(range(len(clans))) while len(candidate) > 0: cur = candidate[-1] marriage_path = [cur] kinship_clans = [] population = 0 while True: next = clan_ls[cur][1] if next in marriage_path: marriage_path = marriage_path[marriage_path.index(next ):] break else: marriage_path.append(next) cur = next candidate.pop() for clan in marriage_path: if clan in candidate: candidate.remove(clan) if len(marriage_path) >= cur_paternal_cycle: cur_paternal_cycle = len(marriage_path) clans = [] cur_maternal_cycle = 0 for clan in clans_ori: clans.append([clan[1], clan[3]]) if len(clans) > 0: num_clans = len(clans) clan_ls = [] for i in range(num_clans): mate = i cur_mate = (clans[i][1] - clans[i][0])**2 - 1 for j in range(num_clans): mate_cur = (clans[i][1] - clans[j][0])**2 if mate_cur < cur_mate: mate = j cur_mate = mate_cur clan_ls.append([i, mate]) candidate = list(range(len(clans))) while len(candidate) > 0: cur = candidate[-1] marriage_path = [cur] kinship_clans = [] population = 0 while True: next = clan_ls[cur][1] if next in marriage_path: marriage_path = marriage_path[marriage_path.index(next ):] break else: marriage_path.append(next) cur = next candidate.pop() for clan in marriage_path: if clan in candidate: candidate.remove(clan) if len(marriage_path) >= cur_maternal_cycle: cur_maternal_cycle = len(marriage_path) if cur_marriage_cycle * cur_descent_cycle == 0: structure = 0 elif cur_marriage_cycle == 1 and cur_descent_cycle == 1: structure = 1 elif cur_marriage_cycle == 2 and cur_descent_cycle == 1: structure = 2 elif cur_marriage_cycle == cur_descent_cycle == len(cur_clans) == 2: structure = 2 elif cur_marriage_cycle > 2 and cur_descent_cycle == 1: structure = 3 elif cur_marriage_cycle == cur_descent_cycle == len(cur_clans): structure = 3 elif cur_marriage_cycle > 1 and cur_descent_cycle > 1 and len( cur_clans) > 3: structure = 4 else: structure = 5 descent = 2 * (cur_paternal_cycle > 1) + 1 * (cur_maternal_cycle > 1) return [structure, descent, clusters_clan]
for line in file: if line[0] == "N": continue else: data = line.split() X.append([float(data[1]), float(data[2])]) Xnp = np.append(Xnp, np.array([[float(data[1]), float(data[2])]]), axis=0) print("loading now") index += 1 except Exception as error: print(error) finally: file.close() initializer = xmeans.kmeans_plusplus_initializer(data=Xnp, amount_centers=2) initial_centers = initializer.initialize() xm = xmeans.xmeans(data=X, initial_centers=initial_centers) xm.process() clusters = xm.get_clusters() #pyclustering.utils.draw_clusters(data=X, clusters=clusters) print("cluster number", len(clusters)) cluster_num = len(clusters) if __name__ == '__main__': learned_flag = False epoch = 0 cell_num = 0 cell = [] min_i = 0
def x_cal_plot(X, city, r, cluster_num, h, w, now): # Set image file name listToStr = ' '.join(map(str, r)) imagefile = 'images/x-' + city + '_bandnum_' + listToStr + '_clusternum_' + str( cluster_num) + '_' + now # クラスタ数2から探索させてみる initial_centers = kmeans_plusplus_initializer(X, 2).initialize() # クラスタリングの実行 instances = xmeans(X, initial_centers, kmax=cluster_num, tolerance=0.025, criterion=0, ccore=True) instances.process() # クラスタはget_clustersで取得できる clusters = instances.get_clusters() centers = instances.get_centers() print('X shape:' + str(X.shape)) X_cluster_new = np.zeros(X.shape[0], dtype=int) X_cluster_info = np.zeros([2, len(clusters)], dtype=int) print('X_cluster_new shape:' + str(X_cluster_new.shape)) print('X_cluster_info shape:' + str(X_cluster_info.shape)) print(X_cluster_info) items = 0 for i in range(len(clusters)): print('Cluster ' + str(i) + ': ' + str(len(clusters[i]))) print('Cluster ' + str(i) + ' Center : ' + str(centers[i])) print('Cluster ' + str(i) + ' first 10: ' + str(clusters[i][:10])) print('Cluster ' + str(i) + ' last 10: ' + str(clusters[i][-10:])) items += len(clusters[i]) X_cluster_info[0:, i] = i X_cluster_info[1:, i] = len(clusters[i]) # Insert cluster num into X_cluster_new for j in range(len(clusters[i])): X_cluster_new[clusters[i][j]] = i X_cluster_new = X_cluster_new.reshape([h, w]) print('Cluster: ' + str(len(clusters))) print('Cluster total items: ' + str(items)) print('X_cluster_new shape: ' + str(X_cluster_new.shape)) #print(X_cluster_new[:5]) #最初の5行だけprintする print(X_cluster_info) ############################################# # IMSHOW fig = plt.figure(dpi=400) #fig.suptitle(city + ': cluster:' + str(len(clusters)) + ' iteration:' + str(k_means.n_iter_)) fig.suptitle('X-Means ' + city + ': cluster:' + str(len(clusters))) plt.get_current_fig_manager().full_screen_toggle() plt.rcParams["font.size"] = 6 plt.subplot(122) import matplotlib.patches as mpatches im = plt.imshow(X_cluster_new, cmap='jet') # for normal map use jet # get the colors of the values, according to the colormap used by imshow colors = [im.cmap(im.norm(value)) for value in range(len(clusters))] # create a patch (proxy artist) for every color patches = [ mpatches.Patch(color=colors[i], label="Cluster {l}".format(l=i)) for i in range(len(clusters)) ] # put those patched as legend-handles into the legend plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) ################################### # PIE print('Cluster Size: ' + str(len(clusters))) plt.subplot(121) plt.title('% of clusters') plt.pie(x=X_cluster_info[1, :], labels=X_cluster_info[0, :], autopct='%.2f%%', colors=colors) plt.tight_layout() ''' plt.subplot(133) centers = np.array(centers) print('Cluster Center : ' + str(centers.shape)) for i in range(len(centers)): print('Cluster Center : ' + str(i) + str(centers[i])) plt.scatter(centers[:,0],centers[:,1],s=250, marker='*',c='red') ''' plt.savefig(imagefile + '.png') #plt.show(block=False) x_get_hist(X, cluster_num, r, X_cluster_new, imagefile, city, X_cluster_info) '''
def cluster(data): res = 0 structure = 0 data = np.array(data) clans = [] for descent in [0, 1]: try: init_center = xmeans.kmeans_plusplus_initializer( data[data[:, 0] == descent], 2).initialize() xm = xmeans.xmeans(data[data[:, 0] == descent], init_center, ccore=False) xm.process() sizes = [len(cluster) for cluster in xm.get_clusters()] centers = xm.get_centers() for i in range(len(sizes)): if sizes[i] > num_lineage / 10: clans.append(centers[i]) # clans=xm.get_centers() except: continue if len(clans) > 0: num_clans = len(clans) clan_ls = [] for i in range(num_clans): mate = 0 child = 0 cur_mate = 100 cur_child = 100 for j in range(num_clans): mate_cur = (clans[i][3] - clans[j][1])**2 + (clans[i][4] - clans[j][2])**2 if mate_cur < cur_mate: mate = j cur_mate = mate_cur if clans[i][0] == 0: clan_ls.append([i, mate, [i]]) else: clan_ls.append([i, mate, []]) for i in range(len(clan_ls)): if clan_ls[i][2] == []: mates = [mate for mate in clan_ls if mate[1] == i] children = [] for mate in mates: cur_child = 100 for j in range(num_clans): if clans[j][0] == 1: child_cur = (clans[i][1] - clans[j][1])**2 + ( clans[mate[0]][2] - clans[j][2])**2 if child_cur < cur_child: child = j cur_child = child_cur children.append(child) clan_ls[i][2] = children counter = 1 for clan in clan_ls: if len(clan[2]) == 1: clan[2] = clan[2][0] elif len(clan[2]) > 1: counter = counter * len(clan[2]) clan_ls_ls = [] clan_ls_ori = copy.deepcopy(clan_ls[:]) for i in range(counter): clan_ls = copy.deepcopy(clan_ls_ori[:]) for clan in clan_ls: if type(clan[2]) != type(0): if len(clan[2]) == 0: clan[2] = -1 else: clan[2] = clan[2][counter % len(clan[2])] clan_ls_ls.append(clan_ls) cur_man_cycle = 0 cur_cycle = 0 cur_woman_cycle = 0 num_clans = 0 for clan_ls in clan_ls_ls: candidate = list(range(len(clans))) while len(candidate) > 0: marriage_path = [] cur = candidate[-1] man_path = [cur] vill_ls = [] while True: next = clan_ls[cur][2] if next in man_path: man_path = man_path[man_path.index(next):] break elif next == -1: break else: man_path.append(next) cur = next cur_woman_cycle_cur = 0 for clan in man_path: if clan not in marriage_path: cur_path = [clan] cur = clan while True: next = clan_ls[clan_ls[cur][1]][2] if next in cur_path: marriage_path.extend( cur_path[cur_path.index(next):]) if len(cur_path[cur_path.index(next):] ) > cur_woman_cycle_cur: cur_woman_cycle_cur = len( cur_path[cur_path.index(next):]) break elif next == -1: break else: cur_path.append(next) cur = next marriage_path = list(set(marriage_path)) candidate.pop() for man in man_path: if man not in marriage_path: man_path.remove(man) if man in candidate: candidate.remove(man) if len(marriage_path) > cur_cycle: cur_cycle = len(marriage_path) cur_man_cycle = len(man_path) cur_woman_cycle = cur_woman_cycle_cur elif len(marriage_path ) == cur_cycle and len(man_path) > cur_man_cycle: cur_cycle = len(marriage_path) cur_man_cycle = len(man_path) cur_woman_cycle = cur_woman_cycle_cur elif len(marriage_path) == cur_cycle and len( man_path ) == cur_man_cycle and cur_woman_cycle_cur > cur_woman_cycle: cur_cycle = len(marriage_path) cur_man_cycle = len(man_path) cur_woman_cycle = cur_woman_cycle_cur else: continue rest = 0 for man in marriage_path: if len(clan_ls[man]) == len(set(clan_ls[man])): rest += 1 if rest >= cur_cycle / 2: rest = 1 else: rest = 0 clan_ls = np.array(clan_ls) ind_ls = [clan_ls[i, 2] for i in list(set(clan_ls[:, 1]))] clan_ls = [list(clan) for clan in clan_ls if clan[0] in ind_ls] if len(clan_ls) > num_clans: num_clans = len(clan_ls) if cur_cycle * cur_man_cycle * cur_woman_cycle != 0: if cur_woman_cycle > 1 and cur_man_cycle > 1 and cur_cycle > 3: structure = 4 elif cur_cycle == 1: structure = 1 elif cur_cycle <= num_clans / 3: structure = 5 elif cur_cycle == 2: structure = 2 elif cur_woman_cycle > 2 or cur_man_cycle > 2: structure = 3 else: structure = 6 # structures=["dead","incest", "dual", "generalized", "restricted", "vill division", "others"] res = [cur_cycle, cur_man_cycle, cur_woman_cycle, rest] return [structure, res]
exit("Path do not exists") filelist = glob.glob(TARGET_IMAGES_DIR + "*." + IMAGE_TYPE) if len(filelist) < 1: exit("No images in this folder") X = np.array( [cv2.resize(cv2.imread(p), (64, 64), cv2.INTER_CUBIC) for p in filelist]) X = X.reshape(X.shape[0], -1) pca = PCA(n_components=PCA_COMPONENTS) pca.fit(X) X_pca = pca.transform(X) initializer = xmeans.kmeans_plusplus_initializer( data=X_pca, amount_centers=INITIAL_XMEANS_CENTERS) initial_centers = initializer.initialize() xm = xmeans.xmeans(data=X_pca, initial_centers=initial_centers) xm.process() clusters = xm.get_clusters() centers = xm.get_centers() centerIdx = list() for idx, p in enumerate(centers): centerIdx.append(np.where(X_pca == closest_node(p, X_pca))[0][0]) if PCA_COMPONENTS < 4: ax = pyclustering.utils.draw_clusters(data=X_pca, clusters=clusters) if os.path.exists(CLUSTERED_IMAGES_DIR):
def main(): vills = [] num = 0 initial_population = initial_pop * num_lineage * 2 for i in range(num_vills): vills.append(Village()) for j in range(num_lineage): vills[i].clans.append(Clan(0, 0, initial_pop)) cycles = [] incests = [] while num < 500: if num == 20: mutation = 0.1 remove_ls = [] duplicate_ls = [] for vill in vills: vill.df[num] = year(vill) if vill.population < initial_population / 10: remove_ls.append(vill) elif vill.population > initial_population * 2: duplicate_ls.append(vill) for vill in remove_ls: vills.remove(vill) for vill in duplicate_ls: random.shuffle(vill.clans) n = math.floor(math.log2(vill.population / initial_population)) k = round(len(vill.clans) / 2**n) for i in [0] * (2**n - 1): clans = vill.clans[:k] vill.clans = vill.clans[k:] vills.append(Village()) vills[-1].clans = copy.deepcopy(clans) vills[-1].df = vill.df.copy() if len(vills) > num_vills: random.shuffle(vills) vills = vills[:num_vills] for vill in vills: mating(vill) if len(vills) == 0: break num += 1 if len(vills) == 0: cycles = 0 for k in range(min(50, len(vills))): vill = vills[k] my_ls = [] for i in range(500): my_ls.extend([[i, vill.df.iat[0, i][j], vill.df.iat[1, i][j]] for j in range(len(vill.df.iat[0, i]))]) df_res = pd.DataFrame(my_ls, columns=["time", "t", "p"]) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(df_res["time"], df_res["t"], s=0.5, color="blue") ax.scatter(df_res["time"], df_res["p"], s=0.5, color="red") ax.set_xlabel("time", fontsize=36) ax.tick_params(labelsize=24) fig.tight_layout() fig.savefig( "figs_timeseries/timeseries_mutation{}pc_coop{}pc_conf{}pc_{}_{}.eps" .format(round(mutation * 100), round(coop * 100), round(conflict * 100), k, trial)) plt.close('all') data = np.array([vill.df.iat[0, -1], vill.df.iat[1, -1]]).T init_center = xmeans.kmeans_plusplus_initializer(data, 1).initialize() xm = xmeans.xmeans(data, init_center, ccore=False) xm.process() sizes = [len(cluster) for cluster in xm.get_clusters()] centers = xm.get_centers() clusters_candidate = xm.get_clusters() ls = [] for i in range(len(sizes)): if sizes[i] > num_lineage / 10: ls.append(clusters_candidate[i]) # clans=xm.get_centers() fig = plt.figure() ax = fig.add_subplot(1, 1, 1) for i in range(len(ls)): try: ax.scatter(data[:, 0][ls[i]], data[:, 1][ls[i]], s=60, c=current_palette[i]) except: pass ax.set_xlabel(r"$t$", fontsize=36) ax.set_ylabel(r"$p$", fontsize=36) ax.tick_params(labelsize=24) ax.set_aspect('equal', 'datalim') fig.tight_layout() fig.savefig( "figs_map/map_mutation{}pc_coop{}pc_conf{}pc_{}_{}.eps".format( round(mutation * 100), round(coop * 100), round(conflict * 100), k, trial))
fig = plt.figure() ax = fig.add_subplot(1, 1, 1) #ax.scatter(x.ravel(), y.ravel(), s=50, c=c, marker="s") ax.scatter(xy[0], xy[1], marker="s") ax.set_title('first scatter plot') ax.set_xlabel('x') ax.set_ylabel('y') #plt.scatter(x=X[:, 0], y=X[:, 1]) data2 = list(zip(xy[0], xy[1])) initializer = xmeans.kmeans_plusplus_initializer(data=data2, amount_centers=8) initial_centers = initializer.initialize() xm = xmeans.xmeans(data=data2, initial_centers=initial_centers) xm.process() clusters = xm.get_clusters() xy = np.unravel_index(labels, (N, N)) pyclustering.utils.draw_clusters(data=data2, clusters=clusters) for i in range(8): print("cluster:", i) print(np.array(data)[clusters[i]])
s=600, linewidth=6) # ax_tsne.axis('off') fig_tsne.savefig('tsne_embedding/reaction{}_tsne.png'.format(reaction_num)) from pandas.api.types import CategoricalDtype import scipy.sparse as sp from utils.ga_utils import csc_drop_zerocols from pyclustering.cluster import xmeans candidates_fps = sp.load_npz('data/candidates_fp_single.npz') summary_fps = candidate_df['reactants_idx'].apply( lambda x: candidates_fps[np.concatenate(x)].sum(0)) summary_fps = sp.csc_matrix(np.concatenate(summary_fps.values, axis=0)) summary_fps_dropped = csc_drop_zerocols(summary_fps) xmeans_init = xmeans.kmeans_plusplus_initializer( data=summary_fps_dropped.todense(), amount_centers=2) initial_centers = xmeans_init.initialize() xm = xmeans.xmeans(data=summary_fps_dropped.todense(), kmax=100, repeat=10) xm.process() clusters100 = xm.get_clusters() centers100 = xm.get_centers() candidate_df_split = list() cluster_num = list() for i, cluster in enumerate(clusters100): df_cluster = candidate_df.iloc[cluster] df_cluster['cluster'] = 'cluster ' + str(i + 1) candidate_df_split.append(df_cluster) cluster_num.append('cluster ' + str(i + 1)) summary_df_100clusters = pd.concat(candidate_df_split, axis=0)