def phenograph_clustering_freq(d,DTCRU,n_jobs=1): nbrs = NearestNeighbors(n_neighbors=30, metric='precomputed',n_jobs=n_jobs).fit(d) d_knn, idx = nbrs.kneighbors(d) IDX, _, _ = phenograph_local.cluster(d=d_knn, idx=idx, n_jobs=1) label_id = DTCRU.class_id file_id = DTCRU.sample_id freq = DTCRU.freq file_list = DTCRU.sample_list DFs = [] DF_Sum = pd.DataFrame() DF_Sum['File'] = file_list DF_Sum.set_index('File', inplace=True) for i in np.unique(IDX): if i != -1: sel = IDX == i label_sel = label_id[sel] file_sel = file_id[sel] freq_sel = freq[sel] df = pd.DataFrame() df['Labels'] = label_sel df['File'] = file_sel df['Frequency'] = freq_sel df_sum = df.groupby(by='File', sort=False).agg({'Frequency': 'sum'}) DF_Sum['Cluster_' + str(i)] = df_sum DFs.append(df) DF_Sum.fillna(0.0, inplace=True) return DF_Sum, IDX
def phenograph_clustering(d): nbrs = NearestNeighbors(n_neighbors=30, metric='precomputed').fit(d) d, idx = nbrs.kneighbors(d) IDX, _, _ = phenograph_local.cluster(d=d, idx=idx, n_jobs=1) c_freq = [] for i in np.unique(IDX): if i != -1: sel = IDX == i c_freq.append(np.sum(sel)) return c_freq