예제 #1
0
def phenograph_clustering_freq(d,DTCRU,n_jobs=1):
    nbrs = NearestNeighbors(n_neighbors=30, metric='precomputed',n_jobs=n_jobs).fit(d)
    d_knn, idx = nbrs.kneighbors(d)
    IDX, _, _ = phenograph_local.cluster(d=d_knn, idx=idx, n_jobs=1)

    label_id = DTCRU.class_id
    file_id = DTCRU.sample_id
    freq = DTCRU.freq
    file_list = DTCRU.sample_list

    DFs = []
    DF_Sum = pd.DataFrame()
    DF_Sum['File'] = file_list
    DF_Sum.set_index('File', inplace=True)
    for i in np.unique(IDX):
        if i != -1:
            sel = IDX == i
            label_sel = label_id[sel]
            file_sel = file_id[sel]
            freq_sel = freq[sel]

            df = pd.DataFrame()
            df['Labels'] = label_sel
            df['File'] = file_sel
            df['Frequency'] = freq_sel

            df_sum = df.groupby(by='File', sort=False).agg({'Frequency': 'sum'})

            DF_Sum['Cluster_' + str(i)] = df_sum

            DFs.append(df)

    DF_Sum.fillna(0.0, inplace=True)
    return DF_Sum, IDX
예제 #2
0
def phenograph_clustering(d):
    nbrs = NearestNeighbors(n_neighbors=30, metric='precomputed').fit(d)
    d, idx = nbrs.kneighbors(d)
    IDX, _, _ = phenograph_local.cluster(d=d, idx=idx, n_jobs=1)
    c_freq = []
    for i in np.unique(IDX):
        if i != -1:
            sel = IDX == i
            c_freq.append(np.sum(sel))

    return c_freq