Beispiel #1
0
def cluster_percentage_color_hdbscan(signatures_idx):
    signatures = all_signatures[signatures_idx][rxn_type]
    clus = clustering.ClusterSequences(seqdata=signatures, unique_sequences=False, truncate_seq=50)
    clus.diss_matrix(n_jobs=cpus)
    clus.hdbscan()
    cluster_information = {signatures_idx: clus.cluster_percentage_color(),
                           'best_silh': clus.silhouette_score(), 'labels': clus.labels}
    return cluster_information
Beispiel #2
0
def cluster_percentage_color_spectral(signatures_idx):
    signatures = all_signatures[signatures_idx][rxn_type]
    clus = clustering.ClusterSequences(seqdata=signatures, unique_sequences=False, truncate_seq=50)
    clus.diss_matrix(n_jobs=cpus)
    sil_df = clus.silhouette_score_spectral_range(cluster_range=range(2, 31), n_jobs=4, random_state=1234)
    if sil_threshold:
        silh_diff = sil_df['cluster_silhouette'].max() - sil_threshold
        # Define n_clus to have the minimum number of clusters when silh scores are too similar
        best_silhs = sil_df.loc[sil_df['cluster_silhouette'] > silh_diff]
        best_silh, n_clus = best_silhs.loc[best_silhs['num_clusters'].idxmin()]
    else:
        best_silh, n_clus = sil_df.loc[sil_df['cluster_silhouette'].idxmax()]
    n_clus = int(n_clus)
    clus.spectral_clustering(n_clusters=n_clus, n_jobs=4, random_state=1234)
    cluster_information = {signatures_idx: clus.cluster_percentage_color(),
                           'best_silh': best_silh, 'labels': clus.labels}
    return cluster_information
Beispiel #3
0
from tropical import clustering
import pickle
import numpy as np

with open('pydream_kpars_signatures.pickle', 'rb') as handle:
    all_signatures = pickle.load(handle)

cpus = 30
rxn_type = 'consumption'

uujnk3_signatures = all_signatures[3][rxn_type]
clus = clustering.ClusterSequences(seqdata=uujnk3_signatures,
                                   unique_sequences=False)
# diss = np.load('pydream_uujnk3_diss.npy')
# clus.diss = diss
clus.diss_matrix(n_jobs=cpus)
np.save('pydream_uujnk3_diss.npy', clus.diss)

sil_df = clus.silhouette_score_spectral_range(cluster_range=range(2, 20),
                                              n_jobs=cpus,
                                              random_state=1234)
print(sil_df)
n_clus, best_silh = sil_df.loc[sil_df['cluster_silhouette'].idxmax()]
n_clus = int(n_clus)
# n_clus = 7

clus.spectral_clustering(n_clusters=n_clus, n_jobs=cpus, random_state=1234)
np.save('pydream_labels_uujnk3.npy', clus.labels)
b = clustering.PlotSequences(clus)
b.modal_plot(title='uuJNK3')
b.all_trajectories_plot(title='uuJNK3')
from tropical import clustering
import pickle
import numpy as np

#with open('earm_signatures_sampled_kd.pickle', 'rb') as handle:
#    all_signatures = pickle.load(handle)

with open('earm_signatures_sampled_kd_ic.pickle', 'rb') as handle:
    all_signatures = pickle.load(handle)

cpus = 30
rxn_type = 'consumption'

sp37_signatures = all_signatures[37][rxn_type]
clus = clustering.ClusterSequences(seqdata=sp37_signatures,
                                   unique_sequences=False,
                                   truncate_seq=50)
#diss = np.load('sampled_kd_37_diss.npy')
#clus.diss = diss
clus.diss_matrix(n_jobs=cpus)
np.save('sampled_kd_ic_37_diss.npy', clus.diss)

sil_df = clus.silhouette_score_spectral_range(cluster_range=range(2, 20),
                                              n_jobs=cpus,
                                              random_state=1234)
print(sil_df)
best_silh, n_clus = sil_df.loc[sil_df['cluster_silhouette'].idxmax()]
n_clus = int(n_clus)
#n_clus = 7

clus.spectral_clustering(n_clusters=n_clus, n_jobs=cpus, random_state=1234)