def cluster_percentage_color_hdbscan(signatures_idx): signatures = all_signatures[signatures_idx][rxn_type] clus = clustering.ClusterSequences(seqdata=signatures, unique_sequences=False, truncate_seq=50) clus.diss_matrix(n_jobs=cpus) clus.hdbscan() cluster_information = {signatures_idx: clus.cluster_percentage_color(), 'best_silh': clus.silhouette_score(), 'labels': clus.labels} return cluster_information
def cluster_percentage_color_spectral(signatures_idx): signatures = all_signatures[signatures_idx][rxn_type] clus = clustering.ClusterSequences(seqdata=signatures, unique_sequences=False, truncate_seq=50) clus.diss_matrix(n_jobs=cpus) sil_df = clus.silhouette_score_spectral_range(cluster_range=range(2, 31), n_jobs=4, random_state=1234) if sil_threshold: silh_diff = sil_df['cluster_silhouette'].max() - sil_threshold # Define n_clus to have the minimum number of clusters when silh scores are too similar best_silhs = sil_df.loc[sil_df['cluster_silhouette'] > silh_diff] best_silh, n_clus = best_silhs.loc[best_silhs['num_clusters'].idxmin()] else: best_silh, n_clus = sil_df.loc[sil_df['cluster_silhouette'].idxmax()] n_clus = int(n_clus) clus.spectral_clustering(n_clusters=n_clus, n_jobs=4, random_state=1234) cluster_information = {signatures_idx: clus.cluster_percentage_color(), 'best_silh': best_silh, 'labels': clus.labels} return cluster_information
from tropical import clustering import pickle import numpy as np with open('pydream_kpars_signatures.pickle', 'rb') as handle: all_signatures = pickle.load(handle) cpus = 30 rxn_type = 'consumption' uujnk3_signatures = all_signatures[3][rxn_type] clus = clustering.ClusterSequences(seqdata=uujnk3_signatures, unique_sequences=False) # diss = np.load('pydream_uujnk3_diss.npy') # clus.diss = diss clus.diss_matrix(n_jobs=cpus) np.save('pydream_uujnk3_diss.npy', clus.diss) sil_df = clus.silhouette_score_spectral_range(cluster_range=range(2, 20), n_jobs=cpus, random_state=1234) print(sil_df) n_clus, best_silh = sil_df.loc[sil_df['cluster_silhouette'].idxmax()] n_clus = int(n_clus) # n_clus = 7 clus.spectral_clustering(n_clusters=n_clus, n_jobs=cpus, random_state=1234) np.save('pydream_labels_uujnk3.npy', clus.labels) b = clustering.PlotSequences(clus) b.modal_plot(title='uuJNK3') b.all_trajectories_plot(title='uuJNK3')
from tropical import clustering import pickle import numpy as np #with open('earm_signatures_sampled_kd.pickle', 'rb') as handle: # all_signatures = pickle.load(handle) with open('earm_signatures_sampled_kd_ic.pickle', 'rb') as handle: all_signatures = pickle.load(handle) cpus = 30 rxn_type = 'consumption' sp37_signatures = all_signatures[37][rxn_type] clus = clustering.ClusterSequences(seqdata=sp37_signatures, unique_sequences=False, truncate_seq=50) #diss = np.load('sampled_kd_37_diss.npy') #clus.diss = diss clus.diss_matrix(n_jobs=cpus) np.save('sampled_kd_ic_37_diss.npy', clus.diss) sil_df = clus.silhouette_score_spectral_range(cluster_range=range(2, 20), n_jobs=cpus, random_state=1234) print(sil_df) best_silh, n_clus = sil_df.loc[sil_df['cluster_silhouette'].idxmax()] n_clus = int(n_clus) #n_clus = 7 clus.spectral_clustering(n_clusters=n_clus, n_jobs=cpus, random_state=1234)