def fit_protein_kmeans(yaml_file,mini=True,pca=False): mdl_dir = yaml_file["mdl_dir"] mdl_params = yaml_file["mdl_params"] current_mdl_params={} for i in mdl_params.keys(): if i.startswith("cluster__"): current_mdl_params[i.split("cluster__")[1]] = mdl_params[i] if mini: current_mdl_params["batch_size"] = 100*current_mdl_params["n_clusters"] kmeans_mdl = MiniBatchKMeans(**current_mdl_params) else: kmeans_mdl = KMeans(**current_mdl_params) data = [] for protein in yaml_file["protein_list"]: with enter_protein_mdl_dir(yaml_file, protein): if pca: tica_data = verboseload("pca_data.pkl") else: tica_data = verboseload("tica_data.pkl") # get all traj sorted_list = sorted(tica_data.keys(), key=keynat) data.extend([tica_data[i] for i in sorted_list]) kmeans_mdl.fit(data) kmeans_mdl_path = os.path.join(mdl_dir, "kmeans_mdl.pkl") verbosedump(kmeans_mdl, kmeans_mdl_path) return
def fit_protein_kmeans(yaml_file,mini=True): mdl_dir = yaml_file["mdl_dir"] mdl_params = yaml_file["mdl_params"] current_mdl_params={} for i in mdl_params.keys(): if i.startswith("cluster__"): current_mdl_params[i.split("cluster__")[1]] = mdl_params[i] if mini: current_mdl_params["batch_size"] = 100*current_mdl_params["n_clusters"] kmeans_mdl = MiniBatchKMeans(**current_mdl_params) else: kmeans_mdl = KMeans(**current_mdl_params) data = [] for protein in yaml_file["protein_list"]: with enter_protein_mdl_dir(yaml_file, protein): tica_data = verboseload("tica_data.pkl") # get all traj sorted_list = sorted(tica_data.keys(), key=keynat) data.extend([tica_data[i] for i in sorted_list]) kmeans_mdl.fit(data) kmeans_mdl_path = os.path.join(mdl_dir, "kmeans_mdl.pkl") verbosedump(kmeans_mdl, kmeans_mdl_path) return
class TestPlotUtils: def setUp(self): numpy.random.seed(12) self.ttrajs = { 0 : numpy.random.rand(20, 3), 1 : numpy.random.rand(20, 3), } self.clusterer = MiniBatchKMeans(n_clusters=2) self.clusterer.fit(list(self.ttrajs.values())) def test_plot_spawns(self): ax = plot_spawns( inds=spawns, tica_trajs=self.ttrajs, ax=None ) assert isinstance(ax, Axes) def test_plot_tica_landscape(self): f, ax = plot_tica_landscape(self.ttrajs) assert isinstance(ax, Axes) def test_plot_clusters(self): ax = plot_clusters(self.clusterer) assert isinstance(ax, Axes)
"""Cluster tICA results {{header}} Meta ---- depends: - ttrajs - meta.pandas.pickl """ from msmbuilder.io import load_trajs, save_trajs, save_generic from msmbuilder.cluster import MiniBatchKMeans ## Load meta, ttrajs = load_trajs('ttrajs') ## Fit dim = 5 kmeans = MiniBatchKMeans(n_clusters=500) kmeans.fit([traj[:, :dim] for traj in ttrajs.values()]) ## Transform ktrajs = {} for k, v in ttrajs.items(): ktrajs[k] = kmeans.partial_transform(v[:, :dim]) ## Save print(kmeans.summarize()) save_trajs(ktrajs, 'ktrajs', meta) save_generic(kmeans, 'kmeans.pickl')