def _do_lumping(self): """Do the MVCA lumping. """ model = LandmarkAgglomerative(linkage='ward', n_clusters=self.n_macrostates, metric=self.metric, n_landmarks=self.n_landmarks, landmark_strategy=self.landmark_strategy, random_state=self.random_state) model.fit([self.transmat_]) if self.fit_only: microstate_mapping_ = model.landmark_labels_ else: microstate_mapping_ = model.transform([self.transmat_])[0] self.microstate_mapping_ = microstate_mapping_
def _do_lumping(self): """Do the MVCA lumping. """ model = LandmarkAgglomerative(linkage='ward', n_clusters=self.n_macrostates, metric=self.metric, n_landmarks=self.n_landmarks, landmark_strategy=self.landmark_strategy, random_state=self.random_state) model.fit([self.transmat_]) if self.fit_only: microstate_mapping_ = model.landmark_labels_ else: microstate_mapping_ = model.transform([self.transmat_])[0] self.microstate_mapping_ = microstate_mapping_
i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) return i, traj traj_dict = dict(map(traj_load, meta.iterrows())) trajs = [traj for traj in traj_dict.values()] # cluster print('Attempting to cluster') num_clusters = 20 cluster = LandmarkAgglomerative(n_clusters=num_clusters, n_landmarks=int(totframes / 100), linkage='ward', metric='rmsd') cluster.fit(trajs) # # print('Fitting cluster labels') # ctraj = {} # for k, v in traj_dict.items(): # v = cluster.partial_predict(v) # diff = nframes-v.shape[0] # v = np.append(v, np.zeros(diff)-1) # ctraj[k] = v # Convert to DF for plotting and sampling. # df = to_dataframe(ctraj, nframes, dt=1) print('Fitting cluster labels for MSM') ctraj = {}
ctraj_path = 'ctraj-200' if isdir(ctraj_path): meta, all_ctrajs_dict = load_trajs(ctraj_path) else: def traj_load(irow): i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) return i, traj traj_dict = dict(map(traj_load, meta.iterrows())) all_trajs = [traj for traj in traj_dict.values()] cluster = LandmarkAgglomerative(n_clusters=200, n_landmarks=int(totframes /200), linkage='ward', metric='rmsd') cluster.fit(all_trajs) # TODO will this work? args = [(k,v,cluster) for k, v in traj_dict.items()] with Pool() as pool: all_ctrajs_dict = dict(pool.imap_unordered(clust, args)) save_generic(cluster, 'cluster-200') save_trajs(all_ctrajs_dict, 'ctraj-200', meta) long_ctrajs = [np.squeeze(traj) for traj in all_ctrajs_dict.values() if traj.shape[0] > 1000] all_ctrajs = [np.squeeze(traj) for traj in all_ctrajs_dict.values()] lags = np.concatenate((np.arange(200, 1000, 200),np.arange(1000, 5000, 500))) all_msms = []