# cluster print('Attempting to cluster') num_clusters = 20 cluster = LandmarkAgglomerative(n_clusters=num_clusters, n_landmarks=int(totframes / 100), linkage='ward', metric='rmsd') cluster.fit(trajs) # # print('Fitting cluster labels') # ctraj = {} # for k, v in traj_dict.items(): # v = cluster.partial_predict(v) # diff = nframes-v.shape[0] # v = np.append(v, np.zeros(diff)-1) # ctraj[k] = v # Convert to DF for plotting and sampling. # df = to_dataframe(ctraj, nframes, dt=1) print('Fitting cluster labels for MSM') ctraj = {} for k, v in traj_dict.items(): ctraj[k] = cluster.partial_predict(v) # Save dataframe save_generic(df, 'clusters/rmsd_cluster_trajectory.pickl') save_trajs(ctraj, 'ftraj', meta)
# load trajectories feature = 'dihedrals' meta, traj_dict= load_trajs('pruned_trajectories/{}-ftraj'.format(feature)) trajs = [traj for traj in traj_dict.values()] nframes = int(np.max(meta['nframes'].unique()[0])) # cluster num_clusters=10 cluster = LandmarkAgglomerative(n_clusters=num_clusters, n_landmarks=200, linkage='ward', metric='euclidean') cluster.fit(trajs) ctraj = {} for k, v in traj_dict.items(): v = v.copy(order='C') v = cluster.partial_predict(v) diff = nframes-v.shape[0] v = np.append(v, np.zeros(diff)-1) ctraj[k] = v # Convert to DF for plotting and sampling. df = to_dataframe(ctraj, nframes, dt=1) # Plot trajectories sample = df.sample(frac=0.1, axis=0) sample.sort_values(by=['Prod_ID', 'Site_ID', 'Time_ps'], inplace=True) g = sns.FacetGrid(sample, col='Prod_ID',hue='Site_ID', col_wrap=10) g.map(plt.scatter, 'Time_ps', 'Trajectory', alpha=0.5) g.set(ylim=(-0.5,num_clusters)) g.fig.tight_layout() plt.savefig('figures/{}_cluster_trajectory.pdf'.format(feature))