def cluster(data_dir, traj_dir, n_clusters): reduced_data = verboseload(data_dir) trajs = np.concatenate(reduced_data) plt.hexbin(trajs[:, 0], trajs[:, 1], bins='log', mincnt=1) clusterer = MiniBatchKMedoids(n_clusters=n_clusters) clusterer.fit_transform(reduced_data) centers = clusterer.cluster_centers_ for i in range(0, np.shape(centers)[0]): center = centers[i, :] plt.scatter(center[0], center[1]) plt.annotate('C%d' % i, xy=(center[0], center[1]), xytext=(center[0] + 0.1, center[1] + 0.1), arrowprops=dict(facecolor='black', shrink=0.05)) location = clusterer.cluster_ids_[i, :] print(location) traj = get_trajectory_files(traj_dir)[location[0]] print(("traj = %s" % traj)) print(("frame = %d" % location[1])) conformation = md.load_frame(traj, location[1]) conformation.save_pdb( "/scratch/users/enf/b2ar_analysis/cluster_%d.pdb" % i) plt.show()
def cluster(data_dir, traj_dir, n_clusters, lag_time): clusterer_dir = "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" %(n_clusters, lag_time) if (os.path.exists(clusterer_dir)): print "Already clustered" else: reduced_data = verboseload(data_dir) trajs = np.concatenate(reduced_data) clusterer = MiniBatchKMedoids(n_clusters = n_clusters) clusterer.fit_transform(reduced_data) verbosedump(clusterer, "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" %(n_clusters, lag_time))
def cluster(data_dir, traj_dir, n_clusters, lag_time): clusterer_dir = "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" % ( n_clusters, lag_time) if (os.path.exists(clusterer_dir)): print("Already clustered") else: reduced_data = verboseload(data_dir) trajs = np.concatenate(reduced_data) clusterer = MiniBatchKMedoids(n_clusters=n_clusters) clusterer.fit_transform(reduced_data) verbosedump( clusterer, "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" % (n_clusters, lag_time))
def cluster_features(features, clusterer, n_clusters=8): ''' Input features : list of arrays, length n_trajs, each of shape (n_samples, n_features) Output clst : msmbuilder.cluster object, with attributes cluster_centers_ : (n_clusters, n_features) labels_ : list of arrays, each of shape (n_samples, ) ''' if clusterer == 'KMeans': from msmbuilder.cluster import KMeans clst = KMeans(n_clusters=n_clusters) elif clusterer == 'KCenters': from msmbuilder.cluster import KCenters clst = KCenters(n_clusters=n_clusters) elif clusterer == 'KMedoids': from msmbuilder.cluster import KMedoids clst = KMedoids(n_clusters=n_clusters) elif clusterer == 'MiniBatchKMeans': from msmbuilder.cluster import MiniBatchKMeans clst = MiniBatchKMeans(n_clusters=n_clusters) elif clusterer == 'MiniBatchKMedoids': from msmbuilder.cluster import MiniBatchKMedoids clst = MiniBatchKMedoids(n_clusters=n_clusters) clusters = clst.fit_transform(features) return clst
def cluster(data_dir, traj_dir, n_clusters): reduced_data = verboseload(data_dir) trajs = np.concatenate(reduced_data) plt.hexbin(trajs[:,0], trajs[:,1], bins='log', mincnt=1) clusterer = MiniBatchKMedoids(n_clusters = n_clusters) clusterer.fit_transform(reduced_data) centers = clusterer.cluster_centers_ for i in range(0, np.shape(centers)[0]): center = centers[i,:] plt.scatter(center[0],center[1]) plt.annotate('C%d' %i, xy=(center[0],center[1]),xytext=(center[0]+0.1,center[1]+0.1), arrowprops=dict(facecolor='black',shrink=0.05)) location = clusterer.cluster_ids_[i,:] print location traj = get_trajectory_files(traj_dir)[location[0]] print("traj = %s" %traj) print("frame = %d" %location[1]) conformation = md.load_frame(traj, location[1]) conformation.save_pdb("/scratch/users/enf/b2ar_analysis/cluster_%d.pdb" %i) plt.show()
"""Cluster based on RMSD between conformations msmbuilder autogenerated template version 2 created 2017-05-23T16:38:49.093656 please cite msmbuilder in any publications """ import mdtraj as md from msmbuilder.cluster import MiniBatchKMedoids from msmbuilder.io import load_meta, itertrajs, save_generic, backup ## Set up parameters kmed = MiniBatchKMedoids( n_clusters=500, metric='rmsd', ) ## Load meta = load_meta() ## Try to limit RAM usage def guestimate_stride(): total_data = meta['nframes'].sum() want = kmed.n_clusters * 10 stride = max(1, total_data // want) print("Since we have", total_data, "frames, we're going to stride by", stride, "during fitting, because this is probably adequate for", kmed.n_clusters, "clusters") return stride
Meta ---- depends: - meta.pandas.pickl - trajs - top.pdb """ import mdtraj as md from msmbuilder.cluster import MiniBatchKMedoids from msmbuilder.io import load_meta, itertrajs, save_generic, backup ## Set up parameters kmed = MiniBatchKMedoids( n_clusters=500, metric='rmsd', ) ## Load meta = load_meta() ## Try to limit RAM usage def guestimate_stride(): total_data = meta['nframes'].sum() want = kmed.n_clusters * 10 stride = max(1, total_data // want) print("Since we have", total_data, "frames, we're going to stride by", stride, "during fitting, because this is probably adequate for", kmed.n_clusters, "clusters") return stride
Meta ---- depends: - meta.pandas.pickl - trajs - top.pdb """ import mdtraj as md from msmbuilder.cluster import MiniBatchKMedoids from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top ## Set up parameters kmed = MiniBatchKMedoids( n_clusters=500, metric='rmsd', ) ## Load meta = load_meta() centroids = md.load("centroids.xtc", top=preload_top(meta)) ## Kernel SIGMA = 0.3 # nm from msmbuilder.featurizer import RMSDFeaturizer import numpy as np featurizer = RMSDFeaturizer(centroids) lfeats = {} for i, traj in itertrajs(meta): lfeat = featurizer.partial_transform(traj)