Example #1
0
def cluster(data_dir, traj_dir, n_clusters):
    reduced_data = verboseload(data_dir)
    trajs = np.concatenate(reduced_data)
    plt.hexbin(trajs[:, 0], trajs[:, 1], bins='log', mincnt=1)

    clusterer = MiniBatchKMedoids(n_clusters=n_clusters)
    clusterer.fit_transform(reduced_data)

    centers = clusterer.cluster_centers_
    for i in range(0, np.shape(centers)[0]):
        center = centers[i, :]
        plt.scatter(center[0], center[1])
        plt.annotate('C%d' % i,
                     xy=(center[0], center[1]),
                     xytext=(center[0] + 0.1, center[1] + 0.1),
                     arrowprops=dict(facecolor='black', shrink=0.05))

        location = clusterer.cluster_ids_[i, :]
        print(location)
        traj = get_trajectory_files(traj_dir)[location[0]]
        print(("traj = %s" % traj))
        print(("frame = %d" % location[1]))
        conformation = md.load_frame(traj, location[1])
        conformation.save_pdb(
            "/scratch/users/enf/b2ar_analysis/cluster_%d.pdb" % i)

    plt.show()
def cluster(data_dir, traj_dir, n_clusters, lag_time):
	clusterer_dir = "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" %(n_clusters, lag_time)
	if (os.path.exists(clusterer_dir)):
		print "Already clustered"
	else:
		reduced_data = verboseload(data_dir)
		trajs = np.concatenate(reduced_data)
		clusterer = MiniBatchKMedoids(n_clusters = n_clusters)
		clusterer.fit_transform(reduced_data)
		verbosedump(clusterer, "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" %(n_clusters, lag_time))	
Example #3
0
def cluster(data_dir, traj_dir, n_clusters, lag_time):
    clusterer_dir = "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" % (
        n_clusters, lag_time)
    if (os.path.exists(clusterer_dir)):
        print("Already clustered")
    else:
        reduced_data = verboseload(data_dir)
        trajs = np.concatenate(reduced_data)
        clusterer = MiniBatchKMedoids(n_clusters=n_clusters)
        clusterer.fit_transform(reduced_data)
        verbosedump(
            clusterer, "/scratch/users/enf/b2ar_analysis/clusterer_%d_t%d.h5" %
            (n_clusters, lag_time))
def cluster_features(features, clusterer, n_clusters=8):
    '''
    Input
    features : list of arrays, length n_trajs, each of shape (n_samples, n_features)
	
    Output
    clst : msmbuilder.cluster object, with attributes
        cluster_centers_ : (n_clusters, n_features)
	labels_	         : list of arrays, each of shape (n_samples, )
    '''
    if clusterer == 'KMeans':
        from msmbuilder.cluster import KMeans
        clst = KMeans(n_clusters=n_clusters)
    elif clusterer == 'KCenters':
        from msmbuilder.cluster import KCenters
        clst = KCenters(n_clusters=n_clusters)
    elif clusterer == 'KMedoids':
        from msmbuilder.cluster import KMedoids
        clst = KMedoids(n_clusters=n_clusters)
    elif clusterer == 'MiniBatchKMeans':
        from msmbuilder.cluster import MiniBatchKMeans
        clst = MiniBatchKMeans(n_clusters=n_clusters)
    elif clusterer == 'MiniBatchKMedoids':
        from msmbuilder.cluster import MiniBatchKMedoids
        clst = MiniBatchKMedoids(n_clusters=n_clusters)
    clusters = clst.fit_transform(features)
    return clst
Example #5
0
def cluster(data_dir, traj_dir, n_clusters):
	reduced_data = verboseload(data_dir)
	trajs = np.concatenate(reduced_data)
	plt.hexbin(trajs[:,0], trajs[:,1], bins='log', mincnt=1)

	clusterer = MiniBatchKMedoids(n_clusters = n_clusters)
	clusterer.fit_transform(reduced_data)
	
	centers = clusterer.cluster_centers_
	for i in range(0, np.shape(centers)[0]):
		center = centers[i,:]
		plt.scatter(center[0],center[1])
		plt.annotate('C%d' %i, xy=(center[0],center[1]),xytext=(center[0]+0.1,center[1]+0.1), arrowprops=dict(facecolor='black',shrink=0.05))

		location = clusterer.cluster_ids_[i,:]
		print location
		traj = get_trajectory_files(traj_dir)[location[0]]
		print("traj = %s" %traj)
		print("frame = %d" %location[1])
		conformation = md.load_frame(traj, location[1])
		conformation.save_pdb("/scratch/users/enf/b2ar_analysis/cluster_%d.pdb" %i)


	plt.show()
"""Cluster based on RMSD between conformations

msmbuilder autogenerated template version 2
created 2017-05-23T16:38:49.093656
please cite msmbuilder in any publications


"""
import mdtraj as md

from msmbuilder.cluster import MiniBatchKMedoids
from msmbuilder.io import load_meta, itertrajs, save_generic, backup

## Set up parameters
kmed = MiniBatchKMedoids(
    n_clusters=500,
    metric='rmsd',
)

## Load
meta = load_meta()


## Try to limit RAM usage
def guestimate_stride():
    total_data = meta['nframes'].sum()
    want = kmed.n_clusters * 10
    stride = max(1, total_data // want)
    print("Since we have", total_data, "frames, we're going to stride by",
          stride, "during fitting, because this is probably adequate for",
          kmed.n_clusters, "clusters")
    return stride
Example #7
0
Meta
----
depends:
  - meta.pandas.pickl
  - trajs
  - top.pdb
"""
import mdtraj as md

from msmbuilder.cluster import MiniBatchKMedoids
from msmbuilder.io import load_meta, itertrajs, save_generic, backup

## Set up parameters
kmed = MiniBatchKMedoids(
    n_clusters=500,
    metric='rmsd',
)

## Load
meta = load_meta()


## Try to limit RAM usage
def guestimate_stride():
    total_data = meta['nframes'].sum()
    want = kmed.n_clusters * 10
    stride = max(1, total_data // want)
    print("Since we have", total_data, "frames, we're going to stride by",
          stride, "during fitting, because this is probably adequate for",
          kmed.n_clusters, "clusters")
    return stride
Example #8
0
Meta
----
depends:
  - meta.pandas.pickl
  - trajs
  - top.pdb
"""
import mdtraj as md

from msmbuilder.cluster import MiniBatchKMedoids
from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top

## Set up parameters
kmed = MiniBatchKMedoids(
    n_clusters=500,
    metric='rmsd',
)

## Load
meta = load_meta()
centroids = md.load("centroids.xtc", top=preload_top(meta))

## Kernel
SIGMA = 0.3  # nm
from msmbuilder.featurizer import RMSDFeaturizer
import numpy as np

featurizer = RMSDFeaturizer(centroids)
lfeats = {}
for i, traj in itertrajs(meta):
    lfeat = featurizer.partial_transform(traj)