Ejemplo n.º 1
0
    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [np.array([-3,0]),
                 np.array([-1,1]),
                 np.array([0,0]),
                 np.array([1,-1]),
                 np.array([4,2])]
        widths = [np.array([0.3,2]),
                  np.array([0.3,2]),
                  np.array([0.3,2]),
                  np.array([0.3,2]),
                  np.array([0.3,2])]
        # continuous trajectory
        nsample = 1000
        cls.T = len(means)*nsample
        cls.X = np.zeros((cls.T, 2))
        for i in range(len(means)):
            cls.X[i*nsample:(i+1)*nsample,0] = widths[i][0] * np.random.randn() + means[i][0]
            cls.X[i*nsample:(i+1)*nsample,1] = widths[i][1] * np.random.randn() + means[i][1]
        # cluster in different ways
        cls.km = coor.cluster_kmeans(data = cls.X, k = 100)
        cls.rs = coor.cluster_regspace(data = cls.X, dmin=0.5)
        cls.rt = coor.cluster_uniform_time(data = cls.X, k = 100)
        cls.cl = [cls.km, cls.rs, cls.rt]
Ejemplo n.º 2
0
    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [
            np.array([-3, 0]),
            np.array([-1, 1]),
            np.array([0, 0]),
            np.array([1, -1]),
            np.array([4, 2])
        ]
        widths = [
            np.array([0.3, 2]),
            np.array([0.3, 2]),
            np.array([0.3, 2]),
            np.array([0.3, 2]),
            np.array([0.3, 2])
        ]
        # continuous trajectory
        nsample = 1000
        cls.T = len(means) * nsample
        cls.X = np.zeros((cls.T, 2))
        for i in range(len(means)):
            cls.X[i * nsample:(i + 1) * nsample,
                  0] = widths[i][0] * np.random.randn() + means[i][0]
            cls.X[i * nsample:(i + 1) * nsample,
                  1] = widths[i][1] * np.random.randn() + means[i][1]
        # cluster in different ways
        cls.km = coor.cluster_kmeans(data=cls.X, k=100)
        cls.rs = coor.cluster_regspace(data=cls.X, dmin=0.5)
        cls.rt = coor.cluster_uniform_time(data=cls.X, k=100)
        cls.cl = [cls.km, cls.rs, cls.rt]
Ejemplo n.º 3
0
 def __init__(self, ic, data=None, pickle_file=None):
     if data is None:
         self.cluster = pickle.load(open(pickle_file, 'rb'))
     else:
         self.cluster = coor.cluster_regspace(data,
                                              max_centers=1000,
                                              dmin=0.025)
         pickle.dump(self.cluster, open(ic.outname + '_cl_full.pickle',
                                        'wb'))
Ejemplo n.º 4
0
    feat.add_backbone_torsions(selstr=None, deg=True,
                               cossin=True)  # in degrees

    #List of all the angles
    #print(feat.describe())
    #Number of dregree of freedom
    #print(feat.dimension())

    inp = coor.source(traj, feat)
    sincos = inp.get_output()[0]

    #############
    #Use a regular space clustering. Cluster centers are at least in distance of
    #dmin to each other according to the given metric.Then Voronoi discretization
    #with the computed centers is used to partition the data
    cl_space = coor.cluster_regspace(sincos, dmin=arg.dmin, max_centers=100000)
    clustCenters = cl_space.clustercenters  #angle for each centroid
    #We now discretize the trajectory to either set of cluster centers
    #assign structure's cluster number
    Sspace = coor.assign_to_centers(sincos, clustCenters)
    #assign for each cluster their frames number
    indexClusters = cl_space.index_clusters
    clustCentersFrameNo = -1 * np.ones(len(clustCenters), dtype='int32')

    #Find the centroid (euclideen distance)
    for ind_clust in range(len(indexClusters)):
        #Frames which compose the cluster
        frameNumber = indexClusters[ind_clust][:, 1]
        cosinusSinus = sincos[frameNumber, :]
        print(str(ind_clust))
        for j in range(len(cosinusSinus)):
Ejemplo n.º 5
0
backbone.sort()
for i in backbone:
    print(topology.atom(i))

atom_pairs = list(combinations(backbone, 2))

pca = PCA(n_components=8)
atom_pairs = list(combinations(range(t.n_atoms), 2))
pairwise_distances = md.geometry.compute_distances(t, atom_pairs)
print(pairwise_distances.shape)
reduced_distances = pca.fit_transform(pairwise_distances)
pca.components_
pca.explained_variance_ratio_
tmp = deepcopy(pca.components_)

dist_space = coor.cluster_regspace(pairwise_distances, dmin=55)
centers_space = np.sort(dist_space.clustercenters, axis=0)
Sspace = coor.assign_to_centers(pairwise_distances, centers_space)
test = dist_space.dtrajs

plt.figure()
#plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=t.time)
#plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=[1]*len(reduced_distances[:,1]))
plt.scatter(reduced_distances[:, 0],
            reduced_distances[:, 2],
            marker='o',
            c=test[0] * 5,
            alpha=0.3)
plt.xlabel('PC1')
plt.ylabel('PC3')
plt.title('Pairwise distance PCA: cyclic peptide')
Ejemplo n.º 6
0
dtrajs_nnn_234 = []
dtrajs_nnn_345 = []
dtrajs_nnn_456 = []
for i in range( len(dtraj_rama_2) ):
    dtrajs_nnn_234.append( np.vstack( (dtrajs_rama_2[i], dtrajs_rama_3[i], dtrajs_rama_4[i]) ).T )
    dtrajs_nnn_234[i].astype('int64')
    dtrajs_nnn_345.append( np.vstack( (dtrajs_rama_3[i], dtrajs_rama_4[i], dtrajs_rama_5[i]) ).T )
    dtrajs_nnn_345[i].astype('int64')
    dtrajs_nnn_456.append( np.vstack( (dtrajs_rama_4[i], dtrajs_rama_5[i], dtrajs_rama_6[i]) ).T )
    dtrajs_nnn_456[i].astype('int64')


# In[18]:

n_clusters = 8
clustering_nnn_234 = coor.cluster_regspace(dtrajs_nnn_234,max_centers=n_clusters,dmin=0.5)
clustering_nnn_345 = coor.cluster_regspace(dtrajs_nnn_345,max_centers=n_clusters,dmin=0.5)
clustering_nnn_456 = coor.cluster_regspace(dtrajs_nnn_456,max_centers=n_clusters,dmin=0.5)

# In[19]:

dtrajs_1D_234 = clustering_nnn_234.dtrajs
dtrajs_1D_345 = clustering_nnn_345.dtrajs
dtrajs_1D_456 = clustering_nnn_456.dtrajs

# In[20]:

# shift the cluster indices so they are all consistent
cc_234 = clustering_nnn_234.clustercenters[:]
cc_345 = clustering_nnn_345.clustercenters[:]
cc_456 = clustering_nnn_456.clustercenters[:]
                               ss])  # prune the data to lighten the load

# calculate the HMSM on subsets of the trajectories
for traj_frac in range(Ntraj_0, Ntraj_f + 1):

    if (rank == 0):
        print 'Starting trajfrac ' + str(traj_frac) + ' of ' + str(Ntraj_sets)
        # get the subset
        dtraj_CN_act = dtraj_CN[traj_frac * Nparam_traj /
                                Ntraj_sets:(traj_frac + 1) * Nparam_traj /
                                Ntraj_sets]

        # clustering
        n_clusters = n_Estates  # number of clusters
        clustering = coor.cluster_regspace(dtraj_CN_act,
                                           max_centers=n_clusters,
                                           dmin=dmin)
        save_object(
            'clustering' + sys_nm + '_trajfrac-' + str(traj_frac) + '.pkl',
            clustering)
        # already did this, read it in
        #with open('clustering'+sys_nm+'_trajfrac-'+str(traj_frac)+'.pkl', 'rb') as f:
        #    clustering = pickle.load(f)
        dtrajs = clustering.dtrajs
        cc = clustering.clustercenters[:, 0]
        print 'n_clusters = ' + str(len(cc))
    else:
        dtrajs = None

    # send the dtraj info
    dtrajs = comm.bcast(dtrajs, root=0)
Ejemplo n.º 8
0
obs_data = []
obs_data.append(qdata)
for i in range(num_pairs):
    dist = md.compute_distances(traj, [fit_pairs[i]], periodic=False)[:, 0]
    if inverse[i]:
        obs_data.append(1. / dist)
    else:
        obs_data.append(dist)

#load the observable object that calculates the observables of a set of simulation data

#do a simple discretizaiton fo the data into equilibrium distribution states.
#In theory, the user will be able to specify any sort of equlibrium states for their data

all_dist = np.array(obs_data).transpose()
reg_space_obj = coor.cluster_regspace(all_dist, dmin=0.05)
dtrajs = np.array(reg_space_obj.dtrajs)[0, :]
assert np.min(dtrajs) == 0
assert np.shape(dtrajs)[0] == np.shape(data)[0]
print "Number of equilibrium states are : %d" % (np.max(dtrajs))
equilibrium_frames = []
indices = np.arange(np.shape(data)[0])
for i in range(np.max(dtrajs) + 1):
    state_data = indices[dtrajs == i]
    if not state_data.size == 0:
        equilibrium_frames.append(state_data)

total_check = 0
for set_of_frames in equilibrium_frames:
    total_check += len(set_of_frames)
assert total_check == np.shape(data)[0]
Ejemplo n.º 9
0
plt.xlabel('time / ns')
plt.ylabel('IC 3')
plt.yticks(np.arange(-4, 6, 2))

# for shorter trajectory, ideal number of clusters is 100
# optimal lag_time = 750?

# optimal lag_time = 1000 timesteps

clustering = coor.cluster_kmeans(Y, k=100)
dtrajs = clustering.dtrajs
msm = pyemma.msm.estimate_markov_model(dtrajs, 380)
pyemma.plots.plot_cktest(msm.cktest(3, err_est=True), marker='.')

# TRIALS - reg_space clustering and kmeans comparison - kmeans by far better
clustering_reg = coor.cluster_regspace(Y, dmin=2, max_centers=100)
cr_x = clustering_reg.clustercenters[:, 0]
cr_y = clustering_reg.clustercenters[:, 0]
cc_x = clustering.clustercenters[:, 0]
cc_y = clustering.clustercenters[:, 1]
c_reg = [cr_x, cr_y]
c = [cc_x, cc_y]
print(len(clustering_reg.clustercenters))
fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharex=True, sharey=True)
for ax, cls in zip(axes.flat, [c, c_reg]):
    pyemma.plots.plot_density(xall,
                              yall,
                              ax=ax,
                              cbar=False,
                              alpha=0.1,
                              logscale=True)
Ejemplo n.º 10
0
 def test_exceptions(self):
     us_centers = [1.1, 1.3]
     us_force_constants = [1.0, 1.0]
     us_trajs = [
         np.array([1.0, 1.1, 1.2, 1.1, 1.0, 1.1]),
         np.array([1.3, 1.2, 1.3, 1.4, 1.4, 1.3])
     ]
     md_trajs = [
         np.array([0.9, 1.0, 1.1, 1.2, 1.3, 1.4]),
         np.array([1.5, 1.4, 1.3, 1.4, 1.4, 1.5])
     ]
     cluster = cluster_regspace(data=us_trajs + md_trajs,
                                max_centers=10,
                                dmin=0.15)
     us_dtrajs = cluster.dtrajs[:2]
     md_dtrajs = cluster.dtrajs[2:]
     # unmatching number of us trajectories / us parameters
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs[:-1], us_dtrajs, us_centers,
                                    us_force_constants)
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs, us_dtrajs[:-1], us_centers,
                                    us_force_constants)
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers[:-1],
                                    us_force_constants)
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers,
                                    us_force_constants[:-1])
     # unmatching number of md trajectories
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs,
                                    us_dtrajs,
                                    us_centers,
                                    us_force_constants,
                                    md_trajs=md_trajs[:-1],
                                    md_dtrajs=md_dtrajs)
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs,
                                    us_dtrajs,
                                    us_centers,
                                    us_force_constants,
                                    md_trajs=md_trajs,
                                    md_dtrajs=md_dtrajs[:-1])
     # unmatchig trajectory lengths
     us_trajs_x = [
         np.array([1.0, 1.1, 1.2, 1.1, 1.0]),
         np.array([1.3, 1.2, 1.3, 1.4, 1.4])
     ]
     md_trajs_x = [
         np.array([0.9, 1.0, 1.1, 1.2, 1.3]),
         np.array([1.5, 1.4, 1.3, 1.4, 1.4])
     ]
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs_x, us_dtrajs, us_centers,
                                    us_force_constants)
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs,
                                    us_dtrajs,
                                    us_centers,
                                    us_force_constants,
                                    md_trajs=md_trajs_x,
                                    md_dtrajs=md_dtrajs)
     # unmatching md_trajs/md_dtrajs cases
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs,
                                    us_dtrajs,
                                    us_centers,
                                    us_force_constants,
                                    md_trajs=None,
                                    md_dtrajs=md_dtrajs)
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs,
                                    us_dtrajs,
                                    us_centers,
                                    us_force_constants,
                                    md_trajs=md_trajs,
                                    md_dtrajs=None)
     # single trajectory cases
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs[0], us_dtrajs[0],
                                    us_centers[0], us_force_constants[0])
     with self.assertRaises(ValueError):
         estimate_umbrella_sampling(us_trajs,
                                    us_dtrajs,
                                    us_centers,
                                    us_force_constants,
                                    md_trajs=md_trajs[0],
                                    md_dtrajs=md_dtrajs[0])
Ejemplo n.º 11
0
 def setUp(self):
     self.input_trajs = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 1, 2],
                         [3, 4, 5], [6, 7, 8]]
     self.cluster_obj = coor.cluster_regspace(data=self.input_trajs,
                                              dmin=.5)
Ejemplo n.º 12
0
#f = coor.featurizer(topfile)
#f.add_distances_ca()
## load trajectories and colvar files
#inp = coor.source(traj_list, f)

#tica = coor.tica(inp, lag=500, dim=3, commute_map=True, kinetic_map=False, skip=20000, stride=10 )

col = import_colvar('mix')

col_skip = [i[20000::10, 1] for i in col.col]

#print('shape of col_skip is ', len(col_skip[0]))
print('min and max of col_skip ', np.min(col_skip), np.max(col_skip))

clust_col_skip_obj = coor.cluster_regspace(col_skip,
                                           max_centers=1000,
                                           dmin=0.025)

clust_col_skip_dtraj = clust_col_skip_obj.dtrajs
#clust_col_skip_dtraj =  pickle.load(open('clust_col_skip_dtraj_cl_full.pickle', 'rb'))
pickle.dump(clust_col_skip_obj,
            open('clust_col_skip_obj_cl_full.pickle', 'wb'),
            protocol=pickle.HIGHEST_PROTOCOL)
pickle.dump(clust_col_skip_dtraj,
            open('clust_col_skip_dtraj_cl_full_try2.pickle', 'wb'),
            protocol=pickle.HIGHEST_PROTOCOL)

print('length of clust_col_skip_dtraj is ', len(clust_col_skip_dtraj))
print('length of clust_col_skip_dtraj[0] is ', len(clust_col_skip_dtraj[0]))

#Y = tica.get_output()
Ejemplo n.º 13
0
center = np.arange(0.7, 15.5, 0.1)
center2 = center.tolist()
colvar_list = [indir + "/comboCOLVAR{:2.1f}".format(i) for i in center]
col = [np.loadtxt(f, skiprows=1) for f in colvar_list]
length = len(center)
print(length)
force = 119.503
force_list = [500] * length
## Start doing stuff
max_centers = 1500
dmin = 0.015
kt = 0.596
#cv = list(col[20000:,1])
cv = [i[20000::10, 1] for i in col]
cv2 = [i.copy(order='C') for i in cv]

us_cluster = coor.cluster_regspace(cv2, max_centers=max_centers, dmin=dmin)
w = thermo.estimate_umbrella_sampling(cv2,
                                      us_cluster.dtrajs,
                                      center2,
                                      force_list,
                                      kT=2.496,
                                      maxiter=50000,
                                      lag=200,
                                      dt_traj='10 ps',
                                      save_convergence_info=200,
                                      estimator='dtram')

pickle.dump(us_cluster, open(clust_out, 'wb'))
pickle.dump(w, open(out, 'wb'))
Ejemplo n.º 14
0
def clusterRegularSpace(trajectories, dmin, stride=1):
    """
        Cluster the trajectories using Regular Space clustering, which is a
        modified version of Hartigan's leader algorithm
    """
    return coor.cluster_regspace(data=trajectories, dmin=dmin, stride=stride)