Exemple #1
0
 def setUpClass(cls):
     cls.centers = (np.linspace(-1.6, 1.4, 40, endpoint=False) +
                    0.0375).reshape((-1, 1))
     cls.metastable_sets = [np.arange(22, 40), np.arange(0, 22)]
     cls.pi = [0.308479845114,
               0.691520154886]  # MSM(tau=10) on 10^6 steps + PCCA
     cls.f = -np.log(cls.pi)
     cls.mfpt = [[0.0, 176.885753716],
                 [433.556388454, 0.0]]  # MSM(tau=10) on 10^6 steps + PCCA
     cls.us_trajs = []
     cls.us_centers = []
     cls.us_force_constants = []
     spring_constant = 3.0
     for spring_center in [-0.4, 0.2, 0.8]:
         x, u = run_mcmc(spring_constant,
                         1000,
                         spring_constant=spring_constant,
                         spring_center=spring_center)
         cls.us_trajs.append(x)
         cls.us_centers.append(spring_center)
         cls.us_force_constants.append(spring_constant)
     cls.md_trajs = []
     for _repetition in range(7):
         x, u = run_mcmc(0.13, 1000)
         cls.md_trajs.append(x)
     cls.us_dtrajs = assign_to_centers(cls.us_trajs, centers=cls.centers)
     cls.md_dtrajs = assign_to_centers(cls.md_trajs, centers=cls.centers)
Exemple #2
0
    def test_assignment_multithread_minrsmd(self):
        # re-do assignment with multiple threads and compare results
        import pyemma.datasets as data
        d = data.get_bpti_test_data()
        reader = coor.source(d['trajs'], top=d['top'])

        N_centers = 9
        centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]],
                              reader.ra_itraj_jagged[1, [32, 1, 23]],
                              reader.ra_itraj_jagged[2, [17, 8, 15]])).reshape(
                                  (N_centers, -1))
        chunksize = 1000

        assignment_mp = coor.assign_to_centers(reader,
                                               centers,
                                               n_jobs=2,
                                               chunksize=chunksize,
                                               metric='minRMSD')
        assignment_sp = coor.assign_to_centers(reader,
                                               centers,
                                               n_jobs=1,
                                               chunksize=chunksize,
                                               metric='minRMSD')

        np.testing.assert_equal(assignment_mp, assignment_sp)
Exemple #3
0
    def test_assignment_multithread_minrsmd(self):
        # re-do assignment with multiple threads and compare results
        chunksize = 1000

        assignment_mp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=2, chunk_size=chunksize, metric='minRMSD')
        assignment_sp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=1, chunk_size=chunksize, metric='minRMSD')

        np.testing.assert_equal(assignment_mp, assignment_sp)
Exemple #4
0
 def test_threads_cpu_count_def_arg(self):
     expected = int(os.getenv('PYEMMA_NJOBS', 3))
     def fake_cpu_count(*args, **kw):
         return expected
     with patch('psutil.cpu_count', fake_cpu_count):
         res = coor.assign_to_centers(self.X, self.centers_big, return_dtrajs=False)
     self.assertEqual(res.n_jobs, expected)
Exemple #5
0
    def setUpClass(cls):
        super(TestClusterAssign, cls).setUpClass()

        # generate Gaussian mixture
        means = [
            np.array([-3, 0]),
            np.array([-1, 1]),
            np.array([0, 0]),
            np.array([1, -1]),
            np.array([4, 2])
        ]
        widths = [
            np.array([0.1, 0.1]),
            np.array([0.1, 0.1]),
            np.array([0.1, 0.1]),
            np.array([0.1, 0.1]),
            np.array([0.1, 0.1])
        ]
        # data
        cls.nsample = 1000
        cls.T = len(means) * cls.nsample
        cls.X = np.zeros((cls.T, 2))
        for i in range(len(means)):
            cls.X[i * cls.nsample:(i + 1) * cls.nsample,
                  0] = widths[i][0] * np.random.randn() + means[i][0]
            cls.X[i * cls.nsample:(i + 1) * cls.nsample,
                  1] = widths[i][1] * np.random.randn() + means[i][1]
        # try assigning actual centers:
        cls.centers = np.array([[-3, 0], [-1, 1], [0, 0], [1, -1], [4, 2]])
        # assignment
        cls.ass = coor.assign_to_centers(data=cls.X,
                                         centers=cls.centers,
                                         return_dtrajs=False)
Exemple #6
0
    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [np.array([-3,0]),
                 np.array([-1,1]),
                 np.array([0,0]),
                 np.array([1,-1]),
                 np.array([4,2])]
        widths = [np.array([0.1,0.1]),
                  np.array([0.1,0.1]),
                  np.array([0.1,0.1]),
                  np.array([0.1,0.1]),
                  np.array([0.1,0.1])]
        # data
        cls.nsample = 1000
        cls.T = len(means)*cls.nsample
        cls.X = np.zeros((cls.T, 2))
        for i in range(len(means)):
            cls.X[i*cls.nsample:(i+1)*cls.nsample,0] = widths[i][0] * np.random.randn() + means[i][0]
            cls.X[i*cls.nsample:(i+1)*cls.nsample,1] = widths[i][1] * np.random.randn() + means[i][1]
        # try assigning actual centers:
        cls.centers = np.array([[-3,0],
                                [-1,1],
                                [0,0],
                                [1,-1],
                                [4,2]])
        # assignment
        cls.ass = coor.assign_to_centers(data = cls.X, centers=cls.centers, return_dtrajs=False)
Exemple #7
0
 def test_threads_cpu_count_def_arg(self):
     import psutil
     X = np.random.random((1000, 3))
     centers = X[np.random.choice(1000, 10)]
     # note: we want another job number here, but it will be ignored!
     res = coor.assign_to_centers(X, centers, return_dtrajs=False)
     self.assertEqual(res.n_jobs, psutil.cpu_count())
Exemple #8
0
    def test_wrong_centers_argument2(self):
        dim = 3
        data = np.empty((100, dim))
        centers = np.empty(1)

        with self.assertRaises(ValueError):
            c = coor.assign_to_centers(data, centers)
Exemple #9
0
 def setUpClass(cls):
     cls.centers = (np.linspace(-1.6, 1.4, 40, endpoint=False) +
                    0.0375).reshape((-1, 1))
     cls.metastable_sets = [np.arange(22, 40), np.arange(0, 22)]
     cls.pi = [0.308479845114,
               0.691520154886]  # MSM(tau=10) on 10^6 steps + PCCA
     cls.f = -np.log(cls.pi)
     cls.mfpt = [[0.0, 176.885753716],
                 [433.556388454, 0.0]]  # MSM(tau=10) on 10^6 steps + PCCA
     cls.energy_trajs = [[], []]
     cls.temp_trajs = [[], []]
     trajs = [[0.13], [0.13]]
     kT = [1.0, 7.0]
     length = 100
     for _repetition in range(50):
         for i in [0, 1]:
             x, u = run_mcmc(trajs[i][-1], 100, kT=kT[i])
             trajs[i] += x.tolist()
             cls.energy_trajs[i] += u.tolist()
             cls.temp_trajs[i] += [kT[i]] * length
         delta = (kT[0] - kT[1]) * (cls.energy_trajs[0][-1] -
                                    cls.energy_trajs[0][-1])
         if delta < 0.0 or np.random.rand() < np.exp(delta):
             kT = kT[::-1]
     cls.energy_trajs = np.asarray(cls.energy_trajs, dtype=np.float64)
     cls.temp_trajs = np.asarray(cls.temp_trajs, dtype=np.float64)
     cls.dtrajs = [
         assign_to_centers(traj[1:], centers=cls.centers)[0]
         for traj in trajs
     ]
Exemple #10
0
 def test_threads_env_num_threads_fixed_def_arg(self):
     """ tests that if no njobs arg is given (None) we fall back to OMP_NUM_THREADS """
     desired_n_jobs = 3
     with temporary_env('OMP_NUM_THREADS', desired_n_jobs):
         assert os.environ['OMP_NUM_THREADS'] == str(desired_n_jobs)
         # note: we want another job number here, but it will be ignored!
         res = coor.assign_to_centers(self.X, self.centers_big, n_jobs=None, return_dtrajs=False)
         self.assertEqual(res.n_jobs, desired_n_jobs)
Exemple #11
0
 def test_threads_omp_env_arg_borked(self):
     """ if the env var can not be interpreted as int, fall back to one thread. """
     expected = 3
     def fake_cpu_count(*args, **kw):
         return expected
     with patch('psutil.cpu_count', fake_cpu_count), temporary_env('PYEMMA_NJOBS', 'this is not right'):
         res = coor.assign_to_centers(self.X, self.centers_big, n_jobs=None, return_dtrajs=False)
         self.assertEqual(res.n_jobs, expected)
Exemple #12
0
    def test_assignment_multithread(self):
        # re-do assignment with multiple threads and compare results
        n = 10000
        dim = 100
        chunksize = 1000
        X = np.random.random((n, dim))
        centers = X[np.random.choice(n, dim)]

        assignment_mp = coor.assign_to_centers(X,
                                               centers,
                                               n_jobs=4,
                                               chunk_size=chunksize)
        assignment_sp = coor.assign_to_centers(X,
                                               centers,
                                               n_jobs=1,
                                               chunk_size=chunksize)

        np.testing.assert_equal(assignment_mp, assignment_sp)
Exemple #13
0
 def test_threads_env_num_threads_fixed(self):
     desired_n_jobs = 2
     with temporary_env('PYEMMA_NJOBS', 0):
         assert os.environ['PYEMMA_NJOBS'] == '0'
         res = coor.assign_to_centers(self.X,
                                      self.centers_big,
                                      n_jobs=desired_n_jobs,
                                      return_dtrajs=False)
         self.assertEqual(res.n_jobs, desired_n_jobs)
Exemple #14
0
 def test_threads_omp_env_arg_borked(self):
     import os
     os.environ['OMP_NUM_THREADS'] = 'this is not right'
     try:
         import psutil
         X = np.random.random((1000, 3))
         centers = X[np.random.choice(1000, 10)]
         # note: we want another job number here, but it will be ignored!
         res = coor.assign_to_centers(X,
                                      centers,
                                      n_jobs=None,
                                      return_dtrajs=False)
         self.assertEqual(res.n_jobs, psutil.cpu_count())
     finally:
         del os.environ['OMP_NUM_THREADS']
Exemple #15
0
 def test_threads_env_num_threads_fixed_def_arg(self):
     import os
     desired_n_jobs = 3
     os.environ['OMP_NUM_THREADS'] = str(desired_n_jobs)
     try:
         assert os.environ['OMP_NUM_THREADS'] == str(desired_n_jobs)
         X = np.random.random((1000, 3))
         centers = X[np.random.choice(1000, 10)]
         # note: we want another job number here, but it will be ignored!
         res = coor.assign_to_centers(X,
                                      centers,
                                      n_jobs=None,
                                      return_dtrajs=False)
         self.assertEqual(res.n_jobs, desired_n_jobs)
     finally:
         del os.environ['OMP_NUM_THREADS']
Exemple #16
0
 def test_threads_env_num_threads_fixed(self):
     import os
     old_val = os.getenv('OMP_NUM_THREADS', '')
     os.environ['OMP_NUM_THREADS'] = '4'
     desired_n_jobs = 2
     try:
         assert os.environ['OMP_NUM_THREADS'] == "4"
         X = np.random.random((1000, 3))
         centers = X[np.random.choice(1000, 10)]
         res = coor.assign_to_centers(X,
                                      centers,
                                      n_jobs=desired_n_jobs,
                                      return_dtrajs=False)
         self.assertEqual(res.n_jobs, desired_n_jobs)
     finally:
         del os.environ['OMP_NUM_THREADS']
Exemple #17
0
    def test_min_rmsd(self):
        import pyemma.datasets as data
        d = data.get_bpti_test_data()
        reader = coor.source(d['trajs'], top=d['top'])

        N_centers = 9
        centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]],
                              reader.ra_itraj_jagged[1, [32, 1, 23]],
                              reader.ra_itraj_jagged[2, [17, 8, 15]])
                             ).reshape((N_centers, -1))
        dtraj = coor.assign_to_centers(reader, centers=centers, metric='minRMSD', return_dtrajs=True)

        num_assigned_states = len(np.unique(np.concatenate(dtraj)))
        self.assertEqual(num_assigned_states, N_centers,
                         "assigned states=%s out of %s possible ones."
                         % (num_assigned_states, N_centers))
Exemple #18
0
def assign_trajs_to_clusters(datasets, MORE):
    inp_files = min_full_datas(datasets)
    for nm, pars in datasets.items():
        #if nm is not master_dataset:
        #if nm is not 'single trajectory':
        print('Projecting {} into clustercenters'.format(nm))
        feat = pars['feat']
        inp = coor.source(inp_files[nm], feat, chunksize=1000)
        dtrajs = coor.assign_to_centers(
                inp,
                centers=all_clust.clustercenters,
                stride=pars['stride'],
                metric='minRMSD',
                chunksize=1000
                )
        pars['inp']    = inp
        pars['dtrajs'] = dtrajs
        np.save(
                topdir + nm + '.npy',
                disc.dtrajs
               )
Exemple #19
0
 def test_return_dtrajs(self):
     dtrajs = coor.assign_to_centers(data=self.X, centers=self.centers)
     for dtraj in dtrajs:
         assert types.is_int_array(dtraj)
Exemple #20
0
    print(topology.atom(i))

atom_pairs = list(combinations(backbone, 2))

pca = PCA(n_components=8)
atom_pairs = list(combinations(range(t.n_atoms), 2))
pairwise_distances = md.geometry.compute_distances(t, atom_pairs)
print(pairwise_distances.shape)
reduced_distances = pca.fit_transform(pairwise_distances)
pca.components_
pca.explained_variance_ratio_
tmp = deepcopy(pca.components_)

dist_space = coor.cluster_regspace(pairwise_distances, dmin=55)
centers_space = np.sort(dist_space.clustercenters, axis=0)
Sspace = coor.assign_to_centers(pairwise_distances, centers_space)
test = dist_space.dtrajs

plt.figure()
#plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=t.time)
#plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=[1]*len(reduced_distances[:,1]))
plt.scatter(reduced_distances[:, 0],
            reduced_distances[:, 2],
            marker='o',
            c=test[0] * 5,
            alpha=0.3)
plt.xlabel('PC1')
plt.ylabel('PC3')
plt.title('Pairwise distance PCA: cyclic peptide')
#cbar = plt.colorbar()
#cbar.set_label('Time [ps]')
Exemple #21
0
 def test_return_dtrajs(self):
     dtrajs = coor.assign_to_centers(data=self.X, centers=self.centers)
     for dtraj in dtrajs:
         assert types.is_int_vector(dtraj)
Exemple #22
0
    kmean_cluster100 = coor.cluster_kmeans(data=test,
                                           k=nclusters,
                                           max_iter=1000,
                                           tolerance=1e-6)

    print "Done!"
    print "Saving cluster centers..."
    ccenters100 = kmean_cluster100.clustercenters
    f = open(
        'Intermediate_pickle_files/wt-h70a-d66a_cattraj_dirrmsd_ccenter-100.pickle',
        'w')
    pickle.dump(ccenters100, f)
    f.close()

    wt_dtrajs = coor.assign_to_centers(data=wt_dir_rmsd, centers=ccenters100)
    f = open(
        'Intermediate_pickle_files/cypa_wt-d66a_cattraj_dirrmsd_dtrajs.pickle',
        'w')
    pickle.dump(wt_dtrajs, f)
    f.close()

    d66a_dtrajs = coor.assign_to_centers(data=d66a_dir_rmsd,
                                         centers=ccenters100)
    f = open('Intermediate_pickle_files/d66a_cattraj_dirrmsd_dtrajs.pickle',
             'w')
    pickle.dump(wt_dtrajs, f)
    f.close()

    h70a_dtrajs = coor.assign_to_centers(data=h70a_dir_rmsd,
                                         centers=ccenters100)
Exemple #23
0
 def test_threads_omp_env_arg_borked(self):
     """ if the env var can not be interpreted as int, fall back to one thread. """
     expected = 3
     with patch('psutil.cpu_count', lambda: expected), temporary_env('OMP_NUM_THREADS', 'this is not right'):
         res = coor.assign_to_centers(self.X, self.centers_big, n_jobs=None, return_dtrajs=False)
         self.assertEqual(res.n_jobs, expected)
Exemple #24
0
    #print(feat.describe())
    #Number of dregree of freedom
    #print(feat.dimension())

    inp = coor.source(traj, feat)
    sincos = inp.get_output()[0]

    #############
    #Use a regular space clustering. Cluster centers are at least in distance of
    #dmin to each other according to the given metric.Then Voronoi discretization
    #with the computed centers is used to partition the data
    cl_space = coor.cluster_regspace(sincos, dmin=arg.dmin, max_centers=100000)
    clustCenters = cl_space.clustercenters  #angle for each centroid
    #We now discretize the trajectory to either set of cluster centers
    #assign structure's cluster number
    Sspace = coor.assign_to_centers(sincos, clustCenters)
    #assign for each cluster their frames number
    indexClusters = cl_space.index_clusters
    clustCentersFrameNo = -1 * np.ones(len(clustCenters), dtype='int32')

    #Find the centroid (euclideen distance)
    for ind_clust in range(len(indexClusters)):
        #Frames which compose the cluster
        frameNumber = indexClusters[ind_clust][:, 1]
        cosinusSinus = sincos[frameNumber, :]
        print(str(ind_clust))
        for j in range(len(cosinusSinus)):
            #If the frame feature is close to the centroid, then save it
            if (distance(clustCenters[ind_clust], cosinusSinus[j]) < 1e-7):
                line = 'Cluster ', str(ind_clust), 'Centroid ', str(
                    frameNumber[j])
Exemple #25
0
 def test_threads_cpu_count_def_arg(self):
     expected = int(os.getenv('OMP_NUM_THREADS', 3))
     with patch('psutil.cpu_count', lambda: expected):
         res = coor.assign_to_centers(self.X, self.centers_big, return_dtrajs=False)
     self.assertEqual(res.n_jobs, expected)