def setUpClass(cls): cls.centers = (np.linspace(-1.6, 1.4, 40, endpoint=False) + 0.0375).reshape((-1, 1)) cls.metastable_sets = [np.arange(22, 40), np.arange(0, 22)] cls.pi = [0.308479845114, 0.691520154886] # MSM(tau=10) on 10^6 steps + PCCA cls.f = -np.log(cls.pi) cls.mfpt = [[0.0, 176.885753716], [433.556388454, 0.0]] # MSM(tau=10) on 10^6 steps + PCCA cls.us_trajs = [] cls.us_centers = [] cls.us_force_constants = [] spring_constant = 3.0 for spring_center in [-0.4, 0.2, 0.8]: x, u = run_mcmc(spring_constant, 1000, spring_constant=spring_constant, spring_center=spring_center) cls.us_trajs.append(x) cls.us_centers.append(spring_center) cls.us_force_constants.append(spring_constant) cls.md_trajs = [] for _repetition in range(7): x, u = run_mcmc(0.13, 1000) cls.md_trajs.append(x) cls.us_dtrajs = assign_to_centers(cls.us_trajs, centers=cls.centers) cls.md_dtrajs = assign_to_centers(cls.md_trajs, centers=cls.centers)
def test_assignment_multithread_minrsmd(self): # re-do assignment with multiple threads and compare results import pyemma.datasets as data d = data.get_bpti_test_data() reader = coor.source(d['trajs'], top=d['top']) N_centers = 9 centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]], reader.ra_itraj_jagged[1, [32, 1, 23]], reader.ra_itraj_jagged[2, [17, 8, 15]])).reshape( (N_centers, -1)) chunksize = 1000 assignment_mp = coor.assign_to_centers(reader, centers, n_jobs=2, chunksize=chunksize, metric='minRMSD') assignment_sp = coor.assign_to_centers(reader, centers, n_jobs=1, chunksize=chunksize, metric='minRMSD') np.testing.assert_equal(assignment_mp, assignment_sp)
def test_assignment_multithread_minrsmd(self): # re-do assignment with multiple threads and compare results chunksize = 1000 assignment_mp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=2, chunk_size=chunksize, metric='minRMSD') assignment_sp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=1, chunk_size=chunksize, metric='minRMSD') np.testing.assert_equal(assignment_mp, assignment_sp)
def test_threads_cpu_count_def_arg(self): expected = int(os.getenv('PYEMMA_NJOBS', 3)) def fake_cpu_count(*args, **kw): return expected with patch('psutil.cpu_count', fake_cpu_count): res = coor.assign_to_centers(self.X, self.centers_big, return_dtrajs=False) self.assertEqual(res.n_jobs, expected)
def setUpClass(cls): super(TestClusterAssign, cls).setUpClass() # generate Gaussian mixture means = [ np.array([-3, 0]), np.array([-1, 1]), np.array([0, 0]), np.array([1, -1]), np.array([4, 2]) ] widths = [ np.array([0.1, 0.1]), np.array([0.1, 0.1]), np.array([0.1, 0.1]), np.array([0.1, 0.1]), np.array([0.1, 0.1]) ] # data cls.nsample = 1000 cls.T = len(means) * cls.nsample cls.X = np.zeros((cls.T, 2)) for i in range(len(means)): cls.X[i * cls.nsample:(i + 1) * cls.nsample, 0] = widths[i][0] * np.random.randn() + means[i][0] cls.X[i * cls.nsample:(i + 1) * cls.nsample, 1] = widths[i][1] * np.random.randn() + means[i][1] # try assigning actual centers: cls.centers = np.array([[-3, 0], [-1, 1], [0, 0], [1, -1], [4, 2]]) # assignment cls.ass = coor.assign_to_centers(data=cls.X, centers=cls.centers, return_dtrajs=False)
def setUpClass(cls): super(TestCluster, cls).setUpClass() cls.dtraj_dir = tempfile.mkdtemp() # generate Gaussian mixture means = [np.array([-3,0]), np.array([-1,1]), np.array([0,0]), np.array([1,-1]), np.array([4,2])] widths = [np.array([0.1,0.1]), np.array([0.1,0.1]), np.array([0.1,0.1]), np.array([0.1,0.1]), np.array([0.1,0.1])] # data cls.nsample = 1000 cls.T = len(means)*cls.nsample cls.X = np.zeros((cls.T, 2)) for i in range(len(means)): cls.X[i*cls.nsample:(i+1)*cls.nsample,0] = widths[i][0] * np.random.randn() + means[i][0] cls.X[i*cls.nsample:(i+1)*cls.nsample,1] = widths[i][1] * np.random.randn() + means[i][1] # try assigning actual centers: cls.centers = np.array([[-3,0], [-1,1], [0,0], [1,-1], [4,2]]) # assignment cls.ass = coor.assign_to_centers(data = cls.X, centers=cls.centers, return_dtrajs=False)
def test_threads_cpu_count_def_arg(self): import psutil X = np.random.random((1000, 3)) centers = X[np.random.choice(1000, 10)] # note: we want another job number here, but it will be ignored! res = coor.assign_to_centers(X, centers, return_dtrajs=False) self.assertEqual(res.n_jobs, psutil.cpu_count())
def test_wrong_centers_argument2(self): dim = 3 data = np.empty((100, dim)) centers = np.empty(1) with self.assertRaises(ValueError): c = coor.assign_to_centers(data, centers)
def setUpClass(cls): cls.centers = (np.linspace(-1.6, 1.4, 40, endpoint=False) + 0.0375).reshape((-1, 1)) cls.metastable_sets = [np.arange(22, 40), np.arange(0, 22)] cls.pi = [0.308479845114, 0.691520154886] # MSM(tau=10) on 10^6 steps + PCCA cls.f = -np.log(cls.pi) cls.mfpt = [[0.0, 176.885753716], [433.556388454, 0.0]] # MSM(tau=10) on 10^6 steps + PCCA cls.energy_trajs = [[], []] cls.temp_trajs = [[], []] trajs = [[0.13], [0.13]] kT = [1.0, 7.0] length = 100 for _repetition in range(50): for i in [0, 1]: x, u = run_mcmc(trajs[i][-1], 100, kT=kT[i]) trajs[i] += x.tolist() cls.energy_trajs[i] += u.tolist() cls.temp_trajs[i] += [kT[i]] * length delta = (kT[0] - kT[1]) * (cls.energy_trajs[0][-1] - cls.energy_trajs[0][-1]) if delta < 0.0 or np.random.rand() < np.exp(delta): kT = kT[::-1] cls.energy_trajs = np.asarray(cls.energy_trajs, dtype=np.float64) cls.temp_trajs = np.asarray(cls.temp_trajs, dtype=np.float64) cls.dtrajs = [ assign_to_centers(traj[1:], centers=cls.centers)[0] for traj in trajs ]
def test_threads_env_num_threads_fixed_def_arg(self): """ tests that if no njobs arg is given (None) we fall back to OMP_NUM_THREADS """ desired_n_jobs = 3 with temporary_env('OMP_NUM_THREADS', desired_n_jobs): assert os.environ['OMP_NUM_THREADS'] == str(desired_n_jobs) # note: we want another job number here, but it will be ignored! res = coor.assign_to_centers(self.X, self.centers_big, n_jobs=None, return_dtrajs=False) self.assertEqual(res.n_jobs, desired_n_jobs)
def test_threads_omp_env_arg_borked(self): """ if the env var can not be interpreted as int, fall back to one thread. """ expected = 3 def fake_cpu_count(*args, **kw): return expected with patch('psutil.cpu_count', fake_cpu_count), temporary_env('PYEMMA_NJOBS', 'this is not right'): res = coor.assign_to_centers(self.X, self.centers_big, n_jobs=None, return_dtrajs=False) self.assertEqual(res.n_jobs, expected)
def test_assignment_multithread(self): # re-do assignment with multiple threads and compare results n = 10000 dim = 100 chunksize = 1000 X = np.random.random((n, dim)) centers = X[np.random.choice(n, dim)] assignment_mp = coor.assign_to_centers(X, centers, n_jobs=4, chunk_size=chunksize) assignment_sp = coor.assign_to_centers(X, centers, n_jobs=1, chunk_size=chunksize) np.testing.assert_equal(assignment_mp, assignment_sp)
def test_threads_env_num_threads_fixed(self): desired_n_jobs = 2 with temporary_env('PYEMMA_NJOBS', 0): assert os.environ['PYEMMA_NJOBS'] == '0' res = coor.assign_to_centers(self.X, self.centers_big, n_jobs=desired_n_jobs, return_dtrajs=False) self.assertEqual(res.n_jobs, desired_n_jobs)
def test_threads_omp_env_arg_borked(self): import os os.environ['OMP_NUM_THREADS'] = 'this is not right' try: import psutil X = np.random.random((1000, 3)) centers = X[np.random.choice(1000, 10)] # note: we want another job number here, but it will be ignored! res = coor.assign_to_centers(X, centers, n_jobs=None, return_dtrajs=False) self.assertEqual(res.n_jobs, psutil.cpu_count()) finally: del os.environ['OMP_NUM_THREADS']
def test_threads_env_num_threads_fixed_def_arg(self): import os desired_n_jobs = 3 os.environ['OMP_NUM_THREADS'] = str(desired_n_jobs) try: assert os.environ['OMP_NUM_THREADS'] == str(desired_n_jobs) X = np.random.random((1000, 3)) centers = X[np.random.choice(1000, 10)] # note: we want another job number here, but it will be ignored! res = coor.assign_to_centers(X, centers, n_jobs=None, return_dtrajs=False) self.assertEqual(res.n_jobs, desired_n_jobs) finally: del os.environ['OMP_NUM_THREADS']
def test_threads_env_num_threads_fixed(self): import os old_val = os.getenv('OMP_NUM_THREADS', '') os.environ['OMP_NUM_THREADS'] = '4' desired_n_jobs = 2 try: assert os.environ['OMP_NUM_THREADS'] == "4" X = np.random.random((1000, 3)) centers = X[np.random.choice(1000, 10)] res = coor.assign_to_centers(X, centers, n_jobs=desired_n_jobs, return_dtrajs=False) self.assertEqual(res.n_jobs, desired_n_jobs) finally: del os.environ['OMP_NUM_THREADS']
def test_min_rmsd(self): import pyemma.datasets as data d = data.get_bpti_test_data() reader = coor.source(d['trajs'], top=d['top']) N_centers = 9 centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]], reader.ra_itraj_jagged[1, [32, 1, 23]], reader.ra_itraj_jagged[2, [17, 8, 15]]) ).reshape((N_centers, -1)) dtraj = coor.assign_to_centers(reader, centers=centers, metric='minRMSD', return_dtrajs=True) num_assigned_states = len(np.unique(np.concatenate(dtraj))) self.assertEqual(num_assigned_states, N_centers, "assigned states=%s out of %s possible ones." % (num_assigned_states, N_centers))
def assign_trajs_to_clusters(datasets, MORE): inp_files = min_full_datas(datasets) for nm, pars in datasets.items(): #if nm is not master_dataset: #if nm is not 'single trajectory': print('Projecting {} into clustercenters'.format(nm)) feat = pars['feat'] inp = coor.source(inp_files[nm], feat, chunksize=1000) dtrajs = coor.assign_to_centers( inp, centers=all_clust.clustercenters, stride=pars['stride'], metric='minRMSD', chunksize=1000 ) pars['inp'] = inp pars['dtrajs'] = dtrajs np.save( topdir + nm + '.npy', disc.dtrajs )
def test_return_dtrajs(self): dtrajs = coor.assign_to_centers(data=self.X, centers=self.centers) for dtraj in dtrajs: assert types.is_int_array(dtraj)
print(topology.atom(i)) atom_pairs = list(combinations(backbone, 2)) pca = PCA(n_components=8) atom_pairs = list(combinations(range(t.n_atoms), 2)) pairwise_distances = md.geometry.compute_distances(t, atom_pairs) print(pairwise_distances.shape) reduced_distances = pca.fit_transform(pairwise_distances) pca.components_ pca.explained_variance_ratio_ tmp = deepcopy(pca.components_) dist_space = coor.cluster_regspace(pairwise_distances, dmin=55) centers_space = np.sort(dist_space.clustercenters, axis=0) Sspace = coor.assign_to_centers(pairwise_distances, centers_space) test = dist_space.dtrajs plt.figure() #plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=t.time) #plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=[1]*len(reduced_distances[:,1])) plt.scatter(reduced_distances[:, 0], reduced_distances[:, 2], marker='o', c=test[0] * 5, alpha=0.3) plt.xlabel('PC1') plt.ylabel('PC3') plt.title('Pairwise distance PCA: cyclic peptide') #cbar = plt.colorbar() #cbar.set_label('Time [ps]')
def test_return_dtrajs(self): dtrajs = coor.assign_to_centers(data=self.X, centers=self.centers) for dtraj in dtrajs: assert types.is_int_vector(dtraj)
kmean_cluster100 = coor.cluster_kmeans(data=test, k=nclusters, max_iter=1000, tolerance=1e-6) print "Done!" print "Saving cluster centers..." ccenters100 = kmean_cluster100.clustercenters f = open( 'Intermediate_pickle_files/wt-h70a-d66a_cattraj_dirrmsd_ccenter-100.pickle', 'w') pickle.dump(ccenters100, f) f.close() wt_dtrajs = coor.assign_to_centers(data=wt_dir_rmsd, centers=ccenters100) f = open( 'Intermediate_pickle_files/cypa_wt-d66a_cattraj_dirrmsd_dtrajs.pickle', 'w') pickle.dump(wt_dtrajs, f) f.close() d66a_dtrajs = coor.assign_to_centers(data=d66a_dir_rmsd, centers=ccenters100) f = open('Intermediate_pickle_files/d66a_cattraj_dirrmsd_dtrajs.pickle', 'w') pickle.dump(wt_dtrajs, f) f.close() h70a_dtrajs = coor.assign_to_centers(data=h70a_dir_rmsd, centers=ccenters100)
def test_threads_omp_env_arg_borked(self): """ if the env var can not be interpreted as int, fall back to one thread. """ expected = 3 with patch('psutil.cpu_count', lambda: expected), temporary_env('OMP_NUM_THREADS', 'this is not right'): res = coor.assign_to_centers(self.X, self.centers_big, n_jobs=None, return_dtrajs=False) self.assertEqual(res.n_jobs, expected)
#print(feat.describe()) #Number of dregree of freedom #print(feat.dimension()) inp = coor.source(traj, feat) sincos = inp.get_output()[0] ############# #Use a regular space clustering. Cluster centers are at least in distance of #dmin to each other according to the given metric.Then Voronoi discretization #with the computed centers is used to partition the data cl_space = coor.cluster_regspace(sincos, dmin=arg.dmin, max_centers=100000) clustCenters = cl_space.clustercenters #angle for each centroid #We now discretize the trajectory to either set of cluster centers #assign structure's cluster number Sspace = coor.assign_to_centers(sincos, clustCenters) #assign for each cluster their frames number indexClusters = cl_space.index_clusters clustCentersFrameNo = -1 * np.ones(len(clustCenters), dtype='int32') #Find the centroid (euclideen distance) for ind_clust in range(len(indexClusters)): #Frames which compose the cluster frameNumber = indexClusters[ind_clust][:, 1] cosinusSinus = sincos[frameNumber, :] print(str(ind_clust)) for j in range(len(cosinusSinus)): #If the frame feature is close to the centroid, then save it if (distance(clustCenters[ind_clust], cosinusSinus[j]) < 1e-7): line = 'Cluster ', str(ind_clust), 'Centroid ', str( frameNumber[j])
def test_threads_cpu_count_def_arg(self): expected = int(os.getenv('OMP_NUM_THREADS', 3)) with patch('psutil.cpu_count', lambda: expected): res = coor.assign_to_centers(self.X, self.centers_big, return_dtrajs=False) self.assertEqual(res.n_jobs, expected)