def test_score_vs_MSM(self): from pyerna.util.contexts import numpy_random_seed with numpy_random_seed(32): trajs_test, trajs_train = cvsplit_dtrajs(self.trajs) with numpy_random_seed(32): dtrajs_test, dtrajs_train = cvsplit_dtrajs(self.dtrajs) methods = ('VAMP1', 'VAMP2', 'VAMPE') for m in methods: msm_train = estimate_markov_model(dtrajs=dtrajs_train, lag=self.lag, reversible=False) score_msm = msm_train.score(dtrajs_test, score_method=m, score_k=None) vamp_train = pyerna_api_vamp(data=trajs_train, lag=self.lag, dim=1.0) score_vamp = vamp_train.score(test_data=trajs_test, score_method=m) self.assertAlmostEqual(score_msm, score_vamp, places=2 if m == 'VAMPE' else 3, msg=m)
def test_ml_msm_sparse(self): from pyerna.util.contexts import numpy_random_seed with numpy_random_seed(42): msm = pyerna.msm.estimate_markov_model([np.random.randint(0, 1000, size=10000)], sparse=True, lag=1) assert msm.sparse msm.save(self.f) restored = load(self.f) assert restored.sparse
def test(self): # make it deterministic with numpy_random_seed(0): data = np.random.randn(100, 10) tica_obj = api.tica(data=data, lag=10, dim=1) Y = tica_obj._transform_array(data) # right shape assert types.is_float_matrix(Y) assert Y.shape[0] == 100 assert Y.shape[1] == 1, Y.shape[1]
def setUpClass(cls): from pyerna.util.contexts import numpy_random_seed with numpy_random_seed(32): # three gaussians X = [ np.random.randn(1000) - 2.0, np.random.randn(1000), np.random.randn(1000) + 2.0 ] cls.X = np.hstack(X)
def setUpClass(cls): with numpy_random_seed(123): import msmtools.generation as msmgen # generate HMM with two Gaussians cls.P = np.array([[0.99, 0.01], [0.01, 0.99]]) cls.T = 40000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory cls.X = np.zeros((cls.T, 2)) # hidden trajectory dtraj = msmgen.generate_traj(cls.P, cls.T) for t in range(cls.T): s = dtraj[t] cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0] cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1] # Set the lag time: cls.lag = 10 # Compute mean free data: mref = (np.sum(cls.X[:-cls.lag, :], axis=0) + np.sum( cls.X[cls.lag:, :], axis=0)) / float(2 * (cls.T - cls.lag)) mref_nr = np.sum(cls.X[:-cls.lag, :], axis=0) / float(cls.T - cls.lag) cls.X_mf = cls.X - mref[None, :] cls.X_mf_nr = cls.X - mref_nr[None, :] # Compute correlation matrices: cls.cov_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[:-cls.lag, :]) +\ np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[cls.lag:, :])) / float(2*(cls.T-cls.lag)) cls.cov_ref_nr = np.dot( cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[:-cls.lag, :]) / float(cls.T - cls.lag) cls.cov_tau_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[cls.lag:, :]) +\ np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[:-cls.lag, :])) / float(2*(cls.T-cls.lag)) cls.cov_tau_ref_nr = np.dot( cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[cls.lag:, :]) / float(cls.T - cls.lag) # do unscaled TICA reader = api.source(cls.X, chunksize=0) cls.tica_obj = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False) # non-reversible TICA cls.tica_obj_nr = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False, reversible=False)
def setUp(self): self.eps = 1e-10 path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep self.pdbfile = os.path.join(path, 'bpti_ca.pdb') self.trajfiles = [ os.path.join(path, 'bpti_001-033.xtc'), os.path.join(path, 'bpti_034-066.xtc'), os.path.join(path, 'bpti_067-100.xtc') ] # Create random sets of files and frames to be retrieved from trajfiles n_members_set1 = 10 n_members_set2 = 20 from pyerna.util.contexts import numpy_random_seed with numpy_random_seed(34): set_1 = np.vstack((np.random.permutation( [0, 2] * n_members_set1)[:n_members_set1], np.random.randint(32, size=n_members_set1))).T set_2 = np.vstack((np.random.permutation( [0, 2] * n_members_set2)[:n_members_set2], np.random.randint(32, size=n_members_set2))).T self.sets = [set_1, set_2] self.subdir = tempfile.mkdtemp(suffix='save_trajs_test/') self.outfile = os.path.join(self.subdir, 'save_traj_test.xtc') # Instantiate the reader self.reader = coor.source(self.trajfiles, top=self.pdbfile) self.reader.chunksize = 30 self.n_pass_files = [ self.subdir + 'n_pass.set_%06u.xtc' % ii for ii in range(len(self.sets)) ] self.one_pass_files = [ self.subdir + '1_pass.set_%06u.xtc' % ii for ii in range(len(self.sets)) ] self.traj_ref = save_traj_w_md_load_frame(self.reader, self.sets) self.strides = [2, 3, 5]
def setUpClass(cls): with numpy_random_seed(52): c = super(TestFeatureReaderAndTICAProjection, cls).setUpClass() cls.dim = 99 # dimension (must be divisible by 3) N = 5000 # length of single trajectory # 500000 # 50000 N_trajs = 10 # number of trajectories A = random_invertible(cls.dim) # mixing matrix # tica will approximate its inverse with the projection matrix mean = np.random.randn(cls.dim) # create topology file cls.temppdb = tempfile.mktemp('.pdb') with open(cls.temppdb, 'w') as f: for i in range(cls.dim // 3): print(( 'ATOM %5d C ACE A 1 28.490 31.600 33.379 0.00 1.00' % i), file=f) t = np.arange(0, N) cls.trajnames = [] # list of xtc file names for i in range(N_trajs): # set up data white = np.random.randn(N, cls.dim) brown = np.cumsum(white, axis=0) correlated = np.dot(brown, A) data = correlated + mean xyz = data.reshape((N, cls.dim // 3, 3)) # create trajectory file traj = mdtraj.load(cls.temppdb) traj.xyz = xyz traj.time = t tempfname = tempfile.mktemp('.xtc') traj.save(tempfname) cls.trajnames.append(tempfname)