def test_estimate_transition_matrix_1(): np.random.seed(42) count_matrix = np.array([[6, 3, 7], [4, 6, 9], [2, 6, 7]]) t = MSMLib.estimate_transition_matrix(count_matrix) eq(t, np.array([[0.375, 0.1875, 0.4375], [0.21052632, 0.31578947, 0.47368421], [0.13333333, 0.4, 0.46666667]]))
def test(self): # extract xtcs to a temp dir xtc_fn = get('XTC.tgz', just_filename=True) fh = tarfile.open(xtc_fn, mode='r:gz') fh.extractall(self.td) fh.close() outfn = pjoin(self.td, 'ProjectInfo.yaml') # move to that directory os.chdir(self.td) atom_indices = np.arange(4) ConvertDataToHDF.run(projectfn=outfn, conf_filename=get('native.pdb', just_filename=True), input_dir=pjoin(self.td, 'XTC'), source='file', min_length=0, stride=1, rmsd_cutoff=np.inf, atom_indices=atom_indices, iext=".xtc") project = load(outfn) traj = project.load_conf() eq(traj.n_atoms, 4)
def test(self): prep_metric = metrics.Dihedral(angles='phi/psi') project = get('ProjectInfo.yaml') os.chdir(self.td) tICA_train.run(prep_metric, project, delta_time=10, atom_indices=None, output='tICAtest.h5', min_length=0, stride=1) ref_tICA = get('tICA_ref_mle.h5') ref_vals = ref_tICA['vals'] ref_vecs = ref_tICA['vecs'] ref_inds = np.argsort(ref_vals) ref_vals = ref_vals[ref_inds] ref_vecs = ref_vecs[:, ref_inds] test_tICA = load('tICAtest.h5') test_vals = test_tICA['vals'] test_vecs = test_tICA['vecs'] test_inds = np.argsort(test_vals) test_vals = test_vals[test_inds] test_vecs = test_vecs[:, test_inds] eq(test_vals, ref_vals) eq(test_vecs, test_vecs)
def test(self): num_macro = 5 TC = get("PCCA_ref/tProb.mtx") A = get("PCCA_ref/Assignments.Fixed.h5")['arr_0'] print A macro_map, macro_assign = PCCA.run_pcca(num_macro, A, TC) r_macro_map = get("PCCA_ref/MacroMapping.dat") macro_map = macro_map.astype(np.int) r_macro_map = r_macro_map.astype(np.int) # The order of macrostates might be different between the reference and # new lumping. We therefore find a permutation to match them. permutation_mapping = np.zeros(macro_assign.max() + 1, 'int') for i in range(num_macro): j = np.where(macro_map == i)[0][0] permutation_mapping[i] = r_macro_map[j] macro_map_permuted = permutation_mapping[macro_map] MSMLib.apply_mapping_to_assignments(macro_assign, permutation_mapping) r_macro_assign = get("PCCA_ref/MacroAssignments.h5")['arr_0'] eq(macro_map_permuted, r_macro_map) eq(macro_assign, r_macro_assign)
def test(self): num_macro = 5 TC = get("PCCA_ref/tProb.mtx") A = get("PCCA_ref/Assignments.Fixed.h5")['arr_0'] macro_map, macro_assign = PCCA.run_pcca(num_macro, A, TC) r_macro_map = get("PCCA_ref/MacroMapping.dat") macro_map = macro_map.astype(np.int) r_macro_map = r_macro_map.astype(np.int) # The order of macrostates might be different between the reference and # new lumping. We therefore find a permutation to match them. permutation_mapping = np.zeros(macro_assign.max() + 1, 'int') for i in range(num_macro): j = np.where(macro_map == i)[0][0] permutation_mapping[i] = r_macro_map[j] macro_map_permuted = permutation_mapping[macro_map] MSMLib.apply_mapping_to_assignments(macro_assign, permutation_mapping) r_macro_assign = get("PCCA_ref/MacroAssignments.h5")['arr_0'] eq(macro_map_permuted, r_macro_map) eq(macro_assign, r_macro_assign)
def test_get_count_matrix_from_assignments_1(): assignments = np.zeros((10, 10), 'int') val = MSMLib.get_count_matrix_from_assignments(assignments).todense() correct = np.matrix([[90.0]]) eq(val, correct)
def test_estimate_rate_matrix_1(): np.random.seed(42) assignments = np.random.randint(2, size=(10, 10)) counts = MSMLib.get_count_matrix_from_assignments(assignments) K = MSMLib.estimate_rate_matrix(counts, assignments).todense() correct = np.matrix([[-40.40909091, 0.5], [0.33928571, -50.55357143]]) eq(K, correct)
def test_apply_mapping_to_assignments_1(): l = 100 assignments = np.random.randint(l, size=(10, 10)) mapping = np.ones(l) MSMLib.apply_mapping_to_assignments(assignments, mapping) eq(assignments, np.ones((10, 10)))
def test_get_count_matrix_from_assignments_1(): assignments = np.zeros((10, 10), "int") val = MSMLib.get_count_matrix_from_assignments(assignments).todense() correct = np.matrix([[90.0]]) eq(val, correct)
def test(self): BuildMSM.run(lagtime=1, assignments=get('Assignments.h5')['arr_0'], symmetrize='MLE', out_dir=self.td) eq(load(pjoin(self.td, 'tProb.mtx')), get('tProb.mtx'), decimal=5) eq(load(pjoin(self.td, 'tCounts.mtx')), get('tCounts.mtx'), decimal=3) eq(load(pjoin(self.td, 'Mapping.dat')), get('Mapping.dat')) eq(load(pjoin(self.td, 'Assignments.Fixed.h5')), get('Assignments.Fixed.h5')) eq(load(pjoin(self.td, 'Populations.dat')), get('Populations.dat'))
def test_get_count_matrix_from_assignments_2(): np.random.seed(42) assignments = np.random.randint(3, size=(10, 10)) val = MSMLib.get_count_matrix_from_assignments(assignments).todense() correct = np.matrix([[11., 9., 10.], [9., 17., 7.], [10., 7., 10.]]) eq(val, correct)
def test(self): BuildMSM.run(LagTime=1, assignments=get('Assignments.h5')['arr_0'], Symmetrize='MLE', OutDir=self.td) eq(load(pjoin(self.td, 'tProb.mtx')), get('tProb.mtx')) eq(load(pjoin(self.td, 'tCounts.mtx')), get('tCounts.mtx')) eq(load(pjoin(self.td, 'Mapping.dat')), get('Mapping.dat')) eq(load(pjoin(self.td, 'Assignments.Fixed.h5')), get('Assignments.Fixed.h5')) eq(load(pjoin(self.td, 'Populations.dat')), get('Populations.dat'))
def test_get_count_matrix_from_assignments_3(): np.random.seed(42) assignments = np.random.randint(3, size=(10, 10)) val = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=2, sliding_window=False).todense() eq(val, np.matrix([[5.0, 3.0, 4.0], [2.0, 12.0, 3.0], [4.0, 3.0, 4.0]])) val = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=2, sliding_window=True).todense() eq(val, np.matrix([[8.0, 9.0, 11.0], [5.0, 18.0, 6.0], [11.0, 5.0, 7.0]]))
def test_estimate_transition_matrix_1(): np.random.seed(42) count_matrix = np.array([[6, 3, 7], [4, 6, 9], [2, 6, 7]]) t = MSMLib.estimate_transition_matrix(count_matrix) eq( t, np.array([[0.375, 0.1875, 0.4375], [0.21052632, 0.31578947, 0.47368421], [0.13333333, 0.4, 0.46666667]]))
def test_get_count_matrix_from_assignments_2(): np.random.seed(42) assignments = np.random.randint(3, size=(10, 10)) val = MSMLib.get_count_matrix_from_assignments(assignments).todense() correct = np.matrix([[11.0, 9.0, 10.0], [9.0, 17.0, 7.0], [10.0, 7.0, 10.0]]) eq(val, correct)
def test(self): args, metric = Cluster.parser.parse_args([ '-p', get('ProjectInfo.yaml', just_filename=True), '-s', '10', '-o', self.td, 'rmsd', '-a', get('AtomIndices.dat', just_filename=True), 'hierarchical'], print_banner=False) Cluster.main(args, metric) eq(load(pjoin(self.td, 'ZMatrix.h5')), get('ZMatrix.h5'))
def test_CalculateTPT(): T = get("transition_path_theory_reference/tProb.mtx") sources = [0] # chosen arb. for ref. by TJL sinks = [70] # chosen arb. for ref. by TJL script_out = CalculateTPT.run(T, sources, sinks) committors_ref = get(pjoin("transition_path_theory_reference", "committors.h5"))['Data'] net_flux_ref = get(pjoin("transition_path_theory_reference", "net_flux.h5"))['Data'] eq(script_out[0], committors_ref) eq(script_out[1].toarray(), net_flux_ref)
def test_get_count_matrix_from_assignments_3(): np.random.seed(42) assignments = np.random.randint(3, size=(10, 10)) val = MSMLib.get_count_matrix_from_assignments( assignments, lag_time=2, sliding_window=False).todense() eq(val, np.matrix([[5., 3., 4.], [2., 12., 3.], [4., 3., 4.]])) val = MSMLib.get_count_matrix_from_assignments( assignments, lag_time=2, sliding_window=True).todense() eq(val, np.matrix([[8., 9., 11.], [5., 18., 6.], [11., 5., 7.]]))
def test_trim_states(): # run the (just tested) ergodic trim counts = scipy.sparse.csr_matrix(np.matrix('2 1 0; 1 2 0; 0 0 1')) trimmed, mapping = MSMLib.ergodic_trim(counts) # now try the segmented method states_to_trim = MSMLib.ergodic_trim_indices(counts) trimmed_counts = MSMLib.trim_states( states_to_trim, counts, assignments=None) eq(trimmed.todense(), trimmed_counts.todense())
def test_FindPaths(): tprob = get("transition_path_theory_reference/tProb.mtx") sources = [0] sinks = [70] paths, bottlenecks, fluxes = FindPaths.run(tprob, sources, sinks, 10) # paths are hard to test due to type issues, adding later --TJL bottlenecks_ref = get(pjoin("transition_path_theory_reference", "dijkstra_bottlenecks.h5"))['Data'] fluxes_ref = get(pjoin("transition_path_theory_reference", "dijkstra_fluxes.h5"))['Data'] eq(bottlenecks, bottlenecks_ref) eq(fluxes, fluxes_ref)
def test(self): args, metric = Assign.parser.parse_args([ '-p', get('ProjectInfo.yaml', just_filename=True), '-g', get('Gens.lh5', just_filename=True), '-o', self.td, 'rmsd', '-a', get('AtomIndices.dat', just_filename=True)], print_banner=False) Assign.main(args, metric) eq(load(pjoin(self.td, 'Assignments.h5')), get('assign/Assignments.h5')) eq(load(pjoin(self.td, 'Assignments.h5.distances')), get('assign/Assignments.h5.distances'))
def compare_kyle_to_lutz(self, raw_counts): """Kyle wrote the most recent MLE code. We compare to the previous code that was written by Lutz. """ counts = MSMLib.ergodic_trim(raw_counts)[0] x_kyle = MSMLib.mle_reversible_count_matrix(counts) x_kyle /= x_kyle.sum() x_lutz = MSMLib.__mle_reversible_count_matrix_lutz__(counts) x_lutz /= x_lutz.sum() eq(x_kyle.toarray(), x_lutz.toarray())
def test_apply_mapping_to_assignments_2(): "preseve the -1s" l = 100 assignments = np.random.randint(l, size=(10, 10)) assignments[0, 0] = -1 mapping = np.ones(l) correct = np.ones((10, 10)) correct[0, 0] = -1 MSMLib.apply_mapping_to_assignments(assignments, mapping) eq(assignments, correct)
def test_renumber_states_1(): a = np.random.randint(3, size=(2, 10)) a[np.where(a == 0)] = 1 a[0, 0] = -1 # since its inplace new_a = a.copy() mapping = MSMLib.renumber_states(new_a) eq(int(new_a[0, 0]), -1) eq(np.where(a == 2)[0], np.where(new_a == 1)[0]) eq(np.where(a == 2)[1], np.where(new_a == 1)[1]) eq(mapping, np.array([1, 2])) eq(mapping[new_a][np.where(a != -1)], a[np.where(a != -1)])
def test(self): args, metric = Cluster.parser.parse_args([ '-p', get('points_on_cube/ProjectInfo.yaml', just_filename=True), '-o', self.td, 'rmsd', '-a', get('points_on_cube/AtomIndices.dat', just_filename=True), 'kcenters', '-k', '4'], print_banner=False) Cluster.main(args, metric) assignments = load(pjoin(self.td, 'Assignments.h5'))["arr_0"] assignment_counts = np.bincount(assignments.flatten()) eq(assignment_counts, np.array([2, 2, 2, 2])) distances = load(pjoin(self.td, 'Assignments.h5.distances'))["arr_0"] eq(distances, np.zeros((1,8)))
def test_estimate_rate_matrix_2(): np.random.seed(42) counts_dense = np.random.randint(100, size=(4, 4)) counts_sparse = scipy.sparse.csr_matrix(counts_dense) t_mat_dense = MSMLib.estimate_transition_matrix(counts_dense) t_mat_sparse = MSMLib.estimate_transition_matrix(counts_sparse) correct = np.array([[0.22368421, 0.40350877, 0.06140351, 0.31140351], [0.24193548, 0.08064516, 0.33064516, 0.34677419], [0.22155689, 0.22155689, 0.26047904, 0.29640719], [0.23469388, 0.02040816, 0.21428571, 0.53061224]]) eq(t_mat_dense, correct) eq(t_mat_dense, np.array(t_mat_sparse.todense()))
def test(self): try: import fastcluster except ImportError: raise nose.SkipTest("Cannot find fastcluster, so skipping hierarchical clustering test.") args, metric = Cluster.parser.parse_args([ '-p', get('ProjectInfo.yaml', just_filename=True), '-s', '10', '-o', self.td, 'rmsd', '-a', get('AtomIndices.dat', just_filename=True), 'hierarchical'], print_banner=False) Cluster.main(args, metric) eq(load(pjoin(self.td, 'ZMatrix.h5')), get('ZMatrix.h5'))
def test(self): args, metric = Assign.parser.parse_args([ '-p', get('ProjectInfo.yaml', just_filename=True), '-g', get('Gens.lh5', just_filename=True), '-o', self.td, 'rmsd', '-a', get('OldAtomIndices.dat', just_filename=True)], print_banner=False) if os.getenv('TRAVIS', None) == 'true': raise nose.SkipTest('Skipping test_Assign on TRAVIS') Assign.main(args, metric) eq(load(pjoin(self.td, 'Assignments.h5')), get('assign/Assignments.h5')) eq(load(pjoin(self.td, 'Assignments.h5.distances')), get('assign/Assignments.h5.distances'), decimal=5)
def test_1(self): C = MSMLib.get_count_matrix_from_assignments(self.assignments, 2) rc, t, p, m = MSMLib.build_msm(C, symmetrize="MLE", ergodic_trimming=True) eq(rc.todense(), np.matrix([[6.46159184, 4.61535527], [4.61535527, 2.30769762]]), decimal=4) eq(t.todense(), np.matrix([[0.58333689, 0.41666311], [0.66666474, 0.33333526]]), decimal=4) eq(p, np.array([0.61538595, 0.38461405]), decimal=5) eq(m, np.array([0, 1]))
def test(self): # extract xtcs to a temp dir xtc_fn = get('XTC.tgz', just_filename=True) fh = tarfile.open(xtc_fn, mode='r:gz') fh.extractall(self.td) fh.close() outfn = pjoin(self.td, 'ProjectInfo.yaml') # mode to that directory os.chdir(self.td) ConvertDataToHDF.run(projectfn=outfn, PDBfn=get('native.pdb', just_filename=True), InputDir=pjoin(self.td, 'XTC'), source='file', min_length=0, stride=1, rmsd_cutoff=np.inf) eq(load(outfn), get('ProjectInfo.yaml'))
def test(self): if os.getenv('TRAVIS', None) == 'true': raise nose.SkipTest('Skipping test_Assign on TRAVIS') try: import fastcluster except ImportError: raise nose.SkipTest("Cannot find fastcluster, so skipping hierarchical clustering test.") args, metric = Cluster.parser.parse_args([ '-p', get('ProjectInfo.yaml', just_filename=True), '-s', '10', '-o', self.td, 'rmsd', '-a', get('AtomIndices.dat', just_filename=True), 'hierarchical'], print_banner=False) Cluster.main(args, metric) eq(load(pjoin(self.td, 'ZMatrix.h5')), get('ZMatrix.h5'))
def test_trim_states(): # run the (just tested) ergodic trim counts = scipy.sparse.csr_matrix(np.matrix('2 1 0; 1 2 0; 0 0 1')) trimmed, mapping = MSMLib.ergodic_trim(counts) # now try the segmented method states_to_trim = MSMLib.ergodic_trim_indices(counts) trimmed_counts = MSMLib.trim_states( states_to_trim, counts, assignments=None) eq(trimmed.todense(), trimmed_counts.todense()) assignments = np.array([np.arange(counts.shape[0])]) states_to_trim = MSMLib.ergodic_trim_indices(counts) trimmed_counts, trimmed_assignments = MSMLib.trim_states(states_to_trim, counts, assignments=assignments) # Test that code works with assignments given trimmed_assignments_ref = np.array([[0, 1, -1]]) # State 2 is strong-disconnected so set to -1 eq(trimmed_assignments, trimmed_assignments_ref)
def test(self): from msmbuilder.scripts.SaveStructures import save project = get('ProjectInfo.yaml') assignments = get('Assignments.h5')['arr_0'] which_states = [0, 1, 2] list_of_trajs = project.get_random_confs_from_states(assignments, which_states, num_confs=2, replacement=True, random=np.random.RandomState(42)) assert isinstance(list_of_trajs, list) assert isinstance(list_of_trajs[0], Trajectory) eq(len(list_of_trajs), len(which_states)) for t in list_of_trajs: eq(len(t), 2) print list_of_trajs[0].keys() # sep, tps, one save(list_of_trajs, which_states, style='sep', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='tps', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='one', format='lh5', outdir=self.td) names = ['State0-0.lh5', 'State0-1.lh5', 'State0.lh5', 'State1-0.lh5', 'State1-1.lh5', 'State1.lh5', 'State2-0.lh5', 'State2-1.lh5', 'State2.lh5'] for name in names: t = Trajectory.load_trajectory_file(pjoin(self.td, name)) eq(t, get('save_structures/' + name))
def test(self): from msmbuilder.scripts.SaveStructures import save os.chdir(self.td) project = get('ProjectInfo.yaml') assignments = get('Assignments.h5')['arr_0'] which_states = [0, 1, 2] list_of_trajs = project.get_random_confs_from_states(assignments, which_states, num_confs=2, replacement=True, random=np.random.RandomState(42)) assert isinstance(list_of_trajs, list) assert isinstance(list_of_trajs[0], md.Trajectory) eq(len(list_of_trajs), len(which_states)) for t in list_of_trajs: eq(len(t), 2) # sep, tps, one save(list_of_trajs, which_states, style='sep', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='tps', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='one', format='lh5', outdir=self.td) names = ['State0-0.lh5', 'State0-1.lh5', 'State0.lh5', 'State1-0.lh5', 'State1-1.lh5', 'State1.lh5', 'State2-0.lh5', 'State2-1.lh5', 'State2.lh5'] for name in names: t = md.load(pjoin(self.td, name)) eq(t.xyz, get('save_structures/' + name).xyz) # Just checking coordinates because atom names / bonds in reference data are incompatible with MDTraj.
def test_1(self): C = MSMLib.get_count_matrix_from_assignments(self.assignments, 2) rc, t, p, m = MSMLib.build_msm(C, symmetrize='MLE', ergodic_trimming=True) eq(rc.todense(), np.matrix([[6.46159184, 4.61535527], [4.61535527, 2.30769762]]), decimal=4) eq(t.todense(), np.matrix([[0.58333689, 0.41666311], [0.66666474, 0.33333526]]), decimal=4) eq(p, np.array([0.61538595, 0.38461405]), decimal=5) eq(m, np.array([0, 1]))
def test_trim_states(): # run the (just tested) ergodic trim counts = scipy.sparse.csr_matrix(np.matrix('2 1 0; 1 2 0; 0 0 1')) trimmed, mapping = MSMLib.ergodic_trim(counts) # now try the segmented method states_to_trim = MSMLib.ergodic_trim_indices(counts) trimmed_counts = MSMLib.trim_states(states_to_trim, counts, assignments=None) eq(trimmed.todense(), trimmed_counts.todense()) assignments = np.array([np.arange(counts.shape[0])]) states_to_trim = MSMLib.ergodic_trim_indices(counts) trimmed_counts, trimmed_assignments = MSMLib.trim_states( states_to_trim, counts, assignments=assignments) # Test that code works with assignments given trimmed_assignments_ref = np.array( [[0, 1, -1]]) # State 2 is strong-disconnected so set to -1 eq(trimmed_assignments, trimmed_assignments_ref)
def test(self): args, metric = Cluster.parser.parse_args([ '-p', get('ProjectInfo.yaml', just_filename=True), '-a', pjoin(self.td, 'Assignments.h5'), '-d', pjoin(self.td, 'Assignments.h5.distances'), '-g', pjoin(self.td, 'Gens.lh5'), 'rmsd', '-a', get('AtomIndices.dat', just_filename=True), 'kcenters', '-k', '100'], print_banner=False) Cluster.main(args, metric) eq(load(pjoin(self.td, 'Assignments.h5')), get('Assignments.h5')) eq(load(pjoin(self.td, 'Assignments.h5.distances')), get('Assignments.h5.distances')) eq(load(pjoin(self.td, 'Gens.lh5')), get('Gens.lh5'))
def test_CalculateMFPTs(mfpt_state=70): mfpt = CalculateMFPTs.run(get('transition_path_theory_reference/tProb.mtx'), mfpt_state) mfpt0 = get(pjoin("transition_path_theory_reference", "mfpt.h5"))['Data'] eq(mfpt, mfpt0)
def test_CalculateImpliedTimescales(): impTimes = CalculateImpliedTimescales.run(MinLagtime=3, MaxLagtime=5, Interval=1, NumEigen=10, AssignmentsFn=get('Assignments.h5', just_filename=True), trimming=True, symmetrize='Transpose', nProc=1) eq(impTimes, get('ImpliedTimescales.dat'))
def test(self): project = get('ProjectInfo.yaml') asgn = AssignHierarchical.main(k=100, d=None, zmatrix_fn=get('ZMatrix.h5', just_filename=True), stride=10, project=project) eq(asgn, get('WardAssignments.h5')['arr_0'])
def test_CreateAtomIndices(): indices = CreateAtomIndices.run(get('native.pdb', just_filename=True), 'minimal') eq(indices, get('AtomIndices.dat'))
def compare_kyle_to_reference(self, raw_counts, reference): """Compare MLE estimated reversible counts to a reference matrix""" counts = MSMLib.ergodic_trim(raw_counts)[0] x_kyle = MSMLib.mle_reversible_count_matrix(counts) eq(x_kyle.toarray(), reference.toarray())
def test_ergodic_trim(): counts = scipy.sparse.csr_matrix(np.matrix('2 1 0; 1 2 0; 0 0 1')) trimmed, mapping = MSMLib.ergodic_trim(counts) eq(trimmed.todense(), np.matrix('2 1; 1 2')) eq(mapping, np.array([0, 1, -1]))
def test_CalculateClusterRadii(): cr = CalculateClusterRadii.main(get("Assignments.h5")['arr_0'], get("Assignments.h5.distances")['arr_0']) cr_r = get("ClusterRadii.dat") eq(cr, cr_r)