class TestCustomFeature(unittest.TestCase): def setUp(self): self.feat = MDFeaturizer(pdbfile) self.traj = mdtraj.load(xtcfile, top=pdbfile) self.pairs = [[0, 1], [0, 2], [1, 2]] #some distances self.means = [.5, .75, 1.0] #bogus means self.U = np.array([[0, 1], [1, 0], [ 1, 1 ]]) #bogus transformation, projects from 3 distances to 2 components def test_some_feature(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) Y_custom_feature = self.feat.transform(self.traj) # Directly call the function Y_function = some_call_to_mdtraj_some_operations_some_linalg( self.traj, self.pairs, self.means, self.U) assert np.allclose(Y_custom_feature, Y_function) def test_describe(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) self.feat.describe() def test_dimensionality(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) assert self.feat.dimension() == self.U.shape[1]
class TestCustomFeature(unittest.TestCase): def setUp(self): self.feat = MDFeaturizer(pdbfile) self.traj = mdtraj.load(xtcfile, top=pdbfile) self.pairs = [[0, 1], [0, 2], [1, 2]] #some distances self.means = [.5, .75, 1.0] #bogus means self.U = np.array([[0, 1], [1, 0], [ 1, 1 ]]) #bogus transformation, projects from 3 distances to 2 components def test_some_feature(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) Y_custom_feature = self.feat.transform(self.traj) # Directly call the function Y_function = some_call_to_mdtraj_some_operations_some_linalg( self.traj, self.pairs, self.means, self.U) assert np.allclose(Y_custom_feature, Y_function) def test_describe(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_describe_given(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U, description=['foo'] * self.U.shape[1]) desc = self.feat.describe() self.assertIn('foo', desc) self.assertEqual(len(desc), self.feat.dimension()) def test_describe_given_str(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U, description='test') desc = self.feat.describe() self.assertIn('test', desc) self.assertEqual(len(desc), self.feat.dimension()) def test_describe_given_wrong(self): """ either a list matching input dim, or 1 element iterable allowed""" with self.assertRaises(ValueError) as cm: self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1] + 1, self.pairs, self.means, self.U, description=['ff', 'ff']) def test_describe_1_element_expand(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1] + 1, self.pairs, self.means, self.U, description=['test']) desc = self.feat.describe() self.assertEqual(desc, ['test'] * 3) def test_dimensionality(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) assert self.feat.dimension() == self.U.shape[1]
class TestFeaturizer(unittest.TestCase): @classmethod def setUpClass(cls): import tempfile cls.asn_leu_pdbfile = tempfile.mkstemp(suffix=".pdb")[1] with open(cls.asn_leu_pdbfile, 'w') as fh: fh.write(asn_leu_pdb) cls.asn_leu_traj = tempfile.mktemp(suffix='.xtc') cls.bogus_geom_pdbfile = tempfile.mkstemp(suffix=".pdb")[1] with open(cls.bogus_geom_pdbfile, 'w') as fh: fh.write(bogus_geom_pdbfile) # create traj for asn_leu n_frames = 4001 traj = mdtraj.load(cls.asn_leu_pdbfile) ref = traj.xyz new_xyz = np.empty((n_frames, ref.shape[1], 3)) noise = np.random.random(new_xyz.shape) new_xyz[:, :, :] = noise + ref traj.xyz = new_xyz traj.time = np.arange(n_frames) traj.save(cls.asn_leu_traj) @classmethod def tearDownClass(cls): try: os.unlink(cls.asn_leu_pdbfile) except EnvironmentError: pass try: os.unlink(cls.bogus_geom_pdbfile) except EnvironmentError: pass def setUp(self): self.pdbfile = pdbfile self.traj = mdtraj.load(xtcfile, top=self.pdbfile) self.feat = MDFeaturizer(self.pdbfile) self.atol = 1e-5 self.ref_frame = 0 self.atom_indices = np.arange(0, self.traj.n_atoms / 2) def test_select_backbone(self): inds = self.feat.select_Backbone() def test_select_non_symmetry_heavy_atoms(self): try: inds = self.feat.select_Heavy(exclude_symmetry_related=True) except RuntimeError as e: if "recursion depth" in e.args: import sys raise Exception( "recursion limit reached. Interpreter limit: {}".format( sys.getrecursionlimit())) def test_select_all(self): self.feat.add_all() assert (self.feat.dimension() == self.traj.n_atoms * 3) refmap = np.reshape(self.traj.xyz, (len(self.traj), self.traj.n_atoms * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_select(self): sel = np.array([1, 2, 5, 20], dtype=int) self.feat.add_selection(sel) assert (self.feat.dimension() == sel.shape[0] * 3) refmap = np.reshape(self.traj.xyz[:, sel, :], (len(self.traj), sel.shape[0] * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_inverse_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_inverse_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(Dinv, self.feat.transform(self.traj))) def test_ca_distances(self): sel = self.feat.select_Ca() assert (np.all(sel == list(range(self.traj.n_atoms))) ) # should be all for this Ca-traj pairs = self.feat.pairs(sel, excluded_neighbors=0) self.feat.add_distances_ca( periodic=False, excluded_neighbors=0 ) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs.shape[0]) X = self.traj.xyz[:, pairs[:, 0], :] Y = self.traj.xyz[:, pairs[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_ca_distances_with_all_atom_geometries(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=0) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms)) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_ca_distances_with_all_atom_geometries_and_exclusions(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=2) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2) feat_just_ca.add_distances(ca_pairs) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_ca_distances_with_residues_not_containing_cas_no_exclusions(self): # Load test geom geom = mdtraj.load(self.pdbfile) # No exclusions feat_EN0 = MDFeaturizer(self.bogus_geom_pdbfile) feat_EN0.add_distances_ca(excluded_neighbors=0) ENO_pairs = [[1, 3], [1, 5], [1, 7], [3, 5], [3, 7], [5, 7]] # Check indices assert (np.allclose(ENO_pairs, feat_EN0.active_features[0].distance_indexes)) # Check distances D = mdtraj.compute_distances(geom, ENO_pairs) assert (np.allclose(D, feat_EN0.transform(geom))) # excluded_neighbors=1 ## will yield the same as before, because the first neighbor # doesn't conting CA's anyway feat_EN1 = MDFeaturizer(self.bogus_geom_pdbfile) feat_EN1.add_distances_ca(excluded_neighbors=1) EN1_pairs = [[1, 3], [1, 5], [1, 7], [3, 5], [3, 7], [5, 7]] assert (np.allclose(EN1_pairs, feat_EN1.active_features[0].distance_indexes)) D = mdtraj.compute_distances(geom, EN1_pairs) assert (np.allclose(D, feat_EN1.transform(geom))) def test_ca_distances_with_residues_not_containing_cas_with_exclusions( self): # Load test geom geom = mdtraj.load(self.pdbfile) # No exclusions feat_EN2 = MDFeaturizer(self.bogus_geom_pdbfile) feat_EN2.add_distances_ca(excluded_neighbors=2) EN2_pairs = [ [1, 5], [1, 7], [3, 7], ] # Check indices assert (np.allclose(EN2_pairs, feat_EN2.active_features[0].distance_indexes)) # Check distances D = mdtraj.compute_distances(geom, EN2_pairs) assert (np.allclose(D, feat_EN2.transform(geom))) # excluded_neighbors=1 ## will yield the same as before, because the first neighbor # doesn't conting CA's anyway feat_EN1 = MDFeaturizer(self.bogus_geom_pdbfile) feat_EN1.add_distances_ca(excluded_neighbors=1) EN1_pairs = [[1, 3], [1, 5], [1, 7], [3, 5], [3, 7], [5, 7]] assert (np.allclose(EN1_pairs, feat_EN1.active_features[0].distance_indexes)) D = mdtraj.compute_distances(geom, EN1_pairs) assert (np.allclose(D, feat_EN1.transform(geom))) def test_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 assert (np.allclose(C, self.feat.transform(self.traj))) def test_contacts_count_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False, count_contacts=True ) # unperiodic distances such that we can compare # The dimensionality of the feature is now one assert (self.feat.dimension() == 1) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 # Count the contacts C = C.sum(1, keepdims=True) assert (np.allclose(C, self.feat.transform(self.traj))) def test_angles(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_angles_deg(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) def test_angles_cossin(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) self.assertEqual(Y.shape, (self.traj.n_frames, 2 * sel.shape[0])) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_dihedrals(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrals_deg(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrials_cossin(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) # test ordering of indices backbone_feature = self.feat.active_features[0] angle_indices = backbone_feature.angle_indexes np.testing.assert_equal(angle_indices[0], backbone_feature._phi_inds[0]) np.testing.assert_equal(angle_indices[1], backbone_feature._psi_inds[0]) np.testing.assert_equal(angle_indices[2], backbone_feature._phi_inds[1]) np.testing.assert_equal(angle_indices[3], backbone_feature._psi_inds[1]) def test_backbone_dihedrals_deg(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(deg=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile) Y = self.feat.transform(traj) self.assertEqual(Y.shape, (len(traj), 3 * 4)) # (3 phi + 3 psi)*2 [cos, sin] assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension(), msg=desc) self.assertIn("COS", desc[0]) self.assertIn("SIN", desc[1]) def test_backbone_dihedrials_chi(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrials_chi_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension()) def test_custom_feature(self): # TODO: test me pass def test_MinRmsd(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame]) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame]) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices_precentered(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices, precentered=True) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_Residue_Mindist_Ca_all(self): n_ca = self.feat.topology.n_atoms self.feat.add_residue_mindist(scheme='ca') D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_all_threshold(self): threshold = .7 self.feat.add_residue_mindist(scheme='ca', threshold=threshold) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_array(self): contacts = np.array([[ 20, 10, ], [10, 0]]) self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca', contacts=contacts)[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_array_periodic(self): traj = mdtraj.load(pdbfile) # Atoms most far appart in Z atom_minz = traj.xyz.argmin(1).squeeze()[-1] atom_maxz = traj.xyz.argmax(1).squeeze()[-1] # Residues with the atoms most far appart in Z res_minz = traj.topology.atom(atom_minz).residue.index res_maxz = traj.topology.atom(atom_maxz).residue.index contacts = np.array([[res_minz, res_maxz]]) # Tweak the trajectory so that a (bogus) PBC exists (otherwise traj._have_unitcell is False) traj.unitcell_angles = [90, 90, 90] traj.unitcell_lengths = [1, 1, 1] self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts, periodic=False) D = self.feat.transform(traj) Dperiodic_true = mdtraj.compute_contacts(traj, scheme='ca', contacts=contacts, periodic=True)[0] Dperiodic_false = mdtraj.compute_contacts(traj, scheme='ca', contacts=contacts, periodic=False)[0] # This asserts that the periodic option is having an effect at all assert not np.allclose( Dperiodic_false, Dperiodic_true, ) # This asserts that the periodic option is being handled correctly by pyemma assert np.allclose(D, Dperiodic_false) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_One_Group(self): group0 = [0, 20, 30, 0] self.feat.add_group_mindist( group_definitions=[group0]) # Even with duplicates D = self.feat.transform(self.traj) dist_list = list(combinations(np.unique(group0), 2)) Dref = mdtraj.compute_distances(self.traj, dist_list) assert np.allclose(D.squeeze(), Dref.min(1)) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2]) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups_threshold(self): threshold = .7 group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2], threshold=threshold) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_Some_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] group_pairs = np.array([[0, 1], [2, 2], [0, 2]]) self.feat.add_group_mindist(group_definitions=[group0, group1, group2], group_pairs=group_pairs) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_22 = np.array(list(combinations(np.unique(group2), 2))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_22 = mdtraj.compute_distances(self.traj, dist_list_22).min(1) Dref = np.vstack((Dref_01, Dref_22, Dref_02)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension()
class FeatureReader(DataSource): """ Reads features from MD data. To select a feature, access the :attr:`featurizer` and call a feature selecting method (e.g) distances. Parameters ---------- trajectories: list of strings paths to trajectory files topologyfile: string path to topology file (e.g. pdb) chunksize: int how many frames to process in one batch. featurizer: MDFeaturizer a preconstructed featurizer Examples -------- >>> from pyemma.datasets import get_bpti_test_data >>> from pyemma.util.contexts import settings Iterator access: >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top']) Optionally set a chunksize >>> reader.chunksize = 300 Store chunks by their trajectory index >>> chunks = {i : [] for i in range(reader.number_of_trajectories())} >>> for itraj, X in reader: ... chunks[itraj].append(X) Calculate some distances of protein during feature reading: >>> reader.featurizer.add_distances([[0, 3], [10, 15]]) >>> with settings(show_progress_bars=False): ... X = reader.get_output() """ SUPPORTED_RANDOM_ACCESS_FORMATS = (".h5", ".dcd", ".binpos", ".nc", ".xtc", ".trr") def __init__(self, trajectories, topologyfile=None, chunksize=1000, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" super(FeatureReader, self).__init__(chunksize=chunksize) self._is_reader = True self.topfile = topologyfile self.filenames = trajectories self._return_traj_obj = False self._is_random_accessible = all( (f.endswith(FeatureReader.SUPPORTED_RANDOM_ACCESS_FORMATS) for f in self.filenames)) # check we have at least mdtraj-1.6.1 to efficiently seek xtc, trr formats if any(f.endswith('.xtc') or f.endswith('.trr') for f in trajectories): from distutils.version import LooseVersion xtc_trr_random_accessible = True if LooseVersion( mdtraj.version.version) >= LooseVersion('1.6.1') else False self._is_random_accessible &= xtc_trr_random_accessible self._ra_cuboid = FeatureReaderCuboidRandomAccessStrategy(self, 3) self._ra_jagged = FeatureReaderJaggedRandomAccessStrategy(self, 3) self._ra_linear_strategy = FeatureReaderLinearRandomAccessStrategy( self, 2) self._ra_linear_itraj_strategy = FeatureReaderLinearItrajRandomAccessStrategy( self, 3) # featurizer if topologyfile and featurizer: self._logger.warning( "Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # Check that the topology and the files in the filelist can actually work together self._assert_toptraj_consistency() @property @deprecated('Please use "filenames" property.') def trajfiles(self): return self.filenames def _get_traj_info(self, filename): with mdtraj.open(filename, mode='r') as fh: length = len(fh) frame = fh.read(1)[0] ndim = np.shape(frame)[1] offsets = fh.offsets if hasattr(fh, 'offsets') else [] return TrajInfo(ndim, length, offsets) def _create_iterator(self, skip=0, chunk=0, stride=1, return_trajindex=True, cols=None): return FeatureReaderIterator(self, skip=skip, chunk=chunk, stride=stride, return_trajindex=return_trajindex, cols=cols) def describe(self): """ Returns a description of this transformer :return: """ return ["Feature reader with following features" ] + self.featurizer.describe() def dimension(self): """ Returns the number of output dimensions :return: """ if len(self.featurizer.active_features) == 0: # special case: Cartesian coordinates return self.featurizer.topology.n_atoms * 3 else: # general case return self.featurizer.dimension() def _assert_toptraj_consistency(self): r""" Check if the topology and the filenames of the reader have the same n_atoms""" top = self.featurizer.topology traj = mdtraj.load_frame(self.filenames[0], index=0, top=top) desired_n_atoms = top.n_atoms assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \ " and the first trajectory file, %u vs %u" % \ (desired_n_atoms, traj.xyz.shape[1])
class FeatureReader(DataSource): """ Reads features from MD data. To select a feature, access the :attr:`featurizer` and call a feature selecting method (e.g) distances. Parameters ---------- trajectories: list of strings paths to trajectory files topologyfile: string path to topology file (e.g. pdb) Examples -------- >>> from pyemma.datasets import get_bpti_test_data Iterator access: >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top']) Optionally set a chunksize >>> reader.chunksize = 300 Store chunks by their trajectory index >>> chunks = {i : [] for i in range(reader.number_of_trajectories())} >>> for itraj, X in reader: ... chunks[itraj].append(X) Calculate some distances of protein during feature reading: >>> reader.featurizer.add_distances([[0, 3], [10, 15]]) >>> X = reader.get_output() """ SUPPORTED_RANDOM_ACCESS_FORMATS = (".h5", ".dcd", ".binpos", ".nc", ".xtc", ".trr") def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" super(FeatureReader, self).__init__(chunksize=chunksize) self._is_reader = True self.topfile = topologyfile self.filenames = trajectories self._is_random_accessible = all( (f.endswith(FeatureReader.SUPPORTED_RANDOM_ACCESS_FORMATS) for f in self.filenames)) # check we have at least mdtraj-1.6.1 to efficiently seek xtc, trr formats if any(f.endswith('.xtc') or f.endswith('.trr') for f in trajectories): from distutils.version import LooseVersion xtc_trr_random_accessible = True if LooseVersion( mdtraj.version.version) >= LooseVersion('1.6.1') else False self._is_random_accessible &= xtc_trr_random_accessible self._ra_cuboid = FeatureReaderCuboidRandomAccessStrategy(self, 3) self._ra_jagged = FeatureReaderJaggedRandomAccessStrategy(self, 3) self._ra_linear_strategy = FeatureReaderLinearRandomAccessStrategy( self, 2) self._ra_linear_itraj_strategy = FeatureReaderLinearItrajRandomAccessStrategy( self, 3) # featurizer if topologyfile and featurizer: self._logger.warning( "Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # Check that the topology and the files in the filelist can actually work together self._assert_toptraj_consistency() @property @deprecated('Please use "filenames" property.') def trajfiles(self): return self.filenames def _get_traj_info(self, filename): # workaround NotImplementedError __len__ for xyz files # Github issue: markovmodel/pyemma#621 if six.PY2: from mock import patch else: from unittest.mock import patch from mdtraj.formats import XYZTrajectoryFile def _make_len_func(top): def _len_xyz(self): assert isinstance(self, XYZTrajectoryFile) assert hasattr( self, '_filename'), "structual change in xyzfile class!" import warnings from pyemma.util.exceptions import EfficiencyWarning warnings.warn( "reading all of your data," " just to determine number of frames." + " Happens only once, because this is cached." if config['use_trajectory_lengths_cache'] else "", EfficiencyWarning) # obtain len by reading whole file! mditer = mdtraj.iterload(self._filename, top=top) return sum(t.n_frames for t in mditer) return _len_xyz f = _make_len_func(self.topfile) # lookups pre-computed lengths, or compute it on the fly and store it in db. with patch.object(XYZTrajectoryFile, '__len__', f): with mdtraj.open(filename, mode='r') as fh: length = len(fh) frame = fh.read(1)[0] ndim = np.shape(frame)[1] offsets = fh.offsets if hasattr(fh, 'offsets') else [] return TrajInfo(ndim, length, offsets) def _create_iterator(self, skip=0, chunk=0, stride=1, return_trajindex=True, cols=None): return FeatureReaderIterator(self, skip=skip, chunk=chunk, stride=stride, return_trajindex=return_trajindex, cols=cols) def describe(self): """ Returns a description of this transformer :return: """ return ["Feature reader with following features" ] + self.featurizer.describe() def dimension(self): """ Returns the number of output dimensions :return: """ if len(self.featurizer.active_features) == 0: # special case: Cartesian coordinates return self.featurizer.topology.n_atoms * 3 else: # general case return self.featurizer.dimension() def _assert_toptraj_consistency(self): r""" Check if the topology and the filenames of the reader have the same n_atoms""" traj = mdtraj.load_frame(self.filenames[0], index=0, top=self.topfile) desired_n_atoms = self.featurizer.topology.n_atoms assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \ " and the first trajectory file, %u vs %u" % \ (desired_n_atoms, traj.xyz.shape[1])
class TestFeaturizer(unittest.TestCase): @classmethod def setUpClass(cls): import tempfile cls.asn_leu_pdbfile = tempfile.mkstemp(suffix=".pdb")[1] with open(cls.asn_leu_pdbfile, 'w') as fh: fh.write(asn_leu_pdb) cls.asn_leu_traj = tempfile.mktemp(suffix='.xtc') # create traj for asn_leu n_frames = 4001 traj = mdtraj.load(cls.asn_leu_pdbfile) ref = traj.xyz new_xyz = np.empty((n_frames, ref.shape[1], 3)) noise = np.random.random(new_xyz.shape) new_xyz[:, :, :] = noise + ref traj.xyz = new_xyz traj.time = np.arange(n_frames) traj.save(cls.asn_leu_traj) super(TestFeaturizer, cls).setUpClass() @classmethod def tearDownClass(cls): try: os.unlink(cls.asn_leu_pdbfile) except EnvironmentError: pass super(TestFeaturizer, cls).tearDownClass() def setUp(self): self.pdbfile = pdbfile self.traj = mdtraj.load(xtcfile, top=self.pdbfile) self.feat = MDFeaturizer(self.pdbfile) self.atol = 1e-5 self.ref_frame = 0 self.atom_indices = np.arange(0, self.traj.n_atoms / 2) def test_select_backbone(self): inds = self.feat.select_Backbone() def test_select_all(self): self.feat.add_all() assert (self.feat.dimension() == self.traj.n_atoms * 3) refmap = np.reshape(self.traj.xyz, (len(self.traj), self.traj.n_atoms * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_select(self): sel = np.array([1, 2, 5, 20], dtype=int) self.feat.add_selection(sel) assert (self.feat.dimension() == sel.shape[0] * 3) refmap = np.reshape(self.traj.xyz[:, sel, :], (len(self.traj), sel.shape[0] * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_inverse_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_inverse_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(Dinv, self.feat.transform(self.traj))) def test_ca_distances(self): sel = self.feat.select_Ca() assert (np.all(sel == list(range(self.traj.n_atoms))) ) # should be all for this Ca-traj pairs = self.feat.pairs(sel, excluded_neighbors=0) self.feat.add_distances_ca( periodic=False, excluded_neighbors=0 ) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs.shape[0]) X = self.traj.xyz[:, pairs[:, 0], :] Y = self.traj.xyz[:, pairs[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_ca_distances_with_all_atom_geometries(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=0) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms)) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_ca_distances_with_all_atom_geometries_and_exclusions(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=2) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2) feat_just_ca.add_distances(ca_pairs) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 assert (np.allclose(C, self.feat.transform(self.traj))) def test_contacts_count_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False, count_contacts=True ) # unperiodic distances such that we can compare # The dimensionality of the feature is now one assert (self.feat.dimension() == 1) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 # Count the contacts C = C.sum(1, keepdims=True) assert (np.allclose(C, self.feat.transform(self.traj))) def test_angles(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_angles_deg(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) def test_angles_cossin(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_dihedrals(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrals_deg(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrials_cossin(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals_deg(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(deg=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile) Y = self.feat.transform(traj) self.assertEqual(Y.shape, (len(traj), 3 * 4)) # (3 phi + 3 psi)*2 [cos, sin] assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrials_chi(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrials_chi_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension()) def test_custom_feature(self): # TODO: test me pass def test_MinRmsd(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame]) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame]) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices_precentered(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices, precentered=True) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_Residue_Mindist_Ca_all(self): n_ca = self.feat.topology.n_atoms self.feat.add_residue_mindist(scheme='ca') D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_all_threshold(self): threshold = .7 self.feat.add_residue_mindist(scheme='ca', threshold=threshold) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_array(self): contacts = np.array([[ 20, 10, ], [10, 0]]) self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca', contacts=contacts)[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_One_Group(self): group0 = [0, 20, 30, 0] self.feat.add_group_mindist( group_definitions=[group0]) # Even with duplicates D = self.feat.transform(self.traj) dist_list = list(combinations(np.unique(group0), 2)) Dref = mdtraj.compute_distances(self.traj, dist_list) assert np.allclose(D.squeeze(), Dref.min(1)) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2]) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups_threshold(self): threshold = .7 group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2], threshold=threshold) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_Some_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] group_pairs = np.array([[0, 1], [2, 2], [0, 2]]) self.feat.add_group_mindist(group_definitions=[group0, group1, group2], group_pairs=group_pairs) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_22 = np.array(list(combinations(np.unique(group2), 2))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_22 = mdtraj.compute_distances(self.traj, dist_list_22).min(1) Dref = np.vstack((Dref_01, Dref_22, Dref_02)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension()
class FeatureReader(DataSource, SerializableMixIn): """ Reads features from MD data. To select a feature, access the :attr:`featurizer` and call a feature selecting method (e.g) distances. Parameters ---------- trajectories: list of strings paths to trajectory files topologyfile: string path to topology file (e.g. pdb) chunksize: int how many frames to process in one batch. featurizer: MDFeaturizer a preconstructed featurizer Examples -------- >>> from pyemma.datasets import get_bpti_test_data >>> from pyemma.util.contexts import settings Iterator access: >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top']) Optionally set a chunksize >>> reader.chunksize = 300 Store chunks by their trajectory index >>> chunks = {i : [] for i in range(reader.number_of_trajectories())} >>> for itraj, X in reader: ... chunks[itraj].append(X) Calculate some distances of protein during feature reading: >>> reader.featurizer.add_distances([[0, 3], [10, 15]]) >>> with settings(show_progress_bars=False): ... X = reader.get_output() """ SUPPORTED_RANDOM_ACCESS_FORMATS = (".h5", ".dcd", ".binpos", ".nc", ".xtc", ".trr") __serialize_version = 0 def __init__(self, trajectories, topologyfile=None, chunksize=1000, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" super(FeatureReader, self).__init__(chunksize=chunksize) self._is_reader = True self.topfile = topologyfile if not isinstance(trajectories, (list, tuple)): trajectories = [trajectories] self.filenames = copy([str(traj) for traj in trajectories ]) # this is modified in-place in mdtraj.load self._return_traj_obj = False self._is_random_accessible = all( file_suffix(f) in FeatureReader.SUPPORTED_RANDOM_ACCESS_FORMATS for f in self.filenames) # check we have at least mdtraj-1.6.1 to efficiently seek xtc, trr formats if any( file_suffix(f) == '.xtc' or file_suffix(f) == '.trr' for f in trajectories): from distutils.version import LooseVersion xtc_trr_random_accessible = True if LooseVersion( mdtraj.version.version) >= LooseVersion('1.6.1') else False self._is_random_accessible &= xtc_trr_random_accessible self._ra_cuboid = FeatureReaderCuboidRandomAccessStrategy(self, 3) self._ra_jagged = FeatureReaderJaggedRandomAccessStrategy(self, 3) self._ra_linear_strategy = FeatureReaderLinearRandomAccessStrategy( self, 2) self._ra_linear_itraj_strategy = FeatureReaderLinearItrajRandomAccessStrategy( self, 3) # featurizer if topologyfile and featurizer: self.logger.warning( "Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # Check that the topology and the files in the filelist can actually work together self._assert_toptraj_consistency() @property @deprecated('Please use "filenames" property.') def trajfiles(self): return self.filenames def _get_traj_info(self, filename): filename = str(filename) if isinstance(filename, Path) else filename with mdtraj.open(filename, mode='r') as fh: try: length = len(fh) # certain formats like txt based ones (.gro, .lammpstrj) do not implement len() except (NotImplementedError, TypeError): frame = fh.read(1)[0] ndim = np.shape(frame)[1] _ = fh.read() length = fh.tell() else: frame = fh.read(1)[0] ndim = np.shape(frame)[1] offsets = fh.offsets if hasattr(fh, 'offsets') else () return TrajInfo(ndim, length, offsets) def _create_iterator(self, skip=0, chunk=0, stride=1, return_trajindex=True, cols=None): def transform(data): # trigger to pass mdtraj.Trajectory objects to self.featurizer or not. if self._return_traj_obj: return data else: return self.featurizer.transform(data) it = FeatureReaderIterator(self, skip=skip, chunk=chunk, stride=stride, return_trajindex=return_trajindex, cols=cols, transform_function=transform) return it def describe(self): """ Returns a description of this transformer :return: """ return ["Feature reader with following features" ] + self.featurizer.describe() def dimension(self): """ Returns the number of output dimensions :return: """ if len(self.featurizer.active_features) == 0: # special case: Cartesian coordinates return self.featurizer.topology.n_atoms * 3 else: # general case return self.featurizer.dimension() @staticmethod def supports_format(file_name): """ Static method that checks whether the extension of the input file name indicates a file type that can potentially be read with a FeatureReader. :param file_name: the file name or path :return: True if the extension indicates a file type that could be read, otherwise False """ import os from mdtraj.formats.registry import FormatRegistry if isinstance(file_name, str): # ensure there is something to split file_name = "/dummy" + file_name suffix = os.path.splitext(file_name)[1] if suffix in ('.pdb', '.pdb.gz'): return False return suffix in FormatRegistry.loaders.keys() return False def _assert_toptraj_consistency(self): r""" Check if the topology and the filenames of the reader have the same n_atoms""" top = self.featurizer.topology traj = mdtraj.load_frame(self.filenames[0], index=0, top=top) desired_n_atoms = top.n_atoms assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \ " and the first trajectory file, %u vs %u" % \ (desired_n_atoms, traj.xyz.shape[1]) def __reduce__(self): # serialize only the constructor arguments. return FeatureReader, (self.filenames, None, self.chunksize, self.featurizer)