def testAddFeaturesWithDuplicates(self): """this tests adds multiple features twice (eg. same indices) and checks whether they are rejected or not""" featurizer = MDFeaturizer(pdbfile) featurizer.add_angles([[0, 1, 2], [0, 3, 4]]) featurizer.add_angles([[0, 1, 2], [0, 3, 4]]) self.assertEqual(len(featurizer.active_features), 1) featurizer.add_backbone_torsions() self.assertEqual(len(featurizer.active_features), 2) featurizer.add_backbone_torsions() self.assertEqual(len(featurizer.active_features), 2) featurizer.add_contacts([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 3) featurizer.add_contacts([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 3) # try to fool it with ca selection ca = featurizer.select_Ca() ca = featurizer.pairs(ca) featurizer.add_distances(ca) self.assertEqual(len(featurizer.active_features), 4) featurizer.add_distances_ca() self.assertEqual(len(featurizer.active_features), 4) featurizer.add_inverse_distances([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 5) featurizer.add_distances([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 6) featurizer.add_distances([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 6) def my_func(x): return x - 1 def foo(x): return x - 1 my_feature = CustomFeature(my_func) my_feature.dimension = 3 featurizer.add_custom_feature(my_feature) self.assertEqual(len(featurizer.active_features), 7) featurizer.add_custom_feature(my_feature) self.assertEqual(len(featurizer.active_features), 7) # since myfunc and foo are different functions, it should be added foo_feat = CustomFeature(foo, dim=3) featurizer.add_custom_feature(foo_feat) self.assertEqual(len(featurizer.active_features), 8)
def test_labels(self): """ just checks for exceptions """ featurizer = MDFeaturizer(pdbfile) featurizer.add_angles([[1, 2, 3], [4, 5, 6]]) featurizer.add_backbone_torsions() featurizer.add_contacts([[0, 1], [0, 3]]) featurizer.add_distances([[0, 1], [0, 3]]) featurizer.add_inverse_distances([[0, 1], [0, 3]]) cs = CustomFeature(lambda x: x - 1) cs.dimension = lambda: 3 featurizer.add_custom_feature(cs) featurizer.describe()
def test_labels(self): """ just checks for exceptions """ featurizer = MDFeaturizer(pdbfile) featurizer.add_angles([[1, 2, 3], [4, 5, 6]]) featurizer.add_backbone_torsions() featurizer.add_contacts([[0, 1], [0, 3]]) featurizer.add_distances([[0, 1], [0, 3]]) featurizer.add_inverse_distances([[0, 1], [0, 3]]) cs = CustomFeature(lambda x: x - 1, dim=3) featurizer.add_custom_feature(cs) featurizer.add_minrmsd_to_ref(pdbfile) featurizer.add_residue_mindist() featurizer.add_group_mindist([[0, 1], [0, 2]]) featurizer.describe()
class TestFeaturizer(unittest.TestCase): @classmethod def setUpClass(cls): import tempfile cls.asn_leu_pdbfile = tempfile.mkstemp(suffix=".pdb")[1] with open(cls.asn_leu_pdbfile, 'w') as fh: fh.write(asn_leu_pdb) cls.asn_leu_traj = tempfile.mktemp(suffix='.xtc') # create traj for asn_leu n_frames = 4001 traj = mdtraj.load(cls.asn_leu_pdbfile) ref = traj.xyz new_xyz = np.empty((n_frames, ref.shape[1], 3)) noise = np.random.random(new_xyz.shape) new_xyz[:, :, :] = noise + ref traj.xyz = new_xyz traj.time = np.arange(n_frames) traj.save(cls.asn_leu_traj) super(TestFeaturizer, cls).setUpClass() @classmethod def tearDownClass(cls): try: os.unlink(cls.asn_leu_pdbfile) except EnvironmentError: pass super(TestFeaturizer, cls).tearDownClass() def setUp(self): self.pdbfile = pdbfile self.traj = mdtraj.load(xtcfile, top=self.pdbfile) self.feat = MDFeaturizer(self.pdbfile) self.atol = 1e-5 self.ref_frame = 0 self.atom_indices = np.arange(0, self.traj.n_atoms / 2) def test_select_backbone(self): inds = self.feat.select_Backbone() def test_select_all(self): self.feat.add_all() assert (self.feat.dimension() == self.traj.n_atoms * 3) refmap = np.reshape(self.traj.xyz, (len(self.traj), self.traj.n_atoms * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_select(self): sel = np.array([1, 2, 5, 20], dtype=int) self.feat.add_selection(sel) assert (self.feat.dimension() == sel.shape[0] * 3) refmap = np.reshape(self.traj.xyz[:, sel, :], (len(self.traj), sel.shape[0] * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_inverse_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_inverse_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(Dinv, self.feat.transform(self.traj))) def test_ca_distances(self): sel = self.feat.select_Ca() assert (np.all(sel == list(range(self.traj.n_atoms))) ) # should be all for this Ca-traj pairs = self.feat.pairs(sel, excluded_neighbors=0) self.feat.add_distances_ca( periodic=False, excluded_neighbors=0 ) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs.shape[0]) X = self.traj.xyz[:, pairs[:, 0], :] Y = self.traj.xyz[:, pairs[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_ca_distances_with_all_atom_geometries(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=0) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms)) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_ca_distances_with_all_atom_geometries_and_exclusions(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=2) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2) feat_just_ca.add_distances(ca_pairs) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 assert (np.allclose(C, self.feat.transform(self.traj))) def test_contacts_count_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False, count_contacts=True ) # unperiodic distances such that we can compare # The dimensionality of the feature is now one assert (self.feat.dimension() == 1) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 # Count the contacts C = C.sum(1, keepdims=True) assert (np.allclose(C, self.feat.transform(self.traj))) def test_angles(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_angles_deg(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) def test_angles_cossin(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_dihedrals(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrals_deg(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrials_cossin(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals_deg(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(deg=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile) Y = self.feat.transform(traj) self.assertEqual(Y.shape, (len(traj), 3 * 4)) # (3 phi + 3 psi)*2 [cos, sin] assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrials_chi(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrials_chi_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension()) def test_custom_feature(self): # TODO: test me pass def test_MinRmsd(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame]) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame]) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices_precentered(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices, precentered=True) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_Residue_Mindist_Ca_all(self): n_ca = self.feat.topology.n_atoms self.feat.add_residue_mindist(scheme='ca') D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_all_threshold(self): threshold = .7 self.feat.add_residue_mindist(scheme='ca', threshold=threshold) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_array(self): contacts = np.array([[ 20, 10, ], [10, 0]]) self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca', contacts=contacts)[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_One_Group(self): group0 = [0, 20, 30, 0] self.feat.add_group_mindist( group_definitions=[group0]) # Even with duplicates D = self.feat.transform(self.traj) dist_list = list(combinations(np.unique(group0), 2)) Dref = mdtraj.compute_distances(self.traj, dist_list) assert np.allclose(D.squeeze(), Dref.min(1)) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2]) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups_threshold(self): threshold = .7 group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2], threshold=threshold) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_Some_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] group_pairs = np.array([[0, 1], [2, 2], [0, 2]]) self.feat.add_group_mindist(group_definitions=[group0, group1, group2], group_pairs=group_pairs) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_22 = np.array(list(combinations(np.unique(group2), 2))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_22 = mdtraj.compute_distances(self.traj, dist_list_22).min(1) Dref = np.vstack((Dref_01, Dref_22, Dref_02)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension()