def test_featurize_splif(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz, ligand_xyz=lig_xyz) bins = [(1, 2), (2, 3)] dicts = rgf.featurize_splif(prot_xyz, prot_rdk, lig_xyz, lig_rdk, contact_bins=bins, pairwise_distances=distance, ecfp_degree=2) expected_dicts = [ rgf.compute_splif_features_in_range(prot_rdk, lig_rdk, distance, c_bin, ecfp_degree=2) for c_bin in bins ] self.assertIsInstance(dicts, list) self.assertEqual(dicts, expected_dicts)
def test_compute_splif_features_in_range(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) prot_num_atoms = prot_rdk.GetNumAtoms() lig_num_atoms = lig_rdk.GetNumAtoms() distance = rgf.compute_pairwise_distances( protein_xyz=prot_xyz, ligand_xyz=lig_xyz) for bins in ((0, 2), (2, 3)): splif_dict = rgf.compute_splif_features_in_range( prot_rdk, lig_rdk, distance, bins, ) self.assertIsInstance(splif_dict, dict) for (prot_idx, lig_idx), ecfp_pair in splif_dict.items(): for idx in (prot_idx, lig_idx): self.assertIsInstance(idx, (int, np.int64)) self.assertGreaterEqual(prot_idx, 0) self.assertLess(prot_idx, prot_num_atoms) self.assertGreaterEqual(lig_idx, 0) self.assertLess(lig_idx, lig_num_atoms) for ecfp in ecfp_pair: ecfp_idx, ecfp_frag = ecfp.split(',') ecfp_idx = int(ecfp_idx) self.assertGreaterEqual(ecfp_idx, 0)
def test_compute_splif_features_in_range(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) prot_num_atoms = prot_rdk.GetNumAtoms() lig_num_atoms = lig_rdk.GetNumAtoms() distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz, ligand_xyz=lig_xyz) for bins in ((0, 2), (2, 3)): splif_dict = rgf.compute_splif_features_in_range( prot_rdk, lig_rdk, distance, bins, ) self.assertIsInstance(splif_dict, dict) for (prot_idx, lig_idx), ecfp_pair in splif_dict.items(): for idx in (prot_idx, lig_idx): self.assertIsInstance(idx, (int, np.int64)) self.assertGreaterEqual(prot_idx, 0) self.assertLess(prot_idx, prot_num_atoms) self.assertGreaterEqual(lig_idx, 0) self.assertLess(lig_idx, lig_num_atoms) for ecfp in ecfp_pair: ecfp_idx, ecfp_frag = ecfp.split(',') ecfp_idx = int(ecfp_idx) self.assertGreaterEqual(ecfp_idx, 0)
def test_compute_pairwise_distances(self): n1 = 10 n2 = 50 coords1 = np.random.rand(n1, 3) coords2 = np.random.rand(n2, 3) distance = rgf.compute_pairwise_distances(coords1, coords2) self.assertEqual(distance.shape, (n1, n2)) self.assertTrue((distance >= 0).all()) # random coords between 0 and 1, so the max possible distance in sqrt(2) self.assertTrue((distance <= 2.0**0.5).all()) # check if correct distance metric was used coords1 = np.array([[0, 0, 0], [1, 0, 0]]) coords2 = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) distance = rgf.compute_pairwise_distances(coords1, coords2) self.assertTrue((distance == [[1, 2, 3], [0, 1, 2]]).all())
def test_compute_pairwise_distances(self): n1 = 10 n2 = 50 coords1 = np.random.rand(n1, 3) coords2 = np.random.rand(n2, 3) distance = rgf.compute_pairwise_distances(coords1, coords2) self.assertEqual(distance.shape, (n1, n2)) self.assertTrue((distance >= 0).all()) # random coords between 0 and 1, so the max possible distance in sqrt(2) self.assertTrue((distance <= 2.0**0.5).all())
def test_featurize_binding_pocket_ecfp(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) distance = rgf.compute_pairwise_distances( protein_xyz=prot_xyz, ligand_xyz=lig_xyz) # check if results are the same if we provide precomputed distances prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ) prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance) # ...but first check if we actually got two dicts self.assertIsInstance(prot_dict, dict) self.assertIsInstance(lig_dict, dict) self.assertEqual(prot_dict, prot_dict_dist) self.assertEqual(lig_dict, lig_dict_dist) # check if we get less features with smaller distance cutoff prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=2.0, ) prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=6.0, ) self.assertLess(len(prot_dict_d2), len(prot_dict)) # ligands are typically small so all atoms might be present self.assertLessEqual(len(lig_dict_d2), len(lig_dict)) self.assertGreater(len(prot_dict_d6), len(prot_dict)) self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict)) # check if using different ecfp_degree changes anything prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ecfp_degree=3, ) self.assertNotEqual(prot_dict_e3, prot_dict) self.assertNotEqual(lig_dict_e3, lig_dict)
def test_featurize_binding_pocket_ecfp(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz, ligand_xyz=lig_xyz) # check if results are the same if we provide precomputed distances prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ) prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance) # ...but first check if we actually got two dicts self.assertIsInstance(prot_dict, dict) self.assertIsInstance(lig_dict, dict) self.assertEqual(prot_dict, prot_dict_dist) self.assertEqual(lig_dict, lig_dict_dist) # check if we get less features with smaller distance cutoff prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=2.0, ) prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=6.0, ) self.assertLess(len(prot_dict_d2), len(prot_dict)) # ligands are typically small so all atoms might be present self.assertLessEqual(len(lig_dict_d2), len(lig_dict)) self.assertGreater(len(prot_dict_d6), len(prot_dict)) self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict)) # check if using different ecfp_degree changes anything prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ecfp_degree=3, ) self.assertNotEqual(prot_dict_e3, prot_dict) self.assertNotEqual(lig_dict_e3, lig_dict)
def test_featurize_splif(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) distance = rgf.compute_pairwise_distances( protein_xyz=prot_xyz, ligand_xyz=lig_xyz) bins = [(1, 2), (2, 3)] dicts = rgf.featurize_splif( prot_xyz, prot_rdk, lig_xyz, lig_rdk, contact_bins=bins, pairwise_distances=distance, ecfp_degree=2) expected_dicts = [ rgf.compute_splif_features_in_range( prot_rdk, lig_rdk, distance, c_bin, ecfp_degree=2) for c_bin in bins ] self.assertIsInstance(dicts, list) self.assertEqual(dicts, expected_dicts)