def test_featurize_binding_pocket_ecfp(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz, ligand_xyz=lig_xyz) # check if results are the same if we provide precomputed distances prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ) prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance) # ...but first check if we actually got two dicts self.assertIsInstance(prot_dict, dict) self.assertIsInstance(lig_dict, dict) self.assertEqual(prot_dict, prot_dict_dist) self.assertEqual(lig_dict, lig_dict_dist) # check if we get less features with smaller distance cutoff prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=2.0, ) prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=6.0, ) self.assertLess(len(prot_dict_d2), len(prot_dict)) # ligands are typically small so all atoms might be present self.assertLessEqual(len(lig_dict_d2), len(lig_dict)) self.assertGreater(len(prot_dict_d6), len(prot_dict)) self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict)) # check if using different ecfp_degree changes anything prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ecfp_degree=3, ) self.assertNotEqual(prot_dict_e3, prot_dict) self.assertNotEqual(lig_dict_e3, lig_dict)
def test_featurize_binding_pocket_ecfp(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) distance = rgf.compute_pairwise_distances( protein_xyz=prot_xyz, ligand_xyz=lig_xyz) # check if results are the same if we provide precomputed distances prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ) prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance) # ...but first check if we actually got two dicts self.assertIsInstance(prot_dict, dict) self.assertIsInstance(lig_dict, dict) self.assertEqual(prot_dict, prot_dict_dist) self.assertEqual(lig_dict, lig_dict_dist) # check if we get less features with smaller distance cutoff prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=2.0, ) prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, cutoff=6.0, ) self.assertLess(len(prot_dict_d2), len(prot_dict)) # ligands are typically small so all atoms might be present self.assertLessEqual(len(lig_dict_d2), len(lig_dict)) self.assertGreater(len(prot_dict_d6), len(prot_dict)) self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict)) # check if using different ecfp_degree changes anything prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk, ecfp_degree=3, ) self.assertNotEqual(prot_dict_e3, prot_dict) self.assertNotEqual(lig_dict_e3, lig_dict)
def test_voxelize(self): prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file) lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file) centroid = rgf.compute_centroid(lig_xyz) prot_xyz = rgf.subtract_centroid(prot_xyz, centroid) lig_xyz = rgf.subtract_centroid(lig_xyz, centroid) prot_ecfp_dict, lig_ecfp_dict = rgf.featurize_binding_pocket_ecfp( prot_xyz, prot_rdk, lig_xyz, lig_rdk) box_w = 20 f_power = 5 rgf_featurizer = rgf.RdkitGridFeaturizer( box_width=box_w, ecfp_power=f_power, feature_types=['all_combined'], flatten=True, sanitize=True) prot_tensor = rgf_featurizer._voxelize( rgf.convert_atom_to_voxel, rgf.hash_ecfp, prot_xyz, feature_dict=prot_ecfp_dict, channel_power=f_power) self.assertEqual(prot_tensor.shape, tuple([box_w] * 3 + [2**f_power])) all_features = prot_tensor.sum() # protein is too big for the box, some features should be missing self.assertGreater(all_features, 0) self.assertLess(all_features, prot_rdk.GetNumAtoms()) lig_tensor = rgf_featurizer._voxelize( rgf.convert_atom_to_voxel, rgf.hash_ecfp, lig_xyz, feature_dict=lig_ecfp_dict, channel_power=f_power) self.assertEqual(lig_tensor.shape, tuple([box_w] * 3 + [2**f_power])) all_features = lig_tensor.sum() # whole ligand should fit in the box self.assertEqual(all_features, lig_rdk.GetNumAtoms())