def test_pdbqt_to_pdb(self): """Test that a PDBQT molecule can be converted back in to PDB.""" xyz, mol = rdkit_util.load_molecule(self.protein_file, calc_charges=False, add_hydrogens=False) with tempfile.TemporaryDirectory() as tmp: out_pdb = os.path.join(tmp, "mol.pdb") out_pdbqt = os.path.join(tmp, "mol.pdbqt") rdkit_util.write_molecule(mol, out_pdb, is_protein=True) rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True) pdb_block = pdbqt_utils.pdbqt_to_pdb(out_pdbqt) from rdkit import Chem pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt, add_hydrogens=False, calc_charges=False) assert pdb_mol.GetNumAtoms() == pdbqt_mol.GetNumAtoms() for atom_idx in range(pdb_mol.GetNumAtoms()): atom1 = pdb_mol.GetAtoms()[atom_idx] atom2 = pdbqt_mol.GetAtoms()[atom_idx] assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
def extract_active_site(protein_file, ligand_file, cutoff=4): """Extracts a box for the active site.""" protein_coords = rdkit_util.load_molecule( protein_file, add_hydrogens=False)[0] ligand_coords = rdkit_util.load_molecule( ligand_file, add_hydrogens=True, calc_charges=True)[0] num_ligand_atoms = len(ligand_coords) num_protein_atoms = len(protein_coords) pocket_inds = [] pocket_atoms = set([]) for lig_atom_ind in range(num_ligand_atoms): lig_atom = ligand_coords[lig_atom_ind] for protein_atom_ind in range(num_protein_atoms): protein_atom = protein_coords[protein_atom_ind] if np.linalg.norm(lig_atom - protein_atom) < cutoff: if protein_atom_ind not in pocket_atoms: pocket_atoms = pocket_atoms.union(set([protein_atom_ind])) # Should be an array of size (n_pocket_atoms, 3) pocket_atoms = list(pocket_atoms) n_pocket_atoms = len(pocket_atoms) pocket_coords = np.zeros((n_pocket_atoms, 3)) for ind, pocket_ind in enumerate(pocket_atoms): pocket_coords[ind] = protein_coords[pocket_ind] x_min = int(np.floor(np.amin(pocket_coords[:, 0]))) x_max = int(np.ceil(np.amax(pocket_coords[:, 0]))) y_min = int(np.floor(np.amin(pocket_coords[:, 1]))) y_max = int(np.ceil(np.amax(pocket_coords[:, 1]))) z_min = int(np.floor(np.amin(pocket_coords[:, 2]))) z_max = int(np.ceil(np.amax(pocket_coords[:, 2]))) return (((x_min, x_max), (y_min, y_max), (z_min, z_max)), pocket_atoms, pocket_coords)
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): try: frag1_coords, frag1_mol = rdkit_util.load_molecule( mol_pdb_file, is_protein=False, sanitize=True, add_hydrogens=False) frag2_coords, frag2_mol = rdkit_util.load_molecule( protein_pdb_file, is_protein=True, sanitize=True, add_hydrogens=False) except MoleculeLoadException: # Currently handles loading failures by returning None # TODO: Is there a better handling procedure? logging.warning("Some molecules cannot be loaded by Rdkit. Skipping") return None system_mol = rdkit_util.merge_molecules([frag1_mol, frag2_mol]) system_coords = rdkit_util.get_xyz_from_mol(system_mol) frag1_coords, frag1_mol = self._strip_hydrogens(frag1_coords, frag1_mol) frag2_coords, frag2_mol = self._strip_hydrogens(frag2_coords, frag2_mol) system_coords, system_mol = self._strip_hydrogens(system_coords, system_mol) try: frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol( frag1_coords, frag1_mol, self.frag1_num_atoms) frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol( frag2_coords, frag2_mol, self.frag2_num_atoms) system_coords, system_neighbor_list, system_z = self.featurize_mol( system_coords, system_mol, self.complex_num_atoms) except ValueError as e: logging.warning( "max_atoms was set too low. Some complexes too large and skipped") return None return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \ system_coords, system_neighbor_list, system_z
def extract_active_site(protein_file, ligand_file, cutoff=4): """Extracts a box for the active site. Params ------ protein_file: str Location of protein PDB ligand_file: str Location of ligand input file cutoff: int, optional The distance in angstroms from the protein pocket to consider for featurization. Returns ------- A tuple of `(CoordinateBox, np.ndarray)` where the second entry is of shape `(N, 3)` with `N` the number of atoms in the active site. """ protein = rdkit_util.load_molecule(protein_file, add_hydrogens=False) ligand = rdkit_util.load_molecule( ligand_file, add_hydrogens=True, calc_charges=True) protein_contacts, ligand_contacts = get_contact_atom_indices( [protein, ligand], cutoff=cutoff) protein_coords = protein[0] pocket_coords = protein_coords[protein_contacts] x_min = int(np.floor(np.amin(pocket_coords[:, 0]))) x_max = int(np.ceil(np.amax(pocket_coords[:, 0]))) y_min = int(np.floor(np.amin(pocket_coords[:, 1]))) y_max = int(np.ceil(np.amax(pocket_coords[:, 1]))) z_min = int(np.floor(np.amin(pocket_coords[:, 2]))) z_max = int(np.ceil(np.amax(pocket_coords[:, 2]))) box = box_utils.CoordinateBox((x_min, x_max), (y_min, y_max), (z_min, z_max)) return (box, pocket_coords)
def test_pdbqt_to_pdb(self): current_dir = os.path.dirname(os.path.realpath(__file__)) protein_file = os.path.join(current_dir, "../../dock/tests/1jld_protein.pdb") xyz, mol = rdkit_util.load_molecule(protein_file, calc_charges=False, add_hydrogens=False) out_pdb = "/tmp/mol.pdb" out_pdbqt = "/tmp/mol.pdbqt" rdkit_util.write_molecule(mol, out_pdb) rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True) pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt) from rdkit import Chem pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt, add_hydrogens=False, calc_charges=False) assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms()) for atom_idx in range(pdb_mol.GetNumAtoms()): atom1 = pdb_mol.GetAtoms()[atom_idx] atom2 = pdbqt_mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(out_pdb) os.remove(out_pdbqt)
def test_pdbqt_to_pdb(self): current_dir = os.path.dirname(os.path.realpath(__file__)) protein_file = os.path.join(current_dir, "../../dock/tests/1jld_protein.pdb") xyz, mol = rdkit_util.load_molecule( protein_file, calc_charges=False, add_hydrogens=False) out_pdb = "/tmp/mol.pdb" out_pdbqt = "/tmp/mol.pdbqt" rdkit_util.write_molecule(mol, out_pdb) rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True) pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt) pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) xyz, pdbqt_mol = rdkit_util.load_molecule( out_pdbqt, add_hydrogens=False, calc_charges=False) assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms()) for atom_idx in range(pdb_mol.GetNumAtoms()): atom1 = pdb_mol.GetAtoms()[atom_idx] atom2 = pdbqt_mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(out_pdb) os.remove(out_pdbqt)
def test_load_molecule(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule( ligand_file, calc_charges=False, add_hydrogens=False) assert_true(xyz is not None) assert_true(mol is not None)
def test_merge_molecules(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) num_mol_atoms = mol.GetNumAtoms() # self.ligand_file is for 3ws9_ligand.sdf oth_xyz, oth_mol = rdkit_util.load_molecule(self.ligand_file, calc_charges=False, add_hydrogens=False) num_oth_mol_atoms = oth_mol.GetNumAtoms() merged = rdkit_util.merge_molecules([mol, oth_mol]) merged_num_atoms = merged.GetNumAtoms() assert merged_num_atoms == num_mol_atoms + num_oth_mol_atoms
def find_pockets(self, protein_file, ligand_file): """Find list of suitable binding pockets on protein.""" protein_coords = rdkit_util.load_molecule( protein_file, add_hydrogens=False, calc_charges=False)[0] ligand_coords = rdkit_util.load_molecule( ligand_file, add_hydrogens=False, calc_charges=False)[0] boxes = get_all_boxes(protein_coords, self.pad) mapping = boxes_to_atoms(protein_coords, boxes) pockets, pocket_atoms_map = merge_overlapping_boxes(mapping, boxes) pocket_coords = [] for pocket in pockets: atoms = pocket_atoms_map[pocket] coords = np.zeros((len(atoms), 3)) for ind, atom in enumerate(atoms): coords[ind] = protein_coords[atom] pocket_coords.append(coords) return pockets, pocket_atoms_map, pocket_coords
def test_load_molecule2(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) assert xyz is not None assert mol is not None
def test_write_molecule(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule( ligand_file, calc_charges=False, add_hydrogens=False) outfile = "/tmp/mol.sdf" rdkit_util.write_molecule(mol, outfile) xyz, mol2 = rdkit_util.load_molecule( outfile, calc_charges=False, add_hydrogens=False) assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms()) for atom_idx in range(mol.GetNumAtoms()): atom1 = mol.GetAtoms()[atom_idx] atom2 = mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(outfile)
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): """ Compute neighbor list for complex. Parameters ---------- mol_pdb: list Should be a list of lines of the PDB file. complex_pdb: list Should be a list of lines of the PDB file. """ mol_coords, ob_mol = rdkit_util.load_molecule(mol_pdb_file) protein_coords, protein_mol = rdkit_util.load_molecule(protein_pdb_file) system_coords = rdkit_util.merge_molecules_xyz(mol_coords, protein_coords) system_neighbor_list = compute_neighbor_list( system_coords, self.neighbor_cutoff, self.max_num_neighbors, None) return (system_coords, system_neighbor_list)
def find_all_pockets(self, protein_file): """Find list of binding pockets on protein. Parameters ---------- protein_file: str Protein to load in. """ coords, _ = rdkit_util.load_molecule(protein_file) return box_utils.get_face_boxes(coords, self.pad)
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): """ Compute neighbor list for complex. Parameters ---------- mol_pdb_file: Str Filename for ligand pdb file. protein_pdb_file: Str Filename for protein pdb file. """ mol_coords, ob_mol = rdkit_util.load_molecule(mol_pdb_file) protein_coords, protein_mol = rdkit_util.load_molecule(protein_pdb_file) system_coords = rdkit_util.merge_molecules_xyz([mol_coords, protein_coords]) system_neighbor_list = compute_neighbor_list( system_coords, self.neighbor_cutoff, self.max_num_neighbors, None) return (system_coords, system_neighbor_list)
def test_get_xyz_from_mol(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule( ligand_file, calc_charges=False, add_hydrogens=False) xyz2 = rdkit_util.get_xyz_from_mol(mol) equal_array = np.all(xyz == xyz2) assert_true(equal_array)
def test_convert_protein_to_pdbqt(self): """Test a protein in a PDB can be converted to PDBQT.""" from rdkit import Chem xyz, mol = rdkit_util.load_molecule(self.protein_file, calc_charges=False, add_hydrogens=False) with tempfile.TemporaryDirectory() as tmp: outfile = os.path.join(tmp, "mol.pdbqt") writer = Chem.PDBWriter(outfile) writer.write(mol) writer.close() pdbqt_utils.convert_protein_to_pdbqt(mol, outfile) pdbqt_xyz, pdbqt_mol = rdkit_util.load_molecule( outfile, add_hydrogens=False, calc_charges=False) assert pdbqt_mol.GetNumAtoms() == pdbqt_mol.GetNumAtoms() for atom_idx in range(pdbqt_mol.GetNumAtoms()): atom1 = pdbqt_mol.GetAtoms()[atom_idx] atom2 = pdbqt_mol.GetAtoms()[atom_idx] assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
def _featurize_complex(self, frag1_pdb_file, frag2_pdb_file): """Featurize fragments and complex. Parameters ---------- frag1_pdb_file: string Location of frag1_pdb_file. frag2_pdb_file: string Location of frag2_pdb_file. Returns ------- retval: tuple Tuple containing coordinates, neighbor list, and atomic number for fragment 1, fragment 2, and complex """ try: frag1_mol = rdkit_util.load_molecule(frag1_pdb_file, add_hydrogens=False, calc_charges=False)[1] frag2_mol = rdkit_util.load_molecule(frag2_pdb_file, add_hydrogens=False, calc_charges=False)[1] except: frag1_mol = None frag2_mol = None if frag1_mol and frag2_mol: frag1_coords, frag1_neighbor_list, frag1_z = self.frag1_featurizer._featurize( frag1_mol) frag2_coords, frag2_neighbor_list, frag2_z = self.frag2_featurizer._featurize( frag2_mol) complex_mol = Chem.rdmolops.CombineMols(frag1_mol, frag2_mol) complex_coords, complex_neighbor_list, complex_z = self.complex_featurizer._featurize( complex_mol) return (frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, complex_coords, complex_neighbor_list, complex_z) else: print("failed to featurize") return (None, None, None, None, None, None, None, None, None)
def test_merge_molecules_xyz(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule( ligand_file, calc_charges=False, add_hydrogens=False) merged = rdkit_util.merge_molecules_xyz(xyz, xyz) for i in range(len(xyz)): first_atom_equal = np.all(xyz[i] == merged[i]) second_atom_equal = np.all(xyz[i] == merged[i + len(xyz)]) assert_true(first_atom_equal) assert_true(second_atom_equal)
def test_load_molecule(self): # adding hydrogens and charges is tested in dc.utils from rdkit.Chem.AllChem import Mol for add_hydrogens in (True, False): for calc_charges in (True, False): mol_xyz, mol_rdk = rdkit_util.load_molecule( self.ligand_file, add_hydrogens, calc_charges) num_atoms = mol_rdk.GetNumAtoms() self.assertIsInstance(mol_xyz, np.ndarray) self.assertIsInstance(mol_rdk, Mol) self.assertEqual(mol_xyz.shape, (num_atoms, 3))
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): frag1_coords, frag1_mol = rdkit_util.load_molecule(mol_pdb_file) frag2_coords, frag2_mol = rdkit_util.load_molecule(protein_pdb_file) system_mol = rdkit_util.merge_molecules(frag1_mol, frag2_mol) system_coords = rdkit_util.get_xyz_from_mol(system_mol) frag1_coords, frag1_mol = self._strip_hydrogens(frag1_coords, frag1_mol) frag2_coords, frag2_mol = self._strip_hydrogens(frag2_coords, frag2_mol) system_coords, system_mol = self._strip_hydrogens(system_coords, system_mol) frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol( frag1_coords, frag1_mol, self.frag1_num_atoms) frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol( frag2_coords, frag2_mol, self.frag2_num_atoms) system_coords, system_neighbor_list, system_z = self.featurize_mol( system_coords, system_mol, self.complex_num_atoms) return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \ system_coords, system_neighbor_list, system_z
def test_get_face_boxes_for_protein(self): """Tests that binding pockets are detected.""" current_dir = os.path.dirname(os.path.realpath(__file__)) protein_file = os.path.join(current_dir, "1jld_protein.pdb") ligand_file = os.path.join(current_dir, "1jld_ligand.sdf") coords = rdkit_util.load_molecule(protein_file)[0] boxes = box_utils.get_face_boxes(coords) assert isinstance(boxes, list) # Pocket is of form ((x_min, x_max), (y_min, y_max), (z_min, z_max)) for pocket in boxes: assert isinstance(pocket, box_utils.CoordinateBox)
def test_write_molecule(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) outfile = "/tmp/mol.sdf" rdkit_util.write_molecule(mol, outfile) xyz, mol2 = rdkit_util.load_molecule(outfile, calc_charges=False, add_hydrogens=False) assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms()) for atom_idx in range(mol.GetNumAtoms()): atom1 = mol.GetAtoms()[atom_idx] atom2 = mol.GetAtoms()[atom_idx] assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum()) os.remove(outfile)
def test_get_xyz_from_mol(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) xyz2 = rdkit_util.get_xyz_from_mol(mol) equal_array = np.all(xyz == xyz2) assert equal_array
def test_write_molecule(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) with tempfile.TemporaryDirectory() as tmp: outfile = os.path.join(tmp, "mol.sdf") rdkit_util.write_molecule(mol, outfile) xyz, mol2 = rdkit_util.load_molecule(outfile, calc_charges=False, add_hydrogens=False) assert mol.GetNumAtoms() == mol2.GetNumAtoms() for atom_idx in range(mol.GetNumAtoms()): atom1 = mol.GetAtoms()[atom_idx] atom2 = mol.GetAtoms()[atom_idx] assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
def test_merge_molecules_xyz(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) merged = rdkit_util.merge_molecules_xyz([xyz, xyz]) for i in range(len(xyz)): first_atom_equal = np.all(xyz[i] == merged[i]) second_atom_equal = np.all(xyz[i] == merged[i + len(xyz)]) assert first_atom_equal assert second_atom_equal
def test_compute_charges(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule( ligand_file, calc_charges=False, add_hydrogens=True) rdkit_util.compute_charges(mol) has_a_charge = False for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] value = atom.GetProp(str("_GasteigerCharge")) if value != 0: has_a_charge = True assert_true(has_a_charge)
def test_boxes_to_atoms(self, postfix_directory): """Test that mapping of protein atoms to boxes is meaningful.""" protein_file = os.path.join(postfix_directory, "PfATP4.pdb") ligand_file = os.path.join(postfix_directory, "SJ733.pdb") coords = rdkit_util.load_molecule(protein_file)[0] boxes = dc.dock.binding_pocket.get_all_boxes(coords) mapping = dc.dock.binding_pocket.boxes_to_atoms(coords, boxes) assert isinstance(mapping, dict) for box, box_atoms in mapping.items(): (x_min, x_max), (y_min, y_max), (z_min, z_max) = box for atom_ind in box_atoms: atom = coords[atom_ind] assert x_min <= atom[0] and atom[0] <= x_max assert y_min <= atom[1] and atom[1] <= y_max assert z_min <= atom[2] and atom[2] <= z_max
def test_compute_charges(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=True) rdkit_util.compute_charges(mol) has_a_charge = False for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] value = atom.GetProp(str("_GasteigerCharge")) if value != 0: has_a_charge = True assert has_a_charge
def test_boxes_to_atoms(self): """Test that mapping of protein atoms to boxes is meaningful.""" current_dir = os.path.dirname(os.path.realpath(__file__)) protein_file = os.path.join(current_dir, "1jld_protein.pdb") ligand_file = os.path.join(current_dir, "1jld_ligand.sdf") coords = rdkit_util.load_molecule(protein_file)[0] boxes = dc.dock.binding_pocket.get_all_boxes(coords) mapping = dc.dock.binding_pocket.boxes_to_atoms(coords, boxes) assert isinstance(mapping, dict) for box, box_atoms in mapping.items(): (x_min, x_max), (y_min, y_max), (z_min, z_max) = box for atom_ind in box_atoms: atom = coords[atom_ind] assert x_min <= atom[0] and atom[0] <= x_max assert y_min <= atom[1] and atom[1] <= y_max assert z_min <= atom[2] and atom[2] <= z_max
def hydrogenate_and_compute_partial_charges(input_file, input_format, hyd_output=None, pdbqt_output=None, protein=True, verbose=True): """Outputs a hydrogenated pdb and a pdbqt with partial charges. Takes an input file in specified format. Generates two outputs: -) A pdb file that contains a hydrogenated (at pH 7.4) version of original compound. -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt file is build from the hydrogenated pdb. TODO(rbharath): Can do a bit of refactoring between this function and pdbqt_to_pdb. Parameters ---------- input_file: String Path to input file. input_format: String Name of input format. """ mol = rdkit_util.load_molecule(input_file, add_hydrogens=True, calc_charges=True)[1] if verbose: print("Create pdb with hydrogens added") rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein) if verbose: print("Create a pdbqt file from the hydrogenated pdb above.") rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein) if protein: print("Removing ROOT/ENDROOT/TORSDOF") with open(pdbqt_output) as f: pdbqt_lines = f.readlines() filtered_lines = [] for line in pdbqt_lines: filtered_lines.append(line) with open(pdbqt_output, "w") as f: f.writelines(filtered_lines)
def test_add_hydrogens_to_mol(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule( ligand_file, calc_charges=False, add_hydrogens=False) original_hydrogen_count = 0 for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] if atom.GetAtomicNum() == 1: original_hydrogen_count += 1 mol = rdkit_util.add_hydrogens_to_mol(mol) after_hydrogen_count = 0 for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] if atom.GetAtomicNum() == 1: after_hydrogen_count += 1 assert_true(after_hydrogen_count >= original_hydrogen_count)
def hydrogenate_and_compute_partial_charges(input_file, input_format, hyd_output=None, pdbqt_output=None, protein=True, verbose=True): """Outputs a hydrogenated pdb and a pdbqt with partial charges. Takes an input file in specified format. Generates two outputs: -) A pdb file that contains a hydrogenated (at pH 7.4) version of original compound. -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt file is build from the hydrogenated pdb. TODO(rbharath): Can do a bit of refactoring between this function and pdbqt_to_pdb. Parameters ---------- input_file: String Path to input file. input_format: String Name of input format. """ mol = rdkit_util.load_molecule( input_file, add_hydrogens=True, calc_charges=True)[1] if verbose: logging.info("Create pdb with hydrogens added") rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein) if verbose: logging.info("Create a pdbqt file from the hydrogenated pdb above.") rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein) if protein: logging.info("Removing ROOT/ENDROOT/TORSDOF") with open(pdbqt_output) as f: pdbqt_lines = f.readlines() filtered_lines = [] for line in pdbqt_lines: filtered_lines.append(line) with open(pdbqt_output, "w") as f: f.writelines(filtered_lines)
def test_get_all_boxes(self): """Tests that binding pockets are detected.""" current_dir = os.path.dirname(os.path.realpath(__file__)) protein_file = os.path.join(current_dir, "1jld_protein.pdb") ligand_file = os.path.join(current_dir, "1jld_ligand.sdf") coords = rdkit_util.load_molecule(protein_file)[0] boxes = dc.dock.binding_pocket.get_all_boxes(coords) assert isinstance(boxes, list) # Pocket is of form ((x_min, x_max), (y_min, y_max), (z_min, z_max)) for pocket in boxes: assert len(pocket) == 3 assert len(pocket[0]) == 2 assert len(pocket[1]) == 2 assert len(pocket[2]) == 2 (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket assert x_min < x_max assert y_min < y_max assert z_min < z_max
def test_get_all_boxes(self, postfix_directory): """Tests that binding pockets are detected.""" print("Test_All_Boxes") protein_file = os.path.join(postfix_directory, "PfATP4.pdb") ligand_file = os.path.join(postfix_directory, "SJ733.pdb") coords = rdkit_util.load_molecule(protein_file)[0] boxes = dc.dock.binding_pocket.get_all_boxes(coords) assert isinstance(boxes, list) # Pocket is of form ((x_min, x_max), (y_min, y_max), (z_min, z_max)) for pocket in boxes: assert len(pocket) == 3 assert len(pocket[0]) == 2 assert len(pocket[1]) == 2 assert len(pocket[2]) == 2 (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket assert x_min < x_max assert y_min < y_max assert z_min < z_max
def test_add_hydrogens_to_mol(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) original_hydrogen_count = 0 for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] if atom.GetAtomicNum() == 1: original_hydrogen_count += 1 mol = rdkit_util.add_hydrogens_to_mol(mol) after_hydrogen_count = 0 for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] if atom.GetAtomicNum() == 1: after_hydrogen_count += 1 assert_true(after_hydrogen_count >= original_hydrogen_count)
def find_pockets(self, macromolecule_file): """Find list of suitable binding pockets on protein. This function computes putative binding pockets on this protein. This class uses the `ConvexHull` to compute binding pockets. Each face of the hull is converted into a coordinate box used for binding. Params ------ macromolecule_file: str Location of the macromolecule file to load Returns ------- List of pockets. Each pocket is a `CoordinateBox` """ coords = rdkit_util.load_molecule( macromolecule_file, add_hydrogens=False, calc_charges=False)[0] boxes = box_utils.get_face_boxes(coords, self.pad) boxes = box_utils.merge_overlapping_boxes(boxes) return boxes
def featurize(self, protein_file, pockets): """ Calculate atomic coodinates. Params ------ protein_file: str Location of PDB file. Will be loaded by MDTraj pockets: list[CoordinateBox] List of `dc.utils.CoordinateBox` objects. Returns ------- A numpy array of shale `(len(pockets), n_residues)` """ import mdtraj protein_coords = rdkit_util.load_molecule( protein_file, add_hydrogens=False, calc_charges=False)[0] mapping = boxes_to_atoms(protein_coords, pockets) protein = mdtraj.load(protein_file) n_pockets = len(pockets) n_residues = len(BindingPocketFeaturizer.residues) res_map = dict(zip(BindingPocketFeaturizer.residues, range(n_residues))) all_features = np.zeros((n_pockets, n_residues)) for pocket_num, pocket in enumerate(pockets): pocket_atoms = mapping[pocket] for ind, atom in enumerate(pocket_atoms): atom_name = str(protein.top.atom(atom)) # atom_name is of format RESX-ATOMTYPE # where X is a 1 to 4 digit number residue = atom_name[:3] if residue not in res_map: logger.info("Warning: Non-standard residue in PDB file") continue atomtype = atom_name.split("-")[1] all_features[pocket_num, res_map[residue]] += 1 return all_features
def test_apply_pdbfixer(self): current_dir = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf") xyz, mol = rdkit_util.load_molecule(ligand_file, calc_charges=False, add_hydrogens=False) original_hydrogen_count = 0 for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] if atom.GetAtomicNum() == 1: original_hydrogen_count += 1 assert mol is not None mol = rdkit_util.apply_pdbfixer(mol, hydrogenate=True, is_protein=False) assert mol is not None after_hydrogen_count = 0 for atom_idx in range(mol.GetNumAtoms()): atom = mol.GetAtoms()[atom_idx] if atom.GetAtomicNum() == 1: after_hydrogen_count += 1 assert after_hydrogen_count >= original_hydrogen_count
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): """Computes grid featurization of protein/ligand complex. Takes as input filenames pdb of the protein, pdb of the ligand. This function then computes the centroid of the ligand; decrements this centroid from the atomic coordinates of protein and ligand atoms, and then merges the translated protein and ligand. This combined system/complex is then saved. This function then computes a featurization with scheme specified by the user. Parameters ---------- mol_pdb_file: Str Filename for ligand pdb file. protein_pdb_file: Str Filename for protein pdb file. """ try: ############################################################## TIMING time1 = time.time() ############################################################## TIMING protein_xyz, protein_rdk = load_molecule( protein_pdb_file, calc_charges=True, sanitize=self.sanitize) ############################################################## TIMING time2 = time.time() log("TIMING: Loading protein coordinates took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING ############################################################## TIMING time1 = time.time() ############################################################## TIMING ligand_xyz, ligand_rdk = load_molecule( mol_pdb_file, calc_charges=True, sanitize=self.sanitize) ############################################################## TIMING time2 = time.time() log("TIMING: Loading ligand coordinates took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING except MoleculeLoadException: logging.warning("Some molecules cannot be loaded by Rdkit. Skipping") return None ############################################################## TIMING time1 = time.time() ############################################################## TIMING centroid = compute_centroid(ligand_xyz) ligand_xyz = subtract_centroid(ligand_xyz, centroid) protein_xyz = subtract_centroid(protein_xyz, centroid) ############################################################## TIMING time2 = time.time() log("TIMING: Centroid processing took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING pairwise_distances = compute_pairwise_distances(protein_xyz, ligand_xyz) transformed_systems = {} transformed_systems[(0, 0)] = [protein_xyz, ligand_xyz] for i in range(self.nb_rotations): rotated_system = rotate_molecules([protein_xyz, ligand_xyz]) transformed_systems[(i + 1, 0)] = rotated_system features_dict = {} for system_id, (protein_xyz, ligand_xyz) in transformed_systems.items(): feature_arrays = [] for is_flat, function_name in self.feature_types: result = self._compute_feature( function_name, protein_xyz, protein_rdk, ligand_xyz, ligand_rdk, pairwise_distances, ) feature_arrays += result if self.flatten: features_dict[system_id] = np.concatenate( [feature_array.flatten() for feature_array in feature_arrays]) else: features_dict[system_id] = np.concatenate(feature_arrays, axis=-1) # TODO(rbharath): Is this squeeze OK? features = np.squeeze(np.array(list(features_dict.values()))) return features
def find_all_pockets(self, protein_file): """Find list of binding pockets on protein.""" # protein_coords is (N, 3) tensor coords = rdkit_util.load_molecule(protein_file)[0] return get_all_boxes(coords, self.pad)
def _transform(self, protein_pdb, ligand_file): """Computes featurization of protein/ligand complex. Takes as input files (strings) for pdb of the protein, pdb of the ligand, and a directory to save intermediate files. This function then computes the centroid of the ligand; decrements this centroid from the atomic coordinates of protein and ligand atoms, and then merges the translated protein and ligand. This combined system/complex is then saved. This function then computes a featurization with scheme specified by the user. """ ############################################################## TIMING time1 = time.time() ############################################################## TIMING protein_name = str(protein_pdb).split("/")[len(str(protein_pdb).split("/")) - 2] if not self.ligand_only: protein_xyz, protein_ob = rdkit_util.load_molecule( protein_pdb, calc_charges=True) ############################################################## TIMING time2 = time.time() log("TIMING: Loading protein coordinates took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING ############################################################## TIMING time1 = time.time() ############################################################## TIMING ligand_xyz, ligand_ob = rdkit_util.load_molecule( ligand_file, calc_charges=True) ############################################################## TIMING time2 = time.time() log("TIMING: Loading ligand coordinates took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "ecfp" in self.feature_types: ecfp_array = compute_ecfp_features(ligand_ob, self.ecfp_degree, self.ecfp_power) return ({(0, 0): ecfp_array}) ############################################################## TIMING time1 = time.time() ############################################################## TIMING centroid = compute_centroid(ligand_xyz) ligand_xyz = subtract_centroid(ligand_xyz, centroid) if not self.ligand_only: protein_xyz = subtract_centroid(protein_xyz, centroid) ############################################################## TIMING time2 = time.time() log("TIMING: Centroid processing took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "splif" in self.feature_types: splif_array = self._featurize_splif(protein_xyz, protein_ob, ligand_xyz, ligand_ob) return ({(0, 0): splif_array}) if "flat_combined" in self.feature_types: return (self._compute_flat_features(protein_xyz, protein_ob, ligand_xyz, ligand_ob)) pairwise_distances = compute_pairwise_distances(protein_xyz, ligand_xyz) if "ecfp" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING protein_ecfp_dict, ligand_ecfp_dict = (featurize_binding_pocket_ecfp( protein_xyz, protein_ob, ligand_xyz, ligand_ob, pairwise_distances, cutoff=4.5, ecfp_degree=self.ecfp_degree)) ############################################################## TIMING time2 = time.time() log("TIMING: ecfp voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "splif" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING splif_dicts = featurize_splif(protein_xyz, protein_ob, ligand_xyz, ligand_ob, self.contact_bins, pairwise_distances, self.ecfp_degree) ############################################################## TIMING time2 = time.time() log("TIMING: splif voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "hbond" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING hbond_list = compute_hydrogen_bonds( protein_xyz, protein_ob, ligand_xyz, ligand_ob, pairwise_distances, self.hbond_dist_bins, self.hbond_angle_cutoffs, self.ecfp_degree) ############################################################## TIMING time2 = time.time() log("TIMING: hbond voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "sybyl" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING protein_sybyl_dict, ligand_sybyl_dict = featurize_binding_pocket_sybyl( protein_xyz, protein_ob, ligand_xyz, ligand_ob, pairwise_distances, cutoff=7.0) ############################################################## TIMING time2 = time.time() log("TIMING: sybyl voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "pi_stack" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING protein_pi_t, protein_pi_parallel, ligand_pi_t, ligand_pi_parallel = ( compute_pi_stack(protein_xyz, protein_ob, ligand_xyz, ligand_ob, pairwise_distances)) ############################################################## TIMING time2 = time.time() log("TIMING: pi_stack voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "cation_pi" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING protein_cation_pi, ligand_cation_pi = (compute_binding_pocket_cation_pi( protein_xyz, protein_ob, ligand_xyz, ligand_ob)) ############################################################## TIMING time2 = time.time() log("TIMING: cation_pi voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "salt_bridge" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING salt_bridge_list = compute_salt_bridges( protein_xyz, protein_ob, ligand_xyz, ligand_ob, pairwise_distances) ############################################################## TIMING time2 = time.time() log("TIMING: salt_bridge voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING if "charge" in self.voxel_feature_types: ############################################################## TIMING time1 = time.time() ############################################################## TIMING protein_charge_dictionary = compute_charge_dictionary(protein_ob) ligand_charge_dictionary = compute_charge_dictionary(ligand_ob) ############################################################## TIMING time2 = time.time() log("TIMING: charge voxel computataion took %0.3f s" % (time2 - time1), self.verbose) ############################################################## TIMING transformed_systems = {} transformed_systems[(0, 0)] = [protein_xyz, ligand_xyz] for i in range(0, int(self.nb_rotations)): rotated_system = rotate_molecules([protein_xyz, ligand_xyz]) transformed_systems[(i + 1, 0)] = rotated_system for j in range(0, int(self.nb_reflections)): reflected_system = self._reflect_molecule(rotated_system) transformed_systems[(i + 1, j + 1)] = reflected_system if "voxel_combined" in self.feature_types: features = {} for system_id, system in transformed_systems.items(): protein_xyz = system[0] ligand_xyz = system[1] feature_tensors = [] if "ecfp" in self.voxel_feature_types: ecfp_tensor = self._voxelize( convert_atom_to_voxel, hash_ecfp, protein_xyz, feature_dict=protein_ecfp_dict, channel_power=self.ecfp_power) ecfp_tensor += self._voxelize( convert_atom_to_voxel, hash_ecfp, ligand_xyz, feature_dict=ligand_ecfp_dict, channel_power=self.ecfp_power) feature_tensors.append(ecfp_tensor) print("Completed ecfp tensor") if "splif" in self.voxel_feature_types: feature_tensors += [ self._voxelize( convert_atom_pair_to_voxel, hash_ecfp_pair, (protein_xyz, ligand_xyz), feature_dict=splif_dict, channel_power=self.splif_power) for splif_dict in splif_dicts ] print("Completed splif tensor") if "hbond" in self.voxel_feature_types: feature_tensors += [ self._voxelize( convert_atom_pair_to_voxel, None, (protein_xyz, ligand_xyz), feature_list=hbond, channel_power=0) for hbond in hbond_list ] print("Completed hbond tensor") if "sybyl" in self.voxel_feature_types: sybyl_partial = partial(hash_sybyl, sybyl_types=self.sybyl_types) sybyl_tensor = self._voxelize( convert_atom_to_voxel, hash_sybyl, protein_xyz, feature_dict=protein_sybyl_dict, nb_channel=len(self.sybyl_types)) sybyl_tensor += self._voxelize( convert_atom_to_voxel, hash_sybyl, ligand_xyz, feature_dict=ligand_sybyl_dict, nb_channel=len(self.sybyl_types)) feature_tensors.append(sybyl_tensor) print("Completed sybyl tensor") if "pi_stack" in self.voxel_feature_types: pi_parallel_tensor = self._voxelize( convert_atom_to_voxel, None, protein_xyz, feature_dict=protein_pi_parallel, nb_channel=1) pi_parallel_tensor += self._voxelize( convert_atom_to_voxel, None, ligand_xyz, feature_dict=ligand_pi_parallel, nb_channel=1) feature_tensors.append(pi_parallel_tensor) pi_t_tensor = self._voxelize( convert_atom_to_voxel, None, protein_xyz, feature_dict=protein_pi_t, nb_channel=1) pi_t_tensor += self._voxelize( convert_atom_to_voxel, None, ligand_xyz, feature_dict=ligand_pi_t, nb_channel=1) feature_tensors.append(pi_t_tensor) print("Completed pi_stack tensor") if "cation_pi" in self.voxel_feature_types: cation_pi_tensor = self._voxelize( convert_atom_to_voxel, None, protein_xyz, feature_dict=protein_cation_pi, nb_channel=1) cation_pi_tensor += self._voxelize( convert_atom_to_voxel, None, ligand_xyz, feature_dict=ligand_cation_pi, nb_channel=1) feature_tensors.append(cation_pi_tensor) print("Completed cation_pi tensor.") if "salt_bridge" in self.voxel_feature_types: salt_bridge_tensor = self._voxelize( convert_atom_pair_to_voxel, None, (protein_xyz, ligand_xyz), feature_list=salt_bridge_list, nb_channel=1) feature_tensors.append(salt_bridge_tensor) print("Completed salt_bridge tensor.") if "charge" in self.voxel_feature_types: charge_tensor = self._voxelize( convert_atom_to_voxel, None, protein_xyz, feature_dict=protein_charge_dictionary, nb_channel=1, dtype="np.float16") charge_tensor += self._voxelize( convert_atom_to_voxel, None, ligand_xyz, feature_dict=ligand_charge_dictionary, nb_channel=1, dtype="np.float16") feature_tensors.append(charge_tensor) print("Completed salt_bridge tensor.") if "charge" in self.voxel_feature_types: feature_tensor = np.concatenate( feature_tensors, axis=3).astype(np.float16) else: feature_tensor = np.concatenate( feature_tensors, axis=3).astype(np.int8) if self.flatten: feature_tensor = np.squeeze(feature_tensor) features[system_id] = feature_tensor return (features)
def dock_ligands_to_receptors(docking_dir, worker_pool=None, exhaustiveness=None, chosen_receptor=None, restrict_box=True): subdirs = glob.glob(os.path.join(docking_dir, '*/')) for subdir in subdirs: subdir = subdir.rstrip('/') receptor_name = os.path.basename(subdir) if chosen_receptor is not None and chosen_receptor != receptor_name: continue print("receptor name = %s" % receptor_name) receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name) if not os.path.exists(receptor_filename): continue print("Examining %s" % receptor_filename) receptor_mol = rdkit_util.load_molecule( os.path.join(subdir, "%s.pdb" % receptor_name)) protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0]) protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0]) box_dims = protein_range + 5.0 ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt'))) print("Num ligands = %d" % len(ligands)) dock_ligand_to_receptor_partial = partial( dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=protein_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) if restrict_box: active_ligand = "" for ligand in ligands: if "CHEM" in ligand: active_ligand = ligand break print("Docking to %s first to ascertain centroid and box dimensions" % active_ligand) out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand) ligand_pybel = rdkit_util.load_molecule(out_pdb_qt) ligand_centroid = mol_xyz_util.get_molecule_centroid(ligand_pybel[0]) print("Protein centroid = %s" % (str(protein_centroid))) print("Ligand centroid = %s" % (str(ligand_centroid))) box_dims = np.array([20., 20., 20.]) dock_ligand_to_receptor_partial = partial( dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=ligand_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) print("Finished docking to %s, docking to remainder of ligands now." % active_ligand) if worker_pool is None: for i, ligand_file in enumerate(ligands): a = time.time() dock_ligand_to_receptor_partial(ligand_file) print("took %f seconds to dock single ligand." % (time.time() - a)) else: print("parallelizing docking over worker pool") worker_pool.map(dock_ligand_to_receptor_partial, ligands)
def load_molecule(molecule_file, add_hydrogens=True, calc_charges=False): return rdkit_util.load_molecule(molecule_file, add_hydrogens, calc_charges)
def generate_poses(self, protein_file, ligand_file, centroid=None, box_dims=None, dry_run=False, out_dir=None): """Generates the docked complex and outputs files for docked complex.""" if out_dir is None: out_dir = tempfile.mkdtemp() # Prepare receptor receptor_name = os.path.basename(protein_file).split(".")[0] protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name) protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name) hydrogenate_and_compute_partial_charges( protein_file, "pdb", hyd_output=protein_hyd, pdbqt_output=protein_pdbqt, protein=True) # Get protein centroid and range # TODO(rbharath): Need to add some way to identify binding pocket, or this is # going to be extremely slow! if centroid is not None and box_dims is not None: protein_centroid = centroid else: if not self.detect_pockets: receptor_mol = rdkit_util.load_molecule( protein_hyd, calc_charges=False, add_hydrogens=False) protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0]) protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0]) box_dims = protein_range + 5.0 else: logger.info("About to find putative binding pockets") pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets( protein_file, ligand_file) # TODO(rbharath): Handle multiple pockets instead of arbitrarily selecting # first pocket. logger.info("Computing centroid and size of proposed pocket.") pocket_coord = pocket_coords[0] protein_centroid = np.mean(pocket_coord, axis=1) pocket = pockets[0] (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket x_box = (x_max - x_min) / 2. y_box = (y_max - y_min) / 2. z_box = (z_max - z_min) / 2. box_dims = (x_box, y_box, z_box) # Prepare receptor ligand_name = os.path.basename(ligand_file).split(".")[0] ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name) ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name) # TODO(rbharath): Generalize this so can support mol2 files as well. hydrogenate_and_compute_partial_charges( ligand_file, "sdf", hyd_output=ligand_hyd, pdbqt_output=ligand_pdbqt, protein=False) # Write Vina conf file conf_file = os.path.join(out_dir, "conf.txt") write_conf( protein_pdbqt, ligand_pdbqt, protein_centroid, box_dims, conf_file, exhaustiveness=self.exhaustiveness) # Define locations of log and output files log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name) out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name) # TODO(rbharath): Let user specify the number of poses required. if not dry_run: logger.info("About to call Vina") call( "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True) # TODO(rbharath): Convert the output pdbqt to a pdb file. # Return docked files return protein_hyd, out_pdbqt