def extract_active_site(protein_file, ligand_file, cutoff=4): """Extracts a box for the active site.""" protein_coords = rdkit_util.load_molecule(protein_file, add_hydrogens=False)[0] ligand_coords = rdkit_util.load_molecule(ligand_file, add_hydrogens=True, calc_charges=True)[0] num_ligand_atoms = len(ligand_coords) num_protein_atoms = len(protein_coords) pocket_inds = [] pocket_atoms = set([]) for lig_atom_ind in range(num_ligand_atoms): lig_atom = ligand_coords[lig_atom_ind] for protein_atom_ind in range(num_protein_atoms): protein_atom = protein_coords[protein_atom_ind] if np.linalg.norm(lig_atom - protein_atom) < cutoff: if protein_atom_ind not in pocket_atoms: pocket_atoms = pocket_atoms.union(set([protein_atom_ind])) # Should be an array of size (n_pocket_atoms, 3) pocket_atoms = list(pocket_atoms) n_pocket_atoms = len(pocket_atoms) pocket_coords = np.zeros((n_pocket_atoms, 3)) for ind, pocket_ind in enumerate(pocket_atoms): pocket_coords[ind] = protein_coords[pocket_ind] x_min = int(np.floor(np.amin(pocket_coords[:, 0]))) x_max = int(np.ceil(np.amax(pocket_coords[:, 0]))) y_min = int(np.floor(np.amin(pocket_coords[:, 1]))) y_max = int(np.ceil(np.amax(pocket_coords[:, 1]))) z_min = int(np.floor(np.amin(pocket_coords[:, 2]))) z_max = int(np.ceil(np.amax(pocket_coords[:, 2]))) return (((x_min, x_max), (y_min, y_max), (z_min, z_max)), pocket_atoms, pocket_coords)
def find_pockets(self, protein_file, ligand_file): """Find list of suitable binding pockets on protein.""" protein_coords = rdkit_util.load_molecule(protein_file, add_hydrogens=False, calc_charges=False)[0] ligand_coords = rdkit_util.load_molecule(ligand_file, add_hydrogens=False, calc_charges=False)[0] boxes = get_all_boxes(protein_coords, self.pad) mapping = boxes_to_atoms(protein_coords, boxes) pockets, pocket_atoms_map = merge_overlapping_boxes(mapping, boxes) pocket_coords = [] for pocket in pockets: atoms = pocket_atoms_map[pocket] coords = np.zeros((len(atoms), 3)) for ind, atom in enumerate(atoms): coords[ind] = protein_coords[atom] pocket_coords.append(coords) return pockets, pocket_atoms_map, pocket_coords
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): """ Compute neighbor list for complex. Parameters ---------- mol_pdb: list Should be a list of lines of the PDB file. complex_pdb: list Should be a list of lines of the PDB file. """ mol_coords, ob_mol = rdkit_util.load_molecule(mol_pdb_file) protein_coords, protein_mol = rdkit_util.load_molecule( protein_pdb_file) system_coords = rdkit_util.merge_molecules_xyz(mol_coords, protein_coords) system_neighbor_list = compute_neighbor_list(system_coords, self.neighbor_cutoff, self.max_num_neighbors, None) return (system_coords, system_neighbor_list)
def _featurize_complex(self, mol_pdb_file, protein_pdb_file): frag1_coords, frag1_mol = rdkit_util.load_molecule(mol_pdb_file) frag2_coords, frag2_mol = rdkit_util.load_molecule(protein_pdb_file) system_mol = rdkit_util.merge_molecules(frag1_mol, frag2_mol) system_coords = rdkit_util.get_xyz_from_mol(system_mol) frag1_coords, frag1_mol = self._strip_hydrogens( frag1_coords, frag1_mol) frag2_coords, frag2_mol = self._strip_hydrogens( frag2_coords, frag2_mol) system_coords, system_mol = self._strip_hydrogens( system_coords, system_mol) frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol( frag1_coords, frag1_mol, self.frag1_num_atoms) frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol( frag2_coords, frag2_mol, self.frag2_num_atoms) system_coords, system_neighbor_list, system_z = self.featurize_mol( system_coords, system_mol, self.complex_num_atoms) return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \ system_coords, system_neighbor_list, system_z
def hydrogenate_and_compute_partial_charges(input_file, input_format, hyd_output=None, pdbqt_output=None, protein=True, verbose=True): """Outputs a hydrogenated pdb and a pdbqt with partial charges. Takes an input file in specified format. Generates two outputs: -) A pdb file that contains a hydrogenated (at pH 7.4) version of original compound. -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt file is build from the hydrogenated pdb. TODO(rbharath): Can do a bit of refactoring between this function and pdbqt_to_pdb. Parameters ---------- input_file: String Path to input file. input_format: String Name of input format. """ mol = rdkit_util.load_molecule(input_file, add_hydrogens=True, calc_charges=True)[1] if verbose: logging.info("Create pdb with hydrogens added") rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein) if verbose: logging.info("Create a pdbqt file from the hydrogenated pdb above.") rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein) if protein: logging.info("Removing ROOT/ENDROOT/TORSDOF") with open(pdbqt_output) as f: pdbqt_lines = f.readlines() filtered_lines = [] for line in pdbqt_lines: filtered_lines.append(line) with open(pdbqt_output, "w") as f: f.writelines(filtered_lines)
def find_all_pockets(self, protein_file): """Find list of binding pockets on protein.""" # protein_coords is (N, 3) tensor coords = rdkit_util.load_molecule(protein_file)[0] return get_all_boxes(coords, self.pad)
def generate_poses(self, protein_file, ligand_file, centroid=None, box_dims=None, dry_run=False, out_dir=None): """Generates the docked complex and outputs files for docked complex.""" if out_dir is None: out_dir = tempfile.mkdtemp() # Prepare receptor receptor_name = os.path.basename(protein_file).split(".")[0] protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name) protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name) hydrogenate_and_compute_partial_charges( protein_file, "pdb", hyd_output=protein_hyd, pdbqt_output=protein_pdbqt, protein=True) # Get protein centroid and range if centroid is not None and box_dims is not None: protein_centroid = centroid else: if not self.detect_pockets: receptor_mol = rdkit_util.load_molecule( protein_hyd, calc_charges=False, add_hydrogens=False) protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0]) protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0]) box_dims = protein_range + 5.0 else: logger.info("About to find putative binding pockets") pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets( protein_file, ligand_file) logger.info("Computing centroid and size of proposed pocket.") pocket_coord = pocket_coords[0] protein_centroid = np.mean(pocket_coord, axis=1) pocket = pockets[0] (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket x_box = (x_max - x_min) / 2. y_box = (y_max - y_min) / 2. z_box = (z_max - z_min) / 2. box_dims = (x_box, y_box, z_box) # Prepare receptor ligand_name = os.path.basename(ligand_file).split(".")[0] ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name) ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name) hydrogenate_and_compute_partial_charges( ligand_file, "sdf", hyd_output=ligand_hyd, pdbqt_output=ligand_pdbqt, protein=False) # Write Vina conf file conf_file = os.path.join(out_dir, "conf.txt") write_conf( protein_pdbqt, ligand_pdbqt, protein_centroid, box_dims, conf_file, exhaustiveness=self.exhaustiveness) # Define locations of log and output files log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name) out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name) if not dry_run: logger.info("About to call Vina") call( "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True) # Return docked files return protein_hyd, out_pdbqt