def generate_poses(self, protein_file, ligand_file, centroid=None, box_dims=None, dry_run=False, out_dir=None): """Generates the docked complex and outputs files for docked complex.""" if out_dir is None: out_dir = tempfile.mkdtemp() # Prepare receptor receptor_name = os.path.basename(protein_file).split(".")[0] protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name) protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name) hydrogenate_and_compute_partial_charges(protein_file, "pdb", hyd_output=protein_hyd, pdbqt_output=protein_pdbqt, protein=True) # Get protein centroid and range # TODO(rbharath): Need to add some way to identify binding pocket, or this is # going to be extremely slow! if centroid is not None and box_dims is not None: protein_centroid = centroid else: if not self.detect_pockets: receptor_mol = rdkit_util.load_molecule(protein_hyd, calc_charges=False, add_hydrogens=False) protein_centroid = mol_xyz_util.get_molecule_centroid( receptor_mol[0]) protein_range = mol_xyz_util.get_molecule_range( receptor_mol[0]) box_dims = protein_range + 5.0 else: logger.info("About to find putative binding pockets") pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets( protein_file, ligand_file) # TODO(rbharath): Handle multiple pockets instead of arbitrarily selecting # first pocket. logger.info("Computing centroid and size of proposed pocket.") pocket_coord = pocket_coords[0] protein_centroid = np.mean(pocket_coord, axis=1) pocket = pockets[0] (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket x_box = (x_max - x_min) / 2. y_box = (y_max - y_min) / 2. z_box = (z_max - z_min) / 2. box_dims = (x_box, y_box, z_box) # Prepare receptor ligand_name = os.path.basename(ligand_file).split(".")[0] ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name) ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name) # TODO(rbharath): Generalize this so can support mol2 files as well. hydrogenate_and_compute_partial_charges(ligand_file, "sdf", hyd_output=ligand_hyd, pdbqt_output=ligand_pdbqt, protein=False) # Write Vina conf file conf_file = os.path.join(out_dir, "conf.txt") write_conf(protein_pdbqt, ligand_pdbqt, protein_centroid, box_dims, conf_file, exhaustiveness=self.exhaustiveness) # Define locations of log and output files log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name) out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name) # TODO(rbharath): Let user specify the number of poses required. if not dry_run: logger.info("About to call Vina") call("%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True) # TODO(rbharath): Convert the output pdbqt to a pdb file. # Return docked files return protein_hyd, out_pdbqt
def generate_poses(self, molecular_complex, centroid=None, box_dims=None, exhaustiveness=10, num_modes=9, num_pockets=None, out_dir=None, generate_scores=False): """Generates the docked complex and outputs files for docked complex. TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work. Parameters ---------- molecular_complexes: list A representation of a molecular complex. centroid: np.ndarray, optional The centroid to dock against. Is computed if not specified. box_dims: np.ndarray, optional Of shape `(3,)` holding the size of the box to dock. If not specified is set to size of molecular complex plus 5 angstroms. exhaustiveness: int, optional (default 10) Tells Autodock Vina how exhaustive it should be with pose generation. num_modes: int, optional (default 9) Tells Autodock Vina how many binding modes it should generate at each invocation. num_pockets: int, optional (default None) If specified, `self.pocket_finder` must be set. Will only generate poses for the first `num_pockets` returned by `self.pocket_finder`. out_dir: str, optional If specified, write generated poses to this directory. generate_score: bool, optional (default False) If `True`, the pose generator will return scores for complexes. This is used typically when invoking external docking programs that compute scores. Returns ------- Tuple of `(docked_poses, scores)`. `docked_poses` is a list of docked molecular complexes. Each entry in this list contains a `(protein_mol, ligand_mol)` pair of RDKit molecules. `scores` is a list of binding free energies predicted by Vina. Raises ------ `ValueError` if `num_pockets` is set but `self.pocket_finder is None`. """ if out_dir is None: out_dir = tempfile.mkdtemp() if num_pockets is not None and self.pocket_finder is None: raise ValueError( "If num_pockets is specified, pocket_finder must have been provided at construction time." ) # Parse complex if len(molecular_complex) > 2: raise ValueError( "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes." ) (protein_file, ligand_file) = molecular_complex # Prepare protein protein_name = os.path.basename(protein_file).split(".")[0] protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name) protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name) protein_mol = rdkit_util.load_molecule( protein_file, calc_charges=True, add_hydrogens=True) # Get protein centroid and range if centroid is not None and box_dims is not None: centroids = [centroid] dimensions = [box_dims] else: if self.pocket_finder is None: logger.info("Pockets not specified. Will use whole protein to dock") rdkit_util.write_molecule(protein_mol[1], protein_hyd, is_protein=True) rdkit_util.write_molecule( protein_mol[1], protein_pdbqt, is_protein=True) protein_centroid = geometry_utils.compute_centroid(protein_mol[0]) protein_range = mol_xyz_util.get_molecule_range(protein_mol[0]) box_dims = protein_range + 5.0 centroids, dimensions = [protein_centroid], [box_dims] else: logger.info("About to find putative binding pockets") pockets = self.pocket_finder.find_pockets(protein_file) logger.info("%d pockets found in total" % len(pockets)) logger.info("Computing centroid and size of proposed pockets.") centroids, dimensions = [], [] for pocket in pockets: protein_centroid = pocket.center() (x_min, x_max), (y_min, y_max), ( z_min, z_max) = pocket.x_range, pocket.y_range, pocket.z_range # TODO(rbharath: Does vina divide box dimensions by 2? x_box = (x_max - x_min) / 2. y_box = (y_max - y_min) / 2. z_box = (z_max - z_min) / 2. box_dims = (x_box, y_box, z_box) centroids.append(protein_centroid) dimensions.append(box_dims) if num_pockets is not None: logger.info("num_pockets = %d so selecting this many pockets for docking." % num_pockets) centroids = centroids[:num_pockets] dimensions = dimensions[:num_pockets] # Prepare protein ligand_name = os.path.basename(ligand_file).split(".")[0] ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name) ligand_mol = rdkit_util.load_molecule( ligand_file, calc_charges=True, add_hydrogens=True) rdkit_util.write_molecule(ligand_mol[1], ligand_pdbqt) docked_complexes = [] all_scores = [] for i, (protein_centroid, box_dims) in enumerate( zip(centroids, dimensions)): logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids))) logger.info("Docking with center: %s" % str(protein_centroid)) logger.info("Box dimensions: %s" % str(box_dims)) # Write Vina conf file conf_file = os.path.join(out_dir, "conf.txt") vina_utils.write_vina_conf( protein_pdbqt, ligand_pdbqt, protein_centroid, box_dims, conf_file, num_modes=num_modes, exhaustiveness=exhaustiveness) # Define locations of log and output files log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name) out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name) logger.info("About to call Vina") call( "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True) ligands, scores = vina_utils.load_docked_ligands(out_pdbqt) docked_complexes += [(protein_mol[1], ligand) for ligand in ligands] all_scores += scores if generate_scores: return docked_complexes, all_scores else: return docked_complexes
def dock_ligands_to_receptors(docking_dir, worker_pool=None, exhaustiveness=None, chosen_receptor=None, restrict_box=True): subdirs = glob.glob(os.path.join(docking_dir, '*/')) for subdir in subdirs: subdir = subdir.rstrip('/') receptor_name = os.path.basename(subdir) if chosen_receptor is not None and chosen_receptor != receptor_name: continue print("receptor name = %s" % receptor_name) receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name) if not os.path.exists(receptor_filename): continue print("Examining %s" % receptor_filename) receptor_mol = rdkit_util.load_molecule( os.path.join(subdir, "%s.pdb" % receptor_name)) protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0]) protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0]) box_dims = protein_range + 5.0 ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt'))) print("Num ligands = %d" % len(ligands)) dock_ligand_to_receptor_partial = partial( dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=protein_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) if restrict_box: active_ligand = "" for ligand in ligands: if "CHEM" in ligand: active_ligand = ligand break print("Docking to %s first to ascertain centroid and box dimensions" % active_ligand) out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand) ligand_pybel = rdkit_util.load_molecule(out_pdb_qt) ligand_centroid = mol_xyz_util.get_molecule_centroid(ligand_pybel[0]) print("Protein centroid = %s" % (str(protein_centroid))) print("Ligand centroid = %s" % (str(ligand_centroid))) box_dims = np.array([20., 20., 20.]) dock_ligand_to_receptor_partial = partial( dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=ligand_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) print("Finished docking to %s, docking to remainder of ligands now." % active_ligand) if worker_pool is None: for i, ligand_file in enumerate(ligands): a = time.time() dock_ligand_to_receptor_partial(ligand_file) print("took %f seconds to dock single ligand." % (time.time() - a)) else: print("parallelizing docking over worker pool") worker_pool.map(dock_ligand_to_receptor_partial, ligands)
def generate_poses(self, protein_file, ligand_file, centroid=None, box_dims=None, dry_run=False, out_dir=None): """Generates the docked complex and outputs files for docked complex.""" if out_dir is None: out_dir = tempfile.mkdtemp() # Prepare receptor receptor_name = os.path.basename(protein_file).split(".")[0] protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name) protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name) hydrogenate_and_compute_partial_charges( protein_file, "pdb", hyd_output=protein_hyd, pdbqt_output=protein_pdbqt, protein=True) # Get protein centroid and range # TODO(rbharath): Need to add some way to identify binding pocket, or this is # going to be extremely slow! if centroid is not None and box_dims is not None: protein_centroid = centroid else: if not self.detect_pockets: receptor_mol = rdkit_util.load_molecule( protein_hyd, calc_charges=False, add_hydrogens=False) protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0]) protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0]) box_dims = protein_range + 5.0 else: logger.info("About to find putative binding pockets") pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets( protein_file, ligand_file) # TODO(rbharath): Handle multiple pockets instead of arbitrarily selecting # first pocket. logger.info("Computing centroid and size of proposed pocket.") pocket_coord = pocket_coords[0] protein_centroid = np.mean(pocket_coord, axis=1) pocket = pockets[0] (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket x_box = (x_max - x_min) / 2. y_box = (y_max - y_min) / 2. z_box = (z_max - z_min) / 2. box_dims = (x_box, y_box, z_box) # Prepare receptor ligand_name = os.path.basename(ligand_file).split(".")[0] ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name) ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name) # TODO(rbharath): Generalize this so can support mol2 files as well. hydrogenate_and_compute_partial_charges( ligand_file, "sdf", hyd_output=ligand_hyd, pdbqt_output=ligand_pdbqt, protein=False) # Write Vina conf file conf_file = os.path.join(out_dir, "conf.txt") write_conf( protein_pdbqt, ligand_pdbqt, protein_centroid, box_dims, conf_file, exhaustiveness=self.exhaustiveness) # Define locations of log and output files log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name) out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name) # TODO(rbharath): Let user specify the number of poses required. if not dry_run: logger.info("About to call Vina") call( "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True) # TODO(rbharath): Convert the output pdbqt to a pdb file. # Return docked files return protein_hyd, out_pdbqt
def dock_ligands_to_receptors(docking_dir, worker_pool=None, exhaustiveness=None, chosen_receptor=None, restrict_box=True): subdirs = glob.glob(os.path.join(docking_dir, '*/')) for subdir in subdirs: subdir = subdir.rstrip('/') receptor_name = os.path.basename(subdir) if chosen_receptor is not None and chosen_receptor != receptor_name: continue print("receptor name = %s" % receptor_name) receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name) if not os.path.exists(receptor_filename): continue print("Examining %s" % receptor_filename) receptor_mol = rdkit_util.load_molecule( os.path.join(subdir, "%s.pdb" % receptor_name)) protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0]) protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0]) box_dims = protein_range + 5.0 ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt'))) print("Num ligands = %d" % len(ligands)) dock_ligand_to_receptor_partial = partial( dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=protein_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) if restrict_box: active_ligand = "" for ligand in ligands: if "CHEM" in ligand: active_ligand = ligand break print( "Docking to %s first to ascertain centroid and box dimensions" % active_ligand) out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand) ligand_pybel = rdkit_util.load_molecule(out_pdb_qt) ligand_centroid = mol_xyz_util.get_molecule_centroid( ligand_pybel[0]) print("Protein centroid = %s" % (str(protein_centroid))) print("Ligand centroid = %s" % (str(ligand_centroid))) box_dims = np.array([20., 20., 20.]) dock_ligand_to_receptor_partial = partial( dock_ligand_to_receptor, receptor_filename=receptor_filename, protein_centroid=ligand_centroid, box_dims=box_dims, subdir=subdir, exhaustiveness=exhaustiveness) print( "Finished docking to %s, docking to remainder of ligands now." % active_ligand) if worker_pool is None: for i, ligand_file in enumerate(ligands): a = time.time() dock_ligand_to_receptor_partial(ligand_file) print("took %f seconds to dock single ligand." % (time.time() - a)) else: print("parallelizing docking over worker pool") worker_pool.map(dock_ligand_to_receptor_partial, ligands)