def test_load_docked_ligand(self): docked_ligands, scores = vina_utils.load_docked_ligands( self.docked_ligands) assert len(docked_ligands) == 9 assert len(scores) == 9 for ligand, score in zip(docked_ligands, scores): xyz = rdkit_utils.get_xyz_from_mol(ligand) assert score < 0 # This is a binding free energy assert np.count_nonzero(xyz) > 0
def generate_poses(self, molecular_complex, centroid=None, box_dims=None, exhaustiveness=10, num_modes=9, num_pockets=None, out_dir=None, generate_scores=False): """Generates the docked complex and outputs files for docked complex. TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work. Parameters ---------- molecular_complexes: list A representation of a molecular complex. centroid: np.ndarray, optional The centroid to dock against. Is computed if not specified. box_dims: np.ndarray, optional Of shape `(3,)` holding the size of the box to dock. If not specified is set to size of molecular complex plus 5 angstroms. exhaustiveness: int, optional (default 10) Tells Autodock Vina how exhaustive it should be with pose generation. num_modes: int, optional (default 9) Tells Autodock Vina how many binding modes it should generate at each invocation. num_pockets: int, optional (default None) If specified, `self.pocket_finder` must be set. Will only generate poses for the first `num_pockets` returned by `self.pocket_finder`. out_dir: str, optional If specified, write generated poses to this directory. generate_score: bool, optional (default False) If `True`, the pose generator will return scores for complexes. This is used typically when invoking external docking programs that compute scores. Returns ------- Tuple of `(docked_poses, scores)`. `docked_poses` is a list of docked molecular complexes. Each entry in this list contains a `(protein_mol, ligand_mol)` pair of RDKit molecules. `scores` is a list of binding free energies predicted by Vina. Raises ------ `ValueError` if `num_pockets` is set but `self.pocket_finder is None`. """ if out_dir is None: out_dir = tempfile.mkdtemp() if num_pockets is not None and self.pocket_finder is None: raise ValueError( "If num_pockets is specified, pocket_finder must have been provided at construction time." ) # Parse complex if len(molecular_complex) > 2: raise ValueError( "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes." ) (protein_file, ligand_file) = molecular_complex # Prepare protein protein_name = os.path.basename(protein_file).split(".")[0] protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name) protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name) protein_mol = rdkit_util.load_molecule( protein_file, calc_charges=True, add_hydrogens=True) # Get protein centroid and range if centroid is not None and box_dims is not None: centroids = [centroid] dimensions = [box_dims] else: if self.pocket_finder is None: logger.info("Pockets not specified. Will use whole protein to dock") rdkit_util.write_molecule(protein_mol[1], protein_hyd, is_protein=True) rdkit_util.write_molecule( protein_mol[1], protein_pdbqt, is_protein=True) protein_centroid = geometry_utils.compute_centroid(protein_mol[0]) protein_range = mol_xyz_util.get_molecule_range(protein_mol[0]) box_dims = protein_range + 5.0 centroids, dimensions = [protein_centroid], [box_dims] else: logger.info("About to find putative binding pockets") pockets = self.pocket_finder.find_pockets(protein_file) logger.info("%d pockets found in total" % len(pockets)) logger.info("Computing centroid and size of proposed pockets.") centroids, dimensions = [], [] for pocket in pockets: protein_centroid = pocket.center() (x_min, x_max), (y_min, y_max), ( z_min, z_max) = pocket.x_range, pocket.y_range, pocket.z_range # TODO(rbharath: Does vina divide box dimensions by 2? x_box = (x_max - x_min) / 2. y_box = (y_max - y_min) / 2. z_box = (z_max - z_min) / 2. box_dims = (x_box, y_box, z_box) centroids.append(protein_centroid) dimensions.append(box_dims) if num_pockets is not None: logger.info("num_pockets = %d so selecting this many pockets for docking." % num_pockets) centroids = centroids[:num_pockets] dimensions = dimensions[:num_pockets] # Prepare protein ligand_name = os.path.basename(ligand_file).split(".")[0] ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name) ligand_mol = rdkit_util.load_molecule( ligand_file, calc_charges=True, add_hydrogens=True) rdkit_util.write_molecule(ligand_mol[1], ligand_pdbqt) docked_complexes = [] all_scores = [] for i, (protein_centroid, box_dims) in enumerate( zip(centroids, dimensions)): logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids))) logger.info("Docking with center: %s" % str(protein_centroid)) logger.info("Box dimensions: %s" % str(box_dims)) # Write Vina conf file conf_file = os.path.join(out_dir, "conf.txt") vina_utils.write_vina_conf( protein_pdbqt, ligand_pdbqt, protein_centroid, box_dims, conf_file, num_modes=num_modes, exhaustiveness=exhaustiveness) # Define locations of log and output files log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name) out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name) logger.info("About to call Vina") call( "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True) ligands, scores = vina_utils.load_docked_ligands(out_pdbqt) docked_complexes += [(protein_mol[1], ligand) for ligand in ligands] all_scores += scores if generate_scores: return docked_complexes, all_scores else: return docked_complexes