예제 #1
0
    def test_load_docked_ligand(self):
        docked_ligands, scores = docking_utils.load_docked_ligands(
            self.docked_ligands)
        assert len(docked_ligands) == 9
        assert len(scores) == 9

        for ligand, score in zip(docked_ligands, scores):
            xyz = rdkit_utils.get_xyz_from_mol(ligand)
            assert score < 0  # This is a binding free energy
            assert np.count_nonzero(xyz) > 0
예제 #2
0
    def generate_poses(
            self,
            molecular_complex: Tuple[str, str],
            centroid: Optional[np.ndarray] = None,
            box_dims: Optional[np.ndarray] = None,
            exhaustiveness: int = 10,
            num_modes: int = 9,
            num_pockets: Optional[int] = None,
            out_dir: Optional[str] = None,
            generate_scores: bool = True,
            **kwargs) -> Union[Tuple[DOCKED_POSES, np.ndarray], DOCKED_POSES]:
        """Generates the docked complex and outputs files for docked complex.

    Parameters
    ----------
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file).
    centroid: np.ndarray, optional (default None)
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional (default None)
      A numpy array of shape `(3,)` holding the size of the box to dock.
      If not specified is set to size of molecular complex plus 4 angstroms.
    exhaustiveness: int (default 8)
      Tells GNINA how exhaustive it should be with pose
      generation.
    num_modes: int (default 9)
      Tells GNINA how many binding modes it should generate at
      each invocation.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_scores: bool, optional (default True)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores.
    kwargs:
      Any args supported by GNINA as documented
      https://github.com/gnina/gnina#usage

    Returns
    -------
    Tuple[`docked_poses`, `scores`] or `docked_poses`
      Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
      is a list of docked molecular complexes. Each entry in this list
      contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
      `scores` is an array of binding affinities (kcal/mol),
      CNN pose scores, and CNN affinities predicted by GNINA.

    """

        if out_dir is None:
            out_dir = tempfile.mkdtemp()
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        # Parse complex
        if len(molecular_complex) > 2:
            raise ValueError(
                "GNINA can only dock protein-ligand complexes and not more general molecular complexes."
            )

        (protein_file, ligand_file) = molecular_complex

        # check filetypes
        if not protein_file.endswith('.pdb'):
            raise ValueError('Protein file must be in .pdb format.')
        if not ligand_file.endswith('.sdf'):
            raise ValueError('Ligand file must be in .sdf format.')

        protein_mol = load_molecule(protein_file,
                                    calc_charges=True,
                                    add_hydrogens=True)
        ligand_name = os.path.basename(ligand_file).split(".")[0]

        # Define locations of log and output files
        log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
        out_file = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
        logger.info("About to call GNINA.")

        # Write GNINA conf file
        conf_file = os.path.join(out_dir, "conf.txt")
        write_gnina_conf(protein_filename=protein_file,
                         ligand_filename=ligand_file,
                         conf_filename=conf_file,
                         num_modes=num_modes,
                         exhaustiveness=exhaustiveness,
                         **kwargs)

        # Run GNINA
        args = [
            self.gnina_cmd, "--config", conf_file, "--log", log_file, "--out",
            out_file
        ]
        process = Popen(args, stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()

        # read output and log
        ligands, _ = load_docked_ligands(out_file)
        docked_complexes = [(protein_mol[1], ligand) for ligand in ligands]
        scores = read_gnina_log(log_file)

        if generate_scores:
            return docked_complexes, scores
        else:
            return docked_complexes
예제 #3
0
    def generate_poses(
            self,
            molecular_complex: Tuple[str, str],
            centroid: Optional[np.ndarray] = None,
            box_dims: Optional[np.ndarray] = None,
            exhaustiveness: int = 10,
            num_modes: int = 9,
            num_pockets: Optional[int] = None,
            out_dir: Optional[str] = None,
            generate_scores: Optional[bool] = False,
            **kwargs) -> Union[Tuple[DOCKED_POSES, List[float]], DOCKED_POSES]:
        """Generates the docked complex and outputs files for docked complex.

    Parameters
    ----------
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file). The protein should be a pdb file
      and the ligand should be an sdf file.
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
      A numpy array of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose generation. A
      higher value of exhaustiveness implies more computation effort for the
      docking experiment.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
      generate poses for the first `num_pockets` returned by
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores.
    kwargs:
      The kwargs - cpu, min_rmsd, max_evals, energy_range supported by VINA
      are as documented in https://autodock-vina.readthedocs.io/en/latest/vina.html

    Returns
    -------
    Tuple[`docked_poses`, `scores`] or `docked_poses`
      Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
      is a list of docked molecular complexes. Each entry in this list
      contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
      `scores` is a list of binding free energies predicted by Vina.

    Raises
    ------
    `ValueError` if `num_pockets` is set but `self.pocket_finder is None`.
    """
        if "cpu" in kwargs:
            cpu = kwargs["cpu"]
        else:
            cpu = 0
        if "min_rmsd" in kwargs:
            min_rmsd = kwargs["min_rmsd"]
        else:
            min_rmsd = 1.0
        if "max_evals" in kwargs:
            max_evals = kwargs["max_evals"]
        else:
            max_evals = 0
        if "energy_range" in kwargs:
            energy_range = kwargs["energy_range"]
        else:
            energy_range = 3.0

        try:
            from vina import Vina
        except ModuleNotFoundError:
            raise ImportError("This function requires vina to be installed")

        if out_dir is None:
            out_dir = tempfile.mkdtemp()

        if num_pockets is not None and self.pocket_finder is None:
            raise ValueError(
                "If num_pockets is specified, pocket_finder must have been provided at construction time."
            )

        # Parse complex
        if len(molecular_complex) > 2:
            raise ValueError(
                "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes."
            )

        (protein_file, ligand_file) = molecular_complex

        # Prepare protein
        protein_name = os.path.basename(protein_file).split(".")[0]
        protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name)
        protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name)
        protein_mol = load_molecule(protein_file,
                                    calc_charges=True,
                                    add_hydrogens=True)
        write_molecule(protein_mol[1], protein_hyd, is_protein=True)
        write_molecule(protein_mol[1], protein_pdbqt, is_protein=True)

        # Get protein centroid and range
        if centroid is not None and box_dims is not None:
            centroids = [centroid]
            dimensions = [box_dims]
        else:
            if self.pocket_finder is None:
                logger.info(
                    "Pockets not specified. Will use whole protein to dock")
                centroids = [compute_centroid(protein_mol[0])]
                dimensions = [compute_protein_range(protein_mol[0]) + 5.0]
            else:
                logger.info("About to find putative binding pockets")
                pockets = self.pocket_finder.find_pockets(protein_file)
                logger.info("%d pockets found in total" % len(pockets))
                logger.info("Computing centroid and size of proposed pockets.")
                centroids, dimensions = [], []
                for pocket in pockets:
                    (x_min, x_max), (y_min, y_max), (
                        z_min,
                        z_max) = pocket.x_range, pocket.y_range, pocket.z_range
                    # TODO(rbharath: Does vina divide box dimensions by 2?
                    x_box = (x_max - x_min) / 2.
                    y_box = (y_max - y_min) / 2.
                    z_box = (z_max - z_min) / 2.
                    centroids.append(pocket.center())
                    dimensions.append(np.array((x_box, y_box, z_box)))

        if num_pockets is not None:
            logger.info(
                "num_pockets = %d so selecting this many pockets for docking."
                % num_pockets)
            centroids = centroids[:num_pockets]
            dimensions = dimensions[:num_pockets]

        # Prepare ligand
        ligand_name = os.path.basename(ligand_file).split(".")[0]
        ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

        ligand_mol = load_molecule(ligand_file,
                                   calc_charges=True,
                                   add_hydrogens=True)
        write_molecule(ligand_mol[1], ligand_pdbqt)

        docked_complexes = []
        all_scores = []
        vpg = Vina(sf_name='vina',
                   cpu=cpu,
                   seed=0,
                   no_refine=False,
                   verbosity=1)
        for i, (protein_centroid,
                box_dims) in enumerate(zip(centroids, dimensions)):
            logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids)))
            logger.info("Docking with center: %s" % str(protein_centroid))
            logger.info("Box dimensions: %s" % str(box_dims))
            # Write Vina conf file
            conf_file = os.path.join(out_dir, "conf.txt")
            write_vina_conf(protein_pdbqt,
                            ligand_pdbqt,
                            protein_centroid,
                            box_dims,
                            conf_file,
                            num_modes=num_modes,
                            exhaustiveness=exhaustiveness)

            # Define locations of output files
            out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
            logger.info("About to call Vina")

            vpg.set_receptor(protein_pdbqt)
            vpg.set_ligand_from_file(ligand_pdbqt)

            vpg.compute_vina_maps(center=protein_centroid, box_size=box_dims)
            vpg.dock(exhaustiveness=exhaustiveness,
                     n_poses=num_modes,
                     min_rmsd=min_rmsd,
                     max_evals=max_evals)
            vpg.write_poses(out_pdbqt,
                            n_poses=num_modes,
                            energy_range=energy_range,
                            overwrite=True)

            ligands, scores = load_docked_ligands(out_pdbqt)
            docked_complexes += [(protein_mol[1], ligand)
                                 for ligand in ligands]
            all_scores += scores

        if generate_scores:
            return docked_complexes, all_scores
        else:
            return docked_complexes
예제 #4
0
    def generate_poses(
        self,
        molecular_complex: Tuple[str, str],
        centroid: Optional[np.ndarray] = None,
        box_dims: Optional[np.ndarray] = None,
        exhaustiveness: int = 10,
        num_modes: int = 9,
        num_pockets: Optional[int] = None,
        out_dir: Optional[str] = None,
        generate_scores: Optional[bool] = False
    ) -> Union[Tuple[DOCKED_POSES, List[float]], DOCKED_POSES]:
        """Generates the docked complex and outputs files for docked complex.

    TODO: How can this work on Windows? We need to install a .msi file and
    invoke it correctly from Python for this to work.

    Parameters
    ----------
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file). The protein should be a pdb file
      and the ligand should be an sdf file.
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
      A numpy array of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose
      generation.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
      generate poses for the first `num_pockets` returned by
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores.

    Returns
    -------
    Tuple[`docked_poses`, `scores`] or `docked_poses`
      Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
      is a list of docked molecular complexes. Each entry in this list
      contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
      `scores` is a list of binding free energies predicted by Vina.

    Raises
    ------
    `ValueError` if `num_pockets` is set but `self.pocket_finder is None`.
    """
        if out_dir is None:
            out_dir = tempfile.mkdtemp()

        if num_pockets is not None and self.pocket_finder is None:
            raise ValueError(
                "If num_pockets is specified, pocket_finder must have been provided at construction time."
            )

        # Parse complex
        if len(molecular_complex) > 2:
            raise ValueError(
                "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes."
            )

        (protein_file, ligand_file) = molecular_complex

        # Prepare protein
        protein_name = os.path.basename(protein_file).split(".")[0]
        protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name)
        protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name)
        protein_mol = load_molecule(protein_file,
                                    calc_charges=True,
                                    add_hydrogens=True)
        write_molecule(protein_mol[1], protein_hyd, is_protein=True)
        write_molecule(protein_mol[1], protein_pdbqt, is_protein=True)

        # Get protein centroid and range
        if centroid is not None and box_dims is not None:
            centroids = [centroid]
            dimensions = [box_dims]
        else:
            if self.pocket_finder is None:
                logger.info(
                    "Pockets not specified. Will use whole protein to dock")
                protein_centroid = compute_centroid(protein_mol[0])
                protein_range = compute_protein_range(protein_mol[0])
                box_dims = protein_range + 5.0
                centroids, dimensions = [protein_centroid], [box_dims]
            else:
                logger.info("About to find putative binding pockets")
                pockets = self.pocket_finder.find_pockets(protein_file)
                logger.info("%d pockets found in total" % len(pockets))
                logger.info("Computing centroid and size of proposed pockets.")
                centroids, dimensions = [], []
                for pocket in pockets:
                    protein_centroid = pocket.center()
                    (x_min, x_max), (y_min, y_max), (
                        z_min,
                        z_max) = pocket.x_range, pocket.y_range, pocket.z_range
                    # TODO(rbharath: Does vina divide box dimensions by 2?
                    x_box = (x_max - x_min) / 2.
                    y_box = (y_max - y_min) / 2.
                    z_box = (z_max - z_min) / 2.
                    box_dims = (x_box, y_box, z_box)
                    centroids.append(protein_centroid)
                    dimensions.append(box_dims)

        if num_pockets is not None:
            logger.info(
                "num_pockets = %d so selecting this many pockets for docking."
                % num_pockets)
            centroids = centroids[:num_pockets]
            dimensions = dimensions[:num_pockets]

        # Prepare ligand
        ligand_name = os.path.basename(ligand_file).split(".")[0]
        ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

        ligand_mol = load_molecule(ligand_file,
                                   calc_charges=True,
                                   add_hydrogens=True)
        write_molecule(ligand_mol[1], ligand_pdbqt)

        docked_complexes = []
        all_scores = []
        for i, (protein_centroid,
                box_dims) in enumerate(zip(centroids, dimensions)):
            logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids)))
            logger.info("Docking with center: %s" % str(protein_centroid))
            logger.info("Box dimensions: %s" % str(box_dims))
            # Write Vina conf file
            conf_file = os.path.join(out_dir, "conf.txt")
            write_vina_conf(protein_pdbqt,
                            ligand_pdbqt,
                            protein_centroid,
                            box_dims,
                            conf_file,
                            num_modes=num_modes,
                            exhaustiveness=exhaustiveness)

            # Define locations of log and output files
            log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
            out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
            logger.info("About to call Vina")
            if platform.system() == 'Windows':
                args = [
                    self.vina_cmd, "--config", conf_file, "--log", log_file,
                    "--out", out_pdbqt
                ]
            else:
                # I'm not sure why specifying the args as a list fails on other platforms,
                # but for some reason it only works if I pass it as a string.
                # FIXME: Incompatible types in assignment
                args = "%s --config %s --log %s --out %s" % (  # type: ignore
                    self.vina_cmd, conf_file, log_file, out_pdbqt)
            # FIXME: We should use `subprocess.run` instead of `call`
            call(args, shell=True)
            ligands, scores = load_docked_ligands(out_pdbqt)
            docked_complexes += [(protein_mol[1], ligand)
                                 for ligand in ligands]
            all_scores += scores

        if generate_scores:
            return docked_complexes, all_scores
        else:
            return docked_complexes