Ejemplo n.º 1
0
 def test_write_gnina_conf(self):
     docking_utils.write_gnina_conf(
         'protein.pdb',
         'ligand.sdf',
         'conf.txt',
     )
     assert os.path.exists('conf.txt')
     os.remove('conf.txt')
Ejemplo n.º 2
0
    def generate_poses(
            self,
            molecular_complex: Tuple[str, str],
            centroid: Optional[np.ndarray] = None,
            box_dims: Optional[np.ndarray] = None,
            exhaustiveness: int = 10,
            num_modes: int = 9,
            num_pockets: Optional[int] = None,
            out_dir: Optional[str] = None,
            generate_scores: bool = True,
            **kwargs) -> Union[Tuple[DOCKED_POSES, np.ndarray], DOCKED_POSES]:
        """Generates the docked complex and outputs files for docked complex.

    Parameters
    ----------
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file).
    centroid: np.ndarray, optional (default None)
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional (default None)
      A numpy array of shape `(3,)` holding the size of the box to dock.
      If not specified is set to size of molecular complex plus 4 angstroms.
    exhaustiveness: int (default 8)
      Tells GNINA how exhaustive it should be with pose
      generation.
    num_modes: int (default 9)
      Tells GNINA how many binding modes it should generate at
      each invocation.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_scores: bool, optional (default True)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores.
    kwargs:
      Any args supported by GNINA as documented
      https://github.com/gnina/gnina#usage

    Returns
    -------
    Tuple[`docked_poses`, `scores`] or `docked_poses`
      Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
      is a list of docked molecular complexes. Each entry in this list
      contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
      `scores` is an array of binding affinities (kcal/mol),
      CNN pose scores, and CNN affinities predicted by GNINA.

    """

        if out_dir is None:
            out_dir = tempfile.mkdtemp()
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        # Parse complex
        if len(molecular_complex) > 2:
            raise ValueError(
                "GNINA can only dock protein-ligand complexes and not more general molecular complexes."
            )

        (protein_file, ligand_file) = molecular_complex

        # check filetypes
        if not protein_file.endswith('.pdb'):
            raise ValueError('Protein file must be in .pdb format.')
        if not ligand_file.endswith('.sdf'):
            raise ValueError('Ligand file must be in .sdf format.')

        protein_mol = load_molecule(protein_file,
                                    calc_charges=True,
                                    add_hydrogens=True)
        ligand_name = os.path.basename(ligand_file).split(".")[0]

        # Define locations of log and output files
        log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
        out_file = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
        logger.info("About to call GNINA.")

        # Write GNINA conf file
        conf_file = os.path.join(out_dir, "conf.txt")
        write_gnina_conf(protein_filename=protein_file,
                         ligand_filename=ligand_file,
                         conf_filename=conf_file,
                         num_modes=num_modes,
                         exhaustiveness=exhaustiveness,
                         **kwargs)

        # Run GNINA
        args = [
            self.gnina_cmd, "--config", conf_file, "--log", log_file, "--out",
            out_file
        ]
        process = Popen(args, stdout=PIPE, stderr=PIPE)
        stdout, stderr = process.communicate()

        # read output and log
        ligands, _ = load_docked_ligands(out_file)
        docked_complexes = [(protein_mol[1], ligand) for ligand in ligands]
        scores = read_gnina_log(log_file)

        if generate_scores:
            return docked_complexes, scores
        else:
            return docked_complexes