Esempio n. 1
0
    def test_pdbqt_to_pdb(self):
        """Test that a PDBQT molecule can be converted back in to PDB."""
        xyz, mol = rdkit_utils.load_molecule(self.protein_file,
                                             calc_charges=False,
                                             add_hydrogens=False)
        with tempfile.TemporaryDirectory() as tmp:
            out_pdb = os.path.join(tmp, "mol.pdb")
            out_pdbqt = os.path.join(tmp, "mol.pdbqt")

            rdkit_utils.write_molecule(mol, out_pdb, is_protein=True)
            rdkit_utils.write_molecule(mol, out_pdbqt, is_protein=True)

            pdb_block = pdbqt_utils.pdbqt_to_pdb(out_pdbqt)
            from rdkit import Chem
            pdb_mol = Chem.MolFromPDBBlock(pdb_block,
                                           sanitize=False,
                                           removeHs=False)

            xyz, pdbqt_mol = rdkit_utils.load_molecule(out_pdbqt,
                                                       add_hydrogens=False,
                                                       calc_charges=False)

        assert pdb_mol.GetNumAtoms() == pdbqt_mol.GetNumAtoms()
        for atom_idx in range(pdb_mol.GetNumAtoms()):
            atom1 = pdb_mol.GetAtoms()[atom_idx]
            atom2 = pdbqt_mol.GetAtoms()[atom_idx]
            assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
Esempio n. 2
0
  def test_write_molecule(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")
    xyz, mol = rdkit_utils.load_molecule(
        ligand_file, calc_charges=False, add_hydrogens=False)

    with tempfile.TemporaryDirectory() as tmp:
      outfile = os.path.join(tmp, "mol.sdf")
      rdkit_utils.write_molecule(mol, outfile)

      xyz, mol2 = rdkit_utils.load_molecule(
          outfile, calc_charges=False, add_hydrogens=False)

    assert mol.GetNumAtoms() == mol2.GetNumAtoms()
    for atom_idx in range(mol.GetNumAtoms()):
      atom1 = mol.GetAtoms()[atom_idx]
      atom2 = mol.GetAtoms()[atom_idx]
      assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
Esempio n. 3
0
    def generate_poses(
        self,
        molecular_complex: Tuple[str, str],
        centroid: Optional[np.ndarray] = None,
        box_dims: Optional[np.ndarray] = None,
        exhaustiveness: int = 10,
        num_modes: int = 9,
        num_pockets: Optional[int] = None,
        out_dir: Optional[str] = None,
        generate_scores: bool = False
    ) -> Union[Tuple[DOCKED_POSES, List[float]], DOCKED_POSES]:
        """Generates the docked complex and outputs files for docked complex.

    TODO: How can this work on Windows? We need to install a .msi file and
    invoke it correctly from Python for this to work.

    Parameters
    ----------
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file).
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
      A numpy array of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose
      generation.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
      generate poses for the first `num_pockets` returned by
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores.

    Returns
    -------
    Tuple[`docked_poses`, `scores`] or `docked_poses`
      Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
      is a list of docked molecular complexes. Each entry in this list
      contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
      `scores` is a list of binding free energies predicted by Vina.

    Raises
    ------
    `ValueError` if `num_pockets` is set but `self.pocket_finder is None`.
    """
        if out_dir is None:
            out_dir = tempfile.mkdtemp()

        if num_pockets is not None and self.pocket_finder is None:
            raise ValueError(
                "If num_pockets is specified, pocket_finder must have been provided at construction time."
            )

        # Parse complex
        if len(molecular_complex) > 2:
            raise ValueError(
                "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes."
            )

        (protein_file, ligand_file) = molecular_complex

        # Prepare protein
        protein_name = os.path.basename(protein_file).split(".")[0]
        protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name)
        protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name)
        protein_mol = load_molecule(protein_file,
                                    calc_charges=True,
                                    add_hydrogens=True)
        write_molecule(protein_mol[1], protein_hyd, is_protein=True)
        write_molecule(protein_mol[1], protein_pdbqt, is_protein=True)

        # Get protein centroid and range
        if centroid is not None and box_dims is not None:
            centroids = [centroid]
            dimensions = [box_dims]
        else:
            if self.pocket_finder is None:
                logger.info(
                    "Pockets not specified. Will use whole protein to dock")
                protein_centroid = compute_centroid(protein_mol[0])
                protein_range = compute_protein_range(protein_mol[0])
                box_dims = protein_range + 5.0
                centroids, dimensions = [protein_centroid], [box_dims]
            else:
                logger.info("About to find putative binding pockets")
                pockets = self.pocket_finder.find_pockets(protein_file)
                logger.info("%d pockets found in total" % len(pockets))
                logger.info("Computing centroid and size of proposed pockets.")
                centroids, dimensions = [], []
                for pocket in pockets:
                    protein_centroid = pocket.center()
                    (x_min, x_max), (y_min, y_max), (
                        z_min,
                        z_max) = pocket.x_range, pocket.y_range, pocket.z_range
                    # TODO(rbharath: Does vina divide box dimensions by 2?
                    x_box = (x_max - x_min) / 2.
                    y_box = (y_max - y_min) / 2.
                    z_box = (z_max - z_min) / 2.
                    box_dims = (x_box, y_box, z_box)
                    centroids.append(protein_centroid)
                    dimensions.append(box_dims)

        if num_pockets is not None:
            logger.info(
                "num_pockets = %d so selecting this many pockets for docking."
                % num_pockets)
            centroids = centroids[:num_pockets]
            dimensions = dimensions[:num_pockets]

        # Prepare protein
        ligand_name = os.path.basename(ligand_file).split(".")[0]
        ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

        ligand_mol = load_molecule(ligand_file,
                                   calc_charges=True,
                                   add_hydrogens=True)
        write_molecule(ligand_mol[1], ligand_pdbqt)

        docked_complexes = []
        all_scores = []
        for i, (protein_centroid,
                box_dims) in enumerate(zip(centroids, dimensions)):
            logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids)))
            logger.info("Docking with center: %s" % str(protein_centroid))
            logger.info("Box dimensions: %s" % str(box_dims))
            # Write Vina conf file
            conf_file = os.path.join(out_dir, "conf.txt")
            write_vina_conf(protein_pdbqt,
                            ligand_pdbqt,
                            protein_centroid,
                            box_dims,
                            conf_file,
                            num_modes=num_modes,
                            exhaustiveness=exhaustiveness)

            # Define locations of log and output files
            log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
            out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
            logger.info("About to call Vina")
            if platform.system() == 'Windows':
                args = [
                    self.vina_cmd, "--config", conf_file, "--log", log_file,
                    "--out", out_pdbqt
                ]
            else:
                # I'm not sure why specifying the args as a list fails on other platforms,
                # but for some reason it only works if I pass it as a string.
                # FIXME: Incompatible types in assignment
                args = "%s --config %s --log %s --out %s" % (  # type: ignore
                    self.vina_cmd, conf_file, log_file, out_pdbqt)
            # FIXME: We should use `subprocess.run` instead of `call`
            call(args, shell=True)
            ligands, scores = load_docked_ligands(out_pdbqt)
            docked_complexes += [(protein_mol[1], ligand)
                                 for ligand in ligands]
            all_scores += scores

        if generate_scores:
            return docked_complexes, all_scores
        else:
            return docked_complexes
Esempio n. 4
0
    def generate_poses(
            self,
            molecular_complex: Tuple[str, str],
            centroid: Optional[np.ndarray] = None,
            box_dims: Optional[np.ndarray] = None,
            exhaustiveness: int = 10,
            num_modes: int = 9,
            num_pockets: Optional[int] = None,
            out_dir: Optional[str] = None,
            generate_scores: Optional[bool] = False,
            **kwargs) -> Union[Tuple[DOCKED_POSES, List[float]], DOCKED_POSES]:
        """Generates the docked complex and outputs files for docked complex.

    Parameters
    ----------
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file). The protein should be a pdb file
      and the ligand should be an sdf file.
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
      A numpy array of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose generation. A
      higher value of exhaustiveness implies more computation effort for the
      docking experiment.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
      generate poses for the first `num_pockets` returned by
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores.
    kwargs:
      The kwargs - cpu, min_rmsd, max_evals, energy_range supported by VINA
      are as documented in https://autodock-vina.readthedocs.io/en/latest/vina.html

    Returns
    -------
    Tuple[`docked_poses`, `scores`] or `docked_poses`
      Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
      is a list of docked molecular complexes. Each entry in this list
      contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
      `scores` is a list of binding free energies predicted by Vina.

    Raises
    ------
    `ValueError` if `num_pockets` is set but `self.pocket_finder is None`.
    """
        if "cpu" in kwargs:
            cpu = kwargs["cpu"]
        else:
            cpu = 0
        if "min_rmsd" in kwargs:
            min_rmsd = kwargs["min_rmsd"]
        else:
            min_rmsd = 1.0
        if "max_evals" in kwargs:
            max_evals = kwargs["max_evals"]
        else:
            max_evals = 0
        if "energy_range" in kwargs:
            energy_range = kwargs["energy_range"]
        else:
            energy_range = 3.0

        try:
            from vina import Vina
        except ModuleNotFoundError:
            raise ImportError("This function requires vina to be installed")

        if out_dir is None:
            out_dir = tempfile.mkdtemp()

        if num_pockets is not None and self.pocket_finder is None:
            raise ValueError(
                "If num_pockets is specified, pocket_finder must have been provided at construction time."
            )

        # Parse complex
        if len(molecular_complex) > 2:
            raise ValueError(
                "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes."
            )

        (protein_file, ligand_file) = molecular_complex

        # Prepare protein
        protein_name = os.path.basename(protein_file).split(".")[0]
        protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name)
        protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name)
        protein_mol = load_molecule(protein_file,
                                    calc_charges=True,
                                    add_hydrogens=True)
        write_molecule(protein_mol[1], protein_hyd, is_protein=True)
        write_molecule(protein_mol[1], protein_pdbqt, is_protein=True)

        # Get protein centroid and range
        if centroid is not None and box_dims is not None:
            centroids = [centroid]
            dimensions = [box_dims]
        else:
            if self.pocket_finder is None:
                logger.info(
                    "Pockets not specified. Will use whole protein to dock")
                centroids = [compute_centroid(protein_mol[0])]
                dimensions = [compute_protein_range(protein_mol[0]) + 5.0]
            else:
                logger.info("About to find putative binding pockets")
                pockets = self.pocket_finder.find_pockets(protein_file)
                logger.info("%d pockets found in total" % len(pockets))
                logger.info("Computing centroid and size of proposed pockets.")
                centroids, dimensions = [], []
                for pocket in pockets:
                    (x_min, x_max), (y_min, y_max), (
                        z_min,
                        z_max) = pocket.x_range, pocket.y_range, pocket.z_range
                    # TODO(rbharath: Does vina divide box dimensions by 2?
                    x_box = (x_max - x_min) / 2.
                    y_box = (y_max - y_min) / 2.
                    z_box = (z_max - z_min) / 2.
                    centroids.append(pocket.center())
                    dimensions.append(np.array((x_box, y_box, z_box)))

        if num_pockets is not None:
            logger.info(
                "num_pockets = %d so selecting this many pockets for docking."
                % num_pockets)
            centroids = centroids[:num_pockets]
            dimensions = dimensions[:num_pockets]

        # Prepare ligand
        ligand_name = os.path.basename(ligand_file).split(".")[0]
        ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

        ligand_mol = load_molecule(ligand_file,
                                   calc_charges=True,
                                   add_hydrogens=True)
        write_molecule(ligand_mol[1], ligand_pdbqt)

        docked_complexes = []
        all_scores = []
        vpg = Vina(sf_name='vina',
                   cpu=cpu,
                   seed=0,
                   no_refine=False,
                   verbosity=1)
        for i, (protein_centroid,
                box_dims) in enumerate(zip(centroids, dimensions)):
            logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids)))
            logger.info("Docking with center: %s" % str(protein_centroid))
            logger.info("Box dimensions: %s" % str(box_dims))
            # Write Vina conf file
            conf_file = os.path.join(out_dir, "conf.txt")
            write_vina_conf(protein_pdbqt,
                            ligand_pdbqt,
                            protein_centroid,
                            box_dims,
                            conf_file,
                            num_modes=num_modes,
                            exhaustiveness=exhaustiveness)

            # Define locations of output files
            out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
            logger.info("About to call Vina")

            vpg.set_receptor(protein_pdbqt)
            vpg.set_ligand_from_file(ligand_pdbqt)

            vpg.compute_vina_maps(center=protein_centroid, box_size=box_dims)
            vpg.dock(exhaustiveness=exhaustiveness,
                     n_poses=num_modes,
                     min_rmsd=min_rmsd,
                     max_evals=max_evals)
            vpg.write_poses(out_pdbqt,
                            n_poses=num_modes,
                            energy_range=energy_range,
                            overwrite=True)

            ligands, scores = load_docked_ligands(out_pdbqt)
            docked_complexes += [(protein_mol[1], ligand)
                                 for ligand in ligands]
            all_scores += scores

        if generate_scores:
            return docked_complexes, all_scores
        else:
            return docked_complexes