Esempio n. 1
0
  def test_pdbqt_to_pdb(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir,
                                "../../dock/tests/1jld_protein.pdb")
    xyz, mol = rdkit_util.load_molecule(
        protein_file, calc_charges=False, add_hydrogens=False)
    out_pdb = "/tmp/mol.pdb"
    out_pdbqt = "/tmp/mol.pdbqt"

    rdkit_util.write_molecule(mol, out_pdb)
    rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True)

    pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt)
    pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False)

    xyz, pdbqt_mol = rdkit_util.load_molecule(
        out_pdbqt, add_hydrogens=False, calc_charges=False)

    assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms())
    for atom_idx in range(pdb_mol.GetNumAtoms()):
      atom1 = pdb_mol.GetAtoms()[atom_idx]
      atom2 = pdbqt_mol.GetAtoms()[atom_idx]
      assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
    os.remove(out_pdb)
    os.remove(out_pdbqt)
Esempio n. 2
0
    def test_pdbqt_to_pdb(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        protein_file = os.path.join(current_dir,
                                    "../../dock/tests/1jld_protein.pdb")
        xyz, mol = rdkit_util.load_molecule(protein_file,
                                            calc_charges=False,
                                            add_hydrogens=False)
        out_pdb = "/tmp/mol.pdb"
        out_pdbqt = "/tmp/mol.pdbqt"

        rdkit_util.write_molecule(mol, out_pdb)
        rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True)

        pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt)
        from rdkit import Chem
        pdb_mol = Chem.MolFromPDBBlock(pdb_block,
                                       sanitize=False,
                                       removeHs=False)

        xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt,
                                                  add_hydrogens=False,
                                                  calc_charges=False)

        assert_equal(pdb_mol.GetNumAtoms(), pdbqt_mol.GetNumAtoms())
        for atom_idx in range(pdb_mol.GetNumAtoms()):
            atom1 = pdb_mol.GetAtoms()[atom_idx]
            atom2 = pdbqt_mol.GetAtoms()[atom_idx]
            assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
        os.remove(out_pdb)
        os.remove(out_pdbqt)
Esempio n. 3
0
    def test_pdbqt_to_pdb(self):
        """Test that a PDBQT molecule can be converted back in to PDB."""
        xyz, mol = rdkit_util.load_molecule(self.protein_file,
                                            calc_charges=False,
                                            add_hydrogens=False)
        with tempfile.TemporaryDirectory() as tmp:
            out_pdb = os.path.join(tmp, "mol.pdb")
            out_pdbqt = os.path.join(tmp, "mol.pdbqt")

            rdkit_util.write_molecule(mol, out_pdb, is_protein=True)
            rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True)

            pdb_block = pdbqt_utils.pdbqt_to_pdb(out_pdbqt)
            from rdkit import Chem
            pdb_mol = Chem.MolFromPDBBlock(pdb_block,
                                           sanitize=False,
                                           removeHs=False)

            xyz, pdbqt_mol = rdkit_util.load_molecule(out_pdbqt,
                                                      add_hydrogens=False,
                                                      calc_charges=False)

        assert pdb_mol.GetNumAtoms() == pdbqt_mol.GetNumAtoms()
        for atom_idx in range(pdb_mol.GetNumAtoms()):
            atom1 = pdb_mol.GetAtoms()[atom_idx]
            atom2 = pdbqt_mol.GetAtoms()[atom_idx]
            assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
Esempio n. 4
0
def prepare_ligand(args):
    mol_name, mol, save_dir = args[0], args[1], args[2]
    filename = str(mol_name) + ".sdf"
    filename = os.path.join(save_dir, filename)
    rdkit_util.write_molecule(mol, filename)
    prepared_filename = os.path.join(save_dir, "%s_prepared.pdb" % mol_name)
    prepared_pdbqt = os.path.join(save_dir, "%s_prepared.pdbqt" % mol_name)
    if os.path.exists(prepared_pdbqt):
        return

    hydrogenate_and_compute_partial_charges(filename,
                                            "sdf",
                                            hyd_output=prepared_filename,
                                            pdbqt_output=prepared_pdbqt,
                                            verbose=False,
                                            protein=False)
Esempio n. 5
0
def prepare_ligand(args):
  mol_name, mol, save_dir = args[0], args[1], args[2]
  filename = str(mol_name) + ".sdf"
  filename = os.path.join(save_dir, filename)
  rdkit_util.write_molecule(mol, filename)
  prepared_filename = os.path.join(save_dir, "%s_prepared.pdb" % mol_name)
  prepared_pdbqt = os.path.join(save_dir, "%s_prepared.pdbqt" % mol_name)
  if os.path.exists(prepared_pdbqt):
    return

  hydrogenate_and_compute_partial_charges(
      filename,
      "sdf",
      hyd_output=prepared_filename,
      pdbqt_output=prepared_pdbqt,
      verbose=False,
      protein=False)
Esempio n. 6
0
def hydrogenate_and_compute_partial_charges(input_file,
                                            input_format,
                                            hyd_output=None,
                                            pdbqt_output=None,
                                            protein=True,
                                            verbose=True):
    """Outputs a hydrogenated pdb and a pdbqt with partial charges.

  Takes an input file in specified format. Generates two outputs:

  -) A pdb file that contains a hydrogenated (at pH 7.4) version of
     original compound.
  -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt
     file is build from the hydrogenated pdb.

  TODO(rbharath): Can do a bit of refactoring between this function and
  pdbqt_to_pdb.

  Parameters
  ----------
  input_file: String
    Path to input file.
  input_format: String
    Name of input format.
  """
    mol = rdkit_util.load_molecule(input_file,
                                   add_hydrogens=True,
                                   calc_charges=True)[1]
    if verbose:
        print("Create pdb with hydrogens added")
    rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein)
    if verbose:
        print("Create a pdbqt file from the hydrogenated pdb above.")
    rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein)

    if protein:
        print("Removing ROOT/ENDROOT/TORSDOF")
        with open(pdbqt_output) as f:
            pdbqt_lines = f.readlines()
        filtered_lines = []
        for line in pdbqt_lines:

            filtered_lines.append(line)
        with open(pdbqt_output, "w") as f:
            f.writelines(filtered_lines)
Esempio n. 7
0
  def test_write_molecule(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")
    xyz, mol = rdkit_util.load_molecule(
        ligand_file, calc_charges=False, add_hydrogens=False)

    outfile = "/tmp/mol.sdf"
    rdkit_util.write_molecule(mol, outfile)

    xyz, mol2 = rdkit_util.load_molecule(
        outfile, calc_charges=False, add_hydrogens=False)

    assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms())
    for atom_idx in range(mol.GetNumAtoms()):
      atom1 = mol.GetAtoms()[atom_idx]
      atom2 = mol.GetAtoms()[atom_idx]
      assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
    os.remove(outfile)
Esempio n. 8
0
def hydrogenate_and_compute_partial_charges(input_file,
                                            input_format,
                                            hyd_output=None,
                                            pdbqt_output=None,
                                            protein=True,
                                            verbose=True):
  """Outputs a hydrogenated pdb and a pdbqt with partial charges.

  Takes an input file in specified format. Generates two outputs:

  -) A pdb file that contains a hydrogenated (at pH 7.4) version of
     original compound.
  -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt
     file is build from the hydrogenated pdb.

  TODO(rbharath): Can do a bit of refactoring between this function and
  pdbqt_to_pdb.

  Parameters
  ----------
  input_file: String
    Path to input file.
  input_format: String
    Name of input format.
  """
  mol = rdkit_util.load_molecule(
      input_file, add_hydrogens=True, calc_charges=True)[1]
  if verbose:
    logging.info("Create pdb with hydrogens added")
  rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein)
  if verbose:
    logging.info("Create a pdbqt file from the hydrogenated pdb above.")
  rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein)

  if protein:
    logging.info("Removing ROOT/ENDROOT/TORSDOF")
    with open(pdbqt_output) as f:
      pdbqt_lines = f.readlines()
    filtered_lines = []
    for line in pdbqt_lines:

      filtered_lines.append(line)
    with open(pdbqt_output, "w") as f:
      f.writelines(filtered_lines)
Esempio n. 9
0
    def test_write_molecule(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")
        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=False)

        with tempfile.TemporaryDirectory() as tmp:
            outfile = os.path.join(tmp, "mol.sdf")
            rdkit_util.write_molecule(mol, outfile)

            xyz, mol2 = rdkit_util.load_molecule(outfile,
                                                 calc_charges=False,
                                                 add_hydrogens=False)

        assert mol.GetNumAtoms() == mol2.GetNumAtoms()
        for atom_idx in range(mol.GetNumAtoms()):
            atom1 = mol.GetAtoms()[atom_idx]
            atom2 = mol.GetAtoms()[atom_idx]
            assert atom1.GetAtomicNum() == atom2.GetAtomicNum()
Esempio n. 10
0
    def test_write_molecule(self):
        current_dir = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(current_dir,
                                   "../../dock/tests/1jld_ligand.sdf")
        xyz, mol = rdkit_util.load_molecule(ligand_file,
                                            calc_charges=False,
                                            add_hydrogens=False)

        outfile = "/tmp/mol.sdf"
        rdkit_util.write_molecule(mol, outfile)

        xyz, mol2 = rdkit_util.load_molecule(outfile,
                                             calc_charges=False,
                                             add_hydrogens=False)

        assert_equal(mol.GetNumAtoms(), mol2.GetNumAtoms())
        for atom_idx in range(mol.GetNumAtoms()):
            atom1 = mol.GetAtoms()[atom_idx]
            atom2 = mol.GetAtoms()[atom_idx]
            assert_equal(atom1.GetAtomicNum(), atom2.GetAtomicNum())
        os.remove(outfile)
Esempio n. 11
0
  def generate_poses(self,
                     molecular_complex,
                     centroid=None,
                     box_dims=None,
                     exhaustiveness=10,
                     num_modes=9,
                     num_pockets=None,
                     out_dir=None,
                     generate_scores=False):
    """Generates the docked complex and outputs files for docked complex.

    TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work.

    Parameters
    ----------
    molecular_complexes: list
      A representation of a molecular complex.
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
      Of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose
      generation.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
      generate poses for the first `num_pockets` returned by
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores. 

    Returns
    -------
    Tuple of `(docked_poses, scores)`. `docked_poses` is a list of
    docked molecular complexes. Each entry in this list contains a
    `(protein_mol, ligand_mol)` pair of RDKit molecules. `scores` is a
    list of binding free energies predicted by Vina.

    Raises
    ------
    `ValueError` if `num_pockets` is set but `self.pocket_finder is None`.
    """
    if out_dir is None:
      out_dir = tempfile.mkdtemp()

    if num_pockets is not None and self.pocket_finder is None:
      raise ValueError(
          "If num_pockets is specified, pocket_finder must have been provided at construction time."
      )

    # Parse complex
    if len(molecular_complex) > 2:
      raise ValueError(
          "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes."
      )

    (protein_file, ligand_file) = molecular_complex

    # Prepare protein
    protein_name = os.path.basename(protein_file).split(".")[0]
    protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name)
    protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name)
    protein_mol = rdkit_util.load_molecule(
        protein_file, calc_charges=True, add_hydrogens=True)

    # Get protein centroid and range
    if centroid is not None and box_dims is not None:
      centroids = [centroid]
      dimensions = [box_dims]
    else:
      if self.pocket_finder is None:
        logger.info("Pockets not specified. Will use whole protein to dock")
        rdkit_util.write_molecule(protein_mol[1], protein_hyd, is_protein=True)
        rdkit_util.write_molecule(
            protein_mol[1], protein_pdbqt, is_protein=True)
        protein_centroid = geometry_utils.compute_centroid(protein_mol[0])
        protein_range = mol_xyz_util.get_molecule_range(protein_mol[0])
        box_dims = protein_range + 5.0
        centroids, dimensions = [protein_centroid], [box_dims]
      else:
        logger.info("About to find putative binding pockets")
        pockets = self.pocket_finder.find_pockets(protein_file)
        logger.info("%d pockets found in total" % len(pockets))
        logger.info("Computing centroid and size of proposed pockets.")
        centroids, dimensions = [], []
        for pocket in pockets:
          protein_centroid = pocket.center()
          (x_min, x_max), (y_min, y_max), (
              z_min, z_max) = pocket.x_range, pocket.y_range, pocket.z_range
          # TODO(rbharath: Does vina divide box dimensions by 2?
          x_box = (x_max - x_min) / 2.
          y_box = (y_max - y_min) / 2.
          z_box = (z_max - z_min) / 2.
          box_dims = (x_box, y_box, z_box)
          centroids.append(protein_centroid)
          dimensions.append(box_dims)

    if num_pockets is not None:
      logger.info("num_pockets = %d so selecting this many pockets for docking."
                  % num_pockets)
      centroids = centroids[:num_pockets]
      dimensions = dimensions[:num_pockets]

    # Prepare protein
    ligand_name = os.path.basename(ligand_file).split(".")[0]
    ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

    ligand_mol = rdkit_util.load_molecule(
        ligand_file, calc_charges=True, add_hydrogens=True)
    rdkit_util.write_molecule(ligand_mol[1], ligand_pdbqt)

    docked_complexes = []
    all_scores = []
    for i, (protein_centroid, box_dims) in enumerate(
        zip(centroids, dimensions)):
      logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids)))
      logger.info("Docking with center: %s" % str(protein_centroid))
      logger.info("Box dimensions: %s" % str(box_dims))
      # Write Vina conf file
      conf_file = os.path.join(out_dir, "conf.txt")
      vina_utils.write_vina_conf(
          protein_pdbqt,
          ligand_pdbqt,
          protein_centroid,
          box_dims,
          conf_file,
          num_modes=num_modes,
          exhaustiveness=exhaustiveness)

      # Define locations of log and output files
      log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
      out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
      logger.info("About to call Vina")
      call(
          "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                log_file, out_pdbqt),
          shell=True)
      ligands, scores = vina_utils.load_docked_ligands(out_pdbqt)
      docked_complexes += [(protein_mol[1], ligand) for ligand in ligands]
      all_scores += scores

    if generate_scores:
      return docked_complexes, all_scores
    else:
      return docked_complexes