Beispiel #1
0
    def generate_poses(self,
                       protein_file,
                       ligand_file,
                       centroid=None,
                       box_dims=None,
                       dry_run=False,
                       out_dir=None):
        """Generates the docked complex and outputs files for docked complex."""
        if out_dir is None:
            out_dir = tempfile.mkdtemp()

        # Prepare receptor
        receptor_name = os.path.basename(protein_file).split(".")[0]
        protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name)
        protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name)
        hydrogenate_and_compute_partial_charges(protein_file,
                                                "pdb",
                                                hyd_output=protein_hyd,
                                                pdbqt_output=protein_pdbqt,
                                                protein=True)
        # Get protein centroid and range
        # TODO(rbharath): Need to add some way to identify binding pocket, or this is
        # going to be extremely slow!
        if centroid is not None and box_dims is not None:
            protein_centroid = centroid
        else:
            if not self.detect_pockets:
                receptor_mol = rdkit_util.load_molecule(protein_hyd,
                                                        calc_charges=False,
                                                        add_hydrogens=False)
                protein_centroid = mol_xyz_util.get_molecule_centroid(
                    receptor_mol[0])
                protein_range = mol_xyz_util.get_molecule_range(
                    receptor_mol[0])
                box_dims = protein_range + 5.0
            else:
                logger.info("About to find putative binding pockets")
                pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets(
                    protein_file, ligand_file)
                # TODO(rbharath): Handle multiple pockets instead of arbitrarily selecting
                # first pocket.
                logger.info("Computing centroid and size of proposed pocket.")
                pocket_coord = pocket_coords[0]
                protein_centroid = np.mean(pocket_coord, axis=1)
                pocket = pockets[0]
                (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket
                x_box = (x_max - x_min) / 2.
                y_box = (y_max - y_min) / 2.
                z_box = (z_max - z_min) / 2.
                box_dims = (x_box, y_box, z_box)

        # Prepare receptor
        ligand_name = os.path.basename(ligand_file).split(".")[0]
        ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name)
        ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

        # TODO(rbharath): Generalize this so can support mol2 files as well.
        hydrogenate_and_compute_partial_charges(ligand_file,
                                                "sdf",
                                                hyd_output=ligand_hyd,
                                                pdbqt_output=ligand_pdbqt,
                                                protein=False)
        # Write Vina conf file
        conf_file = os.path.join(out_dir, "conf.txt")
        write_conf(protein_pdbqt,
                   ligand_pdbqt,
                   protein_centroid,
                   box_dims,
                   conf_file,
                   exhaustiveness=self.exhaustiveness)

        # Define locations of log and output files
        log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
        out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
        # TODO(rbharath): Let user specify the number of poses required.
        if not dry_run:
            logger.info("About to call Vina")
            call("%s --config %s --log %s --out %s" %
                 (self.vina_cmd, conf_file, log_file, out_pdbqt),
                 shell=True)
        # TODO(rbharath): Convert the output pdbqt to a pdb file.

        # Return docked files
        return protein_hyd, out_pdbqt
Beispiel #2
0
  def generate_poses(self,
                     molecular_complex,
                     centroid=None,
                     box_dims=None,
                     exhaustiveness=10,
                     num_modes=9,
                     num_pockets=None,
                     out_dir=None,
                     generate_scores=False):
    """Generates the docked complex and outputs files for docked complex.

    TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work.

    Parameters
    ----------
    molecular_complexes: list
      A representation of a molecular complex.
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
      Of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose
      generation.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
      generate poses for the first `num_pockets` returned by
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores. 

    Returns
    -------
    Tuple of `(docked_poses, scores)`. `docked_poses` is a list of
    docked molecular complexes. Each entry in this list contains a
    `(protein_mol, ligand_mol)` pair of RDKit molecules. `scores` is a
    list of binding free energies predicted by Vina.

    Raises
    ------
    `ValueError` if `num_pockets` is set but `self.pocket_finder is None`.
    """
    if out_dir is None:
      out_dir = tempfile.mkdtemp()

    if num_pockets is not None and self.pocket_finder is None:
      raise ValueError(
          "If num_pockets is specified, pocket_finder must have been provided at construction time."
      )

    # Parse complex
    if len(molecular_complex) > 2:
      raise ValueError(
          "Autodock Vina can only dock protein-ligand complexes and not more general molecular complexes."
      )

    (protein_file, ligand_file) = molecular_complex

    # Prepare protein
    protein_name = os.path.basename(protein_file).split(".")[0]
    protein_hyd = os.path.join(out_dir, "%s_hyd.pdb" % protein_name)
    protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name)
    protein_mol = rdkit_util.load_molecule(
        protein_file, calc_charges=True, add_hydrogens=True)

    # Get protein centroid and range
    if centroid is not None and box_dims is not None:
      centroids = [centroid]
      dimensions = [box_dims]
    else:
      if self.pocket_finder is None:
        logger.info("Pockets not specified. Will use whole protein to dock")
        rdkit_util.write_molecule(protein_mol[1], protein_hyd, is_protein=True)
        rdkit_util.write_molecule(
            protein_mol[1], protein_pdbqt, is_protein=True)
        protein_centroid = geometry_utils.compute_centroid(protein_mol[0])
        protein_range = mol_xyz_util.get_molecule_range(protein_mol[0])
        box_dims = protein_range + 5.0
        centroids, dimensions = [protein_centroid], [box_dims]
      else:
        logger.info("About to find putative binding pockets")
        pockets = self.pocket_finder.find_pockets(protein_file)
        logger.info("%d pockets found in total" % len(pockets))
        logger.info("Computing centroid and size of proposed pockets.")
        centroids, dimensions = [], []
        for pocket in pockets:
          protein_centroid = pocket.center()
          (x_min, x_max), (y_min, y_max), (
              z_min, z_max) = pocket.x_range, pocket.y_range, pocket.z_range
          # TODO(rbharath: Does vina divide box dimensions by 2?
          x_box = (x_max - x_min) / 2.
          y_box = (y_max - y_min) / 2.
          z_box = (z_max - z_min) / 2.
          box_dims = (x_box, y_box, z_box)
          centroids.append(protein_centroid)
          dimensions.append(box_dims)

    if num_pockets is not None:
      logger.info("num_pockets = %d so selecting this many pockets for docking."
                  % num_pockets)
      centroids = centroids[:num_pockets]
      dimensions = dimensions[:num_pockets]

    # Prepare protein
    ligand_name = os.path.basename(ligand_file).split(".")[0]
    ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

    ligand_mol = rdkit_util.load_molecule(
        ligand_file, calc_charges=True, add_hydrogens=True)
    rdkit_util.write_molecule(ligand_mol[1], ligand_pdbqt)

    docked_complexes = []
    all_scores = []
    for i, (protein_centroid, box_dims) in enumerate(
        zip(centroids, dimensions)):
      logger.info("Docking in pocket %d/%d" % (i + 1, len(centroids)))
      logger.info("Docking with center: %s" % str(protein_centroid))
      logger.info("Box dimensions: %s" % str(box_dims))
      # Write Vina conf file
      conf_file = os.path.join(out_dir, "conf.txt")
      vina_utils.write_vina_conf(
          protein_pdbqt,
          ligand_pdbqt,
          protein_centroid,
          box_dims,
          conf_file,
          num_modes=num_modes,
          exhaustiveness=exhaustiveness)

      # Define locations of log and output files
      log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
      out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
      logger.info("About to call Vina")
      call(
          "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                log_file, out_pdbqt),
          shell=True)
      ligands, scores = vina_utils.load_docked_ligands(out_pdbqt)
      docked_complexes += [(protein_mol[1], ligand) for ligand in ligands]
      all_scores += scores

    if generate_scores:
      return docked_complexes, all_scores
    else:
      return docked_complexes
Beispiel #3
0
def dock_ligands_to_receptors(docking_dir,
                              worker_pool=None,
                              exhaustiveness=None,
                              chosen_receptor=None,
                              restrict_box=True):
  subdirs = glob.glob(os.path.join(docking_dir, '*/'))
  for subdir in subdirs:
    subdir = subdir.rstrip('/')
    receptor_name = os.path.basename(subdir)
    if chosen_receptor is not None and chosen_receptor != receptor_name:
      continue
    print("receptor name = %s" % receptor_name)
    receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name)
    if not os.path.exists(receptor_filename):
      continue

    print("Examining %s" % receptor_filename)

    receptor_mol = rdkit_util.load_molecule(
        os.path.join(subdir, "%s.pdb" % receptor_name))
    protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0])
    protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0])

    box_dims = protein_range + 5.0

    ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt')))
    print("Num ligands = %d" % len(ligands))

    dock_ligand_to_receptor_partial = partial(
        dock_ligand_to_receptor,
        receptor_filename=receptor_filename,
        protein_centroid=protein_centroid,
        box_dims=box_dims,
        subdir=subdir,
        exhaustiveness=exhaustiveness)

    if restrict_box:
      active_ligand = ""
      for ligand in ligands:
        if "CHEM" in ligand:
          active_ligand = ligand
          break

      print("Docking to %s first to ascertain centroid and box dimensions" %
            active_ligand)

      out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand)
      ligand_pybel = rdkit_util.load_molecule(out_pdb_qt)
      ligand_centroid = mol_xyz_util.get_molecule_centroid(ligand_pybel[0])
      print("Protein centroid = %s" % (str(protein_centroid)))
      print("Ligand centroid = %s" % (str(ligand_centroid)))
      box_dims = np.array([20., 20., 20.])
      dock_ligand_to_receptor_partial = partial(
          dock_ligand_to_receptor,
          receptor_filename=receptor_filename,
          protein_centroid=ligand_centroid,
          box_dims=box_dims,
          subdir=subdir,
          exhaustiveness=exhaustiveness)

      print("Finished docking to %s, docking to remainder of ligands now." %
            active_ligand)

    if worker_pool is None:
      for i, ligand_file in enumerate(ligands):
        a = time.time()
        dock_ligand_to_receptor_partial(ligand_file)
        print("took %f seconds to dock single ligand." % (time.time() - a))
    else:
      print("parallelizing docking over worker pool")

      worker_pool.map(dock_ligand_to_receptor_partial, ligands)
Beispiel #4
0
  def generate_poses(self,
                     protein_file,
                     ligand_file,
                     centroid=None,
                     box_dims=None,
                     dry_run=False,
                     out_dir=None):
    """Generates the docked complex and outputs files for docked complex."""
    if out_dir is None:
      out_dir = tempfile.mkdtemp()

    # Prepare receptor
    receptor_name = os.path.basename(protein_file).split(".")[0]
    protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name)
    protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name)
    hydrogenate_and_compute_partial_charges(
        protein_file,
        "pdb",
        hyd_output=protein_hyd,
        pdbqt_output=protein_pdbqt,
        protein=True)
    # Get protein centroid and range
    # TODO(rbharath): Need to add some way to identify binding pocket, or this is
    # going to be extremely slow!
    if centroid is not None and box_dims is not None:
      protein_centroid = centroid
    else:
      if not self.detect_pockets:
        receptor_mol = rdkit_util.load_molecule(
            protein_hyd, calc_charges=False, add_hydrogens=False)
        protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0])
        protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0])
        box_dims = protein_range + 5.0
      else:
        logger.info("About to find putative binding pockets")
        pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets(
            protein_file, ligand_file)
        # TODO(rbharath): Handle multiple pockets instead of arbitrarily selecting
        # first pocket.
        logger.info("Computing centroid and size of proposed pocket.")
        pocket_coord = pocket_coords[0]
        protein_centroid = np.mean(pocket_coord, axis=1)
        pocket = pockets[0]
        (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket
        x_box = (x_max - x_min) / 2.
        y_box = (y_max - y_min) / 2.
        z_box = (z_max - z_min) / 2.
        box_dims = (x_box, y_box, z_box)

    # Prepare receptor
    ligand_name = os.path.basename(ligand_file).split(".")[0]
    ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name)
    ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

    # TODO(rbharath): Generalize this so can support mol2 files as well.
    hydrogenate_and_compute_partial_charges(
        ligand_file,
        "sdf",
        hyd_output=ligand_hyd,
        pdbqt_output=ligand_pdbqt,
        protein=False)
    # Write Vina conf file
    conf_file = os.path.join(out_dir, "conf.txt")
    write_conf(
        protein_pdbqt,
        ligand_pdbqt,
        protein_centroid,
        box_dims,
        conf_file,
        exhaustiveness=self.exhaustiveness)

    # Define locations of log and output files
    log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
    out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
    # TODO(rbharath): Let user specify the number of poses required.
    if not dry_run:
      logger.info("About to call Vina")
      call(
          "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                log_file, out_pdbqt),
          shell=True)
    # TODO(rbharath): Convert the output pdbqt to a pdb file.

    # Return docked files
    return protein_hyd, out_pdbqt
def dock_ligands_to_receptors(docking_dir,
                              worker_pool=None,
                              exhaustiveness=None,
                              chosen_receptor=None,
                              restrict_box=True):
    subdirs = glob.glob(os.path.join(docking_dir, '*/'))
    for subdir in subdirs:
        subdir = subdir.rstrip('/')
        receptor_name = os.path.basename(subdir)
        if chosen_receptor is not None and chosen_receptor != receptor_name:
            continue
        print("receptor name = %s" % receptor_name)
        receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name)
        if not os.path.exists(receptor_filename):
            continue

        print("Examining %s" % receptor_filename)

        receptor_mol = rdkit_util.load_molecule(
            os.path.join(subdir, "%s.pdb" % receptor_name))
        protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0])
        protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0])

        box_dims = protein_range + 5.0

        ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt')))
        print("Num ligands = %d" % len(ligands))

        dock_ligand_to_receptor_partial = partial(
            dock_ligand_to_receptor,
            receptor_filename=receptor_filename,
            protein_centroid=protein_centroid,
            box_dims=box_dims,
            subdir=subdir,
            exhaustiveness=exhaustiveness)

        if restrict_box:
            active_ligand = ""
            for ligand in ligands:
                if "CHEM" in ligand:
                    active_ligand = ligand
                    break

            print(
                "Docking to %s first to ascertain centroid and box dimensions"
                % active_ligand)

            out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand)
            ligand_pybel = rdkit_util.load_molecule(out_pdb_qt)
            ligand_centroid = mol_xyz_util.get_molecule_centroid(
                ligand_pybel[0])
            print("Protein centroid = %s" % (str(protein_centroid)))
            print("Ligand centroid = %s" % (str(ligand_centroid)))
            box_dims = np.array([20., 20., 20.])
            dock_ligand_to_receptor_partial = partial(
                dock_ligand_to_receptor,
                receptor_filename=receptor_filename,
                protein_centroid=ligand_centroid,
                box_dims=box_dims,
                subdir=subdir,
                exhaustiveness=exhaustiveness)

            print(
                "Finished docking to %s, docking to remainder of ligands now."
                % active_ligand)

        if worker_pool is None:
            for i, ligand_file in enumerate(ligands):
                a = time.time()
                dock_ligand_to_receptor_partial(ligand_file)
                print("took %f seconds to dock single ligand." %
                      (time.time() - a))
        else:
            print("parallelizing docking over worker pool")

            worker_pool.map(dock_ligand_to_receptor_partial, ligands)