Esempio n. 1
0
def extract_active_site(protein_file, ligand_file, cutoff=4):
    """Extracts a box for the active site."""
    protein_coords = rdkit_util.load_molecule(protein_file,
                                              add_hydrogens=False)[0]
    ligand_coords = rdkit_util.load_molecule(ligand_file,
                                             add_hydrogens=True,
                                             calc_charges=True)[0]
    num_ligand_atoms = len(ligand_coords)
    num_protein_atoms = len(protein_coords)
    pocket_inds = []
    pocket_atoms = set([])
    for lig_atom_ind in range(num_ligand_atoms):
        lig_atom = ligand_coords[lig_atom_ind]
        for protein_atom_ind in range(num_protein_atoms):
            protein_atom = protein_coords[protein_atom_ind]
            if np.linalg.norm(lig_atom - protein_atom) < cutoff:
                if protein_atom_ind not in pocket_atoms:
                    pocket_atoms = pocket_atoms.union(set([protein_atom_ind]))
    # Should be an array of size (n_pocket_atoms, 3)
    pocket_atoms = list(pocket_atoms)
    n_pocket_atoms = len(pocket_atoms)
    pocket_coords = np.zeros((n_pocket_atoms, 3))
    for ind, pocket_ind in enumerate(pocket_atoms):
        pocket_coords[ind] = protein_coords[pocket_ind]

    x_min = int(np.floor(np.amin(pocket_coords[:, 0])))
    x_max = int(np.ceil(np.amax(pocket_coords[:, 0])))
    y_min = int(np.floor(np.amin(pocket_coords[:, 1])))
    y_max = int(np.ceil(np.amax(pocket_coords[:, 1])))
    z_min = int(np.floor(np.amin(pocket_coords[:, 2])))
    z_max = int(np.ceil(np.amax(pocket_coords[:, 2])))
    return (((x_min, x_max), (y_min, y_max), (z_min, z_max)), pocket_atoms,
            pocket_coords)
Esempio n. 2
0
 def find_pockets(self, protein_file, ligand_file):
     """Find list of suitable binding pockets on protein."""
     protein_coords = rdkit_util.load_molecule(protein_file,
                                               add_hydrogens=False,
                                               calc_charges=False)[0]
     ligand_coords = rdkit_util.load_molecule(ligand_file,
                                              add_hydrogens=False,
                                              calc_charges=False)[0]
     boxes = get_all_boxes(protein_coords, self.pad)
     mapping = boxes_to_atoms(protein_coords, boxes)
     pockets, pocket_atoms_map = merge_overlapping_boxes(mapping, boxes)
     pocket_coords = []
     for pocket in pockets:
         atoms = pocket_atoms_map[pocket]
         coords = np.zeros((len(atoms), 3))
         for ind, atom in enumerate(atoms):
             coords[ind] = protein_coords[atom]
         pocket_coords.append(coords)
     return pockets, pocket_atoms_map, pocket_coords
Esempio n. 3
0
    def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
        """
    Compute neighbor list for complex.
    Parameters
    ----------
    mol_pdb: list
      Should be a list of lines of the PDB file.
    complex_pdb: list
      Should be a list of lines of the PDB file.
    """
        mol_coords, ob_mol = rdkit_util.load_molecule(mol_pdb_file)
        protein_coords, protein_mol = rdkit_util.load_molecule(
            protein_pdb_file)
        system_coords = rdkit_util.merge_molecules_xyz(mol_coords,
                                                       protein_coords)

        system_neighbor_list = compute_neighbor_list(system_coords,
                                                     self.neighbor_cutoff,
                                                     self.max_num_neighbors,
                                                     None)

        return (system_coords, system_neighbor_list)
Esempio n. 4
0
    def _featurize_complex(self, mol_pdb_file, protein_pdb_file):
        frag1_coords, frag1_mol = rdkit_util.load_molecule(mol_pdb_file)
        frag2_coords, frag2_mol = rdkit_util.load_molecule(protein_pdb_file)
        system_mol = rdkit_util.merge_molecules(frag1_mol, frag2_mol)
        system_coords = rdkit_util.get_xyz_from_mol(system_mol)

        frag1_coords, frag1_mol = self._strip_hydrogens(
            frag1_coords, frag1_mol)
        frag2_coords, frag2_mol = self._strip_hydrogens(
            frag2_coords, frag2_mol)
        system_coords, system_mol = self._strip_hydrogens(
            system_coords, system_mol)

        frag1_coords, frag1_neighbor_list, frag1_z = self.featurize_mol(
            frag1_coords, frag1_mol, self.frag1_num_atoms)

        frag2_coords, frag2_neighbor_list, frag2_z = self.featurize_mol(
            frag2_coords, frag2_mol, self.frag2_num_atoms)

        system_coords, system_neighbor_list, system_z = self.featurize_mol(
            system_coords, system_mol, self.complex_num_atoms)

        return frag1_coords, frag1_neighbor_list, frag1_z, frag2_coords, frag2_neighbor_list, frag2_z, \
               system_coords, system_neighbor_list, system_z
Esempio n. 5
0
def hydrogenate_and_compute_partial_charges(input_file,
                                            input_format,
                                            hyd_output=None,
                                            pdbqt_output=None,
                                            protein=True,
                                            verbose=True):
    """Outputs a hydrogenated pdb and a pdbqt with partial charges.
  Takes an input file in specified format. Generates two outputs:
  -) A pdb file that contains a hydrogenated (at pH 7.4) version of
     original compound.
  -) A pdbqt file that has computed Gasteiger partial charges. This pdbqt
     file is build from the hydrogenated pdb.
  TODO(rbharath): Can do a bit of refactoring between this function and
  pdbqt_to_pdb.
  Parameters
  ----------
  input_file: String
    Path to input file.
  input_format: String
    Name of input format.
  """
    mol = rdkit_util.load_molecule(input_file,
                                   add_hydrogens=True,
                                   calc_charges=True)[1]
    if verbose:
        logging.info("Create pdb with hydrogens added")
    rdkit_util.write_molecule(mol, str(hyd_output), is_protein=protein)
    if verbose:
        logging.info("Create a pdbqt file from the hydrogenated pdb above.")
    rdkit_util.write_molecule(mol, str(pdbqt_output), is_protein=protein)

    if protein:
        logging.info("Removing ROOT/ENDROOT/TORSDOF")
        with open(pdbqt_output) as f:
            pdbqt_lines = f.readlines()
        filtered_lines = []
        for line in pdbqt_lines:

            filtered_lines.append(line)
        with open(pdbqt_output, "w") as f:
            f.writelines(filtered_lines)
Esempio n. 6
0
 def find_all_pockets(self, protein_file):
     """Find list of binding pockets on protein."""
     # protein_coords is (N, 3) tensor
     coords = rdkit_util.load_molecule(protein_file)[0]
     return get_all_boxes(coords, self.pad)
Esempio n. 7
0
  def generate_poses(self,
                     protein_file,
                     ligand_file,
                     centroid=None,
                     box_dims=None,
                     dry_run=False,
                     out_dir=None):
    """Generates the docked complex and outputs files for docked complex."""
    if out_dir is None:
      out_dir = tempfile.mkdtemp()

    # Prepare receptor
    receptor_name = os.path.basename(protein_file).split(".")[0]
    protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name)
    protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name)
    hydrogenate_and_compute_partial_charges(
        protein_file,
        "pdb",
        hyd_output=protein_hyd,
        pdbqt_output=protein_pdbqt,
        protein=True)
    # Get protein centroid and range
    if centroid is not None and box_dims is not None:
      protein_centroid = centroid
    else:
      if not self.detect_pockets:
        receptor_mol = rdkit_util.load_molecule(
            protein_hyd, calc_charges=False, add_hydrogens=False)
        protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0])
        protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0])
        box_dims = protein_range + 5.0
      else:
        logger.info("About to find putative binding pockets")
        pockets, pocket_atoms_maps, pocket_coords = self.pocket_finder.find_pockets(
            protein_file, ligand_file)
        logger.info("Computing centroid and size of proposed pocket.")
        pocket_coord = pocket_coords[0]
        protein_centroid = np.mean(pocket_coord, axis=1)
        pocket = pockets[0]
        (x_min, x_max), (y_min, y_max), (z_min, z_max) = pocket
        x_box = (x_max - x_min) / 2.
        y_box = (y_max - y_min) / 2.
        z_box = (z_max - z_min) / 2.
        box_dims = (x_box, y_box, z_box)

    # Prepare receptor
    ligand_name = os.path.basename(ligand_file).split(".")[0]
    ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name)
    ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

    hydrogenate_and_compute_partial_charges(
        ligand_file,
        "sdf",
        hyd_output=ligand_hyd,
        pdbqt_output=ligand_pdbqt,
        protein=False)
    # Write Vina conf file
    conf_file = os.path.join(out_dir, "conf.txt")
    write_conf(
        protein_pdbqt,
        ligand_pdbqt,
        protein_centroid,
        box_dims,
        conf_file,
        exhaustiveness=self.exhaustiveness)

    # Define locations of log and output files
    log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
    out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
    if not dry_run:
      logger.info("About to call Vina")
      call(
          "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                log_file, out_pdbqt),
          shell=True)

    # Return docked files
    return protein_hyd, out_pdbqt