Example #1
0
def align(inputs):
    """"""
    try:
        ref_pdb, ref_mol, other_pdb, input_dir = inputs

        ref = Protein.from_file(os.path.join(input_dir, f"{ref_pdb}.pdb"))

        other_path = os.path.join(input_dir, f"{other_pdb}.pdb")
        other = Protein.from_file(other_path)

        if ref_mol:
            ref_mol_obj = [
                lig for lig in ref.ligands
                if lig.identifier.split(":")[1] == ref_mol
            ][0]
            ref_bind_site = Protein.BindingSiteFromMolecule(
                protein=ref, molecule=ref_mol_obj, distance=12)
        else:
            ref_bind_site = None

        chain_superposition = Protein.ChainSuperposition()
        # other chains already striped
        rms, X = chain_superposition.superpose(ref.chains[0],
                                               other.chains[0],
                                               binding_site1=ref_bind_site)

        with io.MoleculeWriter(other_path) as w:
            w.write(other)

        return rms

    except:
        return 999
Example #2
0
    def get_ensemble(self, nrotations, charged=False):
        largest_lig = self.find_largest_ligand()
        lig = MoleculeReader(largest_lig)[0]
        prot = Protein.from_file(join(dirname(largest_lig), 'protein.mol2'))
        bs = Protein.BindingSiteFromMolecule(protein=prot,
                                             molecule=lig,
                                             distance=6.5)

        # prot_paths = glob(join(self.root_dir, '*', 'protein.mol2'))
        prot_paths = self.get_protein_paths()
        print(prot_paths)
        print(self.ensemble_name, len(prot_paths))
        luigi.build([
            ParalleliselRunner(
                prot_paths, nrotations, charged, data_source='KLIFS')
        ],
                    local_scheduler=True,
                    workers=30)
        #luigi.build([ParalleliselRunner(prot_paths, nrotations, charged)], local_scheduler=True,
        #workers=30)
        hot_paths = [
            join(dirname(in_pdb), "fullsize_hotspots_{}".format(nrotations),
                 "out.zip") for in_pdb in prot_paths
        ]
        return hot_paths
Example #3
0
    def _align_proteins(reference, reference_chain, targets):
        """
        align proteins by chain

        :param `ccdc.protein.Protein` reference: align to me
        :param str reference_chain: align to this chain
        :param list targets: list of `ccdc.protein.Protein`
        :return tup: list(:class:`ccdc.protein.Protein`) and list (:classa`ccdc.molecule.Molecule`)
        """
        print("Aligning proteins to {}, chain {}...".format(
            reference.identifier, reference_chain))
        aligned_prots = []
        aligned_ligands = []

        reference = Protein.from_file(reference.fname)
        reference.add_hydrogens()

        for t in tqdm(targets):
            prot = Protein.from_file(t.fname)
            prot.detect_ligand_bonds()
            prot.add_hydrogens()
            for l in prot.ligands:
                if str(t.clustered_ligand) == str(
                        l.identifier.split(":")[1][0:3]):
                    try:
                        bs = Protein.BindingSiteFromMolecule(protein=prot,
                                                             molecule=l,
                                                             distance=6)
                        chain = bs.residues[0].identifier.split(":")[0]
                    except:
                        break
                    break

                else:
                    continue
            if not chain:
                print("\n        {} failed! No chain detected".format(
                    t.identifier))
                break
            try:
                binding_site_superposition = Protein.ChainSuperposition()
                (bs_rmsd,
                 bs_transformation) = binding_site_superposition.superpose(
                     reference[reference_chain], prot[chain])
                aligned_prots.append(prot)
                for lig in prot.ligands:
                    if str(t.clustered_ligand) == str(
                            lig.identifier.split(":")[1][0:3]):
                        if chain == str(lig.identifier.split(":")[0]):
                            aligned_ligands.append(lig)
            except IndexError:
                print("\n        {} failed!".format(t.identifier))
                continue

        return aligned_prots, aligned_ligands
Example #4
0
 def get_largest_binding_site(self):
     """
     Returns the binding site created within 6.5A of the largest ligand
     :return: 
     """
     lig_fname = self.find_largest_ligand()
     lig = MoleculeReader(join(self.lig_dir, lig_fname))[0]
     prot = Protein.from_file(
         join(self.pdb_dir, lig_fname.replace("sdf", "pdb")))
     bs = Protein.BindingSiteFromMolecule(protein=prot,
                                          molecule=lig,
                                          distance=6.5)
     return bs
    def run_one(self, target_chain, mol_writer):
        '''Superpose a target chain onto the reference chain

        :param target_chain: The target chain dictionary
        :param mol_writer: Superposition file writer
        '''
        target_protein = self.load_target(target_chain['pdb_id'])

        print('Target protein {} chain {} identity {}% similarity {}%'.format(
            target_protein.identifier, target_protein[target_chain['chain_id']].identifier,
            target_chain['identity'], target_chain['similarity']))

        binding_site_superposition = Protein.ChainSuperposition()
        if self.args.sequence_alignment_tool is not None:
            binding_site_superposition.settings.sequence_search_tool = self.args.sequence_search_tool
            binding_site_superposition.settings.sequence_alignment_tool = self.args.sequence_alignment_tool
        binding_site_superposition.settings.superposition_atoms = self.args.superposition_atoms

        if self.reference_binding_site is not None:
            (bs_rmsd, bs_transformation) = binding_site_superposition.superpose(
                self.reference_protein[self.reference_chain_id],
                target_protein[target_chain['chain_id']],
                self.reference_binding_site)
            target_chain['rmsd'] = '{:.4f}'.format(bs_rmsd)
            print('Binding site RMSD is: {}'.format(target_chain['rmsd']))
            print('Transformation matrix is: {}'.format(bs_transformation))

        else:
            (chain_rmsd, chain_transformation) = binding_site_superposition.superpose(
                self.reference_protein[self.reference_chain_id],
                target_protein[target_chain['chain_id']])
            target_chain['rmsd'] = '{:.4f}'.format(chain_rmsd)
            print('Chain RMSD is: {}'.format(target_chain['rmsd']))
            print('Transformation matrix is: {}'.format(chain_transformation))

        if self.superposition_output == 'full_protein':
            target_molecule = target_protein
        else:
            target_molecule = Protein.BindingSiteFromMolecule(
                target_protein, self.reference_ligand, self.args.radius_around_ligand
            )

        target_entry = Entry.from_molecule(target_molecule,
                                           rmsd='{}'.format(target_chain['rmsd']),
                                           identity='{}%'.format(target_chain['identity']),
                                           similarity='{}%'.format(target_chain['similarity']))
        mol_writer.write_entry(target_entry)
    def load_reference_ligand(self):
        reference_binding_site = None
        reference_ligand = None
        if self.args.reference_ligand is not None:

            reference_ligand = next((ligand for ligand in self.reference_protein.ligands
                                     if ligand.identifier == self.args.reference_ligand), None)
            if reference_ligand is None:
                raise IndexError('Ligand not found for ligand identifier {}'.format(self.args.reference_ligand))

        if reference_ligand is not None:
            reference_binding_site = Protein.BindingSiteFromMolecule(
                self.reference_protein, reference_ligand, self.args.radius_around_ligand
            )
            print('Reference ligand {}'.format(reference_ligand.identifier))

        return reference_ligand, reference_binding_site
Example #7
0
    def get_ensemble_binding_sites(self):
        """
        Gets the binding sites of the proteins in the ensemble, as defined by the reference ligand
        :return: list of CCDC binding site objects
        """
        binding_sites = []
        for i, s in enumerate(self.structures):
            s.detect_ligand_bonds(
            )  # PDBs don't have connect records; correct for that
            sbinding_site = Protein.BindingSiteFromMolecule(
                s, self.reference_ligand)

            # Give the binding site a unique identifier:
            sbinding_site.identifier = "{}_{}".format(sbinding_site.identifier,
                                                      str(i))
            binding_sites.append(sbinding_site)

        return binding_sites
Example #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--het_code', '--het', help='het_code pdb')
    args = parser.parse_args()

    P = Pdbesearch(args.het_code)
    sti_entries = P.run_search()
    list_of_entries = list(sti_entries.values())[0]

    for entry in list_of_entries:
        print(entry)
        fetch_pdb(entry)
    logging.info(list_of_entries)

    directory = r'C:\Users\amukhopadhyay\Documents\test_scripts'

    list_of_sti_binding_residues = []
    for filename in os.listdir(directory):
        if filename.endswith(".pdb"):
            protein_from_entry = Protein.from_file(filename)
            list_of_ligands = protein_from_entry.ligands
            list_of_ligands_identifiers = [
                ligands.identifier for ligands in list_of_ligands
            ]
            sti_list = (list(
                filter(lambda x: 'STI' in x, list_of_ligands_identifiers)))
            sti_indices = [
                i for i, s in enumerate(list_of_ligands_identifiers)
                if 'STI' in s
            ]

            for ligand_sti in sti_indices:
                list_of_sti_binding_residues.append(
                    ((Protein.BindingSiteFromMolecule(
                        protein_from_entry, list_of_ligands[ligand_sti],
                        6.).residues)))
        list_of_residues = [
            item for t in list_of_sti_binding_residues for item in t
        ]
    list_of_aa = []
    for i in list_of_residues:
        list_of_aa.append((i.identifier[2:5]))

    create_plot(list_of_aa)
Example #9
0
        list_of_protein_objs.append(protein_from_entry)
        list_of_ligands = protein_from_entry.ligands
        list_of_ligands_identifiers = [
            ligands.identifier for ligands in list_of_ligands
        ]
        print(list_of_ligands_identifiers)
        sti_list = (list(
            filter(lambda x: 'STI' in x, list_of_ligands_identifiers)))

        sti_indices = [
            i for i, s in enumerate(list_of_ligands_identifiers) if 'STI' in s
        ]

        print(sti_indices)
        first_index_of_sti = sti_indices[0]
        binding_site = Protein.BindingSiteFromMolecule(
            protein_from_entry, list_of_ligands[first_index_of_sti], 6.)
        #print(binding_site.residues)
        binding_site_dict[pdb_id] = binding_site

# find global score of pdb entry list


def run_val_search(pdb_entry):
    """
    Check pdbe search api documentation for more detials
    :param pdbe_search_term: String
    :return: JSON
    """
    # This constructs the complete query URL
    full_query = base_url + validation_url + pdb_entry
    print(full_query)
Example #10
0
        kr.get_subset()
        #kr.save_pdbs()

        #kr.find_largest_ligand()

        hot_paths = kr.get_ensemble(nrotations=nrot, charged=False)

        ref_kr = KLIFSReader(ensemble_directory=join(main_dir, 'CK2a1'),
                             ens_name='CK2a1')
        # # CK2_kr.save_pdbs()
        #
        #
        largest_ligand = ref_kr.find_largest_ligand()
        prot = Protein.from_file(join(dirname(largest_ligand), 'protein.mol2'))
        bs = Protein.BindingSiteFromMolecule(
            protein=prot,
            molecule=MoleculeReader(largest_ligand)[0],
            distance=7.0)

        s_paths_file = join(main_dir, "shrunk_hot_paths_{}.json".format(nrot))
        if exists(s_paths_file):
            with open(s_paths_file, "r") as f:
                s_paths_dict = json.load(f)
        else:
            s_paths_dict = {}

        ensemble = Ensemble(root_dir=join(main_dir, e))
        ensemble.reference_binding_site = bs
        #hot_paths = glob(join(ensemble.root_dir, '*', "fullsize_hotspots_{}".format(nrot), "out.zip"))
        s_paths = ensemble.shrink_hotspots(hotspot_paths=hot_paths,
                                           padding=2.0)
        s_paths_dict[e] = s_paths