Esempio n. 1
0
    def handle(self, *args, **options):

        if options["verbose"] == 1:
            import logging
            logging.basicConfig(level=logging.DEBUG)

        pdbs = PDBs(options["pdbs_dir"])
        pdbs.url_pdb_entries = options["entries_url"]
        if not os.path.exists(options["entries_path"]):
            pdbs.download_pdb_entries()

        pdbio = PDBIO(options['pdbs_dir'] + "/", options['entries_path'],
                      options['tmp'])
        pdbio.init()

        try:
            pdbs.update_pdb(options['code'])
            pdbio.process_pdb(options['code'],
                              force=options['force'],
                              pocket_path=pdbs.pdb_pockets_path(
                                  options['code']),
                              pdb_path=pdbs.pdb_path(options['code']))

        except IOError as ex:
            traceback.print_exc()
            self.stderr.write("error processing pockets from %s: %s" %
                              (options['code'], str(ex)))
        except Exception as ex:
            traceback.print_exc()
            raise CommandError(ex)
Esempio n. 2
0
    def load_pdb_pocket(self, pdb, pdb_dir="/data/databases/pdb/"):
        utils = PDBs(pdb_dir)
        if not os.path.exists(utils.pdb_pockets_path(pdb)):
            utils.update_pdb(pdb)
            fpocket = FPocket(utils.pdb_path(pdb))
            result = fpocket.hunt_pockets()
            mkdir(os.path.dirname(utils.pdb_pockets_path(pdb)))
            result.save(utils.pdb_pockets_path(pdb))
        with open(utils.pdb_pockets_path(pdb)) as h:
            result = json.load(h)

        self.pdb_data[pdb]["pockets"] = result
        return self.pdb_data[pdb]["pockets"]
Esempio n. 3
0
    def handle(self, *args, **options):
        pdbs_utils = PDBs(pdb_dir=options['pdbs_dir'])
        pdbs_utils.url_pdb_entries = options["entries_url"]
        if not options['entries_path']:
            options['entries_path'] = options['pdbs_dir'] + "/entries.idx"
        if (datetime.now() - datetime.fromtimestamp(
                os.path.getctime(options["entries_path"]))).days > 7:
            pdbs_utils.download_pdb_entries()

        pdb2sql = PDB2SQL(options['pdbs_dir'], options['entries_path'])
        pdb2sql.load_entries()
        if options["only_annotated"]:
            self.stderr.write("only_annotated option activated by default")
            from bioseq.models.Dbxref import Dbxref
            pdbs = [(x.accession.lower(),
                     pdbs_utils.pdb_path(x.accession.lower()))
                    for x in Dbxref.objects.filter(dbname="PDB")]
        else:
            pdbs = list(tqdm(iterpdbs(options['pdbs_dir'])))
        # 4zux 42 mer 2lo7("5my5","/data/databases/pdb/divided/my/pdb5my5.ent")
        # ("4zu4", "/data/databases/pdb/divided/zu/pdb4zu4.ent")

        with tqdm(pdbs) as pbar:
            for code, pdb_path in pbar:
                code = code.lower()

                if PDBsWS.is_obsolete(code):
                    self.stderr.write(f"{code} entry is obsolete")
                    continue

                try:
                    pdb_path = pdbs_utils.update_pdb(code)
                except KeyboardInterrupt:
                    raise
                except:
                    self.stderr.write("PDB %s could not be downloaded" % code)
                    continue

                if PDB.objects.filter(code=code).exists():
                    self.stderr.write("PDB %s already exists" % code)
                    continue

                pbar.set_description(code)
                try:
                    pdb2sql.create_pdb_entry(code, pdb_path)
                    pdb2sql.update_entry_data(code, pdb_path)
                except KeyboardInterrupt:
                    raise
                except Exception as ex:
                    import traceback
                    traceback.print_exc()
                    raise CommandError(ex)
Esempio n. 4
0
class StructureVariant:
    def __init__(self, pdb_dir="/data/databases/pdb/"):
        self.utils = PDBs(pdb_dir)
        self.seqs_path = "/tmp/seq.faa"
        self.aln_path = "/tmp/msa.faa"
        self.ref_seq = None
        self.pdbfile = None
        self.pdb_data = defaultdict(dict)

    def load_msa(self, input_sequence, pdb_code, pdb_chain=None):
        pdb_code = pdb_code.lower()
        self.utils.update_pdb(pdb_code)
        self.ref_seq = bpio.read(input_sequence, "fasta")
        self.pdbfile = PDBFile(pdb_code, self.utils.pdb_path(pdb_code))
        with open(self.seqs_path, "w") as h:
            bpio.write(self.ref_seq, h, "fasta")
            bpio.write(self.pdbfile.seq(selected_chain=pdb_chain), h, "fasta")

        cmd = docker_wrap_command(
            f'mafft --quiet --localpair --maxiterate 1000 {self.seqs_path} > {self.aln_path} '
        )
        execute(cmd)

        self.msa = MSAMap.from_msa(self.aln_path)
        self.res_map = self.pdbfile.residues_map(pdb_chain)

    def residues_from_pos(self, pos):
        pos_data = []
        for sample in self.msa.samples():
            if sample != self.ref_seq.id:
                pdb, chain = sample.split("_")[:2]
                if self.msa.exists_pos(self.ref_seq.id, pos, sample):
                    msa_pos = self.msa.pos_seq_msa_map[self.ref_seq.id][pos]
                    sample_pos = self.msa.pos_from_seq(self.ref_seq.id, pos,
                                                       sample)
                    line = {
                        "pos": pos + 1,
                        "ref": self.msa.seqs[self.ref_seq.id][msa_pos],
                        "alt": self.msa.seqs[sample][msa_pos],
                        "pdb": pdb,
                        "chain": chain,
                        "resid": str(self.res_map[chain][sample_pos][1]),
                        "icode": str(self.res_map[chain][sample_pos][2]),
                        "pdb_pos": sample_pos
                    }
                    pos_data.append(line)
        return pos_data

    def residues_from_aln_seq(self, input_sequence, pdb_code, pdb_chain=None):
        self.load_msa(input_sequence, pdb_code, pdb_chain)
        variants = [
            (k, v)
            for k, v in sorted(self.msa.variants(self.ref_seq.id).items(),
                               key=lambda x: int(x[0].split("_")[1]))
        ]

        output = []
        for ref_pos, alt_samples in variants:
            ref, pos = ref_pos.split("_")
            pos = int(pos)
            for alt, samples in alt_samples.items():
                if alt != self.msa.gap_code:
                    pos_data = self.residues_from_pos(pos)
                    output += pos_data
        return pd.DataFrame(output)

    def annotate_resid(self, pdb: str, resid: str,
                       structure_annotator: StructureAnnotator):
        pdb = pdb.lower()
        data = {}
        if pdb not in self.pdb_data:
            self.load_pdb_ann(pdb, structure_annotator)

        if str(resid) in self.pdb_data[pdb]["binding"]:
            data["binding"] = self.pdb_data[pdb]["binding"][str(resid)]
        if str(resid) in self.pdb_data[pdb]["pockets"]:
            data["pockets"] = self.pdb_data[pdb]["pockets"][str(resid)]
        return data

    def load_pdb_ann(self, pdb, structure_annotator: StructureAnnotator):
        binding_data = structure_annotator.load_pdb_binding_data(pdb)
        binding_dict = defaultdict(list)
        for site in binding_data:
            for site_res in site["site_residues"]:
                res = str(site_res["residue_number"]) + (site_res.get(
                    "author_insertion_code", "") or "")
                binding_dict[res].append({
                    "site_id":
                    site["site_id"],
                    "details":
                    site["details"],
                    "ligands": [{
                        c: x[c]
                        for c in
                        ["chain_id", "author_residue_number", "chem_comp_id"]
                    } for x in site["site_residues"]
                                if x["chem_comp_id"] in binding_dict and (
                                    x["chem_comp_id"] != "HOH")]
                })
        self.pdb_data[pdb]["binding"] = binding_dict
        pockets_data = structure_annotator.load_pdb_pocket(
            pdb, self.utils.pdb_dir)
        pockets_dict = defaultdict(list)
        for pocket in pockets_data:
            for residue in set(pocket["residues"]):
                pockets_dict[residue].append({
                    "pocket_num":
                    pocket["number"],
                    "druggabilitty":
                    pocket["properties"]['Druggability Score']
                })
        self.pdb_data[pdb]["pockets"] = dict(pockets_dict)

    def annotate_residue_list(self, df,
                              structure_annotator: StructureAnnotator):
        """
        
        :param df: columns=["pdb", "chain", "resid", "alt", "ref", "pos"] or generated by residues_from_aln_seq
        :return: 
        """

        output = {}
        for i, r in df.iterrows():
            output[
                f'{r.pdb}_{r.chain}_{r.resid}_{r.alt}'] = self.annotate_resid(
                    r.pdb, str(r.resid), structure_annotator)

        return output