def pdbs_seq_for_modelling(self, out_fasta=None, pdbsIter=None, reuse_previours=None): if pdbsIter == None: pdbsIter = PDBs(self.pdb_dir) if not out_fasta: out_fasta = self.pdb_dir + "processed/seqs_from_pdb.fasta" pdb_codes = {x.lower(): 1 for x in self.entries_df().IDCODE} reuse = defaultdict(lambda: []) if reuse_previours: for x in bpio.parse(reuse_previours, "fasta"): pdb = x.id.split("_")[0] reuse[pdb].append(x) reuse = dict(reuse) pdblist = list(pdbsIter) with open(out_fasta, "w") as out_fasta_handle: for (pdb, pdb_file_path) in tqdm(pdblist): if pdb in pdb_codes: if pdb in reuse: bpio.write(reuse[pdb], out_fasta_handle, "fasta") else: self.seq_from_pdb(out_fasta_handle, pdb, pdb_file_path)
def handle(self, *args, **options): tmp = os.path.abspath(options['tmp']) if not os.path.exists(tmp): os.makedirs(tmp) qs = PDB.objects.filter(code=options["pdb"]) if options["pdb"] else PDB.objects.all() total = qs.count() utils = PDBs(options["pdbs_dir"]) with tqdm(qs, total=total) as pbar: for pdb in pbar: pbar.set_description(pdb.code) try: fpocket2sql = FPocket2SQL() fpocket2sql.create_or_get_pocket_properties() fpocket2sql.load_pdb(pdb.code) fpocket2sql.run_fpocket(options['tmp'],pdb_path=utils.pdb_path(pdb.code), pockets_path=utils.pdb_pockets_path(pdb.code), force=options["force"]) fpocket2sql.load_pockets() # res.delete_dir() except IOError as ex: traceback.print_exc() self.stderr.write("error processing pockets from %s: %s" % (pdb.code, str(ex))) except Exception as ex: traceback.print_exc() raise CommandError(ex)
def handle(self, *args, **options): if options["verbose"] == 1: import logging logging.basicConfig(level=logging.DEBUG) pdbs = PDBs(options["pdbs_dir"]) pdbs.url_pdb_entries = options["entries_url"] if not os.path.exists(options["entries_path"]): pdbs.download_pdb_entries() pdbio = PDBIO(options['pdbs_dir'] + "/", options['entries_path'], options['tmp']) pdbio.init() try: pdbs.update_pdb(options['code']) pdbio.process_pdb(options['code'], force=options['force'], pocket_path=pdbs.pdb_pockets_path( options['code']), pdb_path=pdbs.pdb_path(options['code'])) except IOError as ex: traceback.print_exc() self.stderr.write("error processing pockets from %s: %s" % (options['code'], str(ex))) except Exception as ex: traceback.print_exc() raise CommandError(ex)
def __init__(self, pdb_dir="/data/databases/pdb/"): self.utils = PDBs(pdb_dir) self.seqs_path = "/tmp/seq.faa" self.aln_path = "/tmp/msa.faa" self.ref_seq = None self.pdbfile = None self.pdb_data = defaultdict(dict)
def add_arguments(self, parser): pdbs = PDBs() parser.add_argument('--pdbs_dir', default="data/pdb/") parser.add_argument('--entries_path', default=None) parser.add_argument( '--only_annotated', action='store_false', help="by default only cross referenced pdbs are downloaded") parser.add_argument('--entries_url', default=pdbs.url_pdb_entries)
def add_arguments(self, parser): pdbs = PDBs() parser.add_argument('--code', required=True, help="4 letter PDB code") parser.add_argument('--tmp', default="data/tmp/load_pdb") parser.add_argument('--pdbs_dir', default="/data/databases/pdb/divided/") parser.add_argument('--entries_path', default="/data/databases/pdb/entries.idx") parser.add_argument('--entries_url', default=pdbs.url_pdb_entries)
def add_arguments(self, parser): pdbs = PDBs() parser.add_argument('--code', required=True, help="4 letter PDB code") parser.add_argument('--tmp', default="data/tmp/load_pdb") parser.add_argument('--pdbs_dir', default="/data/databases/pdb/") parser.add_argument('--entries_path', default="/data/databases/pdb/entries.idx") parser.add_argument('--entries_url', default=pdbs.url_pdb_entries) parser.add_argument('--force', action="store_true") parser.add_argument('--verbose', default=0, choices=[0, 1], type=int)
def handle(self, *args, **options): pdbs_utils = PDBs(pdb_dir=options['pdbs_dir']) pdbs_utils.url_pdb_entries = options["entries_url"] if not options['entries_path']: options['entries_path'] = options['pdbs_dir'] + "/entries.idx" if (datetime.now() - datetime.fromtimestamp( os.path.getctime(options["entries_path"]))).days > 7: pdbs_utils.download_pdb_entries() pdb2sql = PDB2SQL(options['pdbs_dir'], options['entries_path']) pdb2sql.load_entries() if options["only_annotated"]: self.stderr.write("only_annotated option activated by default") from bioseq.models.Dbxref import Dbxref pdbs = [(x.accession.lower(), pdbs_utils.pdb_path(x.accession.lower())) for x in Dbxref.objects.filter(dbname="PDB")] else: pdbs = list(tqdm(iterpdbs(options['pdbs_dir']))) # 4zux 42 mer 2lo7("5my5","/data/databases/pdb/divided/my/pdb5my5.ent") # ("4zu4", "/data/databases/pdb/divided/zu/pdb4zu4.ent") with tqdm(pdbs) as pbar: for code, pdb_path in pbar: code = code.lower() if PDBsWS.is_obsolete(code): self.stderr.write(f"{code} entry is obsolete") continue try: pdb_path = pdbs_utils.update_pdb(code) except KeyboardInterrupt: raise except: self.stderr.write("PDB %s could not be downloaded" % code) continue if PDB.objects.filter(code=code).exists(): self.stderr.write("PDB %s already exists" % code) continue pbar.set_description(code) try: pdb2sql.create_pdb_entry(code, pdb_path) pdb2sql.update_entry_data(code, pdb_path) except KeyboardInterrupt: raise except Exception as ex: import traceback traceback.print_exc() raise CommandError(ex)
def load_pdb_pocket(self, pdb, pdb_dir="/data/databases/pdb/"): utils = PDBs(pdb_dir) if not os.path.exists(utils.pdb_pockets_path(pdb)): utils.update_pdb(pdb) fpocket = FPocket(utils.pdb_path(pdb)) result = fpocket.hunt_pockets() mkdir(os.path.dirname(utils.pdb_pockets_path(pdb))) result.save(utils.pdb_pockets_path(pdb)) with open(utils.pdb_pockets_path(pdb)) as h: result = json.load(h) self.pdb_data[pdb]["pockets"] = result return self.pdb_data[pdb]["pockets"]
def handle(self, *args, **options): pdbs = PDBs() pdbs.url_pdb_entries = options["entries_url"] if not os.path.exists(options["entries_path"]): pdbs.download_pdb_entries() pdbio = PDBIO(options['pdbs_dir'] + "/", options['entries_path'], options['tmp']) pdbio.init() try: pdbio.process_pdb(options['code']) except IOError as ex: traceback.print_exc() self.stderr.write("error processing pockets from %s: %s" % (options['code'], str(ex))) except Exception as ex: traceback.print_exc() raise CommandError(ex)
def handle(self, *args, **options): pdbs = PDBs(pdb_dir=options['pdbs_dir']) pdbs.url_pdb_entries = options["entries_url"] if not os.path.exists(options["entries_path"]): pdbs.download_pdb_entries() pdb2sql = PDB2SQL(options['pdbs_dir'], options['entries_path']) pdb2sql.load_entries() if options["only_annotated"]: self.stderr.write("only_annotated option activated by default") from bioseq.models.Dbxref import Dbxref pdbs = [(x.accession.lower(),pdbs.pdb_path( x.accession.lower())) for x in Dbxref.objects.filter(dbname="PDB")] else: pdbs = list(tqdm(iterpdbs(options['pdbs_dir']))) # 4zux 42 mer 2lo7("5my5","/data/databases/pdb/divided/my/pdb5my5.ent") # ("4zu4", "/data/databases/pdb/divided/zu/pdb4zu4.ent") with tqdm(pdbs) as pbar: for code,pdb_path in pbar: code = code.lower() try: pdb_path = pdb2sql.download(code) except: self.stderr.write("PDB %s could not be downloaded" % code) continue if PDB.objects.filter(code=code).exists(): self.stderr.write("PDB %s already exists" % code) continue pbar.set_description(code) try: pdb2sql.create_pdb_entry(code, pdb_path) pdb2sql.update_entry_data(code, pdb_path) except Exception as ex: raise CommandError(ex)
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-i", "--data_path", default='/data/databases/pdb/') parser.add_argument( "-o", "--output_path", default='/data/databases/pdb/processed/domain_analisis') args = parser.parse_args() domains = defaultdict(lambda: []) for seq in bpio.parse(args.data_path + "/processed/domains.fasta", "fasta"): domains["_".join(seq.id.split("_")[0:2])].append(seq.id.split("_")) for (code, pdb_path) in tqdm(PDBs(pdb_dir=args.data_path)): pdb_model = PDB(code=code) pdb_model.save() p = PDBParser(PERMISSIVE=True, QUIET=True) try: for chain in p.get_structure(code, pdb_path).get_chains(): chains_dir = args.output_path + "/chains/" + code[1:3] + "/" mkdir(chains_dir) cs = ChainSplitter(chains_dir) process_chain(pdb_path, code, chain.id, pdb_model) for (_, _, res_start, res_end, dn, dn_start, dn_end) in domains[code + "_" + chain.id]: # 1r9d_A_2_787_PF02901.14_8_648
def old_or_inexistent(filepath, period=30): return not os.path.exists(filepath) or (( (time.time() - os.path.getatime(filepath)) / 60 / 60 / 24) > period) #os.environ["http_proxy"] = "http://proxy.fcen.uba.ar:8080" #os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080" mkdir("/data/pdb/") download_file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx", "/data/pdb/entries.idx", ovewrite=True) pdbs = PDBs("/data/pdb/") pdbs.download_pdb_seq_ses() pdbs.update_pdb_dir() mkdir("/data/pdb/processed/") pdbs.pdbs_seq_for_modelling() execute("makeblastdb -dbtype prot -in /data/pdb/processed/seqs_from_pdb.fasta") if old_or_inexistent("/data/uniprot/uniref/uniref90/uniref90.fasta"): mkdir("/data/uniprot/uniref/uniref90") download_file( "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", "/data/uniprot/uniref/uniref90/uniref90.fasta.gz", ovewrite=True) execute("gunzip /data/uniprot/uniref/uniref90/uniref90.fasta.gz") if old_or_inexistent("/data/uniprot/uniref/uniref90/uniref90.fasta.pal"):
def main(argv=None): # IGNORE:C0111 '''Command line options.''' if argv is None: argv = sys.argv else: sys.argv.extend(argv) parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]") # parser.add_argument("-dir", "--structs_dir", required = True ) parser.add_argument("-db", "--database_name", default='pdb') parser.add_argument("-host", "--db_host", default='127.0.0.1') parser.add_argument( "--csa", default='/data/databases/csa/csa.txt') parser.add_argument( "--hmm", default='/data/databases/pdb/pdb_seq_res.hmm') parser.add_argument( "--pdbs", default='/data/databases/pdb/') parser.add_argument( "--distances", default='/data/databases/pdb/processed/distances.tbl') args = parser.parse_args() # pdbs = PDBs() # pdbs.update('/data/pdb/divided/') BioMongoDB(args.database_name) #args.db_host # update_quaternary() # # clusters cd hit # update_clusters() # # residues near ligands --> metal drug/cofactor if not os.path.exists(args.csa): sys.stderr.write("%s not found. Download it from %s" % ( args.csa, "http://www.ebi.ac.uk/thornton-srv/databases/CSA/downloads/CSA_2_0_121113.txt" )) sys.exit(1) if not os.path.exists(args.pdbs): sys.stderr.write("%s not found. Specify where is pdbs/divided directory" % ( args.pdbs )) sys.exit(1) if not os.path.exists(args.distances): sys.stderr.write("%s not found. Run extended_domain.py script to create it." % ( args.distances )) sys.exit(1) pdbUtils = PDBs(pdb_dir=args.pdbs) print("Update Quaternary") update_quaternary(pdbUtils) print("Update CSA") update_csa(args.csa) print("Update CYS/TYR") free_cys_tyr(pdbUtils) print("Update Importan Pfam") important_pfam(args.hmm) print("Update Binding residues") update_binding_residues(args.distances) _log.info("update pdb properties finished!!")
parser.add_argument("-p", "--dbpass", required=True) parser.add_argument("-i", "--pdb_dir", default="/data/databases/pdb/") parser.add_argument("-db", "--dbname", default="pdbdb") parser.add_argument("-u", "--dbuser", default="root") args = parser.parse_args() from peewee import MySQLDatabase mysql_db = MySQLDatabase(args.dbname, user=args.dbuser, password=args.dbpass) mysql_db.close() sqldb.initialize(mysql_db) pdb_utils = PDBs(pdb_dir=args.pdb_dir) df = pdb_utils.entries_df() pdbs = list(pdb_utils) with tqdm(pdbs) as pbar: for (code, pdb_path) in pbar: mysql_db.connect(reuse_if_open=True) pbar.set_description(code) try: entry = df[df.IDCODE == code.upper()].iloc[0] except IndexError: continue pdb_model = PDB(code=code, experiment=str(entry.EXPERIMENT)) try: resolution = float(entry.RESOLUTION)
def add_arguments(self, parser): pdbs = PDBs() parser.add_argument('--pdbs_dir', default="data/pdb/") parser.add_argument('--entries_path', default="data/pdb/entries.idx") parser.add_argument('--only_annotated', action='store_false') parser.add_argument('--entries_url', default=pdbs.url_pdb_entries)
return self.pdbs_dir + pdb[1:3] + "/pdb" + pdb + ".ent" @staticmethod def sequence_from_residues(residues): return "".join([ protein_letters_3to1[res.get_resname()[0] + res.get_resname()[1:3].lower()] for res in residues ]) if __name__ == '__main__': from SNDG import init_log import argparse from SNDG.Structure.PDBs import PDBs parser = argparse.ArgumentParser(description='PDB Update utils') init_log() pdbs = PDBs(pdb_dir="/data/databases/pdb/") #os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080" # pdbs.download_pdb_seq_ses() pdbs.download_pdb_entries() pdbs.update_pdb_dir() # from SNDG.Structure.PDBs import PDBs # pdbs = PDBs(pdb_dir="/data/databases/pdb/") # pdbs.pdbs_seq_for_modelling("/data/databases/pdb/processed/seqs_from_pdb.fasta") #pepe = pdbs.entries_df() #print pepe
"--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]") parser.add_argument("-host", "--db_host", default='127.0.0.1') parser.add_argument("-db", "--db_name", default='tdr') parser.add_argument("--pdb_entries", default='/data/databases/pdb/entries.idx') parser.add_argument("--pdbs", default='/data/databases/pdb/') args = parser.parse_args() BioMongoDB(args.db_name) pdbUtils = PDBs(pdb_dir=args.pdbs) db = MongoClient(args.db_host)["pdb"] col_name = "pdb" if not os.path.exists(args.pdb_entries): sys.stderr.write("%s does not exists" % args.pdb_entries) sys.exit(1) """ collection = SeqCollection.objects(name=col_name) if len(collection): collection = collection.get() else: collection = SeqCollection(name=col_name, description="Protein Data Bank", organism="?") collection.save() """
parser = argparse.ArgumentParser(description='PDB utils') subparsers = parser.add_subparsers(help='commands', description='valid subcommands', dest='command') update_pdb = subparsers.add_parser('update', help='List contents') update_pdb.add_argument('-i', '--pdbs_dir', help="pdbs_directory", default="/data/databases/pdb/") # update_pdb = subparsers.add_parser('getpdb', help='List contents') # update_pdb.add_argument('-i', '--pdb_code', help="4 letter code", required=True) # update_pdb.add_argument('-o', '--ouput_file', help="output file") args = parser.parse_args() if args.command == "update": # remzemeber to configure ftp pdbs = PDBs(pdb_dir=args.pdbs_dir) pdbs.download_pdb_entries() pdbs.update_pdb_dir() pdbs.download_pdb_seq_ses() sys.exit(0) # os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080" # pdbs.download_pdb_seq_ses() # from SNDG.Structure.PDBs import PDBs # pdbs = PDBs(pdb_dir="/data/databases/pdb/") # pdbs.pdbs_seq_for_modelling("/data/databases/pdb/processed/seqs_from_pdb.fasta") # pepe = pdbs.entries_df() # print pepe
help='pdb files directory') parser.add_argument('--tmp_dir', default=mkdtemp(), help='temporal directory') parser.add_argument('--cpus', default=1, type=int, help='cpu cores to use') # parser.add_argument('--max_alns', default=3, type=int, help='max different templates to use') parser.add_argument('-t', "--templates_to_use", default=3, type=int, help='max amount of templates to use.') args = parser.parse_args() pdbs_dir = args.pdbs_dir + ("/" if args.pdbs_dir[-1] != "/" else "") mkdir(f'{pdbs_dir}/divided') pdb_utils = PDBs(pdbs_dir) # pbar = tqdm(args.alns) sys.stderr.write(str(args)) sys.stderr.write(f'reading alignment file\n') alns = [{ "aln_file": x, "templates2use": args.templates_to_use, "output_dir": args.output_dir, "tmp_dir": args.tmp_dir } for x in args.alns] mkdir(args.output_dir) assert os.path.exists( args.output_dir), f'"{args.output_dir}" could not be created' sys.stderr.write(f'processing alignment files\n')