def run(self): log_command(_log, 'gene', "/usr/bin/rsync" + " rsync://ftp.ncbi.nlm.nih.gov/gene/DATA/ASN_BINARY/All_Data.ags.gz" + " %s" % os.path.join(settings["DATADIR"], 'gene/'), timeout=24*60*60 )
def run(self): in_path = mmcif_path(self._pdbid) out_path = dssp_mmcif_path(self._pdbid) if os.path.isfile(in_path): log_command(_log, 'dssp-from-mmcif', "%s %s %s" % (MKDSSP, in_path, out_path))
def run(self): log_command(_log, 'pmc', "/usr/bin/rsync -rtv --delete" + " --include=\'articles*.tar.gz\' --exclude=\'*\'" + " rsync://ftp.ncbi.nih.gov/pub/pmc/" + " %s" % os.path.join(settings["DATADIR"], 'pmc/'), timeout=24*60*60 )
def run(self): log_command(_log, 'unigene', "/usr/bin/rsync -rtv --delete" + " --include=\'*/\' --include=\'*.data.gz\' --exclude=\'*\'" + " rsync://ftp.ebi.ac.uk/pub/databases/Unigene/" " %s" % os.path.join(settings["DATADIR"], 'unigene/'), timeout=24*60*60 )
def run(self): log_command(_log, 'prosite', "/usr/bin/rsync -rtv --delete --include=\'prosite*.dat\'" + " --include=\'prosite*.doc\' --exclude=\'*\'" + " rsync://ftp.ebi.ac.uk/pub/databases/prosite/" + " %s" % os.path.join(settings["DATADIR"], 'prosite/'), timeout=24*60*60 )
def run(self): log_command(_log, 'genbank', "/usr/bin/rsync -rtv --delete" + " --include=\'*.seq.gz\' --exclude=\'*\'" + " rsync://ftp.ncbi.nih.gov/genbank/" + " %s" % os.path.join(settings["DATADIR"], 'genbank/'), timeout=24*60*60 )
def run(self): log_command(_log, 'enzyme', "/usr/bin/rsync" + " rsync://ftp.ebi.ac.uk/pub/databases/enzyme/release_with_updates/" + " %s" % os.path.join(settings["DATADIR"], 'enzyme/'), timeout=24*60*60 )
def run(self): _log.info("[pdbfinder2] mkpdbfinder2 %s" % self._pdbid) log_command(_log, 'pdbfinder2', "python2 " + PDBFINDER2_SCRIPT + " -pdbftopdbf2" + " %s" % (self._pdbid.lower()), cwd="/srv/data/prog/pdbfinder2/" )
def run(self): log_command(_log, 'pfam', "/usr/bin/rsync -rtv --delete --include=\'Pfam-A.full.gz\'" + " --include='Pfam-A.seed.gz' --exclude=\'*\'" + " rsync://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/" " %s" % os.path.join(settings["DATADIR"], 'pfam/'), timeout=24*60*60 )
def run(self): log_command(_log, 'refseq', "/usr/bin/rsync -rtv --delete" + " --include=\'*g{p,b}ff.gz\' --exclude=\'*\'" + " rsync://ftp.ncbi.nih.gov/refseq/release/complete/" + " %s" % os.path.join(settings["DATADIR"], 'refseq/'), timeout=24*60*60 )
def run(self): in_path = pdb_flat_path(self._pdbid) if not os.path.isfile(in_path): return out_dir = pdbreport_path(self._pdbid) if not os.path.isdir(out_dir): os.mkdir(out_dir) log_path = os.path.join(out_dir, "log.log") txt_path = os.path.join(out_dir, "pdbout.txt") check_path = os.path.join(out_dir, "check.db") checkbz2_path = os.path.join(out_dir, "check.db.bz2") html_path = os.path.join(out_dir, "pdbout.html") index_path = os.path.join(out_dir, "index.html") ion_path = os.path.join(out_dir, "%s.ion" % self._pdbid) ionout_path = os.path.join(out_dir, "pdb%s_ION.OUT" % self._pdbid) whynot_path = ("/srv/data/scratch/whynot2/comment/%sWC.txt" % (datetime.now().strftime("%G%m%d"))) if log_command(_log, 'pdbreport', ". %s; %s %s" % (ccp4setup, whatcheck, in_path), cwd=out_dir, timeout=5 * 60): if os.path.isfile(txt_path): log_command(_log, 'pdbreport', htmlgen, cwd=out_dir) if os.path.isfile(html_path) and valid_html(html_path): os.rename(html_path, index_path) else: _log.debug("validation failed") if os.path.isfile(log_path): log_to_whynot(log_path, self._pdbid, whynot_path) if os.path.isfile(check_path): with BZ2File(checkbz2_path, 'wb') as g: with open(check_path, 'rb') as f: while True: chunk = f.read(1024) if len(chunk) <= 0: break g.write(chunk) os.remove(check_path) if os.path.isfile(ionout_path): os.rename(ionout_path, ion_path) else: _log.debug("validation failed") if os.path.isfile(log_path): log_to_whynot(log_path, self._pdbid, whynot_path) else: _log.error("[pdbreport] whatcheck timeout for %s" % self._pdbid)
def run(self): excludes = '' for pdbid in settings["FRAUD_IDS"]: excludes += " --exclude=\'%s\'" % pdbid log_command(_log, 'pdbredo', "/usr/bin/rsync -av --delete" + " rsync://rsync.pdb-redo.eu/pdb-redo/" + " %s" % os.path.join(settings["DATADIR"], 'pdb_redo/'), timeout=24*60*60 )
def run(self): excludes = '' for pdbid in settings["FRAUD_IDS"]: excludes += " --exclude=\'r%ssf.ent.gz\'" % pdbid log_command(_log, 'structure_factors', "/usr/bin/rsync -rtv --delete --port=33444" + " --include=\'r????sf.ent.gz\' --exclude=\'*\'" + excludes + " rsync.wwpdb.org::ftp_data/structures/divided/structure_factors/**/" + " %s" % os.path.join(settings["DATADIR"], 'structure_factors/'), timeout=24*60*60 )
def run(self): in_path = find_input(self._pdbid) out3_path = hssp3_path(self._pdbid) out1_path = hssp1_path(self._pdbid) err_path = "/srv/data/scratch/whynot2/hssp/%s.err" % self._pdbid if os.path.isfile(in_path): run_mkhssp(in_path, out3_path, "hssp", err_path) if os.path.isfile(out3_path): log_command(_log, 'hssp', "%s %s %s" % (HSSPCONV, out3_path, out1_path)) if os.path.isfile(err_path): os.remove(err_path)
def run(self): if not os.path.isfile(pdb_path(self._pdbid)): whynot_path = datetime.datetime.now().strftime( '/srv/data/scratch/whynot2/comment/%Y%m%d_pdbredo.txt') with open(whynot_path, 'a') as f: f.write('COMMENT: No PDB-format coordinate file available\n') f.write('PDB_REDO, %s\n' % self._pdbid) elif not os.path.isfile(pdb_flat_path(self._pdbid)): PdbExtractJob(pdb_path(self._pdbid)).run() _log.info("[pdbredo] running pdbredo for %s" % self._pdbid) days = 3 * 24 * 60 * 60 if not log_command(_log, 'pdbredo', "%s %s" % (redo_script, self._pdbid), timeout=days): _log.error("[pdbredo] pdbredo timeout for %s" % self._pdbid) whynot_path = os.path.join(zata_dir, 'whynot.txt') with open(whynot_path, 'a') as f: f.write( 'COMMENT: PDB REDO script timed out\nPDB_REDO, %s' % self._pdbid ) tot_path = os.path.join(pdbredo_path(self._pdbid), "%s_final_tot.pdb" % self._pdbid) if os.path.isfile(tot_path): link_path = os.path.join(settings["DATADIR"], "pdb_redo/flat/%s" % self._pdbid) if os.path.islink(link_path): os.symlink(tot_path, link_path)
def run(self): if self._src.lower() == 'pdb': struct_path = pdb_path(self._pdbid) elif self._src.lower() == 'redo': struct_path = final_path(self._pdbid) else: raise Exception("unknown structure type %s" % self._src) in_path = wilist_data_path(self._src, self._lis_type, self._pdbid) root_dir = os.path.join(settings["DATADIR"], "wi-lists/%s" % self._src) if os.path.isfile(in_path): os.environ["SCENES_SETTINGS"] = scene_settings log_command(_log, 'scene', "%s %d %s %s %s %s %s" % (script, os.getpid(), struct_path, self._pdbid, self._src.upper(), commands[self._lis_type], in_path), cwd=root_dir)
def run(self): dat_dir = os.path.join(pdbfinder2_dir, "data/") out_file = os.path.join(pdbfinder2_dir, "PDBFIND2.TXT") gz_file = os.path.join(pdbfinder2_dir, "PDBFIND2.TXT.gz") log_command(_log, 'pdbfinder2', "/srv/data/prog/pdbfinder2/mergepdbfinder2.pl" + " %s %s" % (out_file, dat_dir), cwd="/srv/data/prog/pdbfinder2/" ) _log.debug("[pdbfinder2] compressing %s" % out_file) with GzipFile(gz_file, 'wb') as g: with open(out_file, 'rb') as f: while True: chunk = f.read(1024) if len(chunk) <= 0: break g.write(chunk)
def run(self): uniprot_dir = os.path.join(settings["DATADIR"], 'uniprot/') fasta_dir = os.path.join(settings["DATADIR"], 'fasta/') if not os.path.isdir(uniprot_dir): os.mkdir(uniprot_dir) for filename in ['uniprot_sprot.fasta.gz', 'uniprot_trembl.fasta.gz', 'uniprot_sprot.dat.gz', 'uniprot_trembl.dat.gz', 'README', 'reldate.txt']: log_command(_log, 'uniprot', '/usr/bin/wget -q' + ' ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/%s' % filename + ' -N -P %s' % uniprot_dir, timeout=24*60*60 ) #ftp = FTP('ftp.uniprot.org', timeout=3600) #ftp.login() #ftp.cwd('pub/databases/uniprot/current_release/knowledgebase/complete/') #for filename in ['uniprot_sprot.fasta.gz', 'uniprot_trembl.fasta.gz', # 'uniprot_sprot.dat.gz', 'uniprot_trembl.dat.gz', # 'README', 'reldate.txt']: # with open(os.path.join(uniprot_dir, filename), 'wb') as f: # _log.info("[uniprot] retrieve %s" % filename) # ftp.retrbinary('RETR %s' % filename, f.write) #ftp.quit() # Decompress fastas: for filename in ['uniprot_sprot.fasta.gz', 'uniprot_trembl.fasta.gz']: fastaname = os.path.splitext(filename)[0] fasta_path = os.path.join(fasta_dir, fastaname) _log.info("[uniprot] extract to %s" % fasta_path) with open(fasta_path, 'wb') as f: with GzipFile(os.path.join(uniprot_dir, filename), 'rb') as g: while True: chunk = g.read(65536) if len(chunk) <= 0: break f.write(chunk)
def run(self): in_path = pdb_flat_path(self._pdbid) if not os.path.isfile(in_path): return out_dir = hbonds_path(self._pdbid) if not os.path.isdir(out_dir): os.mkdir(out_dir) _log.info("making hbonds for %s" % self._pdbid) logfilename = "%s.hb2.log" % self._pdbid script = """ GETMOL %s Y %s HBONDS HB2INI DOLOG %s 0 HBONDS HB2LIS protein 0 protein 0 NOLOG FULLST Y """ % (in_path, self._pdbid, logfilename) try: tmpdir = tempfile.mkdtemp() if log_command(_log, 'hbonds', whatif, cwd=tmpdir, timeout=20, strin=script): logfilepath = os.path.join(tmpdir, logfilename) out_path = os.path.join(out_dir, "%s.hb2.bz2" % self._pdbid) if os.path.isfile(logfilepath): with open(logfilepath, 'rb') as g: with BZ2File(out_path, 'wb') as f: for line in g.readlines()[2:]: f.write(line.decode('ascii').replace("->", "-").encode('ascii')) else: _log.error("hb2 log not generated for %s" % self._pdbid) else: _log.error("[hbonds] whatif timeout for %s" % self._pdbid) finally: # Remove all whatif runtime files if os.path.isdir(tmpdir): shutil.rmtree(tmpdir)
def run(self): log_command(_log, "pdbredo", "/srv/data/pdb_redo/alldata.csh")
def run(self): log_command( _log, 'bdb', ". %s; %s %s %s %s" % (ccp4_setup, script, bdb_dir, pdb_flat_path(self._pdbid), self._pdbid))
def run(self): log_command(_log, 'whynot', "%s %s %s" % (CRAWL, self._databank, self._path))
def run(self): in_path = pdb_path(self._pdbid) out_path = dssp_path(self._pdbid) if os.path.isfile(in_path): log_command(_log, 'dssp', "%s %s %s" % (MKDSSP, in_path, out_path))