def GetLocDef(predfile): #{{{ """ Read in LocDef and its corresponding score from the subcons prediction file """ content = "" if os.path.exists(predfile): content = myfunc.ReadFile(predfile) loc_def = None loc_def_score = None if content != "": lines = content.split("\n") if len(lines) >= 2: strs0 = lines[0].split("\t") strs1 = lines[1].split("\t") strs0 = [x.strip() for x in strs0] strs1 = [x.strip() for x in strs1] if len(strs0) == len(strs1) and len(strs0) > 2: if strs0[1] == "LOC_DEF": loc_def = strs1[1] dt_score = {} for i in xrange(2, len(strs0)): dt_score[strs0[i]] = strs1[i] if loc_def in dt_score: loc_def_score = dt_score[loc_def] return (loc_def, loc_def_score)
def GetAverageNewRunTime(finished_seq_file, window=100): #{{{ """Get average running time of the newrun tasks for the last x number of sequences """ logger = logging.getLogger(__name__) avg_newrun_time = -1.0 if not os.path.exists(finished_seq_file): return avg_newrun_time else: indexmap_content = myfunc.ReadFile(finished_seq_file).split("\n") indexmap_content = indexmap_content[::-1] cnt = 0 sum_run_time = 0.0 for line in indexmap_content: strs = line.split("\t") if len(strs) >= 7: source = strs[4] if source == "newrun": try: sum_run_time += float(strs[5]) cnt += 1 except: logger.debug( "bad format in finished_seq_file (%s) with line \"%s\"" % (finished_seq_file, line)) pass if cnt >= window: break if cnt > 0: avg_newrun_time = sum_run_time / float(cnt) return avg_newrun_time
def RunJob(infile, outpath, tmpdir, email, jobid, g_params): #{{{ blastdb = "/data3/data/blastdb/swissprot" rootname = os.path.basename(os.path.splitext(infile)[0]) starttagfile = "%s/runjob.start" % (outpath) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) tmp_outfile = "%s/query.result" % (tmpdir) resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) outfile = "%s/query.result" % (outpath_result) tarball = "%s.tar.gz" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) isOK = True try: os.makedirs(outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False if isOK: g_params['runjob_log'].append("tmpdir = %s" % (tmpdir)) #cmd = [script_getseqlen, infile, "-o", tmp_outfile , "-printid"] datetime = time.strftime("%Y-%m-%d %H:%M:%S") rt_msg = myfunc.WriteFile(datetime, starttagfile) if rt_msg: g_params['runjob_err'].append(rt_msg) cmd = [ blastall, "-i", infile, "-p", "blastp", "-o", tmp_outfile, "-d", blastdb ] g_params['runjob_log'].append(" ".join(cmd)) try: myfunc.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e)) suqoutfilelist = glob.glob("%s/*.sh.*.out" % (tmpdir)) if len(suqoutfilelist) > 0: suqoutfile = suqoutfilelist[0] g_params['runjob_err'].append(myfunc.ReadFile(suqoutfile)) if os.path.exists(tmp_outfile): cmd = ["cp", "-f", tmp_outfile, outfile] try: myfunc.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e))
def GetRunTimeFromTimeFile(timefile, keyword=""): # {{{ runtime = 0.0 if os.path.exists(timefile): lines = myfunc.ReadFile(timefile).split("\n") for line in lines: if keyword == "" or (keyword != "" and line.find(keyword) != -1): ss2 = line.split(";") try: runtime = float(ss2[1]) if keyword == "": break except: runtime = 0.0 pass return runtime
def main(args, g_params): parser = argparse.ArgumentParser( description='TOPCONS2_OCTOPUS workflow master script', formatter_class=argparse.RawDescriptionHelpFormatter, epilog='''\ Created 2015-05-05, updated 2018-02-16, Peters Christoph and Nanjiang Shu Examples: ''') parser.add_argument( 'inFile', metavar='inFile', help='Specify the input amino acid sequence file in FASTA format') parser.add_argument('out_path', metavar='out_path', help='Specify the outpath for result') parser.add_argument( 'blastDir', metavar='blastDir', help='Specify the path for psiblast, which contains bin/blastpgp') parser.add_argument( 'blastDB', metavar='blastDB', help='Specify the name of the blastdb, including the path') parser.add_argument( '-tmpdir', '--tmpdir', metavar='DIR', dest='TMPPATH', help= 'Specify the directory where the temporary files will be written to') parser.add_argument('-debug', '--debug', action='store_true', default=False, dest='isDEBUG', help='Output debug info') parser.add_argument('-RM', '--remove-individual-files', action='store_true', default=False, dest='isRemoveFile', help='Delete result for individual sequences') args = parser.parse_args() g_params['DEBUG'] = args.isDEBUG g_params['REMOVE_IND_FILES'] = args.isRemoveFile inFile = os.path.abspath(args.inFile) out_path = os.path.abspath(args.out_path) blastDir = os.path.abspath(args.blastDir) blastDB = os.path.abspath(args.blastDB) if args.TMPPATH != None: g_params['TMPPATH'] = os.path.abspath(args.TMPPATH) if not os.access(g_params['TMPPATH'], os.W_OK): print >> sys.stderr, "Error. TMPPATH '%s' not writable. Exit." % ( g_params['TMPPATH']) return 1 if not os.access(out_path, os.W_OK): print >> sys.stderr, "Error. out_path '%s' not writable. Exit." % ( out_path) return 1 os.environ['TMPPATH'] = g_params['TMPPATH'] DEBUG = g_params['DEBUG'] TMPPATH = g_params['TMPPATH'] if not os.path.exists(inFile): print >> sys.stderr, "inFile %s does not exist. Exit." % (inFile) sys.exit(1) if not os.path.exists(out_path): try: os.makedirs(out_path) except OSError: print >> sys.stderr, "Failed to create out_path %s. Exit." % ( out_path) sys.exit(1) if not "BLASTDB" in os.environ: # this fixed the warning message of unset 'BLASTDB' try: blastdbpath = os.path.realpath(os.path.dirname(blastDB)) os.environ['BLASTDB'] = blastdbpath except: pass # Set the working dir to the script location my_path = module_locator.module_path() os.chdir(my_path) inFile_rootname = os.path.basename(os.path.splitext(inFile)[0]) # Timing remove from final version #print "Timing remove from final version" timingfile = "%s/%s" % (out_path, "time.txt") topfile_OCTOPUS = "%s/%s.OCTOPUS.topfa" % (out_path, inFile_rootname) topfile_SPOCTOPUS = "%s/%s.SPOCTOPUS.topfa" % (out_path, inFile_rootname) fpout_OCTOPUS = open(topfile_OCTOPUS, "w") fpout_SPOCTOPUS = open(topfile_SPOCTOPUS, "w") with open(timingfile, "w") as timingFileOut: with open(inFile, "rU") as seqFile: for index, entry in enumerate(list(SeqIO.parse(seqFile, "fasta"))): # Timing remove from final version # print "Timing remove from final version" start = time.time() #Create folders for tmp data and output used_pfam = "pfam" tmpDir = tempfile.mkdtemp(prefix="%s/seq_" % (TMPPATH) + str(index) + "_") + "/" os.chmod(tmpDir, 0755) tmpDir_pfam = tmpDir tmpDir_cdd = "" tmpDir_uniref = "" protnamefile = "%s/query.fa.txt" % (tmpDir) try: fpout = open(protnamefile, "w") print >> fpout, "query" fpout.close() except IOError: print >> sys.stderr, "Failed to write to protnamefile %s. "\ "Exit."%(protnamefile) sys.exit(1) outDir = "%s%s%s/" % (out_path, os.sep, "seq_%d" % (index)) if os.path.exists(tmpDir) is False: os.mkdir(tmpDir) if os.path.exists(outDir) is False: os.mkdir(outDir) # if os.path.exists(outDir + "Topcons/") is False: # os.mkdir(outDir + "Topcons/") # outfile = "%s/%s"%(tmpDir, "query.fa") with open(tmpDir + "query.fa", "w") as outFile: outFile.write(">query" + "\n" + str(entry.seq)) with open(outDir + "seq.fa", "w") as outFile: outFile.write(">query" + "\n" + str(entry.seq)) startDir = os.getcwd() # At the same time the profiles can be created cmd = ["./fa2prfs_pfamscan_v2.sh", tmpDir_pfam, blastDir] cmdline = " ".join(cmd) rmsg = "" try: print "cmdline: ", cmdline rmsg = subprocess.check_call(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError, e: print "errmsg:", e print "rmsg:", rmsg pass query_seqdbfile = "%s/%s" % (tmpDir_pfam, "query.hits.db") filesize = 0 try: filesize = os.path.getsize(query_seqdbfile) except OSError: filesize = -1 pass if DEBUG: print "After fa2prfs_pfamscan_v2.sh filesize(%s)=%d" % ( query_seqdbfile, filesize) # In case we do not find a hit, we have to run hmmscan on the cdd database if filesize <= 0: tmpDir_cdd = tempfile.mkdtemp(prefix="%s/seq_cdd_" % (TMPPATH) + str(index) + "_") + "/" os.chmod(tmpDir_cdd, 0755) with open(tmpDir_cdd + "query.fa", "w") as outFile: outFile.write(">query" + "\n" + str(entry.seq)) used_pfam = "cdd" cmd = ["./fa2prfs_hmmscan.sh", tmpDir_cdd, blastDir] cmdline = " ".join(cmd) try: print "\ncmdline:", cmdline rmsg = subprocess.check_call(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError, e: print "errmsg:", e print "rmsg:", rmsg pass tmpDir = tmpDir_cdd query_seqdbfile = "%s/%s" % (tmpDir_cdd, "query.hits.db") try: filesize = os.path.getsize(query_seqdbfile) except OSError: filesize = -1 pass if DEBUG: print "After fa2prfs_hmmscan.sh filesize(%s)=%d" % ( query_seqdbfile, filesize) # In case we do not find a hit, we have to run the old script if filesize <= 0: tmpDir_uniref = tempfile.mkdtemp(prefix="%s/seq_uniref_" % (TMPPATH) + str(index) + "_") + "/" os.chmod(tmpDir_uniref, 0755) with open(tmpDir_uniref + "query.fa", "w") as outFile: outFile.write(">query" + "\n" + str(entry.seq)) used_pfam = "uniref" cmd = [ "./fa2prfs_fallback_v2.sh", tmpDir_uniref, blastDir, blastDB ] cmdline = " ".join(cmd) try: print "\ncmdline:", cmdline rmsg = subprocess.check_call(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError, e: print e print rmsg pass tmpDir = tmpDir_uniref query_seqdbfile = "%s/%s" % (tmpDir_uniref, "query.hits.db") try: filesize = os.path.getsize(query_seqdbfile) except OSError: filesize = -1 pass if DEBUG: print "After fa2prfs_fallback_v2.sh filesize(%s)=%d" % ( query_seqdbfile, filesize) # Once the profile is created start all other predictors os.chdir(os.path.abspath("../predictors/spoctopus/")) outDir_SPOCTOPUS = outDir + os.sep + "SPOCTOPUS" if not os.path.exists(outDir_SPOCTOPUS): os.makedirs(outDir_SPOCTOPUS) cmd = [ "./SPOCTOPUS.sh", protnamefile, tmpDir + "PSSM_PRF_FILES/", tmpDir + "RAW_PRF_FILES/", outDir_SPOCTOPUS, "-N" ] #output also the ANN result for SPOCTOPUS, changed 2016-01-26 cmdline = " ".join(cmd) if DEBUG: print "cmdline:", cmdline p_spoctopus = subprocess.Popen(cmd) os.chdir(startDir) os.chdir(os.path.abspath("../predictors/spoctopus/")) outDir_OCTOPUS = outDir + os.sep + "OCTOPUS" if not os.path.exists(outDir_OCTOPUS): os.makedirs(outDir_OCTOPUS) cmd = [ "./OCTOPUS.sh", protnamefile, tmpDir + "PSSM_PRF_FILES/", tmpDir + "RAW_PRF_FILES/", outDir_OCTOPUS, "-N" ] #output also the ANN result for OCTOPUS, changed 2016-01-26 cmdline = " ".join(cmd) if DEBUG: print "cmdline:", cmdline p_octopus = subprocess.Popen(cmd) os.chdir(startDir) p_spoctopus.communicate() #now wait for OCTOPUS p_octopus.communicate() #now wait for SPOCTOPUS count_pred = 2 end = time.time() lines = 0 with open(tmpDir + "query.hits.db") as inFile: for line in inFile: if line.find(">") == -1: lines += 1 timingFileOut.write( str(entry.id) + ";" + str(end - start) + ";" + used_pfam + ";" + str(lines) + ";" + str(count_pred) + "\n") #Remove the tmpFolder if not DEBUG: #debugging if os.path.exists(tmpDir) is True: p = subprocess.call(["rm", "-rf", tmpDir]) if os.path.exists(tmpDir_cdd) is True: p = subprocess.call(["rm", "-rf", tmpDir_cdd]) if os.path.exists(tmpDir_uniref) is True: p = subprocess.call(["rm", "-rf", tmpDir_uniref]) if os.path.exists(tmpDir_pfam) is True: p = subprocess.call(["rm", "-rf", tmpDir_pfam]) else: print "tmpDir=%s" % (tmpDir) p = subprocess.call(["python", "correct_Topo.py", outDir]) topfile = "%s/%s/%s" % (outDir, "OCTOPUS", "query.top") if os.path.exists(topfile): top = myfunc.ReadFile(topfile).strip() if top: fpout_OCTOPUS.write(">%s\n" % (entry.description)) fpout_OCTOPUS.write("%s\n" % (top)) topfile = "%s/%s/%s" % (outDir, "SPOCTOPUS", "query.top") if os.path.exists(topfile): top = myfunc.ReadFile(topfile).strip() if top: fpout_SPOCTOPUS.write(">%s\n" % (entry.description)) fpout_SPOCTOPUS.write("%s\n" % (top)) if g_params['REMOVE_IND_FILES']: shutil.rmtree(outDir)
def WriteTextResultFile(outfile, maplist, runtime_in_sec):#{{{ try: outpath_result = os.path.dirname(outfile) methodlist = ['TOPCONS', 'OCTOPUS', 'Philius', 'PolyPhobius', 'SCAMPI', 'SPOCTOPUS'] fpout = open(outfile, "w") date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print >> fpout, "##############################################################################" print >> fpout, "TOPCONS2 result file" print >> fpout, "Generated from http://%s at %s"%(g_params['base_www_url'], date) print >> fpout, "Total request time: %.1f seconds."%(runtime_in_sec) print >> fpout, "##############################################################################" cnt = 0 for line in maplist: strs = line.split('\t') subfoldername = strs[0] length = int(strs[1]) desp = strs[2] seq = strs[3] print >> fpout, "Sequence number: %d"%(cnt+1) print >> fpout, "Sequence name: %s"%(desp) print >> fpout, "Sequence length: %d aa."%(length) print >> fpout, "Sequence:\n%s\n\n"%(seq) for i in xrange(len(methodlist)): method = methodlist[i] if method == "TOPCONS": topfile = "%s/%s/%s/topcons.top"%(outpath_result, subfoldername, "Topcons") elif method == "Philius": topfile = "%s/%s/%s/query.top"%(outpath_result, subfoldername, "philius") elif method == "SCAMPI": topfile = "%s/%s/%s/query.top"%(outpath_result, subfoldername, method+"_MSA") else: topfile = "%s/%s/%s/query.top"%(outpath_result, subfoldername, method) if os.path.exists(topfile): top = myfunc.ReadFile(topfile) else: top = "" if top == "": top = "***No topology could be produced with this method topfile=%s***"%(topfile) print >> fpout, "%s predicted topology:\n%s\n\n"%(method, top) dgfile = "%s/%s/dg.txt"%(outpath_result, subfoldername) dg_content = myfunc.ReadFile(dgfile) lines = dg_content.split("\n") dglines = [] for line in lines: if line and line[0].isdigit(): dglines.append(line) if len(dglines)>0: print >> fpout, "\nPredicted Delta-G-values (kcal/mol) "\ "(left column=sequence position; right column=Delta-G)\n" print >> fpout, "\n".join(dglines) reliability_file = "%s/%s/Topcons/reliability.txt"%(outpath_result, subfoldername) reliability = myfunc.ReadFile(reliability_file) if reliability != "": print >> fpout, "\nPredicted TOPCONS reliability (left "\ "column=sequence position; right column=reliability)\n" print >> fpout, reliability print >> fpout, "##############################################################################" cnt += 1 except IOError: print "Failed to write to file %s"%(outfile)
def RunJob(infile, outpath, tmpdir, email, jobid, g_params):#{{{ rootname = os.path.basename(os.path.splitext(infile)[0]) starttagfile = "%s/runjob.start"%(outpath) runjob_errfile = "%s/runjob.err"%(outpath) runjob_logfile = "%s/runjob.log"%(outpath) finishtagfile = "%s/runjob.finish"%(outpath) rmsg = "" resultpathname = jobid outpath_result = "%s/%s"%(outpath, resultpathname) tarball = "%s.tar.gz"%(resultpathname) zipfile = "%s.zip"%(resultpathname) tarball_fullpath = "%s.tar.gz"%(outpath_result) zipfile_fullpath = "%s.zip"%(outpath_result) outfile = "%s/%s/Topcons/topcons.top"%(outpath_result, "seq_%d"%(0)) resultfile_text = "%s/%s"%(outpath_result, "query.result.txt") tmp_outpath_result = "%s/%s"%(tmpdir, resultpathname) isOK = True try: os.makedirs(tmp_outpath_result) isOK = True except OSError: msg = "Failed to create folder %s"%(tmp_outpath_result) myfunc.WriteFile(msg+"\n", runjob_errfile, "a") isOK = False print "isOK =", isOK if isOK: tmp_mapfile = "%s/seqid_index_map.txt"%(tmp_outpath_result) maplist = [] maplist_simple = [] hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0) if hdl.failure: isOK = False else: recordList = hdl.readseq() cnt = 0 while recordList != None: for rd in recordList: maplist.append("%s\t%d\t%s\t%s"%("seq_%d"%cnt, len(rd.seq), rd.description, rd.seq)) maplist_simple.append("%s\t%d\t%s"%("seq_%d"%cnt, len(rd.seq), rd.description)) cnt += 1 recordList = hdl.readseq() hdl.close() myfunc.WriteFile("\n".join(maplist_simple), tmp_mapfile) if isOK: # g_params['runjob_log'].append("tmpdir = %s"%(tmpdir)) #cmd = [script_getseqlen, infile, "-o", tmp_outfile , "-printid"] datetime = time.strftime("%Y-%m-%d %H:%M:%S") rt_msg = myfunc.WriteFile(datetime, starttagfile) if rt_msg: g_params['runjob_err'].append(rt_msg) cmd = [runscript, infile, tmp_outpath_result, blastdir, blastdb ] g_params['runjob_log'].append(" ".join(cmd)) begin_time = time.time() try: rmsg = subprocess.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e)+"\n") g_params['runjob_err'].append(rmsg + "\n") suqoutfilelist = glob.glob("%s/*.sh.*.out"%(tmpdir)) if len(suqoutfilelist)>0: suqoutfile = suqoutfilelist[0] g_params['runjob_err'].append(myfunc.ReadFile(suqoutfile)) end_time = time.time() runtime_in_sec = end_time - begin_time if os.path.exists(tmp_outpath_result): cmd = ["cp","-rf", tmp_outpath_result, outpath] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e)) if len(g_params['runjob_log']) > 0 : rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']), runjob_logfile, "a") if rt_msg: g_params['runjob_err'].append(rt_msg) datetime = time.strftime("%Y-%m-%d %H:%M:%S") if os.path.exists(outfile): rt_msg = myfunc.WriteFile(datetime, finishtagfile) if rt_msg: g_params['runjob_err'].append(rt_msg) # now write the text output to a single file WriteTextResultFile(resultfile_text, maplist, runtime_in_sec) # now making zip instead (for windows users) pwd = os.getcwd() os.chdir(outpath) # cmd = ["tar", "-czf", tarball, resultpathname] cmd = ["zip", "-rq", zipfile, resultpathname] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e))
def RunJob(infile, outpath, tmpdir, email, jobid, g_params): #{{{ all_begin_time = time.time() rootname = os.path.basename(os.path.splitext(infile)[0]) starttagfile = "%s/runjob.start" % (outpath) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) rmsg = "" resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) tarball = "%s.tar.gz" % (resultpathname) zipfile = "%s.zip" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) zipfile_fullpath = "%s.zip" % (outpath_result) outfile = "%s/%s/Topcons/topcons.top" % (outpath_result, "seq_%d" % (0)) resultfile_text = "%s/%s" % (outpath_result, "query.result.txt") mapfile = "%s/seqid_index_map.txt" % (outpath_result) finished_seq_file = "%s/finished_seqs.txt" % (outpath_result) tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname) isOK = True try: os.makedirs(tmp_outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (tmp_outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass try: os.makedirs(outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass if isOK: try: open(finished_seq_file, 'w').close() except: pass #first getting result from caches # ================================== maplist = [] maplist_simple = [] toRunDict = {} hdl = myfunc.ReadFastaByBlock(infile, method_seqid=0, method_seq=0) if hdl.failure: isOK = False else: datetime = time.strftime("%Y-%m-%d %H:%M:%S") rt_msg = myfunc.WriteFile(datetime, starttagfile) recordList = hdl.readseq() cnt = 0 origpath = os.getcwd() while recordList != None: for rd in recordList: isSkip = False # temp outpath for the sequence is always seq_0, and I feed # only one seq a time to the workflow tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" % 0) outpath_this_seq = "%s/%s" % (outpath_result, "seq_%d" % cnt) subfoldername_this_seq = "seq_%d" % (cnt) if os.path.exists(tmp_outpath_this_seq): try: shutil.rmtree(tmp_outpath_this_seq) except OSError: pass maplist.append( "%s\t%d\t%s\t%s" % ("seq_%d" % cnt, len(rd.seq), rd.description, rd.seq)) maplist_simple.append( "%s\t%d\t%s" % ("seq_%d" % cnt, len(rd.seq), rd.description)) if not g_params['isForceRun']: md5_key = hashlib.md5(rd.seq).hexdigest() subfoldername = md5_key[:2] md5_link = "%s/%s/%s" % (path_md5cache, subfoldername, md5_key) if os.path.exists(md5_link): # create a symlink to the cache rela_path = os.path.relpath( md5_link, outpath_result) #relative path os.chdir(outpath_result) os.symlink(rela_path, subfoldername_this_seq) if os.path.exists(outpath_this_seq): runtime = 0.0 #in seconds topfile = "%s/%s/topcons.top" % ( outpath_this_seq, "Topcons") top = myfunc.ReadFile(topfile).strip() numTM = myfunc.CountTM(top) posSP = myfunc.GetSPPosition(top) if len(posSP) > 0: isHasSP = True else: isHasSP = False info_finish = [ "seq_%d" % cnt, str(len(rd.seq)), str(numTM), str(isHasSP), "cached", str(runtime), rd.description ] myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) isSkip = True if not isSkip: # first try to delete the outfolder if exists if os.path.exists(outpath_this_seq): try: shutil.rmtree(outpath_this_seq) except OSError: pass origIndex = cnt numTM = 0 toRunDict[origIndex] = [rd.seq, numTM, rd.description ] #init value for numTM is 0 cnt += 1 recordList = hdl.readseq() hdl.close() myfunc.WriteFile("\n".join(maplist_simple) + "\n", mapfile) # run scampi single to estimate the number of TM helices and then run # the query sequences in the descending order of numTM torun_all_seqfile = "%s/%s" % (tmp_outpath_result, "query.torun.fa") dumplist = [] for key in toRunDict: top = toRunDict[key][0] dumplist.append(">%s\n%s" % (str(key), top)) myfunc.WriteFile("\n".join(dumplist) + "\n", torun_all_seqfile, "w") del dumplist topfile_scampiseq = "%s/%s" % (tmp_outpath_result, "query.torun.fa.topo") if os.path.exists(torun_all_seqfile): # run scampi to estimate the number of TM helices cmd = [ script_scampi, torun_all_seqfile, "-outpath", tmp_outpath_result ] try: rmsg = subprocess.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e) + "\n") pass if os.path.exists(topfile_scampiseq): (idlist_scampi, annolist_scampi, toplist_scampi) = myfunc.ReadFasta(topfile_scampiseq) for jj in xrange(len(idlist_scampi)): numTM = myfunc.CountTM(toplist_scampi[jj]) try: toRunDict[int(idlist_scampi[jj])][1] = numTM except (KeyError, ValueError, TypeError): pass sortedlist = sorted(toRunDict.items(), key=lambda x: x[1][1], reverse=True) #format of sortedlist [(origIndex: [seq, numTM, description]), ...] # submit sequences one by one to the workflow according to orders in # sortedlist for item in sortedlist: # g_params['runjob_log'].append("tmpdir = %s"%(tmpdir)) #cmd = [script_getseqlen, infile, "-o", tmp_outfile , "-printid"] origIndex = item[0] seq = item[1][0] description = item[1][2] outpath_this_seq = "%s/%s" % (outpath_result, "seq_%d" % origIndex) tmp_outpath_this_seq = "%s/%s" % (tmp_outpath_result, "seq_%d" % (0)) if os.path.exists(tmp_outpath_this_seq): try: shutil.rmtree(tmp_outpath_this_seq) except OSError: pass seqfile_this_seq = "%s/%s" % (tmp_outpath_result, "query_%d.fa" % (origIndex)) seqcontent = ">%d\n%s\n" % (origIndex, seq) myfunc.WriteFile(seqcontent, seqfile_this_seq, "w") if not os.path.exists(seqfile_this_seq): g_params['runjob_err'].append( "failed to generate seq index %d" % (origIndex)) continue cmd = [ runscript, seqfile_this_seq, tmp_outpath_result, blastdir, blastdb ] g_params['runjob_log'].append(" ".join(cmd)) begin_time = time.time() try: rmsg = subprocess.check_output(cmd) g_params['runjob_log'].append("workflow:\n" + rmsg + "\n") except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e) + "\n") g_params['runjob_err'].append(rmsg + "\n") pass #suqoutfilelist = glob.glob("%s/*.sh.*.out"%(tmpdir)) #if len(suqoutfilelist)>0: # suqoutfile = suqoutfilelist[0] #g_params['runjob_err'].append(myfunc.ReadFile(suqoutfile)) end_time = time.time() runtime_in_sec = end_time - begin_time if os.path.exists(tmp_outpath_this_seq): cmd = ["mv", "-f", tmp_outpath_this_seq, outpath_this_seq] isCmdSuccess = False try: subprocess.check_output(cmd) isCmdSuccess = True except subprocess.CalledProcessError, e: msg = "Failed to run prediction for sequence No. %d\n" % ( origIndex) g_params['runjob_err'].append(msg) g_params['runjob_err'].append(str(e) + "\n") pass timefile = "%s/time.txt" % (tmp_outpath_result) targetfile = "%s/time.txt" % (outpath_this_seq) if os.path.exists(timefile) and os.path.exists( outpath_this_seq): try: shutil.move(timefile, targetfile) except: g_params['runjob_err'].append( "Failed to move %s/time.txt" % (tmp_outpath_result) + "\n") pass if isCmdSuccess: runtime = runtime_in_sec #in seconds topfile = "%s/%s/topcons.top" % (outpath_this_seq, "Topcons") top = myfunc.ReadFile(topfile).strip() numTM = myfunc.CountTM(top) posSP = myfunc.GetSPPosition(top) if len(posSP) > 0: isHasSP = True else: isHasSP = False info_finish = [ "seq_%d" % origIndex, str(len(seq)), str(numTM), str(isHasSP), "newrun", str(runtime), description ] myfunc.WriteFile("\t".join(info_finish) + "\n", finished_seq_file, "a", isFlush=True) # now write the text output for this seq info_this_seq = "%s\t%d\t%s\t%s" % ( "seq_%d" % origIndex, len(seq), description, seq) resultfile_text_this_seq = "%s/%s" % (outpath_this_seq, "query.result.txt") myfunc.WriteTOPCONSTextResultFile(resultfile_text_this_seq, outpath_result, [info_this_seq], runtime_in_sec, g_params['base_www_url']) # create or update the md5 cache # create cache only on the front-end if g_params['base_www_url'].find("topcons.net") != -1: md5_key = hashlib.md5(seq).hexdigest() subfoldername = md5_key[:2] md5_subfolder = "%s/%s" % (path_md5cache, subfoldername) md5_link = "%s/%s/%s" % (path_md5cache, subfoldername, md5_key) if os.path.exists(md5_link): try: os.unlink(md5_link) except: pass subfolder_md5 = "%s/%s" % (path_md5cache, subfoldername) if not os.path.exists(subfolder_md5): try: os.makedirs(subfolder_md5) except: pass rela_path = os.path.relpath( outpath_this_seq, md5_subfolder) #relative path try: os.chdir(md5_subfolder) os.symlink(rela_path, md5_key) except: pass
def WriteSubconsTextResultFile( outfile, outpath_result, maplist, #{{{ runtime_in_sec, base_www_url, statfile=""): try: fpout = open(outfile, "w") if statfile != "": fpstat = open(statfile, "w") date_str = time.strftime(FORMAT_DATETIME) print >> fpout, "##############################################################################" print >> fpout, "Subcons result file" print >> fpout, "Generated from %s at %s" % (base_www_url, date_str) print >> fpout, "Total request time: %.1f seconds." % (runtime_in_sec) print >> fpout, "##############################################################################" cnt = 0 for line in maplist: strs = line.split('\t') subfoldername = strs[0] length = int(strs[1]) desp = strs[2] seq = strs[3] seqid = myfunc.GetSeqIDFromAnnotation(desp) print >> fpout, "Sequence number: %d" % (cnt + 1) print >> fpout, "Sequence name: %s" % (desp) print >> fpout, "Sequence length: %d aa." % (length) print >> fpout, "Sequence:\n%s\n\n" % (seq) rstfile = "%s/%s/%s/query_0_final.csv" % (outpath_result, subfoldername, "plot") if os.path.exists(rstfile): content = myfunc.ReadFile(rstfile).strip() lines = content.split("\n") if len(lines) >= 6: header_line = lines[0].split("\t") if header_line[0].strip() == "": header_line[0] = "Method" header_line = [x.strip() for x in header_line] data_line = [] for i in xrange(1, len(lines)): strs1 = lines[i].split("\t") strs1 = [x.strip() for x in strs1] data_line.append(strs1) content = tabulate.tabulate(data_line, header_line, 'plain') else: content = "" if content == "": content = "***No prediction could be produced with this method***" print >> fpout, "Prediction results:\n\n%s\n\n" % (content) print >> fpout, "##############################################################################" cnt += 1 except IOError: print "Failed to write to file %s" % (outfile)
def WriteProQ3TextResultFile( outfile, query_para, modelFileList, #{{{ runtime_in_sec, base_www_url, proq3opt, statfile=""): try: fpout = open(outfile, "w") try: isDeepLearning = query_para['isDeepLearning'] except KeyError: isDeepLearning = True if isDeepLearning: m_str = "proq3d" else: m_str = "proq3" try: method_quality = query_para['method_quality'] except KeyError: method_quality = 'sscore' fpstat = None numTMPro = 0 if statfile != "": fpstat = open(statfile, "w") numModel = len(modelFileList) date_str = time.strftime(FORMAT_DATETIME) print >> fpout, "##############################################################################" print >> fpout, "# ProQ3 result file" print >> fpout, "# Generated from %s at %s" % (base_www_url, date_str) print >> fpout, "# Options for Proq3: %s" % (str(proq3opt)) print >> fpout, "# Total request time: %.1f seconds." % ( runtime_in_sec) print >> fpout, "# Number of finished models: %d" % (numModel) print >> fpout, "##############################################################################" print >> fpout print >> fpout, "# Global scores" fpout.write("# %10s" % ("Model")) cnt = 0 for i in xrange(numModel): modelfile = modelFileList[i] globalscorefile = "%s.%s.%s.global" % (modelfile, m_str, method_quality) if not os.path.exists(globalscorefile): globalscorefile = "%s.proq3.%s.global" % (modelfile, method_quality) if not os.path.exists(globalscorefile): globalscorefile = "%s.proq3.global" % (modelfile) (globalscore, itemList) = ReadProQ3GlobalScore(globalscorefile) if i == 0: for ss in itemList: fpout.write(" %12s" % (ss)) fpout.write("\n") try: if globalscore: fpout.write("%2s %10s" % ("", "model_%d" % (i))) for jj in xrange(len(itemList)): fpout.write(" %12f" % (globalscore[itemList[jj]])) fpout.write("\n") else: print >> fpout, "%2s %10s" % ("", "model_%d" % (i)) except: pass print >> fpout, "\n# Local scores" for i in xrange(numModel): modelfile = modelFileList[i] localscorefile = "%s.%s.%s.local" % (modelfile, m_str, method_quality) if not os.path.exists(localscorefile): localscorefile = "%s.proq3.%s.local" % (modelfile, method_quality) if not os.path.exists(localscorefile): localscorefile = "%s.proq3.local" % (modelfile) print >> fpout, "\n# Model %d" % (i) content = myfunc.ReadFile(localscorefile) print >> fpout, content except IOError: print "Failed to write to file %s" % (outfile)
def RunJob(modelfile, seqfile, outpath, tmpdir, email, jobid, g_params): #{{{ all_begin_time = time.time() rootname = os.path.basename(os.path.splitext(modelfile)[0]) starttagfile = "%s/runjob.start" % (outpath) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) rmsg = "" query_parafile = "%s/query.para.txt" % (outpath) query_para = {} content = myfunc.ReadFile(query_parafile) if content != "": query_para = json.loads(content) resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) tarball = "%s.tar.gz" % (resultpathname) zipfile = "%s.zip" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) zipfile_fullpath = "%s.zip" % (outpath_result) mapfile = "%s/seqid_index_map.txt" % (outpath_result) finished_model_file = "%s/finished_models.txt" % (outpath_result) timefile = "%s/time.txt" % (outpath_result) tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname) isOK = True if os.path.exists(tmp_outpath_result): shutil.rmtree(tmp_outpath_result) try: os.makedirs(tmp_outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (tmp_outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass if os.path.exists(outpath_result): shutil.rmtree(outpath_result) try: os.makedirs(outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass if isOK: try: open(finished_model_file, 'w').close() except: pass #first getting result from caches # cache profiles for sequences, but do not cache predictions for models webserver_common.WriteDateTimeTagFile(starttagfile, runjob_logfile, runjob_errfile) # ================================== numModel = 0 modelFileList = [] if seqfile != "": # if the fasta sequence is supplied, all models should be using this sequence subfoldername_profile = "profile_%d" % (0) outpath_profile = "%s/%s" % (outpath_result, subfoldername_profile) CreateProfile(seqfile, outpath_profile, outpath_result, tmp_outpath_result, timefile, runjob_errfile) # run proq3 for models modelList = myfunc.ReadPDBModel(modelfile) numModel = len(modelList) for ii in xrange(len(modelList)): model = modelList[ii] tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii) myfunc.WriteFile(model + "\n", tmp_model_file) profilename = "%s/%s" % (outpath_profile, "query.fasta") subfoldername_this_model = "model_%d" % (ii) outpath_this_model = "%s/%s" % (outpath_result, subfoldername_this_model) modelinfo = ScoreModel(query_para, tmp_model_file, outpath_this_model, profilename, outpath_result, tmp_outpath_result, timefile, runjob_errfile) myfunc.WriteFile("\t".join(modelinfo) + "\n", finished_model_file, "a") modelFileList.append( "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii))) else: # no seqfile supplied, sequences are obtained from the model file modelList = myfunc.ReadPDBModel(modelfile) numModel = len(modelList) for ii in xrange(len(modelList)): model = modelList[ii] tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii) myfunc.WriteFile(model + "\n", tmp_model_file) subfoldername_this_model = "model_%d" % (ii) tmp_outpath_this_model = "%s/%s" % (tmp_outpath_result, subfoldername_this_model) if not os.path.exists(tmp_outpath_this_model): os.makedirs(tmp_outpath_this_model) tmp_seqfile = "%s/query.fasta" % (tmp_outpath_this_model) cmd = [pdb2aa_script, tmp_model_file] g_params['runjob_log'].append(" ".join(cmd)) try: rmsg = subprocess.check_output(cmd) g_params['runjob_log'].append( "extracting sequence from modelfile:\n" + rmsg + "\n") except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e) + "\n") g_params['runjob_err'].append(rmsg + "\n") if rmsg != "": myfunc.WriteFile(">seq\n" + rmsg.strip(), tmp_seqfile) subfoldername_profile = "profile_%d" % (ii) outpath_profile = "%s/%s" % (outpath_result, subfoldername_profile) CreateProfile(tmp_seqfile, outpath_profile, outpath_result, tmp_outpath_result, timefile, runjob_errfile) outpath_this_model = "%s/%s" % (outpath_result, subfoldername_this_model) profilename = "%s/%s" % (outpath_profile, "query.fasta") modelinfo = ScoreModel(query_para, tmp_model_file, outpath_this_model, profilename, outpath_result, tmp_outpath_result, timefile, runjob_errfile) myfunc.WriteFile("\t".join(modelinfo) + "\n", finished_model_file, "a") modelFileList.append( "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii))) all_end_time = time.time() all_runtime_in_sec = all_end_time - all_begin_time if len(g_params['runjob_log']) > 0: rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']) + "\n", runjob_logfile, "a") if rt_msg: g_params['runjob_err'].append(rt_msg) webserver_common.WriteDateTimeTagFile(finishtagfile, runjob_logfile, runjob_errfile) # now write the text output to a single file #statfile = "%s/%s"%(outpath_result, "stat.txt") statfile = "" dumped_resultfile = "%s/%s" % (outpath_result, "query.proq3.txt") proq3opt = GetProQ3Option(query_para) webserver_common.WriteProQ3TextResultFile(dumped_resultfile, query_para, modelFileList, all_runtime_in_sec, g_params['base_www_url'], proq3opt, statfile=statfile) # now making zip instead (for windows users) # note that zip rq will zip the real data for symbolic links os.chdir(outpath) # cmd = ["tar", "-czf", tarball, resultpathname] cmd = ["zip", "-rq", zipfile, resultpathname] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: g_params['runjob_err'].append(str(e)) pass
def DumpPredictionTOPCONS2(seqfile, path_result, outfile, isWriteDG, isWriteRel): #{{{ (seqidlist, seqannolist, seqlist) = myfunc.ReadFasta(seqfile) outfile_fa = "%s.fa" % (outfile) outfile_unfinished_fa = "%s.unfinished.fa" % (outfile) numseq = len(seqidlist) fpout = None try: fpout = open(outfile, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile) return 1 fpout_fa = None try: fpout_fa = open(outfile_fa, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile_fa) return 1 fpout_unfinished_fa = None try: fpout_unfinished_fa = open(outfile_unfinished_fa, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % ( outfile_unfinished_fa) return 1 methodlist = [ 'TOPCONS', 'OCTOPUS', 'Philius', 'PolyPhobius', 'SCAMPI', 'SPOCTOPUS', 'Homology' ] cntUnFinished = 0 for iseq in xrange(len(seqidlist)): seq = seqlist[iseq] length = len(seq) desp = seqannolist[iseq] if g_params['resultPathFormat'] == "md5": md5_key2 = hashlib.md5(seq + "\n").hexdigest() md5_key1 = hashlib.md5(seq).hexdigest() subdirname = "seq_%d" % (0) isFound = False for md5_key in [md5_key1, md5_key2]: dir1 = md5_key[:2] dir2 = md5_key[2:4] datapath_this_seq = "%s%s%s%s%s%s%s" % ( path_result, os.sep, dir1, os.sep, dir2, os.sep, md5_key) subdir = "%s/%s" % (datapath_this_seq, subdirname) if os.path.exists(subdir): break else: subdirname = "seq_%d" % (iseq) subdir = "%s/%s" % (path_result, subdirname) if g_params['verbose']: print "subdir = %s" % (subdir) rstfile = "%s/Topcons/topcons.top" % (subdir) if os.path.exists(rstfile): print >> fpout, "Sequence number: %d" % (iseq + 1) print >> fpout, "Sequence name: %s" % (desp) print >> fpout, "Sequence length: %d aa." % (length) print >> fpout, "Sequence:\n%s\n\n" % (seq) topo_consensus = "" for i in xrange(len(methodlist)): method = methodlist[i] seqid = "" seqanno = "" top = "" if method == "TOPCONS": topfile = "%s/%s/topcons.top" % (subdir, "Topcons") elif method == "Philius": topfile = "%s/%s/query.top" % (subdir, "philius") elif method == "SCAMPI": topfile = "%s/%s/query.top" % (subdir, method + "_MSA") else: topfile = "%s/%s/query.top" % (subdir, method) if os.path.exists(topfile): (seqid, seqanno, top) = myfunc.ReadSingleFasta(topfile) else: top = "" if top == "": #top = "***No topology could be produced with this method topfile=%s***"%(topfile) top = "***No topology could be produced with this method***" if method == "TOPCONS": topo_consensus = top if method == "Homology": showtext_homo = method if seqid != "": showtext_homo = seqid print >> fpout, "%s:\n%s\n\n" % (showtext_homo, top) else: print >> fpout, "%s predicted topology:\n%s\n\n" % (method, top) if isWriteDG: dgfile = "%s/dg.txt" % (subdir) dg_content = "" if os.path.exists(dgfile): dg_content = myfunc.ReadFile(dgfile) lines = dg_content.split("\n") dglines = [] for line in lines: if line and line[0].isdigit(): dglines.append(line) if len(dglines) > 0: print >> fpout, "\nPredicted Delta-G-values (kcal/mol) "\ "(left column=sequence position; right column=Delta-G)\n" print >> fpout, "\n".join(dglines) if isWriteRel: reliability_file = "%s/Topcons/reliability.txt" % (subdir) reliability = "" if os.path.exists(reliability_file): reliability = myfunc.ReadFile(reliability_file) if reliability != "": print >> fpout, "\nPredicted TOPCONS reliability (left "\ "column=sequence position; right column=reliability)\n" print >> fpout, reliability print >> fpout, "##############################################################################" # write the concensus prediction in FASTA format print >> fpout_fa, ">%s" % (desp) print >> fpout_fa, topo_consensus else: # write unfinished fpout_unfinished_fa.write(">%s\n%s\n" % (desp, seq)) cntUnFinished += 1 if cntUnFinished > 1: print >> sys.stderr, "%s out of %d sequences are with unfinished predictions, please check." % ( cntUnFinished, numseq) for fp in [fpout, fpout_fa, fpout_unfinished_fa]: if fp: try: fp.close() except IOError: pass return 0
def SubmitJobToQueue( jobid, datapath, outpath, nummodel, nummodel_this_user, email, #{{{ host_ip, base_www_url): myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'], "a") modelfile = "%s/query.pdb" % (datapath) seqfile = "%s/query.fa" % (datapath) if nummodel == -1: nummodel = myfunc.ReadFile(modelfile).count("\nENDMDL") if nummodel == 0: nummodel = 1 if nummodel_this_user == -1: nummodel_this_user = nummodel query_parafile = "%s/query.para.txt" % (outpath) query_para = {} content = myfunc.ReadFile(query_parafile) para_str = content if content != "": query_para = json.loads(content) try: name_software = query_para['name_software'] except KeyError: name_software = "proq3" runjob = "%s %s/run_job.py" % (python_exec, rundir) scriptfile = "%s/runjob,%s,%s,%s,%s,%d.sh" % ( outpath, name_software, jobid, host_ip, email, nummodel) code_str_list = [] code_str_list.append("#!/bin/bash") code_str_list.append("source %s/bin/activate" % (virt_env_path)) cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % ( runjob, modelfile, outpath, datapath, jobid) if email != "": cmdline += "-email \"%s\" " % (email) if os.path.exists(seqfile): cmdline += "-fasta \"%s\" " % (seqfile) if base_www_url != "": cmdline += "-baseurl \"%s\" " % (base_www_url) if g_params['isForceRun']: cmdline += "-force " code_str_list.append(cmdline) code = "\n".join(code_str_list) msg = "Write scriptfile %s" % (scriptfile) myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a") myfunc.WriteFile(code, scriptfile) os.chmod(scriptfile, 0755) myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a") priority = myfunc.GetSuqPriority(nummodel_this_user) if email in vip_user_list: priority = 999999999.0 myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a") st1 = SubmitSuqJob(suq_basedir, datapath, outpath, priority, scriptfile) return st1
def RunJob(modelfile, seqfile, outpath, tmpdir, email, jobid, g_params): #{{{ all_begin_time = time.time() rootname = os.path.basename(os.path.splitext(modelfile)[0]) starttagfile = "%s/runjob.start" % (outpath) runjob_errfile = "%s/runjob.err" % (outpath) runjob_logfile = "%s/runjob.log" % (outpath) finishtagfile = "%s/runjob.finish" % (outpath) rmsg = "" query_parafile = "%s/query.para.txt" % (outpath) query_para = {} content = myfunc.ReadFile(query_parafile) if content != "": query_para = json.loads(content) resultpathname = jobid outpath_result = "%s/%s" % (outpath, resultpathname) tarball = "%s.tar.gz" % (resultpathname) zipfile = "%s.zip" % (resultpathname) tarball_fullpath = "%s.tar.gz" % (outpath_result) zipfile_fullpath = "%s.zip" % (outpath_result) mapfile = "%s/seqid_index_map.txt" % (outpath_result) finished_model_file = "%s/finished_models.txt" % (outpath_result) timefile = "%s/time.txt" % (outpath_result) tmp_outpath_result = "%s/%s" % (tmpdir, resultpathname) isOK = True if os.path.exists(tmp_outpath_result): shutil.rmtree(tmp_outpath_result) try: os.makedirs(tmp_outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (tmp_outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass if os.path.exists(outpath_result): shutil.rmtree(outpath_result) try: os.makedirs(outpath_result) isOK = True except OSError: msg = "Failed to create folder %s" % (outpath_result) myfunc.WriteFile(msg + "\n", runjob_errfile, "a") isOK = False pass if isOK: try: open(finished_model_file, 'w').close() except: pass #first getting result from caches # cache profiles for sequences, but do not cache predictions for models webserver_common.WriteDateTimeTagFile(starttagfile, runjob_logfile, runjob_errfile) # ================================== numModel = 0 modelFileList = [] if seqfile != "": # if the fasta sequence is supplied, all models should be using this sequence subfoldername_profile = "profile_%d" % (0) outpath_profile = "%s/%s" % (outpath_result, subfoldername_profile) CreateProfile(seqfile, outpath_profile, outpath_result, tmp_outpath_result, timefile, runjob_errfile) # run proq3 for models modelList = myfunc.ReadPDBModel(modelfile) numModel = len(modelList) for ii in range(len(modelList)): model = modelList[ii] tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii) myfunc.WriteFile(model + "\n", tmp_model_file) profilename = "%s/%s" % (outpath_profile, "query.fasta") subfoldername_this_model = "model_%d" % (ii) outpath_this_model = "%s/%s" % (outpath_result, subfoldername_this_model) modelinfo = ScoreModel(query_para, tmp_model_file, outpath_this_model, profilename, outpath_result, tmp_outpath_result, timefile, runjob_errfile) myfunc.WriteFile("\t".join(modelinfo) + "\n", finished_model_file, "a") modelFileList.append( "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii))) else: # no seqfile supplied, sequences are obtained from the model file modelList = myfunc.ReadPDBModel(modelfile) numModel = len(modelList) for ii in range(len(modelList)): model = modelList[ii] tmp_model_file = "%s/query_%d.pdb" % (tmp_outpath_result, ii) myfunc.WriteFile(model + "\n", tmp_model_file) subfoldername_this_model = "model_%d" % (ii) tmp_outpath_this_model = "%s/%s" % (tmp_outpath_result, subfoldername_this_model) if not os.path.exists(tmp_outpath_this_model): os.makedirs(tmp_outpath_this_model) tmp_seqfile = "%s/query.fasta" % (tmp_outpath_this_model) cmd = [pdb2aa_script, tmp_model_file] g_params['runjob_log'].append(" ".join(cmd)) try: rmsg = subprocess.check_output(cmd) g_params['runjob_log'].append( "extracting sequence from modelfile:\n" + rmsg + "\n") except subprocess.CalledProcessError as e: g_params['runjob_err'].append(str(e) + "\n") g_params['runjob_err'].append(rmsg + "\n") if rmsg != "": myfunc.WriteFile(">seq\n" + rmsg.strip(), tmp_seqfile) subfoldername_profile = "profile_%d" % (ii) outpath_profile = "%s/%s" % (outpath_result, subfoldername_profile) CreateProfile(tmp_seqfile, outpath_profile, outpath_result, tmp_outpath_result, timefile, runjob_errfile) outpath_this_model = "%s/%s" % (outpath_result, subfoldername_this_model) profilename = "%s/%s" % (outpath_profile, "query.fasta") modelinfo = ScoreModel(query_para, tmp_model_file, outpath_this_model, profilename, outpath_result, tmp_outpath_result, timefile, runjob_errfile) myfunc.WriteFile("\t".join(modelinfo) + "\n", finished_model_file, "a") modelFileList.append( "%s/%s" % (outpath_this_model, "query_%d.pdb" % (ii))) all_end_time = time.time() all_runtime_in_sec = all_end_time - all_begin_time if len(g_params['runjob_log']) > 0: rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_log']) + "\n", runjob_logfile, "a") if rt_msg: g_params['runjob_err'].append(rt_msg) webserver_common.WriteDateTimeTagFile(finishtagfile, runjob_logfile, runjob_errfile) # now write the text output to a single file #statfile = "%s/%s"%(outpath_result, "stat.txt") statfile = "" dumped_resultfile = "%s/%s" % (outpath_result, "query.proq3.txt") proq3opt = GetProQ3Option(query_para) webserver_common.WriteProQ3TextResultFile(dumped_resultfile, query_para, modelFileList, all_runtime_in_sec, g_params['base_www_url'], proq3opt, statfile=statfile) # now making zip instead (for windows users) # note that zip rq will zip the real data for symbolic links os.chdir(outpath) # cmd = ["tar", "-czf", tarball, resultpathname] cmd = ["zip", "-rq", zipfile, resultpathname] try: subprocess.check_output(cmd) except subprocess.CalledProcessError as e: g_params['runjob_err'].append(str(e)) pass isSuccess = False if (os.path.exists(finishtagfile) and os.path.exists(zipfile_fullpath)): isSuccess = True flist = glob.glob("%s/*.out" % (tmpdir)) if len(flist) > 0: outfile_runscript = flist[0] else: outfile_runscript = "" if os.path.exists(outfile_runscript): shutil.move(outfile_runscript, outpath) # delete the tmpdir if succeeded shutil.rmtree(tmpdir) #DEBUG, keep tmpdir else: isSuccess = False failedtagfile = "%s/runjob.failed" % (outpath) webserver_common.WriteDateTimeTagFile(failedtagfile, runjob_logfile, runjob_errfile) # send the result to email # do not sendmail at the cloud VM if (webserver_common.IsFrontEndNode(g_params['base_www_url']) and myfunc.IsValidEmailAddress(email)): from_email = "*****@*****.**" to_email = email subject = "Your result for ProQ3 JOBID=%s" % (jobid) if isSuccess: bodytext = """ Your result is ready at %s/pred/result/%s Thanks for using ProQ3 """ % (g_params['base_www_url'], jobid) else: bodytext = """ We are sorry that your job with jobid %s is failed. Please contact %s if you have any questions. Attached below is the error message: %s """ % (jobid, contact_email, "\n".join(g_params['runjob_err'])) g_params['runjob_log'].append("Sendmail %s -> %s, %s" % (from_email, to_email, subject)) #debug rtValue = myfunc.Sendmail(from_email, to_email, subject, bodytext) if rtValue != 0: g_params['runjob_err'].append( "Sendmail to {} failed with status {}".format( to_email, rtValue)) if len(g_params['runjob_err']) > 0: rt_msg = myfunc.WriteFile("\n".join(g_params['runjob_err']) + "\n", runjob_errfile, "w") return 1 return 0
def DumpPredictionTOPCONS2(seqfile, path_result, outfile, isWriteDG, isWriteRel): #{{{ (seqidlist, seqannolist, seqlist) = myfunc.ReadFasta(seqfile) outfile_fa = "%s.fa" % (outfile) fpout = None try: fpout = open(outfile, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile) return 1 fpout_fa = None try: fpout_fa = open(outfile_fa, "w") except IOError: print >> sys.stderr, "Failed to write to file \"%s\"" % (outfile_fa) return 1 methodlist = [ 'TOPCONS', 'OCTOPUS', 'Philius', 'PolyPhobius', 'SCAMPI', 'SPOCTOPUS', 'Homology' ] for i in xrange(len(seqidlist)): subdirname = "seq_%d" % (i) subdir = "%s/%s" % (path_result, subdirname) seq = seqlist[i] length = len(seq) desp = seqannolist[i] print >> fpout, "Sequence number: %d" % (i + 1) print >> fpout, "Sequence name: %s" % (desp) print >> fpout, "Sequence length: %d aa." % (length) print >> fpout, "Sequence:\n%s\n\n" % (seq) topo_consensus = "" for i in xrange(len(methodlist)): method = methodlist[i] seqid = "" seqanno = "" top = "" if method == "TOPCONS": topfile = "%s/%s/topcons.top" % (subdir, "Topcons") elif method == "Philius": topfile = "%s/%s/query.top" % (subdir, "philius") elif method == "SCAMPI": topfile = "%s/%s/query.top" % (subdir, method + "_MSA") else: topfile = "%s/%s/query.top" % (subdir, method) if os.path.exists(topfile): (seqid, seqanno, top) = myfunc.ReadSingleFasta(topfile) else: top = "" if top == "": #top = "***No topology could be produced with this method topfile=%s***"%(topfile) top = "***No topology could be produced with this method***" if method == "TOPCONS": topo_consensus = top if method == "Homology": showtext_homo = method if seqid != "": showtext_homo = seqid print >> fpout, "%s:\n%s\n\n" % (showtext_homo, top) else: print >> fpout, "%s predicted topology:\n%s\n\n" % (method, top) if isWriteDG: dgfile = "%s/dg.txt" % (subdir) dg_content = "" if os.path.exists(dgfile): dg_content = myfunc.ReadFile(dgfile) lines = dg_content.split("\n") dglines = [] for line in lines: if line and line[0].isdigit(): dglines.append(line) if len(dglines) > 0: print >> fpout, "\nPredicted Delta-G-values (kcal/mol) "\ "(left column=sequence position; right column=Delta-G)\n" print >> fpout, "\n".join(dglines) if isWriteRel: reliability_file = "%s/Topcons/reliability.txt" % (subdir) reliability = "" if os.path.exists(reliability_file): reliability = myfunc.ReadFile(reliability_file) if reliability != "": print >> fpout, "\nPredicted TOPCONS reliability (left "\ "column=sequence position; right column=reliability)\n" print >> fpout, reliability print >> fpout, "##############################################################################" # write the concensus prediction in FASTA format print >> fpout_fa, ">%s" % (desp) print >> fpout_fa, topo_consensus if fpout: try: fpout.close() except IOError: pass if fpout_fa: try: fpout_fa.close() except IOError: pass return 0