def SubmitJobToQueue( jobid, datapath, outpath, numseq, numseq_this_user, email, #{{{ host_ip, base_www_url): myfunc.WriteFile("Entering SubmitJobToQueue()\n", g_params['debugfile'], "a") fafile = "%s/query.fa" % (datapath) if numseq == -1: numseq = myfunc.CountFastaSeq(fafile) if numseq_this_user == -1: numseq_this_user = numseq name_software = "boctopus2" runjob = "%s %s/run_job.py" % (python_exec, rundir) scriptfile = "%s/runjob;%s;%s;%s;%s;%d.sh" % ( outpath, name_software, jobid, host_ip, email, numseq) code_str_list = [] code_str_list.append("#!/bin/bash") cmdline = "%s %s -outpath %s -tmpdir %s -jobid %s " % ( runjob, fafile, outpath, datapath, jobid) if email != "": cmdline += "-email \"%s\" " % (email) if base_www_url != "": cmdline += "-baseurl \"%s\" " % (base_www_url) if g_params['isForceRun']: cmdline += "-force " code_str_list.append(cmdline) code = "\n".join(code_str_list) msg = "Write scriptfile %s" % (scriptfile) myfunc.WriteFile(msg + "\n", g_params['debugfile'], "a") myfunc.WriteFile(code, scriptfile) os.chmod(scriptfile, 0755) myfunc.WriteFile("Getting priority" + "\n", g_params['debugfile'], "a") priority = myfunc.GetSuqPriority(numseq_this_user) priority = 10 # quick fix debug 2017-09-18 if email in vip_user_list: priority = 999999999.0 myfunc.WriteFile("priority=%d\n" % (priority), g_params['debugfile'], "a") st1 = SubmitSuqJob(suq_basedir, datapath, outpath, priority, scriptfile) return st1
def IndexFastaFile(infile, dbname, idtype): #{{{ path_of_dbname = os.path.dirname(dbname) if path_of_dbname != "" and not os.path.exists(path_of_dbname): os.system("mkdir -p %s"%path_of_dbname) fpin = None try: fpin = open(infile,"rb") except IOError: print >> sys.stderr, "Failed to open file %s for read"%(infile) return 1 cntdbfile = 0 record_offset = 0 dbfile = dbname+"%d.db"%(cntdbfile) indexfile = dbname+".index" try: fpindex = open(indexfile,"wb") except IOError: msg = "Failed to open indexfile {} for write" print >> sys.stderr, msg.format(indexfile) return 1 fpdb = None fpindex.write("DEF_DBNAME %s\n"%dbname) idSet = set([]) isFirstSeq = True totalLength = 0 buff = fpin.read(BLOCK_SIZE) brokenSeqWithAnnoLine = ""; ##for the annotation line broken by BLOCK read while buff: beg=0 end=0 while 1: if brokenSeqWithAnnoLine: if brokenSeqWithAnnoLine[len(brokenSeqWithAnnoLine)-1] == "\n": end = buff.find(">") else: end = buff.find("\n>") if end >= 0: seqWithAnno = brokenSeqWithAnnoLine + buff[0:end] (fpdb, record_offset) = WriteIndexFasta(seqWithAnno, fpdb,dbname, fpindex, cntdbfile, record_offset, idSet, idtype) brokenSeqWithAnnoLine = "" beg=end else: brokenSeqWithAnnoLine += buff break beg = buff.find(">",beg) end = buff.find("\n>",beg+1) if beg >= 0: if end >=0: seqWithAnno=buff[beg:end] (fpdb, record_offset) = WriteIndexFasta(seqWithAnno, fpdb, dbname, fpindex, cntdbfile, record_offset, idSet, idtype) beg=end else: brokenSeqWithAnnoLine=buff[beg:] break else: break if record_offset > MAX_DBFILE_SIZE: fpdb.close() fpdb = None cntdbfile +=1 record_offset=0 buff = fpin.read(BLOCK_SIZE) if brokenSeqWithAnnoLine: seqWithAnno = brokenSeqWithAnnoLine (fpdb, record_offset) = WriteIndexFasta(seqWithAnno, fpdb, dbname, fpindex, cntdbfile, record_offset, idSet, idtype) fpin.close() fpindex.close() if fpdb != None: fpdb.close() # post processing numIndexedSeq = len(idSet) numSeq = myfunc.CountFastaSeq(infile) cmd = "%s/my_indexformatconvert.py -f %s.index"%(g_params['binpath'], dbname) os.system(cmd) if numIndexedSeq == numSeq and cntdbfile == 0: print "%d sequences indexed successfully" dbfile = "%s0.db"%(dbname) dbfile_base = os.path.basename(dbfile) dbfile_path = os.path.dirname(dbfile) infile_path = os.path.dirname(infile) if dbfile_path == "": dbfile_path = "." if infile_path == "": infile_path = "." relpath = os.path.relpath(dbfile_path, infile_path) cmd = "rm -f %s"%infile print cmd os.system(cmd) cmd = "ln -s %s%s%s %s"%(relpath, os.sep, dbfile_base, infile) print cmd os.system(cmd) else: print >> sys.stderr, "numIndexedSeq (%d) conflicts with numSeq (%d)" %( numIndexedSeq, numSeq) return 0
try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print >> sys.stderr, e return 1 if tmpdir == "": print >> sys.stderr, "tmpdir not set. exit" return 1 elif not os.path.exists(tmpdir): try: subprocess.check_output(["mkdir", "-p", tmpdir]) except subprocess.CalledProcessError, e: print >> sys.stderr, e return 1 numseq = myfunc.CountFastaSeq(infile) g_params['debugfile'] = "%s/debug.log" % (outpath) return RunJob(infile, outpath, tmpdir, email, jobid, g_params) #}}} def InitGlobalParameter(): #{{{ g_params = {} g_params['isQuiet'] = True g_params['runjob_log'] = [] g_params['runjob_err'] = [] g_params['isForceRun'] = False g_params['base_www_url'] = "" return g_params
def WriteHTMLTable( tablename, tabletitle, idList, pfamDefDict, #{{{ datapath, topomsapath, ordermsapath, htmlname, outpath, fpout): numInputID = len(idList) ordermsapath = g_params['ordermsapath'] datapath = g_params['datapath'] topomsapath = g_params['topomsapath'] treepath = g_params['treepath'] print >> fpout, "<a name=\"%s\"></a><h4>%s</h4>" % (tablename, tabletitle) print >> fpout, "<table class=\"sortable\" border=1>" cntOutputID = 0 headerItemList = [] headerItemList.append("No.") headerItemList.append("PfamID") headerItemList.append("Definition") headerItemList.append("numSeq") headerItemList.append("numCluster") headerItemList.append("Phylo Tree") if ordermsapath != "" and os.path.exists(ordermsapath): headerItemList.append("Topology MSA ordered according to phylo tree") if topomsapath != "" and os.path.exists(topomsapath): headerItemList.append("Topology MSA grouped by topology comparison") print >> fpout, "<tr>" for item in headerItemList: print >> fpout, "<th>" print >> fpout, item print >> fpout, "</th>" print >> fpout, "</tr>" for i in xrange(numInputID): pfamid = idList[i] pfamURL = 'http://pfam.sanger.ac.uk/family/' + pfamid if pfamid in pfamDefDict: pfamDef = pfamDefDict[pfamid] else: pfamDef = '-' topomsafile = datapath + os.sep + pfamid + '.sorted.orig.topomsa.fa' if os.path.exists(topomsafile): numSeq = myfunc.CountFastaSeq(topomsafile) else: numSeq = -1 if numSeq < g_params['MIN_NUMSEQ']: continue cntOutputID += 1 print >> fpout, "<tr>" #--------------------------- print >> fpout, '<td>' print >> fpout, '%d' % (cntOutputID) print >> fpout, '</td>' #--------------------------- print >> fpout, '<td>' print >> fpout, '<a href=\"%s\" target=\"_blank\">%s</a>' % (pfamURL, pfamid) print >> fpout, '</td>' #--------------------------- print >> fpout, '<td>' print >> fpout, '%s' % pfamDef print >> fpout, '</td>' #--------------------------- print >> fpout, '<td>' print >> fpout, '%d' % numSeq print >> fpout, '</td>' #--------------------------- numCluster = -1 clusteredmsafile = datapath + os.sep + pfamid + '.clustered.orig.topomsa.fa' numCluster = GetNumCluster(clusteredmsafile) print >> fpout, '<td>' if numCluster == -1: print >> fpout, '%s' % "-" else: print >> fpout, '%d' % numCluster print >> fpout, '</td>' #--------------------------- ext = '-itol.jpg' extpdf = '-itol.pdf' print >> fpout, '<td>' imageSourceFile = g_params['treepath'] + os.sep + pfamid + ext imageSourceFilePDF = g_params['treepath'] + os.sep + pfamid + extpdf imageTargetFile = outpath + os.sep + htmlname + os.sep + pfamid + ext imageTargetFilePDF = outpath + os.sep + htmlname + os.sep + pfamid + extpdf thumbImageSourceFile = g_params[ 'treepath'] + os.sep + 'thumb.' + pfamid + ext thumbImageTargetFile = outpath + os.sep + htmlname + os.sep + 'thumb.' + pfamid + ext if os.path.exists(imageSourceFile): os.system("%s %s %s" % (g_params['CP_EXE'], imageSourceFile, imageTargetFile)) if os.path.exists(imageSourceFilePDF): os.system( "%s %s %s" % (g_params['CP_EXE'], imageSourceFilePDF, imageTargetFilePDF)) if os.path.exists(thumbImageSourceFile): os.system("%s %s %s" % (g_params['CP_EXE'], thumbImageSourceFile, thumbImageTargetFile)) print >> fpout, ( "<a href=\"%s\"target=\"_blank\">" % (htmlname + os.sep + os.path.basename(imageTargetFile))) print >> fpout, ( "<img src=\"%s\">" % (htmlname + os.sep + os.path.basename(thumbImageTargetFile))) print >> fpout, "</a>" print >> fpout, '</td>' #--------------------------- if ordermsapath != "" and os.path.exists(ordermsapath): print >> fpout, '<td>' ext = '.reordered.topomsa.png' imageSourceFile = ordermsapath + os.sep + pfamid + ext imageTargetFile = outpath + os.sep + htmlname + os.sep + pfamid + ext thumbImageSourceFile = ordermsapath + os.sep + 'thumb.' + pfamid + ext thumbImageTargetFile = outpath + os.sep + htmlname + os.sep + 'thumb.' + pfamid + ext if os.path.exists(imageSourceFile): os.system( "%s %s %s" % (g_params['CP_EXE'], imageSourceFile, imageTargetFile)) if os.path.exists(thumbImageSourceFile): os.system("%s %s %s" % (g_params['CP_EXE'], thumbImageSourceFile, thumbImageTargetFile)) print >> fpout, ( "<a href=\"%s\"target=\"_blank\">" % (htmlname + os.sep + os.path.basename(imageTargetFile))) print >> fpout, ( "<img src=\"%s\">" % (htmlname + os.sep + os.path.basename(thumbImageTargetFile))) print >> fpout, "</a>" print >> fpout, '</td>' #--------------------------- if topomsapath != "" and os.path.exists(topomsapath): ext = '.sorted.orig.topomsa.png' print >> fpout, '<td>' imageSourceFile = topomsapath + os.sep + pfamid + ext imageTargetFile = outpath + os.sep + htmlname + os.sep + pfamid + ext thumbImageSourceFile = topomsapath + os.sep + 'thumb.' + pfamid + ext thumbImageTargetFile = outpath + os.sep + htmlname + os.sep + 'thumb.' + pfamid + ext if os.path.exists(imageSourceFile): os.system( "%s %s %s" % (g_params['CP_EXE'], imageSourceFile, imageTargetFile)) if os.path.exists(thumbImageSourceFile): os.system("%s %s %s" % (g_params['CP_EXE'], thumbImageSourceFile, thumbImageTargetFile)) print >> fpout, ( "<a href=\"%s\"target=\"_blank\">" % (htmlname + os.sep + os.path.basename(imageTargetFile))) print >> fpout, ( "<img src=\"%s\">" % (htmlname + os.sep + os.path.basename(thumbImageTargetFile))) print >> fpout, "</a>" print >> fpout, '</td>' #--------------------------- print >> fpout, "</tr>" print >> fpout, "</table>"
def SplitFasta(inFile): #{{{ # The faster version if 'numsplit' in g_params or g_params['numseq_per_split'] > 1: g_params['isNameFileSequentially'] = True if 'numsplit' in g_params and g_params['numsplit'] > 1: numTotalSeq = myfunc.CountFastaSeq(inFile) g_params['numseq_per_split'] = int( ceil(numTotalSeq / float(g_params['numsplit']))) if g_params['verbose'] >= 1: msg = "file %s (with %d sequences) is going to"\ "be splitted into %d files" print msg % (inFile, numTotalSeq, g_params['numsplit']) rootname = os.path.basename(os.path.splitext(inFile)[0]) cntTotalSeq = 0 fpout = None cntsplit = 0 cntseq_of_split = 0 fpin = open(inFile, "r") buff = fpin.read(BLOCK_SIZE) brokenSeqWithAnnoLine = "" ##for the annotation line broken by BLOCK read while buff: beg = 0 end = 0 while 1: if brokenSeqWithAnnoLine: if brokenSeqWithAnnoLine[len(brokenSeqWithAnnoLine) - 1] == "\n": end = buff.find(">") else: end = buff.find("\n>") if end >= 0: seqWithAnno = brokenSeqWithAnnoLine + buff[0:end] (cntsplit, cntseq_of_split, fpout) = OutputSplittedSeq(seqWithAnno, rootname, cntsplit, cntseq_of_split, fpout) brokenSeqWithAnnoLine = "" cntTotalSeq += 1 beg = end else: brokenSeqWithAnnoLine += buff break beg = buff.find(">", beg) end = buff.find("\n>", beg + 1) if beg >= 0: if end >= 0: seqWithAnno = buff[beg:end] (cntsplit, cntseq_of_split, fpout) = OutputSplittedSeq(seqWithAnno, rootname, cntsplit, cntseq_of_split, fpout) cntTotalSeq += 1 beg = end else: brokenSeqWithAnnoLine = buff[beg:] break else: break buff = fpin.read(BLOCK_SIZE) fpin.close() if brokenSeqWithAnnoLine: seqWithAnno = brokenSeqWithAnnoLine (cntsplit, cntseq_of_split, fpout) = OutputSplittedSeq(seqWithAnno, rootname, cntsplit, cntseq_of_split, fpout) cntTotalSeq += 1 return cntTotalSeq