def FilterSignalPeptide(topofile, sigpepDict, outfile, isDeleteSeqWithSignalPeptide): hdl = myfunc.ReadFastaByBlock(topofile) if hdl.failure: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) recordList = hdl.readseq() while recordList != None: for rd in recordList: try: sp_pos = sigpepDict[rd.seqid] except KeyError: sp_pos = -1 if sp_pos != -1: if isDeleteSeqWithSignalPeptide: newtopo = "" else: newtopo = lcmp.FilterSignalPeptideInTopology(rd.seq, sp_pos) else: newtopo = rd.seq if newtopo != "" and myfunc.CountTM(newtopo) > 0: fpout.write(">%s\n"%(rd.description)) fpout.write("%s\n"%(newtopo)) recordList = hdl.readseq() hdl.close() myfunc.myclose(fpout) return 0
def MSA2Seq_fasta(infile, outfile): #{{{ try: fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpin = open(infile, "rb") unprocessedBuffer = "" isEOFreached = False while 1: buff = fpin.read(BLOCK_SIZE) if len(buff) < BLOCK_SIZE: isEOFreached = True buff = unprocessedBuffer + buff recordList = [] unprocessedBuffer = myfunc.ReadFastaFromBuffer( buff, recordList, isEOFreached) for rd in recordList: anno = rd[1] seq = rd[2].replace("-", "").replace(".", "").replace(" ", "") fpout.write(">%s\n" % anno) fpout.write("%s\n" % seq) if isEOFreached == True: break fpin.close() myfunc.myclose(fpout) except IOError: print >> sys.stderr, "Failed to read file", infile return 1
def MatchMSATopo_using_topofile( msafile, topofile, isIgnoreBadseq, #{{{ method_match, outfile): topoDict = GetTopoDict(topofile) hdl = myfunc.ReadFastaByBlock(msafile) if hdl.failure: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) recordList = hdl.readseq() while recordList != None: for rd in recordList: try: topo = topoDict[rd.seqid] except KeyError: print("topo not found for ID %s" % (rd.seqid), file=sys.stderr) topo = "" matchedtopo = MatchSeqToTopo(rd.seq, topo, method_match) if not (matchedtopo == "BADSEQ" and isIgnoreBadseq): print(">%s" % (rd.description), file=fpout) print("%s" % (matchedtopo), file=fpout) recordList = hdl.readseq() myfunc.myclose(fpout) hdl.close() return 0
def Stat2(lines, outfile): MAX_NUMTM = 14 dt = {} for item in [1, 12, 2]: dt[item] = InitXY(MAX_NUMTM) cnt = 0 for line in lines: if line: strs = line.split() if strs[0] == "TMMap": cnt += 1 numTM = int(strs[4].rstrip(":")) if numTM <= 1 or numTM >= MAX_NUMTM: continue mapArray = [int(x) for x in strs[5:]] st = 1 str_maparray_list = ["%d" % x for x in mapArray] str_maparray = "".join(str_maparray_list) num_1 = str_maparray.count("1") num_2 = str_maparray.count("2") num_12 = num_1 + num_2 dt[1][1][num_1] += 1 dt[2][1][num_2] += 1 dt[12][1][num_12] += 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout.write("#%2s %8s %3s %8s %3s %8s\n" % ("1x", "1y", "2x", "2y", "12x", "12y")) for i in xrange(1, MAX_NUMTM + 1): for item in [1, 2, 12]: (lx, ly) = dt[item] fpout.write("%3d %8d " % (lx[i], ly[i])) fpout.write("\n") myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" gramfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram", "--gram"]: (gramfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if myfunc.checkfile(gramfile) != 0: return 1 grampairlist = myfunc.ReadPairList(gramfile) gramMapDict = {} for tup in grampairlist: gramMapDict[tup[0]] = tup[1] gi2taxidpairlist = myfunc.ReadPairList(infile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for tup in gi2taxidpairlist: try: fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], gramMapDict[tup[1]])) except KeyError: fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], "NA")) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 topofile = "" outfile = "" isGapLess = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: topofile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o"]: outfile = argv[i + 1] i += 2 elif argv[i] in ["-i", "--i"]: topofile = argv[i + 1] i += 2 elif argv[i] in ["-gapless", "--gapless"]: isGapLess = True i += 1 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: topofile = argv[i] i += 1 if topofile == "": print >> sys.stderr, "topofile not set. exit" return 1 try: (idList, annoList, seqList) = myfunc.ReadFasta(topofile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for i in xrange(len(idList)): topo = seqList[i] seqid = idList[i] if isGapLess: topo = topo.replace("-", "").replace(".", "") posTMList = myfunc.GetTMPosition(topo) print >> fpout, seqid, posTMList myfunc.myclose(fpout) except (IOError, IndexError): pass
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" idwithannofile = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o"]: outfile = argv[i+1] i += 2 elif argv[i] in ["-case", "--case"]: g_params['isCaseSensitive'] = True i += 1 elif argv[i] in ["-idwithanno", "--idwithanno"]: idwithannofile = argv[i+1] i += 2 elif argv[i] in ["-key", "--key"]: g_params['keywordList'].append(argv[i+1]) i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if infile == "": print >> sys.stderr, "infile not set.exit" return 1 if idwithannofile == "": print >> sys.stderr, "idwithannofile not set.exit" return 1 seqid2AnnoDict = ReadSeqIDWithAnnoFile(idwithannofile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) FilterUniprotTableinfoByKeyword(infile, seqid2AnnoDict, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileListFile = "" fileList = [] isOverWrite = 0 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: outpath = argv[i + 1] i += 2 elif argv[i] in ["-l", "--l"]: fileListFile = argv[i + 1] i += 2 elif argv[i] in ["-overwrite", "--overwrite"]: isOverWrite = 1 i += 1 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": try: fp = open(fileListFile, "r") fileList += fp.read().split() fp.close() except IOError: msg = "Failed to read idlistfile {}." print >> sys.stderr, msg.format(fileListFile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for i in xrange(len(fileList)): CleanAASeq(fileList[i], isOverWrite, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{#{{{ # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" BLOCK_SIZE = 100000 isPrintID = False isJustPrintSum = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False infile = sys.argv[i] i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 0 elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]: isPrintID = True i += 1 elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]: isJustPrintSum = True i += 1 elif sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-bs", "--bs", "-block-size", "--block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print("Error! BLOCK_SIZE should >0", file=sys.stderr) return 1 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: infile = sys.argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout) myfunc.myclose(fpout) return status
def main(g_params):#{{{#{{{ # Check argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" BLOCK_SIZE = 100000 isPrintID = False isJustPrintSum = False i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False infile = sys.argv[i] i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 0 elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]: isPrintID = True i += 1 elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]: isJustPrintSum = True i += 1 elif sys.argv[i] in [ "-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-bs", "--bs", "-block-size", "--block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print >> sys.stderr,"Error! BLOCK_SIZE should >0" return 1 else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: infile=sys.argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout) myfunc.myclose(fpout) return status
def main(g_params): #{{{ # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileList = [] fileListFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(sys.argv[i]) isNonOptionArg = False i += 1 elif sys.argv[i] == "--": isNonOptionArg = True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-l", "--l"]: fileListFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-o", "--o", "-outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-evalue", "--evalue"]: g_params['evalue_th'], i = myfunc.my_getopt_float(sys.argv, i) elif sys.argv[i] in ["-seqidt", "--seqidt"]: g_params['seqidt_th'], i = myfunc.my_getopt_float(sys.argv, i) elif sys.argv[i] in ["-round", "--round"]: g_params['iteration'] = myfunc.my_getopt_int(sys.argv, i) else: print >> sys.stderr, ("Error! Wrong argument: '%s'" % sys.argv[i]) return 1 else: fileList.append(sys.argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile, delim="\n") if len(fileList) < 1: print >> sys.stderr, "No input set. exit" return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for infile in fileList: BlastM9toPairlist(infile, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" idListFile = "" idList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) i += 1 isNonOptionArg = False elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l", "-list"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) numID = len(idList) if numID < 1: print >> sys.stderr, "No ID set. exit" return 1 params = {} params['from'] = 'P_GI' params['to'] = 'ID' # to uniprot id params['format'] = 'tab' params['query'] = " ".join(idList) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) GIID2UniprotID(params, fpout) myfunc.myclose(fpout)
def OutputPfamFastaFile(seqidList, pfamid, seqdbDict, hdl_seqdb, extra_desp_dict, outpath): #{{{ outfile = "%s%s%s%s" % (outpath, os.sep, pfamid, g_params['out_ext']) fpout = myfunc.myopen(outfile, None, "w", True) isAddExtraDescription = False if len(extra_desp_dict) > 0: isAddExtraDescription = True for seqid in seqidList: if seqid.find("UniRef") != -1: try: ss = seqid.split("_") seqid = ss[1] except IndexError: pass if g_params['isBigmem']: try: record = seqdbDict[seqid] (tmpanno, tmpseq) = record if isAddExtraDescription: try: extraanno = extra_desp_dict[seqid] except KeyError: extraanno = "" if extraanno != "": tmpanno = "%s %s" % (extraanno, tmpanno) fpout.write(">%s\n%s\n" % (tmpanno, tmpseq)) except KeyError: print >> sys.stderr, "seqid %s not found in seqdb" % (seqid) else: record = hdl_seqdb.GetRecord(seqid) if record: if isAddExtraDescription: try: extraanno = extra_desp_dict[seqid] except KeyError: extraanno = "" if extraanno == "": fpout.write("%s" % (record)) else: (tmpseqid, tmpanno, tmpseq) = myfunc.ExtractFromSeqWithAnno(record) tmpanno = "%s %s" % (extraanno, tmpanno) fpout.write(">%s\n%s\n" % (tmpanno, tmpseq)) else: fpout.write("%s" % (record)) else: print >> sys.stderr, "seqid %s not found in seqdb" % (seqid) myfunc.myclose(fpout) if g_params['isGzip']: cmd = ["gzip", "-N", "-f", outfile] print " ".join(cmd) subprocess.check_call(cmd, stdout=open(os.devnull, "w"))
def main(g_params): # Check argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" begin = 0 end = 999999999 method = 2 i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-b", "--b", "--begin"]: begin, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-e", "--e", "--end"]: end, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o" , "--o","--outfile"]: outFile, i = my_getopt_str(sys.argv,i ) else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile=sys.argv[i] i+=1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) if method == 1: CatFasta(inFile,begin, end, fpout) else: CatFasta2(inFile,begin, end, fpout) myfunc.myclose(fpout) return 0
def main(g_params): # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" begin = 0 end = 999999999 method = 2 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-b", "--b", "--begin"]: begin, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-e", "--e", "--end"]: end, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o", "--o", "--outfile"]: outFile, i = my_getopt_str(sys.argv, i) else: print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile = sys.argv[i] i += 1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) if method == 1: CatFasta(inFile, begin, end, fpout) else: CatFasta2(inFile, begin, end, fpout) myfunc.myclose(fpout) return 0
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" outfile = "" fileListFile = "" fileList = [] pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv"%(DATADIR3) i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"] : (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for i in xrange(len(fileList)): CountUniquePairInvertedInfo(fileList[i], pfamidDefDict, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" allinfoFile = "" idwithannofile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: allinfoFile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o"]: outfile = argv[i + 1] i += 2 elif argv[i] in ["-idwithanno", "--idwithanno"]: idwithannofile = argv[i + 1] i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: allinfoFile = argv[i] i += 1 if allinfoFile == "": print >> sys.stderr, "allinfoFile not set.exit" return 1 if idwithannofile == "": print >> sys.stderr, "idwithannofile not set.exit" return 1 seqid2AnnoDict = ReadSeqIDWithAnnoFile(idwithannofile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) FilterTopconsSingleResultUniprot(allinfoFile, seqid2AnnoDict, fpout) myfunc.myclose(fpout)
def main(): #{{{ numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" N = 999999999 rand_seed = None i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-n", "--n"]: N, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-seed", "--seed"]: rand_seed, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o", "--outfile"]: outFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-bs", "--block-size", "-block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print("Error! BLOCK_SIZE should >0", file=sys.stderr) return 1 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: inFile = sys.argv[i] i += 1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) RandFasta(inFile, N, rand_seed, fpout) myfunc.myclose(fpout)
def ReWriteFasta(infile, outfile): #{{{ fpout = myfunc.myopen(outfile, sys.stdout, "w", False) hdl = myfunc.ReadFastaByBlock(infile, 0, 1) if hdl.failure: return 1 recordList = hdl.readseq() while recordList != None: for rd in recordList: fpout.write(">%s\n" % rd.description) fpout.write("%s\n" % rd.seq) recordList = hdl.readseq() hdl.close() myfunc.myclose(fpout) return 0
def main():#{{{ numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile="" inFile="" N=999999999 rand_seed=None i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in [ "-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-n" , "--n"]: N,i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in [ "-seed" , "--seed"]: rand_seed, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in [ "-o" , "--outfile"]: outFile,i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-bs" , "--block-size" , "-block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print >> sys.stderr,"Error! BLOCK_SIZE should >0" return 1 else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile = sys.argv[i] i+=1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) RandFasta(inFile, N, rand_seed, fpout) myfunc.myclose(fpout)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" mapfile = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-map", "--map", "-mapfile"]: (mapfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if myfunc.checkfile(mapfile) != 0: return 1 clanid2pfamidDict = myfunc.ReadFam2SeqidMap(mapfile) pfamPercentTMDict = ReadPercentTM(infile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) GetPercentTMOfClan(pfamPercentTMDict, clanid2pfamidDict, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 infile = "" outfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile = argv[i + 1] i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if infile == "": print >> sys.stderr, "infile not set. exit" return 1 elif not os.path.exists(infile): print >> sys.stderr, "infile %s does not exist. exit" % (infile) return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) FilterUniprotIDMap(infile, fpout) myfunc.myclose(fpout)
def main(): #{{{ numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 argv = sys.argv outfile = "" infile = "" output_format = "mfa" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = sys.argv[i] isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-o", "--o"]: outfile, i = myfunc.my_getopt_str(argv, i) elif sys.argv[i] in ["-of", "--of"]: output_format, i = myfunc.my_getopt_str(argv, i) else: print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i]) return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile, "MSA file") != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) # detect the format of mpa files, the old format MPA2MSA(infile, output_format, fpout) myfunc.myclose(fpout)
def GetSeqFromMSA(infile, outfile, hdl_seqdb):#{{{ if not os.path.exists(infile): print >> sys.stderr, "infile %s does not exist."%(infile) return 1 outdir = os.path.dirname(outfile) if outdir != "" and not os.path.exists(outdir): os.system("mkdir -p %s"%(outdir)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) filetype = GetFileType(infile) inputfile = "" if filetype == "fasta": tmpf = tempfile.mktemp() os.system("grep '^>' %s | sed 's/>//' > %s"%(infile, tmpf)) inputfile = tmpf try: first_record = SeqIO.parse(open(infile, "rU"), "fasta").next() if first_record.id == "target": seq = (first_record.seq._data).replace("-","") fpout.write(">%s\n"%(first_record.description)) fpout.write("%s\n"%(seq)) except (IOError,ValueError,KeyError): pass elif filetype == "idlist": inputfile = infile else: print >> sys.stderr, "Unrecognized infile type" return 1 (status, numseq, numRetrieved) = GetFullSeq(inputfile, hdl_seqdb, fpout) if status == 1: msg = "%s retrieved %d out of %d sequences. Failed to read." else: msg = "%s retrieved %d out of %d sequences. Succeeded." print msg%(infile, numRetrieved, numseq) if filetype == "fasta": os.system("rm -f %s"%(tmpf)) myfunc.myclose(fpout) return 0
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) goancDict = myfunc.myclose(fpout)
def Stat1(lines, outfile): MAX_NUMTM = 14 dt = {} for item in [0.0, 0.5, 1.0]: dt[item] = InitXY(MAX_NUMTM) cnt = 0 for line in lines: if line: strs = line.split() if strs[0] == "TMMap": cnt += 1 numTM = int(strs[4].rstrip(":")) if numTM <= 1 or numTM >= MAX_NUMTM: continue mapArray = [int(x) for x in strs[5:]] st = 1 str_maparray_list = ["%d" % x for x in mapArray] str_maparray = "".join(str_maparray_list) posContList = GetSegPos(str_maparray, "%d" % st) neighbour_char = "0" posContList = FilterSegPos(posContList, str_maparray, neighbour_char) if len(posContList) >= 1: for (b, e) in posContList: if b == 0: pp = 0.0 elif e == numTM: pp = 1.0 else: pp = 0.5 if DEBUG and (e - b) == 1: print "Maparray", mapArray, posContList dt[pp][1][e - b] += 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout.write("#%2s %8s %3s %8s %3s %8s\n" % ("Nx", "Ny", "Ix", "Iy", "Cx", "Cy")) for i in xrange(1, MAX_NUMTM + 1): for item in [0.0, 0.5, 1.0]: (lx, ly) = dt[item] fpout.write("%3d %8d " % (lx[i], ly[i])) fpout.write("\n") myfunc.myclose(fpout)
def MatchMSATopo_using_topodb( msafile, topodb, isIgnoreBadseq, #{{{ method_match, outfile): hdl_topo = myfunc.MyDB(topodb) if hdl_topo.failure: return 1 hdl = myfunc.ReadFastaByBlock(msafile) if hdl.failure: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) recordList = hdl.readseq() while recordList != None: for rd in recordList: topowithanno = hdl_topo.GetRecord(rd.seqid) if topowithanno != None: (topoid, topoanno, topo) = myfunc.ExtractFromSeqWithAnno(topowithanno) else: print("topo not found for ID %s" % (rd.seqid), file=sys.stderr) topo = "" matchedtopo = MatchSeqToTopo(rd.seq, topo, method_match) if not (matchedtopo == "BADSEQ" and isIgnoreBadseq): print(">%s" % (rd.description), file=fpout) print("%s" % (matchedtopo), file=fpout) recordList = hdl.readseq() myfunc.myclose(fpout) hdl.close() hdl_topo.close() return 0
def RemoveDupSeq(infile, g_outpath, method, isUseMD5):#{{{ if g_outpath == "": outpath = myfunc.my_dirname(infile) else: outpath = g_outpath rootname = os.path.basename(os.path.splitext(infile)[0]) outfile = "%s%s%s"%(outpath, os.sep, rootname) fpout = myfunc.myopen(outfile, None, "w", False) if fpout == None: return 1 hdl = myfunc.ReadFastaByBlock(infile) if hdl.failure: return -1 myset = set([]) recordList = hdl.readseq() while recordList != None: for rd in recordList: if method == "id": key = rd.seqid elif method == "seq": if isUseMD5: key = md5.new(rd.seq).digest() else: key = rd.seq if not key in myset: myset.add(key) fpout.write(">%s\n%s\n"%(rd.description, rd.seq)) recordList = hdl.readseq() hdl.close() myfunc.myclose(fpout) return 0
def RemoveDupSeq(infile, g_outpath, method, isUseMD5): #{{{ if g_outpath == "": outpath = myfunc.my_dirname(infile) else: outpath = g_outpath rootname = os.path.basename(os.path.splitext(infile)[0]) outfile = "%s%s%s" % (outpath, os.sep, rootname) fpout = myfunc.myopen(outfile, None, "w", False) if fpout == None: return 1 hdl = myfunc.ReadFastaByBlock(infile) if hdl.failure: return -1 myset = set([]) recordList = hdl.readseq() while recordList != None: for rd in recordList: if method == "id": key = rd.seqid elif method == "seq": if isUseMD5: key = md5.new(rd.seq).digest() else: key = rd.seq if not key in myset: myset.add(key) fpout.write(">%s\n%s\n" % (rd.description, rd.seq)) recordList = hdl.readseq() hdl.close() myfunc.myclose(fpout) return 0
def main(g_params): numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" orderlistfile = "" msafile = "" outformat = "fasta" # fasta or anno i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: msafile = sys.argv[i] isNonOptionArg=False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] == "-h" or sys.argv[i] == "--help": PrintHelp() return 1 elif sys.argv[i] in [ "-o", "--o"] : outFile=sys.argv[i+1] i = i + 2 elif sys.argv[i] == "-orderlist" or sys.argv[i] == "--orderlist": orderlistfile = sys.argv[i+1] i = i + 2 elif sys.argv[i] == "-msafile" or sys.argv[i] == "--msafile": msafile = sys.argv[i+1] i = i + 2 elif sys.argv[i] in ["-of", "--of", "-outformat", "--outformat"]: outformat = sys.argv[i+1].lower() i += 2 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: msafile = sys.argv[i] i+=1 if not outformat in ["anno", "fasta"]: print("Unrecognized outformat \"%s\","%( outformat) + " should be either \"anno\" or \"fasta\".", file=sys.stderr) return 1 if orderlistfile == "": print("orderlist file not set. Exit", file=sys.stderr) return 1 if msafile == "": print("msafile not set. Exit", file=sys.stderr) orderList = ReadOrderList(orderlistfile) (idList, annoList, seqList) = myfunc.ReadFasta(msafile) if len(orderList) > 0 and len(idList) > 0: fpout = sys.stdout fpout = myfunc.myopen(outFile, sys.stdout, "w", False) seqDict = {} annoDict = {} numSeq = len(idList) for i in range(numSeq): annoDict[idList[i]] = annoList[i] if outformat != "anno": for i in range(numSeq): seqDict[idList[i]] = seqList[i] for sid in orderList: if sid in annoDict: fpout.write(">%s\n"%annoDict[sid]) if outformat != "anno": fpout.write("%s\n"%seqDict[sid]) else: print("seqid %s not in msafile %s"%( sid, msafile), file=sys.stderr) myfunc.myclose(fpout) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" outfile_with_famid = "" outfile_with_pdb = "" outfile_fam2seqmap = "" idListFile = "" mapfile = "%s%s%s" % ( DATADIR3, os.sep, "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid" ) restrictIDListFile = "" idList = [] maxseq_for_fam = 200 maxpair_for_fam = 300 method = 0 rand_seed = None pdbtospFile = "" isOnlyPDB = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithfamid", "--outwithfamid"]: outfile_with_famid, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]: outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithpdb", "--outwithpdb"]: outfile_with_pdb, i = myfunc.my_getopt_str(argv, i) elif argv[i] in [ "-tmprolist", "--tmprolist", "-restrictlist", "--restrictlist" ]: restrictIDListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mapfile", "--mapfile"]: mapfile, i = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-pdbtosp", "--pdbtosp"]): pdbtospFile, i = myfunc.my_getopt_str(argv, i) elif sys.argv[i] in ["-seed", "--seed"]: rand_seed, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-l", "--l"]: idListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-maxseq", "--maxseq"]: maxseq_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-maxpair", "--maxpair"]: maxpair_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-m", "--m", "-method", "--method"]: method, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-onlypdb", "--onlypdb"]: g_params['isOnlyPDB'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if os.path.exists(idListFile): idList += myfunc.ReadIDList(idListFile) if len(idList) < 1: print >> sys.stderr, "no ID set. exit" return 1 if myfunc.checkfile(mapfile, "idMapFile") != 0: return 1 idMapDict = myfunc.ReadFam2SeqidMap(mapfile) # Read in pdbtosp map if pdbtospFile != "": (pdb2uniprotMap, uniprot2pdbMap) =\ myfunc.ReadPDBTOSP(pdbtospFile) g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys()) g_params['uniprot2pdbMap'] = uniprot2pdbMap if g_params['isOnlyPDB'] == True: if pdbtospFile == "": print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit." return 1 elif g_params['uniprotidlist_with_pdb'] == set([]): print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit." return 1 restrictIDSet = set([]) if restrictIDListFile != "": restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False) fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False) fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False) if method == 0: GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet, maxseq_for_fam, rand_seed, fpout, fpout_withfamid) elif method == 1: GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet, maxpair_for_fam, rand_seed, fpout, fpout_withfamid, fpout_fam2seqmap) elif method == 2: #all to all GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout, fpout_withfamid, fpout_withpdb) myfunc.myclose(fpout) myfunc.myclose(fpout_withfamid) myfunc.myclose(fpout_withpdb) myfunc.myclose(fpout_fam2seqmap) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" seqdbfile = "" infile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outfile", "--outfile"]: outfile = argv[i + 1] i += 2 elif argv[i] in ["-seqdb", "--seqdb"]: seqdbfile = argv[i + 1] i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if infile == "": print >> sys.stderr, "annotation file not set" return 1 elif not os.path.exists(infile): print >> sys.stderr, "annotation file %s does not exist" % (infile) return 1 if seqdbfile == "": print >> sys.stderr, "seqdbfile file not set" return 1 elif not os.path.exists(seqdbfile): print >> sys.stderr, "seqdbfile file %s does not exist" % (seqdbfile) return 1 seqDict = GetSeqDict(seqdbfile) if seqDict == {}: print >> sys.stderr, "Failed to read seqdbfile %s" % (seqdbfile) return 1 (idList, annoList, contentList) = myfunc.ReadFasta(infile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for i in xrange(len(idList)): seqid = idList[i] try: seq = seqDict[seqid] fpout.write(">%s\n" % (annoList[i])) fpout.write("%s\n" % (seq)) if contentList[i] != "": fpout.write("%s\n" % (contentList[i])) except KeyError: print >> sys.stderr, "seqid %s not found in seqdb" % (seqid) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" datafile = "" restrictOCList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: datafile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile = argv[i + 1] i += 2 elif argv[i] in ["-i", "--i"]: datafile = argv[i + 1] i += 2 elif argv[i] in ["-keep_isoform", "--keep_isoform"]: g_params['filter_isoform'] = False i += 1 elif argv[i] in ["-keep_non_refpro", "--keep_non_refpro"]: g_params['filter_non_refpro'] = False i += 1 elif argv[i] in ["-keep_no_genename", "--keep_no_genename"]: g_params['filter_no_genename'] = False i += 1 elif argv[i] in ["-oc", "--oc"]: restrictOCList.append(argv[i + 1]) i += 2 elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: datafile = argv[i] i += 1 if not os.path.exists(datafile): print >> sys.stderr, "datafile %s not set or not exists. Exit" % ( datafile) return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) UniprotTremblData2Table(datafile, restrictOCList, fpout) myfunc.myclose(fpout) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" idListFile = "" euk = "" gram_pos = "" gram_neg = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idListFile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-euk", "--euk"]: (euk, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram+", "--gram+"]: (gram_pos, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram-", "--gram-"]: (gram_neg, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idListFile = argv[i] i += 1 if myfunc.checkfile(idListFile, "idListFile") != 0: return 1 if myfunc.checkfile(euk, "euk") != 0: return 1 if myfunc.checkfile(gram_pos, "gram_pos") != 0: return 1 if myfunc.checkfile(gram_neg, "gram_neg") != 0: return 1 idList = myfunc.ReadIDList(idListFile) set_euk_idlist = set(myfunc.ReadIDList(euk)) set_gram_pos_idlist = set(myfunc.ReadIDList(gram_pos)) set_gram_neg_idlist = set(myfunc.ReadIDList(gram_neg)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) NCBI_TaxID = "" for i in xrange(len(idList)): seqid = idList[i] cls = "" if seqid in set_euk_idlist: cls = "euk" elif seqid in set_gram_pos_idlist: cls = "gram+" elif seqid in set_gram_neg_idlist: cls = "gram-" else: cls = "NA" print >> fpout, "%s\t%s\t%s" % (seqid, NCBI_TaxID, cls) myfunc.myclose(fpout)
print usage sys.exit(1) try: path_result = sys.argv[2] except: print usage sys.exit(1) outfile="" try: outfile = sys.argv[3] except: pass fpout = myfunc.myopen(outfile, sys.stdout, "w", False) hdl = myfunc.ReadLineByBlock(jobruntimeFile) if hdl.failure: sys.exit(1) lines = hdl.readlines() cnt=0 while lines != None: for line in lines: strs = line.split("\t") if len(strs) < 8: continue jobid = strs[0]
def CalculateQueue(resultdir, outfile): # {{{ # 1. get the list of working folders raw_folder_list = os.listdir(resultdir) folder_nr_list = [] for folder in raw_folder_list: if os.path.isdir(resultdir + "/" + folder) and ( folder.isdigit() or folder[0:2] == "r_" or folder[:4] == "rst_" ): folder_nr_list.append(folder) # 2. gather information for queued jobs job_table_in_queue = {} other_job_table = {} freq_user_in_queue = {} # count the frequency of the user of jobs in queue freq_user_running = {} # count the frequency of the user for running jobs for folder in folder_nr_list: workdir = "%s/%s" % (resultdir, folder) status = get_job_status(workdir) if status in ["Queued"]: # jobs in queue email = ReadContent("%s/%s" % (workdir, "email")) host = ReadContent("%s/%s" % (workdir, "host")) date_str = ReadContent("%s/%s" % (workdir, "date")) sequence = ReadContent("%s/%s" % (workdir, "sequence")) length_seq = len(sequence) user = "" if email and email != "N/A": user = email else: user = host if not user in freq_user_in_queue: freq_user_in_queue[user] = 0 freq_user_in_queue[user] += 1 try: date_submitted = datetime.datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S") except ValueError: print >>sys.stderr, "datefile = '%s'. date = '%s'" % ("%s/date" % (workdir), date_str) print >>sys.stderr, "Ignore %s" % folder continue date_now = datetime.datetime.now() queue_time = date_now - date_submitted queue_time_in_sec = get_total_seconds(queue_time) job_table_in_queue[folder] = [status, user, queue_time_in_sec, length_seq] else: email = ReadContent("%s/%s" % (workdir, "email")) host = ReadContent("%s/%s" % (workdir, "host")) user = "" if email and email != "N/A": user = email else: user = host if status in ["Running", "Rerun"]: if not user in freq_user_running: freq_user_running[user] = 0 freq_user_running[user] += 1 other_job_table[folder] = [status, user, 0, 0] else: other_job_table[folder] = [status, user, 0, 0] for folder in job_table_in_queue: user = job_table_in_queue[folder][1] freq_in_queue = 1 freq_running = 0 if user != "": freq_in_queue = freq_user_in_queue[user] try: freq_running = freq_user_running[user] except KeyError: freq_running = 0 job_table_in_queue[folder].append(freq_in_queue) job_table_in_queue[folder].append(freq_running) for folder in other_job_table: other_job_table[folder].append(0) other_job_table[folder].append(0) # calculate the priority # now each job_table_in_queue[folder] has five element # [user, queue_time_in_sec, length_seq, freq_in_queue, freq_running] # Group the jobs in each user, and for the sublist of each user, first rank by # the queue_time_in_sec, and then do as follows # Note, for those target with <= 100 aa, the run time is relatively similar # score = queue_time_in_sec / ((rank+addition)**1.5 * max(length_seq,100)**1.5) # where addition = freq_running for user in freq_user_in_queue: sub_table = {} for folder in job_table_in_queue: if job_table_in_queue[folder][1] == user: sub_table[folder] = job_table_in_queue[folder] # in descending order by queue_time_in_sec sorted_sub_table = sorted(sub_table.items(), key=lambda x: x[1][2], reverse=True) for i in xrange(len(sorted_sub_table)): folder_nr = sorted_sub_table[i][0] queue_time_in_sec = sorted_sub_table[i][1][2] length_seq = sorted_sub_table[i][1][3] freq_in_queue = sorted_sub_table[i][1][4] freq_running = sorted_sub_table[i][1][5] rank = i + 1 addition = freq_running if user == "": rank = 1 addition = 0 score = queue_time_in_sec / ((rank + addition) ** 2 * max(length_seq, 100) ** 1.5) job_table_in_queue[folder_nr].append(score) for folder in other_job_table: other_job_table[folder].append(0) # now rank the job_table_in_queue again sorted_job_table_in_queue = sorted(job_table_in_queue.items(), key=lambda x: x[1][6], reverse=True) # write the result fpout = myfunc.myopen(outfile, sys.stdout, "w", False) print >> fpout, "#%-5s %8s %4s %-30s %10s %10s %6s %6s" % ( "ID", "Status", "Rank", "User", "PD_time(s)", "Score", "Count_PD", "Count_R", ) for i in xrange(len(sorted_job_table_in_queue)): folder = sorted_job_table_in_queue[i][0] rank = i + 1 status = sorted_job_table_in_queue[i][1][0] user = sorted_job_table_in_queue[i][1][1] queue_time_in_sec = sorted_job_table_in_queue[i][1][2] freq_in_queue = sorted_job_table_in_queue[i][1][4] freq_running = sorted_job_table_in_queue[i][1][5] score = sorted_job_table_in_queue[i][1][6] print >> fpout, "%-6s %8s %4d %-30s %10.1f %10.1f %6d %6d" % ( folder, status, rank, user, queue_time_in_sec, score, freq_in_queue, freq_running, ) # now rank the job_table_in_queue again sorted_other_job_table = sorted(other_job_table.items(), key=lambda x: x[1][0]) # sorted by status for i in xrange(len(sorted_other_job_table)): folder = sorted_other_job_table[i][0] rank = 0 status = sorted_other_job_table[i][1][0] user = sorted_other_job_table[i][1][1] queue_time_in_sec = sorted_other_job_table[i][1][2] freq_in_queue = sorted_other_job_table[i][1][4] freq_running = sorted_other_job_table[i][1][5] score = sorted_other_job_table[i][1][6] print >> fpout, "%-6s %8s %4d %-30s %10.1f %10.1f %6d %6d" % ( folder, status, rank, user, queue_time_in_sec, score, freq_in_queue, freq_running, ) myfunc.myclose(fpout)