def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" gramfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram", "--gram"]: (gramfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if myfunc.checkfile(gramfile) != 0: return 1 grampairlist = myfunc.ReadPairList(gramfile) gramMapDict = {} for tup in grampairlist: gramMapDict[tup[0]] = tup[1] gi2taxidpairlist = myfunc.ReadPairList(infile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for tup in gi2taxidpairlist: try: fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], gramMapDict[tup[1]])) except KeyError: fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], "NA")) myfunc.myclose(fpout)
def main():#{{{ numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 argv = sys.argv topofile = "" sigpepfile = "" outfile = "" isQuiet = False isDeleteSeqWithSignalPeptide = False i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i += 1 elif sys.argv[i] == "--": isNonOptionArg=True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in [ "-h", "--help"]: PrintHelp() return 1 elif argv[i] in [ '-o', '--o', "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ['-topo', '--topo']: topofile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ['-sig', '--sig']: sigpepfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] == "-q": isQuiet=True; i += 1 elif argv[i] in ["-deleteseq", "--deleteseq"]: isDeleteSeqWithSignalPeptide=True; i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 if myfunc.checkfile(topofile, 'topofile') != 0: return 1 if myfunc.checkfile(sigpepfile, 'sigpepfile') != 0: return 1 sigpepDict = lcmp.ReadSignalPDict(sigpepfile) FilterSignalPeptide(topofile, sigpepDict, outfile, isDeleteSeqWithSignalPeptide) return 0
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" mapfile = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-map", "--map", "-mapfile"]: (mapfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if myfunc.checkfile(mapfile) != 0: return 1 clanid2pfamidDict = myfunc.ReadFam2SeqidMap(mapfile) pfamPercentTMDict = ReadPercentTM(infile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) GetPercentTMOfClan(pfamPercentTMDict, clanid2pfamidDict, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{#{{{ # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" BLOCK_SIZE = 100000 isPrintID = False isJustPrintSum = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False infile = sys.argv[i] i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 0 elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]: isPrintID = True i += 1 elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]: isJustPrintSum = True i += 1 elif sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-bs", "--bs", "-block-size", "--block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print("Error! BLOCK_SIZE should >0", file=sys.stderr) return 1 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: infile = sys.argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout) myfunc.myclose(fpout) return status
def main(g_params):#{{{#{{{ # Check argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" BLOCK_SIZE = 100000 isPrintID = False isJustPrintSum = False i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False infile = sys.argv[i] i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 0 elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]: isPrintID = True i += 1 elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]: isJustPrintSum = True i += 1 elif sys.argv[i] in [ "-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-bs", "--bs", "-block-size", "--block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print >> sys.stderr,"Error! BLOCK_SIZE should >0" return 1 else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: infile=sys.argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout) myfunc.myclose(fpout) return status
def main(g_params): # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" begin = 0 end = 999999999 method = 2 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-b", "--b", "--begin"]: begin, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-e", "--e", "--end"]: end, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o", "--o", "--outfile"]: outFile, i = my_getopt_str(sys.argv, i) else: print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile = sys.argv[i] i += 1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) if method == 1: CatFasta(inFile, begin, end, fpout) else: CatFasta2(inFile, begin, end, fpout) myfunc.myclose(fpout) return 0
def main(g_params): # Check argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" begin = 0 end = 999999999 method = 2 i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-b", "--b", "--begin"]: begin, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-e", "--e", "--end"]: end, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o" , "--o","--outfile"]: outFile, i = my_getopt_str(sys.argv,i ) else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile=sys.argv[i] i+=1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) if method == 1: CatFasta(inFile,begin, end, fpout) else: CatFasta2(inFile,begin, end, fpout) myfunc.myclose(fpout) return 0
def main(): #{{{ numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" N = 999999999 rand_seed = None i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-n", "--n"]: N, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-seed", "--seed"]: rand_seed, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o", "--outfile"]: outFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-bs", "--block-size", "-block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print("Error! BLOCK_SIZE should >0", file=sys.stderr) return 1 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: inFile = sys.argv[i] i += 1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) RandFasta(inFile, N, rand_seed, fpout) myfunc.myclose(fpout)
def main():#{{{ numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile="" inFile="" N=999999999 rand_seed=None i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in [ "-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-n" , "--n"]: N,i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in [ "-seed" , "--seed"]: rand_seed, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in [ "-o" , "--outfile"]: outFile,i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-bs" , "--block-size" , "-block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print >> sys.stderr,"Error! BLOCK_SIZE should >0" return 1 else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile = sys.argv[i] i+=1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) RandFasta(inFile, N, rand_seed, fpout) myfunc.myclose(fpout)
def PrepareDataForPairaln(seqid1, seqid2, outpath): #{{{ topoalnfile = "%s/%s.topoaln.fa" % (g_params['datapath'], g_params['basename']) if myfunc.checkfile(topoalnfile, "topoalnfile") != 0: return 1 # 1 seqid1-seqid2.topoaln.fa ext_topoaln = ".topoaln.fa" pair_topoalnfile = "%s/%s_%s.topoaln.fa" % (outpath, seqid1, seqid2) cmd = [ "%s/selectPairaln.py" % (rundir), "-pairaln", topoalnfile, "-outpath", outpath, "-ext", ext_topoaln, "-split", seqid1, seqid2 ] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: print e
def main(): #{{{ numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 argv = sys.argv outfile = "" infile = "" output_format = "mfa" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = sys.argv[i] isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-o", "--o"]: outfile, i = myfunc.my_getopt_str(argv, i) elif sys.argv[i] in ["-of", "--of"]: output_format, i = myfunc.my_getopt_str(argv, i) else: print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i]) return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile, "MSA file") != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) # detect the format of mpa files, the old format MPA2MSA(infile, output_format, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" infile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if outpath == "": print >> sys.stderr, "outpath not set" elif not os.path.exists(outpath): os.system("mkdir -p %s" % (outpath)) SplitPfamFasta(infile, outpath)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) goancDict = myfunc.myclose(fpout)
def main(): #{{{ numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 argv = sys.argv outfile = "" infile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = sys.argv[i] isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-o", "--o"]: outfile, i = myfunc.my_getopt_str(argv, i) else: print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i]) return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 return ReWriteFasta(infile, outfile)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" msafile = "" topofile = "" topodb = "" isIgnoreBadseq = True method_match = 1 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-msa", "--msa"]: (msafile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-topo", "--topo"]: (topofile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-m", "--m"]: (method_match, i) = myfunc.my_getopt_int(argv, i) if method_match not in [0, 1]: print("method_match %d not in [0,1]" % method_match, file=sys.stderr) return 1 elif argv[i] in ["-topodb", "--topodb"]: (topodb, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in [ "-ig", "--ig", "-ignore-badseq", "--ignore-badseq" ]: (tmpss, i) = myfunc.my_getopt_str(argv, i) if tmpss[0].lower() == "y": isIgnoreBadseq = True else: isIgnoreBadseq = False elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 else: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 if myfunc.checkfile(msafile) != 0: return 1 if topodb != "": if not os.path.exists(topodb + '0.db'): print("topodb %s does not exist" % (topodb), file=sys.stderr) return 1 else: return MatchMSATopo_using_topodb(msafile, topodb, isIgnoreBadseq, method_match, outfile) elif topofile != "": if not os.path.exists(topofile): print("topofile %s does not exist" % (topofile), file=sys.stderr) return 1 else: return MatchMSATopo_using_topofile(msafile, topofile, isIgnoreBadseq, method_match, outfile) else: print("neither topofile nor topodb is set. exit", file=sys.stderr) return 1 return 0
else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 if outpath == "": print >> sys.stderr, "Error! outpath not set. Exit" print usage_short return 1 elif not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print e return 1 if myfunc.checkfile(pfamid2seqidFile, "pfamid2seqidFile") != 0: return 1 if myfunc.checkfile("%s0.db" % topodb, "topodb") != 0: return 1 if myfunc.checkfile("%s0.db" % seqdb, "seqdb") != 0: return 1 pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidFile) hdl_topo = myfunc.MyDB(topodb) if not hdl_topo.failure: idSet_topo = set(hdl_topo.indexedIDList) else: idSet_topo = set([])
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" modelfile = "" seqfile = "" tmpdir = "" email = "" jobid = "" isKeepFiles = "no" isRepack = "yes" targetlength = None i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: modelfile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-tmpdir", "--tmpdir"] : (tmpdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"] : (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-fasta", "--fasta"] : (seqfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-k", "--k"] : (isKeepFiles, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-r", "--r"] : (isRepack, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-t", "--t"] : (targetlength, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-baseurl", "--baseurl"] : (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"] : (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-force", "--force"]: g_params['isForceRun'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: modelfile = argv[i] i += 1 if jobid == "": print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0]) return 1 if myfunc.checkfile(modelfile, "modelfile") != 0: return 1 if outpath == "": print >> sys.stderr, "outpath not set. exit" return 1 elif not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print >> sys.stderr, e return 1
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 infile = "" progList = [] progListFile = "" outpath = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-prog", "--prog"]: tmpstr, i = myfunc.my_getopt_str(argv, i) progList.append(tmpstr) elif argv[i] in ["-gzip", "--gzip"]: tmpstr, i = myfunc.my_getopt_str(argv, i) if tmpstr.upper()[0] == "-": print >> sys.stderr, "Bad argument, -gzip should be"\ " followed by yes or no" return 1 elif tmpstr.upper()[0] == "Y": g_params['isGzip'] = True else: g_params['isGzip'] = False elif argv[i] in ["-num", "--num"]: g_params['num_per_split'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-proglist", "--proglist"]: progListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: outpath, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True; i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile,"infile") != 0: return 1 inputList = ReadInputList(infile) # [(filename, numseq)] inputList = sorted(inputList, key=lambda x:x[1], reverse=False) rtname_infile = os.path.basename(os.path.splitext(infile)[0]) # get progList if len(progList) == 0 and progListFile == "": progList = default_progList else: if progListFile != "": tmp_list = myfunc.ReadIDList(progListFile) if len(tmp_list) == 0: print >> sys.stderr, "progListFile %s does not exist or empty"%( progListFile) return 1 else: progList += tmp_list if len(progList) == 0: print >> sys.stderr, "progList is empty. exit" return 1 if outpath != "" and not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print e return 1
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" modelfile = "" seqfile = "" tmpdir = "" email = "" jobid = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: modelfile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-tmpdir", "--tmpdir"]: (tmpdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"]: (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-fasta", "--fasta"]: (seqfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-baseurl", "--baseurl"]: (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"]: (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-force", "--force"]: g_params['isForceRun'] = True i += 1 else: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 else: modelfile = argv[i] i += 1 if jobid == "": print("%s: jobid not set. exit" % (sys.argv[0]), file=sys.stderr) return 1 if myfunc.checkfile(modelfile, "modelfile") != 0: return 1 if outpath == "": print("outpath not set. exit", file=sys.stderr) return 1 elif not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError as e: print(e, file=sys.stderr) return 1 if tmpdir == "": print("tmpdir not set. exit", file=sys.stderr) return 1 elif not os.path.exists(tmpdir): try: subprocess.check_output(["mkdir", "-p", tmpdir]) except subprocess.CalledProcessError as e: print(e, file=sys.stderr) return 1 g_params['debugfile'] = "%s/debug.log" % (outpath) if not os.path.exists(path_profile_cache): os.makedirs(path_profile_cache) return RunJob(modelfile, seqfile, outpath, tmpdir, email, jobid, g_params)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" infile = "" tmpdir = "" email = "" jobid = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-tmpdir", "--tmpdir"] : (tmpdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"] : (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-baseurl", "--baseurl"] : (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"] : (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if jobid == "": print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0]) return 1 if myfunc.checkfile(infile, "infile") != 0: return 1 if outpath == "": print >> sys.stderr, "outpath not set. exit" return 1 elif not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print >> sys.stderr, e return 1
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" outfile = "" idListFile = "" uniprotDBname = "" idList = [] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-uniprotdb", "--uniprotdb"]: (uniprotDBname, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) if uniprotDBname == "": print >> sys.stderr, "uniprotdb not set" return 1 uniprotdbfile = "%s0.db" % uniprotDBname if myfunc.checkfile(uniprotdbfile, "uniprotdbfile") != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) hdl = myfunc.MyDB(uniprotDBname) if hdl.failure: return 1 for seqid in idList: data = hdl.GetRecord(seqid) if data != None: goinfo = GetGOInfoFromUniprotData(data) WriteGOInfo(seqid, goinfo, fpout) hdl.close() myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileListFile = "" fileList = [] pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3) threshold_Fraction_Group_2 = 0.05 threshold_NumSeq_Group_2 = 2 tableinfoFile = "" pdbtospFile = "" sprotACListFile = "" threshold_g12_seqidt = 20.0 topoalnFile = "" aapath = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqidttype", "--seqidttype"]: (g_params['seqidttype'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-tableinfo", "--tableinfo"]: (tableinfoFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-topoaln", "--topoaln"]: (topoalnFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-aapath", "--aapath"]: (aapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-thncls2", "--thncls2"]: (threshold_NumSeq_Group_2, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-thfrac2", "--thfrac2"]: (threshold_Fraction_Group_2, i) = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-pfamdef", "--pfamdef"]: (pfamDefFile, i) = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-pdbtosp", "--pdbtosp"]): pdbtospFile, i = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-sprot", "--sprot"]): sprotACListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "No input set. exit" return 1 if myfunc.checkfile(topoalnFile, "topoalnFile") != 0: return 1 if myfunc.checkfile(aapath, "aapath") != 0: return 1 if outfile == "": print >> sys.stderr, "outfile not set. Exit" return 1 outpath = myfunc.my_dirname(outfile) if not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: print e return 1
outfile_selected_pair, "-split", "-outpath", outpath, "-ext", ext_topoaln ] print '\n', " ".join(cmd), '\n' try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: print e return 1 # draw pairwise topology alignment for li in selectedPairList: single_topoaln_file = "%s%s%s%s" % (outpath, os.sep, "%s_%s" % (li[0], li[1]), ext_topoaln) aafile = "%s%s%s%s" % (aapath, os.sep, li[3], ".fa") if myfunc.checkfile(single_topoaln_file, "single_topoaln_file") != 0: continue if myfunc.checkfile(aafile, "aafile") != 0: continue method_shrink = "2" method_plot = "mat" shrinkrateTM = "3" maxHoldLoop = "4" cmd = [ "%s/drawMSATopo.py" % (binpath), single_topoaln_file, "-aaseq", aafile, "-pdg", "yes", "-shrink", "yes", "-m-shrink", method_shrink, "-method", method_plot, "-text", "yes", "-krbias", "-shrinkrateTM", shrinkrateTM, "-max-hold-loop", maxHoldLoop ] try: subprocess.check_output(cmd)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" pairalnFile = "" pair = [] listfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: pair.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o"]: outfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairaln", "--pairaln"]: pairalnFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: g_params['outpath'], i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-ext", "--ext"]: g_params['ext'], i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: listfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-split", "--split"]: g_params['isSplit'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: pair.append(argv[i]) i += 1 if myfunc.checkfile(pairalnFile, "pairalnFile") != 0: return 1 if g_params['isSplit']: if g_params['outpath'] == "": print >> sys.stderr, "Error! outpath string is empty when 'split'"\ " is enabled. exit" return 1 elif not os.path.exists(g_params['outpath']): cmd = ["mkdir", "-p", g_params['outpath']] subprocess.check_call(cmd) pairlist = [] if len(pair) >= 2: pairlist.append((pair[0], pair[1])) if listfile != "": pairlist += myfunc.ReadPairList(listfile) pairlistSet = set([]) for pair in pairlist: pairlistSet.add(pair) del pairlist fpout = myfunc.myopen(outfile, sys.stdout, "w", False) SelectPairaln(pairalnFile, pairlistSet, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" infile = "" classfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-class", "--class"]: (classfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if myfunc.checkfile(classfile, "Class File") != 0: return 1 if outpath == "": outpath = os.path.dirname(infile) if outpath == "": outpath = "." (id2ClassDict, classList) = ReadClassDict(classfile) idList = myfunc.ReadIDList(infile) rootname = os.path.basename(os.path.splitext(infile)[0]) ext = os.path.splitext(infile)[1] fpoutList = {} for i in range(len(classList)): outfile = outpath + os.sep + rootname + ".%s" % classList[i] + ext fpoutList[classList[i]] = open(outfile, "w") for idd in idList: try: cls = id2ClassDict[idd] except: print >> sys.stderr, "id %s not in classDict" % idd continue fpoutList[cls].write("%s\n" % idd) for i in range(len(classList)): fpoutList[classList[i]].close()
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" outfile = "" real_topofile = "" seqfile = "" restrictIDListFile = "" outfile_wrong_predtopo = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-owrong", "--owrong"]: (outfile_wrong_predtopo, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-realtopo", "--realtopo"]: (real_topofile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqfile", "--seqfile"]: (seqfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mode", "--mode"]: (g_params['mode'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-path_predtopo", "--path_predtopo"]: (g_params['path_predtopo'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-basename", "--basename"]: (g_params['basename'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-restrictidlist", "--restrictidlist"]: (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-rmsp", "--rmsp"]: g_params['isRMSP'] = True i += 1 elif argv[i] in ["-debug", "--debug"]: g_params['isDEBUG'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 i += 1 if myfunc.checkfile(g_params['path_predtopo'], "path_predtopo") != 0: return 1 if g_params['basename'] == "": print >> sys.stderr, "%s: basename not set. exit" % (argv[0]) return 1 if myfunc.checkfile(real_topofile, "real_topofile") != 0: return 1 if restrictIDListFile != "": g_params['restrictIDset'] = set(myfunc.ReadIDList(restrictIDListFile)) g_params['isRestrictIDList'] = True if g_params['mode'] == "": if g_params['path_predtopo'].find("topcons_single") >= 0: g_params['mode'] = "tps" elif g_params['path_predtopo'].find("topcons") >= 0: g_params['mode'] = "tp" else: print >> sys.stderr, "mode not set, and can not be recognized from path_predtopo=%s" % ( path_predtopo) return 1 if not g_params['mode'] in ["tp", "tps"]: print >> sys.stderr, "Unrecognized mode = %s" % (g_params['mode']) return 1 (real_idlist, real_annolist, real_topolist) = myfunc.ReadFasta(real_topofile) seqDict = {} if seqfile != "" and os.path.exists(seqfile): (seq_idlist, seq_annolist, seqlist) = myfunc.ReadFasta(seqfile) for i in xrange(len(seq_idlist)): seqDict[seq_idlist[i]] = seqlist[i] if len(real_idlist) <= 0: print >> sys.stderr, "Failed to read real_topofile %s" % ( real_topofile) return 1 real_topodict = {} for i in xrange(len(real_idlist)): real_topodict[real_idlist[i]] = real_topolist[i] fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout_wrong = myfunc.myopen(outfile_wrong_predtopo, None, "w", False) idSet_single = set([]) idSet_multi = set([]) for seqid in real_topodict: topo = real_topodict[seqid] numTM = myfunc.CountTM(topo) if numTM == 1: idSet_single.add(seqid) elif numTM > 1: idSet_multi.add(seqid) # print "len(real_topodict)", len(real_topodict) # print "len(idSet_single)", len(idSet_single) # print "len(idSet_multi)", len(idSet_multi) #for TM_type in ["All_Alpha", "Single", "Multi"]: for TM_type in ["All_Alpha"]: if TM_type == "All_Alpha": sub_real_topodict = real_topodict else: sub_real_topodict = {} for seqid in real_topodict: topo = real_topodict[seqid] numTM = myfunc.CountTM(topo) if TM_type == "Single" and numTM == 1: sub_real_topodict[seqid] = topo elif TM_type == "Multi" and numTM > 1: sub_real_topodict[seqid] = topo Benchmark(sub_real_topodict, idSet_single, idSet_multi, TM_type, fpout, fpout_wrong, seqDict) myfunc.myclose(fpout)
def Itol_Tree(pfamid, datapath, outpath): #Create the Itol class itl = Itol.Itol() #Set the tree file treefile = "%s%s%s%s"%(datapath , os.sep , pfamid , ".TMpro.clustalo10.fasttree") taxofile = "%s%s%s%s"%(datapath , os.sep , pfamid , "-taxonomy-file.txt") TMdeffile = "%s%s%s%s"%(datapath , os.sep , pfamid , "tms.txt") if not os.path.exists(treefile): print >> sys.stderr, "tree file %s does not exist. Ignore" %(treefile) return 1 if myfunc.checkfile(taxofile, "taxofile") != 0: return 1 numLeave = len(open(taxofile, "r").readlines()) # t = Tree(treefile) # leaves = t.get_leaves() fontsize = GetFontSize(numLeave) colordeffile = taxofile branchlabelfile = "" dataset1 = "" dataset2 = "" dataset3 = "" dataset4 = "" dataset1 = TMdeffile #=================================== itl.add_variable('treeFile',treefile) itl.add_variable('treeName', pfamid) itl.add_variable('treeFormat','newick') if os.path.exists(colordeffile): itl.add_variable('colorDefinitionFile', colordeffile) if os.path.exists(branchlabelfile): itl.add_variable('branchLabelsFile', branchlabelfile) if os.path.exists(dataset1): itl.add_variable('dataset1File',dataset1) itl.add_variable('dataset1Label','numTM_and_repeat') itl.add_variable('dataset1Separator','comma') itl.add_variable('dataset1Type','domains') itl.add_variable('dataset1PreventOverlap','1') itl.add_variable('dataset1Color','#FF0000') # itl.add_variable('dataset1BarSizeMax','300') #=================================== # itl.add_variable('dataset1File',dataset1) # itl.add_variable('dataset1Label','numTM') # itl.add_variable('dataset1Separator','comma') # itl.add_variable('dataset1Type','simplebar') # itl.add_variable('dataset1Color','#FF0000') #=================================== if os.path.exists(dataset2): itl.add_variable('dataset2File', dataset2) itl.add_variable('dataset2Label', 'taxonomy') itl.add_variable('dataset2Separator','comma') itl.add_variable('dataset2Type','colorstrip') itl.add_variable('dataset2StripWidth','300') itl.add_variable('dataset2PreventOverlap','1') itl.add_variable('dataset2ColoringType','both') itl.add_variable('dataset2CirclesSpacing','100') #=================================== if os.path.exists(dataset3): itl.add_variable('dataset3File', dataset3) itl.add_variable('dataset3Label', 'pfam') itl.add_variable('dataset3Separator','tab') itl.add_variable('dataset3Type','colorstrip') # itl.add_variable('dataset3Type','ColorDefinitionFile') # itl.add_variable('dataset3StripWidth','300') # itl.add_variable('dataset3PreventOverlap','1') # itl.add_variable('dataset3ColoringType','both') # itl.add_variable('dataset3CirclesSpacing','100') #=================================== if os.path.exists(dataset4): itl.add_variable('dataset4File', dataset4) itl.add_variable('dataset4Label', 'cluster') itl.add_variable('dataset4Separator','comma') itl.add_variable('dataset4Type','colorstrip') itl.add_variable('dataset4StripWidth','200') itl.add_variable('dataset4PreventOverlap','1') itl.add_variable('dataset4ColoringType','both') #itl.add_variable('dataset1BarSizeMax','1') #=================================== # Check parameters # itl.print_variables() #Submit the tree print '' print 'Uploading the tree. This may take some time depending on how large the tree is and how much load there is on the itol server' good_upload = itl.upload() if good_upload == False: print 'There was an error:'+itl.comm.upload_output sys.exit(1) #Read the tree ID print 'Tree ID: '+str(itl.comm.tree_id) #Read the iTOL API return statement print 'iTOL output: '+str(itl.comm.upload_output) #Website to be redirected to iTOL tree print 'Tree Web Page URL: '+itl.get_webpage() # Warnings associated with the upload print 'Warnings: '+str(itl.comm.warnings) #Export to pdf print 'Exporting to pdf' itol_exporter = itl.get_itol_export() #itol_exporter = itolexport.ItolExport() #itol_exporter.set_export_param_value('tree','18793532031912684633930') itol_exporter.set_export_param_value('format', 'eps') itol_exporter.set_export_param_value('displayMode',"circular") itol_exporter.set_export_param_value('showBS',"0") itol_exporter.set_export_param_value('fontSize',fontsize) itol_exporter.set_export_param_value('alignLabels',"1") itol_exporter.set_export_param_value('datasetList','dataset1,dataset2,dataset3,dataset4') epsfile = outpath + os.sep + pfamid + '-itol.eps' pdffile = outpath + os.sep + pfamid + '-itol.pdf' jpgfile = outpath + os.sep + pfamid + '-itol.jpg' svgfile = outpath + os.sep + pfamid + '-itol.svg' itol_exporter.export(epsfile) itol_exporter.export(svgfile) os.system("epstopdf %s" % epsfile ) os.system("convert %s %s" % (epsfile, jpgfile) ) print 'exported tree to ', pdffile
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" infileList = [] gomapfile = "/data3/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.Family.nr100.filter.fragmented.uniq.pfam.goinfowithancestor.txt" gotermfile = "/data3/wk/MPTopo/pfamAna_refpro/GO_analysis/GO_term.txt" anclevel = 2 gotype = "function" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gomap", "--gomap"]: (gomapfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-goterm", "--goterm"]: (gotermfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mp", "--mp"]: (g_params['pairwise_comparison_method'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infileList.append(argv[i]) i += 1 # print len(gomapfile), gomapfile # lines = open(gomapfile, "r").readlines() # print lines if myfunc.checkfile(gomapfile, "GO map file") != 0: return 1 if myfunc.checkfile(gotermfile, "GO Term file") != 0: return 1 cmpclassList = [] if g_params['pairwise_comparison_method'] == 1: cmpclassList = cmpclassList_method1 elif g_params['pairwise_comparison_method'] == 3: cmpclassList = cmpclassList_method3 else: print >> sys.stderr, "mp not in [1,3]. Exit" return 1 numCmpClass = len(cmpclassList) numInfile = len(infileList) if numInfile < len(cmpclassList): print >> sys.stderr, "input file less than len(cmpclassList)=%d" % ( len(cmpclassList)) goMapDict = ReadGOMap(gomapfile) goTermDict = ReadGOTerm(gotermfile) pairinfoDict = {} for infile in infileList: tag = "" for cls in cmpclassList: if infile.find(".%s." % (cls)) != -1: tag = cls break if tag == "": print >> sys.stderr, "bad infile %s" % (infile) return 1 pairinfoDict[tag] = ReadPairInfo(infile) for tag in pairinfoDict: pairinfo = pairinfoDict[tag] for j in xrange(len(pairinfo)): tup = pairinfo[j] ancList1 = GetAncenstorGOList_LevelOne(tup[0], goMapDict, gotype) ancList2 = GetAncenstorGOList_LevelOne(tup[1], goMapDict, gotype) common_ancList = list(set(ancList1) & set(ancList2)) for goid in common_ancList: pairinfo[j].append(goid) # output pairinfo with GO common term stemname = os.path.splitext(infileList[0].replace(".pairinfo.txt", ""))[0] for tag in pairinfoDict: outfile = outpath + os.sep + stemname + ".%s" % ( tag) + ".pairinfowithGOterm.txt" WritePairInfoWithGO(pairinfoDict[tag], outfile) print "%s output" % (outfile) tableCmpclassDict = {} tableNumTMHeatMapDict = {} for goid in SEL_GOID_SET: tableCmpclassDict[goid] = {} tableNumTMHeatMapDict[goid] = {} InitTableCmpClass(tableCmpclassDict[goid], numSeqIDTGroup, numCmpClass) InitTableNumTMHeatMap(tableNumTMHeatMapDict[goid], classList_TableNumTMHeatMap, MAX_NUMTM) for tag in pairinfoDict.keys(): cmpclass = tag idxClass = GetClassIndex(cmpclass, cmpclassList) pairinfo = pairinfoDict[tag] for li in pairinfo: if len(li) > 9: #print li seqidt = li[8] numTM1 = li[4] numTM2 = li[5] minNumTM = min(numTM1, numTM2) maxNumTM = max(numTM1, numTM2) for goid in li[9:]: idxGroup = GetSeqIDTGroupIndex(seqidt, seqIDTGroup) tableCmpclassDict[goid]['freq'][idxGroup][idxClass] += 1 tableCmpclassDict[goid]['subsum'][idxGroup] += 1 dt = tableNumTMHeatMapDict[goid]['ALL'] dt['data'][minNumTM][maxNumTM] += 1 if maxNumTM > dt['maxNumTM']: dt['maxNumTM'] += 1 dt['numPair'] += 1 # write cmpclass stemname2 = os.path.splitext(os.path.basename(stemname))[0] print "stemname2=", stemname2 for goid in SEL_GOID_SET: data = tableCmpclassDict[goid] outfile = outpath + os.sep + stemname2 + "." + goid + ".cmpclass.txt" if WriteTable2D(data['freq'], data['subsum'], cmpclassList, seqIDTGroup, outfile) == 0: xlabel = "Sequence identity" print "%s output" % (outfile) if g_params['pairwise_comparison_method'] == 1: cmd = "%s/plotCmpClass_mp1_cmpsp_5.sh %s -xlabel \"%s\""\ " -outstyle eps -outpath %s -plot1 -multiplot" %( binpath, outfile, xlabel, outpath) elif g_params['pairwise_comparison_method'] == 3: cmd = "%s/plotCmpClass_mp3.sh %s -xlabel \"%s\""\ " -outstyle eps -outpath %s -plot1 -multiplot" %( binpath, outfile, xlabel, outpath) os.system(cmd) data = tableNumTMHeatMapDict[goid] outfile = outpath + os.sep + stemname2 + "." + goid + ".numTMHeatMap.ALL.txt" mtx = data['ALL'] mode_norm = "norm_diag" print "%s numPair=%d" % (goid, mtx['numPair']) if WriteNumTMHeatMap(mtx['data'], mtx['maxNumTM'], mtx['numPair'], mode_norm, outfile) == 0: cmd = "%s/plotNumTMHeatMap.sh %s" % (binpath, outfile) os.system(cmd)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" infile = "" tmpdir = "" email = "" jobid = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-tmpdir", "--tmpdir"] : (tmpdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"] : (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-baseurl", "--baseurl"] : (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"] : (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if jobid == "": print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0]) return 1 if myfunc.checkfile(infile, "infile"): return 1 if outpath == "": print >> sys.stderr, "outpath not set. exit" return 1 elif not os.path.exists(outpath): try: myfunc.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print >> sys.stderr, e return 1
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" gramPositiveFile = "" gramNegativeFile = "" eukFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram+", "--gram+"]: (gramPositiveFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram-", "--gram-"]: (gramNegativeFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-euk", "--euk"]: (eukFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-debug", "--debug"]: g_params['isDEBUG'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile, "taxidwithtaxoFile") != 0: return 1 if myfunc.checkfile(gramPositiveFile, "gramPositiveFile") != 0: return 1 if myfunc.checkfile(gramNegativeFile, "gramNegativeFile") != 0: return 1 if myfunc.checkfile(eukFile, "eukFile") != 0: return 1 gramPositiveDict = ReadSignalPFile(gramPositiveFile) gramNegativeDict = ReadSignalPFile(gramNegativeFile) eukDict = ReadSignalPFile(eukFile) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) hdl = myfunc.ReadLineByBlock(infile) if hdl.failure: return 1 lines = hdl.readlines() while lines != None: for line in lines: strs = line.split("\t") if len(strs) == 3: seqid = strs[0].strip() taxo = strs[2].strip() info = "" try: if taxo == "Gram+" or taxo == "gram+": info = gramPositiveDict[seqid] elif taxo == "Gram-" or taxo == "gram-": info = gramNegativeDict[seqid] elif taxo == "Euk" or taxo == "euk": info = eukDict[seqid] if g_params['isDEBUG']: print >> sys.stderr, "%s: %s" % (seqid, taxo) except KeyError: info = "" if info != "": fpout.write("%s\n" % info) lines = hdl.readlines() hdl.close() myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" outfile = "" infile = "" goancfile = "%s%s%s" % ( DATADIR3, os.sep, "wk/MPTopo/pfamAna_refpro/GO_analysis/GO_ancenstor.MF.txt") uniqGOIDListFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-wgoidlist", "--wgoidlist"]: (uniqGOIDListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-anc", "--anc"]: (goancfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-uniprotdb", "--uniprotdb"]: (uniprotDBname, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 if myfunc.checkfile(goancfile, "ancestor file") != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) GOinfoList = ReadGOInfo(infile) GOAncDict = ReadGOAnc(goancfile) if uniqGOIDListFile != "": #{{{ fp = open(uniqGOIDListFile, "w") for goinfo in GOinfoList: for item in ["function", "process", "location"]: #for item in ["function"]: if len(goinfo[item]) > 0: for li in goinfo[item]: goidSet.add(li[0]) for goid in goidSet: print >> fp, goid fp.close() #}}} for goinfo in GOinfoList: nF = len(goinfo["function"]) nP = len(goinfo["process"]) nC = len(goinfo["location"]) fpout.write("%s F %1d P %1d C %1d\n" % (goinfo['seqid'], nF, nP, nC)) for item in ["function", "process", "location"]: for li in goinfo[item]: try: goancinfo = GOAncDict[li[0]] except KeyError: goancinfo = "" fpout.write("%s\t%s\n" % (li[2], goancinfo)) myfunc.myclose(fpout)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" infile = "" tmpdir = "" email = "" jobid = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-tmpdir", "--tmpdir"] : (tmpdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"] : (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-baseurl", "--baseurl"] : (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"] : (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-force", "--force"]: g_params['isForceRun'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if jobid == "": print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0]) return 1 g_params['jobid'] = jobid # create a lock file in the resultpath when run_job.py is running for this # job, so that daemon will not run on this folder lockname = "runjob.lock" lock_file = "%s/%s/%s"%(path_result, jobid, lockname) g_params['lockfile'] = lock_file fp = open(lock_file, 'w') try: fcntl.lockf(fp, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: print >> sys.stderr, "Another instance of %s is running"%(progname) return 1 if myfunc.checkfile(infile, "infile") != 0: return 1 if outpath == "": print >> sys.stderr, "outpath not set. exit" return 1 elif not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] (t_isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, gen_logfile, gen_errfile) if not t_isCmdSuccess: return 1 if tmpdir == "": print >> sys.stderr, "tmpdir not set. exit" return 1 elif not os.path.exists(tmpdir): cmd = ["mkdir", "-p", tmpdir] (t_isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, gen_logfile, gen_errfile) if not t_isCmdSuccess: return 1 numseq = myfunc.CountFastaSeq(infile) g_params['debugfile'] = "%s/debug.log"%(outpath) return RunJob(infile, outpath, tmpdir, email, jobid, g_params)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" idListFile = "" euk = "" gram_pos = "" gram_neg = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idListFile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-euk", "--euk"]: (euk, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram+", "--gram+"]: (gram_pos, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gram-", "--gram-"]: (gram_neg, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idListFile = argv[i] i += 1 if myfunc.checkfile(idListFile, "idListFile") != 0: return 1 if myfunc.checkfile(euk, "euk") != 0: return 1 if myfunc.checkfile(gram_pos, "gram_pos") != 0: return 1 if myfunc.checkfile(gram_neg, "gram_neg") != 0: return 1 idList = myfunc.ReadIDList(idListFile) set_euk_idlist = set(myfunc.ReadIDList(euk)) set_gram_pos_idlist = set(myfunc.ReadIDList(gram_pos)) set_gram_neg_idlist = set(myfunc.ReadIDList(gram_neg)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) NCBI_TaxID = "" for i in xrange(len(idList)): seqid = idList[i] cls = "" if seqid in set_euk_idlist: cls = "euk" elif seqid in set_gram_pos_idlist: cls = "gram+" elif seqid in set_gram_neg_idlist: cls = "gram-" else: cls = "NA" print >> fpout, "%s\t%s\t%s" % (seqid, NCBI_TaxID, cls) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 workdir = "" fileListFile = "" idListFile = "" extList = [] maxfile_per_folder = 2000 method = 0 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-idlist", "--idlist"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-filelist", "--filelist"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-workdir", "--workdir"]: (workdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-max", "--max"]: (maxfile_per_folder, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-method", "--method"]: (method, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-ext", "--ext"]: (tmpstr, i) = myfunc.my_getopt_str(argv, i) extList.append(tmpstr) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 if myfunc.checkfile(workdir) != 0: return 1 if idListFile == "" and fileListFile == "": print >> sys.stderr, "At least one of idListFile and fileListFile need to be set" return 1 if idListFile != "": if os.path.exists(idListFile): idList = myfunc.ReadIDList(idListFile) if len(idList) <= 0: print >> sys.stderr, "No ID in idListFile %s" % (idListFile) elif len(extList) <= 0: print >> sys.stderr, "No extension set when idList is used." else: SplitToFolder_idlist(idList, workdir, extList, maxfile_per_folder) else: print >> sys.stderr, "idListFile %s does not exist" % (idListFile) if fileListFile != "": if os.path.exists(fileListFile): fileList = open(fileListFile, "r").read().split("\n") fileList = filter(None, fileList) if len(fileList) <= 0: print >> sys.stderr, "No file in fileListFile %s" % ( fileListFile) else: SplitToFolder_filelist(fileList, workdir, maxfile_per_folder) else: print >> sys.stderr, "fileListFile %s does not exist" % ( fileListFile)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" outfile_with_famid = "" outfile_with_pdb = "" outfile_fam2seqmap = "" idListFile = "" mapfile = "%s%s%s" % ( DATADIR3, os.sep, "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid" ) restrictIDListFile = "" idList = [] maxseq_for_fam = 200 maxpair_for_fam = 300 method = 0 rand_seed = None pdbtospFile = "" isOnlyPDB = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithfamid", "--outwithfamid"]: outfile_with_famid, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]: outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithpdb", "--outwithpdb"]: outfile_with_pdb, i = myfunc.my_getopt_str(argv, i) elif argv[i] in [ "-tmprolist", "--tmprolist", "-restrictlist", "--restrictlist" ]: restrictIDListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mapfile", "--mapfile"]: mapfile, i = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-pdbtosp", "--pdbtosp"]): pdbtospFile, i = myfunc.my_getopt_str(argv, i) elif sys.argv[i] in ["-seed", "--seed"]: rand_seed, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-l", "--l"]: idListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-maxseq", "--maxseq"]: maxseq_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-maxpair", "--maxpair"]: maxpair_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-m", "--m", "-method", "--method"]: method, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-onlypdb", "--onlypdb"]: g_params['isOnlyPDB'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if os.path.exists(idListFile): idList += myfunc.ReadIDList(idListFile) if len(idList) < 1: print >> sys.stderr, "no ID set. exit" return 1 if myfunc.checkfile(mapfile, "idMapFile") != 0: return 1 idMapDict = myfunc.ReadFam2SeqidMap(mapfile) # Read in pdbtosp map if pdbtospFile != "": (pdb2uniprotMap, uniprot2pdbMap) =\ myfunc.ReadPDBTOSP(pdbtospFile) g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys()) g_params['uniprot2pdbMap'] = uniprot2pdbMap if g_params['isOnlyPDB'] == True: if pdbtospFile == "": print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit." return 1 elif g_params['uniprotidlist_with_pdb'] == set([]): print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit." return 1 restrictIDSet = set([]) if restrictIDListFile != "": restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False) fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False) fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False) if method == 0: GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet, maxseq_for_fam, rand_seed, fpout, fpout_withfamid) elif method == 1: GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet, maxpair_for_fam, rand_seed, fpout, fpout_withfamid, fpout_fam2seqmap) elif method == 2: #all to all GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout, fpout_withfamid, fpout_withpdb) myfunc.myclose(fpout) myfunc.myclose(fpout_withfamid) myfunc.myclose(fpout_withpdb) myfunc.myclose(fpout_fam2seqmap) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 SPE_PAIR_LIST = [(2, 1), (2, 4), (2, 6), (2, 8), (3, 6), (3, 7), (4, 6), (4, 8), (4, 10), (5, 7), (5, 10), (6, 8), (6, 10), (6, 12), (7, 14), (8, 10), (8, 12), (10, 12), (10, 13), (11, 13), (12, 14)] outfile = "" infile = "" pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3) signalpFile = "%s/wk/MPTopo/pfamAna_refpro/pred_signalp/refpro20120604-celluar.selmaxlength-m1.nr100.signalp_list" % ( DATADIR3) #seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2clanid"%(DATADIR3) #seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2pfamid"%(DATADIR3) seqid2clanidMapFile = "" seqid2pfamidMapFile = "" tm_pfamidListFile = "" tm_clanidListFile = "" pfamid2seqidMapFile = "" clanid2seqidMapFile = "" dbname_predTM = "" pairlistwithpfamidFile = "" pfamtype = "" pairListFile = "" #classList_TableNumTMHeatMap = ["ALL", "RMSP"] classList_TableNumTMHeatMap = ["ALL"] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (g_params['outpath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamdef", "--pfamdef"]: (pfamDefFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-signalp", "--signalp"]: (signalpFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mp", "--mp"]: g_params[ 'pairwise_comparison_method'], i = myfunc.my_getopt_int( argv, i) elif argv[i] in ["-mindiffpair", "--mindiffpair"]: g_params['mindiffpair'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-pfamtype", "--pfamtype"]: pfamtype, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-clanidlist", "--clanidlist"]: (tm_clanidListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamidlist", "--pfamidlist"]: (tm_pfamidListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqid2clanid", "--seqid2clanid"]: (seqid2clanidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqid2pfamid", "--seqid2pfamid"]: (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamid2seqid", "--pfamid2seqid"]: (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-clanid2seqid", "--clanid2seqid"]: (clanid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairlistwithpfamid", "--pairlistwithpfamid"]: (pairlistwithpfamidFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-predTMdbname", "--predTMdbname"]: (dbname_predTM, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairlist", "--pairlist"]: (pairListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-winsize", "--winsize"]: (g_params['winsize'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-outname", "--outname"]: (g_params['outname'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-prokar", "--prokar"]: g_params['isOnlyAnaProkar'] = True i += 1 elif argv[i] in ["-eukar", "--eukar"]: g_params['isOnlyAnaEukar'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile( infile, "%s (line %d): infile" % (__file__, inspect.currentframe().f_lineno)) != 0: return 1 dirpath = myfunc.my_dirname(infile) # try to obtain Pfam family tag tag = "" if pfamtype != "": if pfamtype.upper().find("FAM") != -1: tag = ".Family" elif pfamtype.upper().find("DOM") != -1: tag = ".Domain" elif pfamtype.upper().find("REP") != -1: tag = ".Repeat" elif pfamtype.upper().find("MOT") != -1: tag = ".Motif" else: tag = "" else: if infile.find(".Family.") != -1: tag = ".Family" elif infile.find(".Domain.") != -1: tag = ".Domain" elif infile.find(".Repeat.") != -1: tag = ".Repeat" elif infile.find(".Motif.") != -1: tag = ".Motif" else: tag = "" if seqid2clanidMapFile == "": seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.seqid2clanid" % ( DATADIR3) if myfunc.checkfile( seqid2clanidMapFile, "%s (line %d): seqid2clanidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if seqid2pfamidMapFile == "": seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.seqid2pfamid" % ( DATADIR3, tag) if myfunc.checkfile( seqid2pfamidMapFile, "%s (line %d): seqid2pfamidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if pfamid2seqidMapFile == "": pfamid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.pfamid2seqid" % ( DATADIR3) if myfunc.checkfile( pfamid2seqidMapFile, "%s (line %d): pfamid2seqidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if clanid2seqidMapFile == "": clanid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.clanid2seqid" % ( DATADIR3, tag) if myfunc.checkfile( clanid2seqidMapFile, "%s (line %d): clanid2seqidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if tm_pfamidListFile == "": tm_pfamidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.pfamidlist" % ( DATADIR3, tag) if myfunc.checkfile( tm_pfamidListFile, "%s (line %d): tm_pfamidListFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if tm_clanidListFile == "": tm_clanidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.clanidlist" % ( DATADIR3) if myfunc.checkfile( tm_clanidListFile, "%s (line %d): tm_clanidListFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if dbname_predTM == "": dbname_predTM = "%s/wk/MPTopo/pfamAna_refpro/pred_topcons_single_method4/refpro20120604-celluar.selmaxlength-m1.topcons-single_topcons_single.m1.agree-44.RMSP" % ( DATADIR3) if myfunc.checkfile( "%s0.db" % (dbname_predTM), "%s (line %d): dbname_predTM" % (__file__, inspect.currentframe().f_lineno)): return 1 if g_params['isOnlyAnaProkar']: prokarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Prokaryota.seqidlist" % ( DATADIR3) g_params['prokarSeqIDSet'] = set(myfunc.ReadIDList(prokarseqidfile)) if len(g_params['prokarSeqIDSet']) < 1: return 1 if g_params['isOnlyAnaEukar']: eukarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Eukaryota.seqidlist" % ( DATADIR3) g_params['eukarSeqIDSet'] = set(myfunc.ReadIDList(eukarseqidfile)) if len(g_params['eukarSeqIDSet']) < 1: return 1 if pairlistwithpfamidFile == "": pairlistwithpfamidFile = "%s/../../Pfam-.maxpair100.pairlistwithpfamid" % ( dirpath) if myfunc.checkfile( pairlistwithpfamidFile, "%s (line %d): pairlistwithpfamidFile" % (__file__, inspect.currentframe().f_lineno)): return 1 pfamid_2_seqidpair_Dict = ReadPairListWithFamID(pairlistwithpfamidFile) usedPfamIDSet = set( pfamid_2_seqidpair_Dict.keys()) # pfamids used in pair selection if pairListFile != "": li = myfunc.ReadPairList(pairListFile) SPE_PAIR_LIST = [] for tup in li: SPE_PAIR_LIST.append((int(tup[0]), int(tup[1]))) (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile) signalpDict = lcmp.ReadSignalPDict(signalpFile) seqid2clanidDict = myfunc.ReadFam2SeqidMap(seqid2clanidMapFile) seqid2pfamidDict = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile) clanid2seqidDict = myfunc.ReadFam2SeqidMap(clanid2seqidMapFile) pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile) tm_pfamidList = myfunc.ReadIDList(tm_pfamidListFile) tm_clanidList = myfunc.ReadIDList(tm_clanidListFile) tm_pfamidSet = set(tm_pfamidList) tm_clanidSet = set(tm_clanidList) hdl_predTM = myfunc.MyDB(dbname_predTM) if not hdl_predTM.failure: idSet_TMpro = set(hdl_predTM.indexedIDList) else: idSet_TMpro = set([]) #classList_TableNumTMHeatMap = ["ALL", "RMSP", "RMDUP"] #alignrangeList = ['FULL_ALIGNED', 'all', 'PART_ALIGNED'] alignrangeList = ['FULL_ALIGNED'] if g_params['outpath'] != "" and not os.path.exists(g_params['outpath']): cmd = ["mkdir", "-p", g_params['outpath']] try: subprocess.check_call(cmd) except subprocess.CalledProcessError, e: print e return 1