def main(g_params): # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" begin = 0 end = 999999999 method = 2 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-b", "--b", "--begin"]: begin, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-e", "--e", "--end"]: end, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o", "--o", "--outfile"]: outFile, i = my_getopt_str(sys.argv, i) else: print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile = sys.argv[i] i += 1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) if method == 1: CatFasta(inFile, begin, end, fpout) else: CatFasta2(inFile, begin, end, fpout) myfunc.myclose(fpout) return 0
def main(g_params): # Check argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" begin = 0 end = 999999999 method = 2 i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-b", "--b", "--begin"]: begin, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-e", "--e", "--end"]: end, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o" , "--o","--outfile"]: outFile, i = my_getopt_str(sys.argv,i ) else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile=sys.argv[i] i+=1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) if method == 1: CatFasta(inFile,begin, end, fpout) else: CatFasta2(inFile,begin, end, fpout) myfunc.myclose(fpout) return 0
def main(): #{{{ numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile = "" inFile = "" N = 999999999 rand_seed = None i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-n", "--n"]: N, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-seed", "--seed"]: rand_seed, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-o", "--outfile"]: outFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-bs", "--block-size", "-block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print("Error! BLOCK_SIZE should >0", file=sys.stderr) return 1 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: inFile = sys.argv[i] i += 1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) RandFasta(inFile, N, rand_seed, fpout) myfunc.myclose(fpout)
def main():#{{{ numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outFile="" inFile="" N=999999999 rand_seed=None i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in [ "-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-i", "--infile"]: inFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-n" , "--n"]: N,i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in [ "-seed" , "--seed"]: rand_seed, i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in [ "-o" , "--outfile"]: outFile,i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-bs" , "--block-size" , "-block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print >> sys.stderr,"Error! BLOCK_SIZE should >0" return 1 else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: inFile = sys.argv[i] i+=1 if myfunc.checkfile(inFile, "Input file") != 0: return 1 fpout = myfunc.myopen(outFile, sys.stdout, "w", False) RandFasta(inFile, N, rand_seed, fpout) myfunc.myclose(fpout)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 datapath = "." outpath = './' idList = [] idListFile = '' i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False idList.append(sys.argv[i]) i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-datapath", "--datapath"]: datapath = sys.argv[i + 1] i += 2 elif argv[i] in ["-method", "--method"]: g_params['method'], i = myfunc.my_getopt_int(sys.argv, i) elif sys.argv[i] in ["-l", "--l"]: idListFile = sys.argv[i + 1] i = i + 2 elif sys.argv[i] in ["-outpath", "--outpath"]: outpath = sys.argv[i + 1] i = i + 2 else: print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: idList.append(sys.argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) if len(idList) > 0: os.system("mkdir -p %s" % outpath) cnt = 0 for pfamid in idList: print "================== ", cnt, pfamid, " ====================" if g_params['method'] == 0: Itol_Tree_m0(pfamid, datapath, outpath) elif g_params['method'] == 1: Itol_Tree_m1(pfamid, datapath, outpath) cnt += 1
def main(g_params): #{{{#{{{ # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" BLOCK_SIZE = 100000 isPrintID = False isJustPrintSum = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False infile = sys.argv[i] i = i + 1 elif sys.argv[i] == "--": isNonOptionArg = True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 0 elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]: isPrintID = True i += 1 elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]: isJustPrintSum = True i += 1 elif sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-bs", "--bs", "-block-size", "--block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print("Error! BLOCK_SIZE should >0", file=sys.stderr) return 1 else: print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr) return 1 else: infile = sys.argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout) myfunc.myclose(fpout) return status
def main(g_params): #{{{ # Check argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileList = [] fileListFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(sys.argv[i]) isNonOptionArg = False i += 1 elif sys.argv[i] == "--": isNonOptionArg = True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif sys.argv[i] in ["-l", "--l"]: fileListFile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-o", "--o", "-outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in ["-evalue", "--evalue"]: g_params['evalue_th'], i = myfunc.my_getopt_float(sys.argv, i) elif sys.argv[i] in ["-seqidt", "--seqidt"]: g_params['seqidt_th'], i = myfunc.my_getopt_float(sys.argv, i) elif sys.argv[i] in ["-round", "--round"]: g_params['iteration'] = myfunc.my_getopt_int(sys.argv, i) else: print >> sys.stderr, ("Error! Wrong argument: '%s'" % sys.argv[i]) return 1 else: fileList.append(sys.argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile, delim="\n") if len(fileList) < 1: print >> sys.stderr, "No input set. exit" return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) for infile in fileList: BlastM9toPairlist(infile, fpout) myfunc.myclose(fpout)
def main(g_params):#{{{#{{{ # Check argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() return 1 outfile = "" infile = "" BLOCK_SIZE = 100000 isPrintID = False isJustPrintSum = False i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: isNonOptionArg=False infile = sys.argv[i] i = i + 1 elif sys.argv[i] == "--": isNonOptionArg=True i = i + 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() return 0 elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]: isPrintID = True i += 1 elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]: isJustPrintSum = True i += 1 elif sys.argv[i] in [ "-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(sys.argv, i) elif sys.argv[i] in [ "-bs", "--bs", "-block-size", "--block-size"]: BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i) if BLOCK_SIZE < 0: print >> sys.stderr,"Error! BLOCK_SIZE should >0" return 1 else: print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i]) return 1 else: infile=sys.argv[i] i += 1 if myfunc.checkfile(infile) != 0: return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout) myfunc.myclose(fpout) return status
def main(g_params): #{{{ argv = sys.argv outpath = '' numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 fileList = [] method = 0 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False fileList.append(argv[i]) i = i + 1 elif argv[i] == "--": isNonOptionArg = True i = i + 1 elif argv[i][0] == "-": if argv[i] == "-h" or argv[i] == "--help": PrintHelp() sys.exit(0) elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-m", "--m"]: (method, i) = myfunc.my_getopt_int(argv, i) else: print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i]) return 1 else: fileList.append(argv[i]) i += 1 if len(fileList) < 1: print >> sys.stderr, "No input set" return 1 cnt = 0 for treefile in fileList: print "================== ", cnt, treefile, " ====================" if method == 0: Itol_Tree_m0(treefile, outpath, method) elif method == 1: Itol_Tree_m1(treefile, outpath, method) cnt += 1
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" datapath = "" pairListFile = "" seqlenFile = "" shortid2fullidFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i += 1 return 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-datapath", "--datapath"]: (datapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-treepath", "--treepath"]: (g_params['treepath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-maxperfamily", "--maxperfamily"]: (g_params['max_num_output_per_family'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-swissprot", "--swissprot"]: (g_params['isSP_threshold'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-msapath", "--msapath"]: (g_params['msapath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqlen", "--seqlen"]: (seqlenFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-shortid2fullid", "--shortid2fullid"]: (shortid2fullidFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (pairListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 if pairListFile != "": pairInfoDict = ReadPairInfo(pairListFile) if len(pairInfoDict) < 1: print >> sys.stderr, "no pair info read in. exit" return 1 if datapath == "": print >> sys.stderr, "datapath not set" return 1 elif not os.path.exists(datapath): print >> sys.stderr, "datapath %s does not exist" % (datapath) return 1 if outpath == "": print >> sys.stderr, "outpath not set" return 1 elif not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] subprocess.check_call(cmd) g_params['OS'] = os.uname()[0] if g_params['OS'].find('Linux') != -1: g_params['CP_EXE'] = "/bin/cp -uf" else: g_params['CP_EXE'] = "/bin/cp -f" # read seqlen file if seqlenFile != "": g_params['seqLenDict'] = myfunc.ReadSeqLengthDict(seqlenFile) if shortid2fullidFile != "": g_params['uniprotAC2FullSeqIDMap'] = myfunc.ReadID2IDMap( shortid2fullidFile) #Sort by descending order of numseq pairInfoList = sorted(pairInfoDict.items(), key=lambda x: x[1]['numseq'], reverse=True) #pairInfoList ['pfamid', {'numseq': 123, 'pairlist':[]}] WriteHTML(pairInfoList, datapath, outpath)
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 rmsg = "" outpath = "" jobid = "" datapath = "" numseq = -1 numseq_this_user = -1 email = "" host_ip = "" base_www_url = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: print >> g_params['fperr'], "Error! Wrong argument:", argv[i] return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"]: (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-host", "--host"]: (host_ip, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-nseq", "--nseq"]: (numseq, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-nseq-this-user", "--nseq-this-user"]: (numseq_this_user, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-baseurl", "--baseurl"]: (base_www_url, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"] : (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-datapath", "--datapath"] : (datapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-force", "--force"]: g_params['isForceRun'] = True i += 1 elif argv[i] in ["-runlocal", "--runlocal"]: g_params['isRunLocal'] = True i += 1 elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> g_params['fperr'], "Error! Wrong argument:", argv[i] return 1 else: print >> g_params['fperr'], "Error! Wrong argument:", argv[i] return 1 if outpath == "": print >> g_params['fperr'], "outpath not set. exit" return 1 elif not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] try: rmsg = subprocess.check_output(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError, e: print e print rmsg return 1
def main(g_params): #{{{ argv = sys.argv numArgv = len(sys.argv) if numArgv < 2: PrintHelp() return 1 infile = "" outpath = "./" isQuiet = False tableinfoFile = "" cmpclassList = [] restrictIDListFile = "" signalpFile = "" dupFile = "" outfile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = sys.argv[i] isNonOptionArg = False i += 1 elif sys.argv[i] == "--": isNonOptionArg = True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in ["-h", "--help"]: PrintHelp() sys.exit() elif argv[i] in ["-o", "--o"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-cmpclass", "--cmpclass"]: (tmpstr, i) = myfunc.my_getopt_str(argv, i) cmpclassList.append(tmpstr) elif argv[i] in ["-signalp", "--signalp"]: (signalpFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-restrictidlist", "--restrictidlist"]: (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i) g_params['isRestrictIDListSet'] = True elif argv[i] in ["-dup", "--dup", "-dupfile", "--dupfile"]: (dupFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-rmsp", "--rmsp"]: g_params['isRemoveSignalP'] = True i += 1 elif argv[i] in ["-rmdup", "--rmdup"]: g_params['isRemoveDup'] = True i += 1 elif argv[i] in ["-seq2fammap", "--seq2fammap"]: (seq2famMapfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqidttype", "--seqidttype"]: g_params['seqidttype'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-tableinfo", "--tableinfo"]: tableinfoFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-min-seqidt", "--min-seqidt"]: g_params['minSeqIDT'], i = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-max-seqidt", "--max-seqidt"]: g_params['maxSeqIDT'], i = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-evodist", "--evodist"]: g_params['isEvodist'] = True i += 1 elif argv[i] in ["-alignrange", "--alignrange"]: g_params['alignrange'], i = myfunc.my_getopt_str(argv, i) if not g_params['alignrange'] in ['all', 'full', 'part']: print >> sys.stderr, "alignrange must be one of [all, full, part]" return 1 else: if g_params['alignrange'] == 'full': g_params['alignrange'] = 'FULL_ALIGNED' elif g_params['alignrange'] == 'part': g_params['alignrange'] = 'PART_ALIGNED' elif argv[i] in ["-debug", "--debug"]: if argv[i + 1][0].lower() == 'y': g_params['isDEBUG'] = True else: g_params['isDEBUG'] = False i += 2 elif argv[i] in [ "-debug-unmapped-position", "--debug-unmapped-position" ]: DEBUG_UNMAPPED_TM_POSITION = 1 i += 2 elif sys.argv[i] == "-q": isQuiet = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", sys.argv[i] return -1 else: infile = sys.argv[i] i += 1 if infile == "": print >> sys.stderr, "infile not set. Exit." return -1 elif not os.path.exists(infile): print >> sys.stderr, "infile %s does not exists. Exit." % infile try: fpin = open(infile, "rb") except IOError: print >> sys.stderr, "Failed to open input file %s" % (infile) return -1 pairalnStat = {} if g_params['seqidttype'] != 0: if tableinfoFile == "" or not os.path.exists(tableinfoFile): print >> sys.stderr, "tableinfoFile must be set when seqidttype is set to 1 or 2" print >> sys.stderr, "but seqidttype = %d is set. Exit." % g_params[ 'seqidttype'] return -1 pairalnStat = lcmp.ReadPairAlnTableInfo(tableinfoFile) rootname = os.path.basename(os.path.splitext(infile)[0]) binpath = os.path.dirname(sys.argv[0]) signalpDict = {} if signalpFile != "": signalpDict = lcmp.ReadSignalPDict(signalpFile) if signalpDict != {}: g_params['isSignalPSet'] = True dupPairList = [] if dupFile != "": dupPairList = lcmp.ReadDupPairList(dupFile) if len(dupPairList) > 0: g_params['isDupSet'] = True dupPairSet = set(dupPairList) restrictIDSet = set([]) if restrictIDListFile != "": restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile)) rltyDict = {} fpout = myfunc.myopen(outfile, sys.stdout, "w", False) unprocessedBuffer = "" cntTotalReadInRecord = 0 cntTotalOutputRecord = 0 isEOFreached = False while 1: buff = fpin.read(BLOCK_SIZE) if buff == "": isEOFreached = True buff = unprocessedBuffer + buff pairCmpRecordList = [] unprocessedBuffer = lcmp.ReadPairCmpResultFromBuffer( buff, pairCmpRecordList) AddTableInfo(pairCmpRecordList, pairalnStat) AddSignalPInfo(pairCmpRecordList, signalpDict) AddDupInfo(pairCmpRecordList, dupPairSet) cntTotalReadInRecord += len(pairCmpRecordList) pairCmpRecordList = FilterPairCmpResult(pairCmpRecordList, cmpclassList, rltyDict, restrictIDSet) if len(pairCmpRecordList) > 0: lcmp.WritePairCmpRecord(pairCmpRecordList, cntTotalOutputRecord, fpout) cntTotalOutputRecord += len(pairCmpRecordList) if isEOFreached == True: break fpin.close() print "cntTotalReadInRecord =", cntTotalReadInRecord print "cntTotalOutputRecord =", cntTotalOutputRecord myfunc.myclose(fpout) return 0
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" modelfile = "" seqfile = "" tmpdir = "" email = "" jobid = "" isKeepFiles = "no" isRepack = "yes" targetlength = None i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: modelfile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-tmpdir", "--tmpdir"] : (tmpdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"] : (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-fasta", "--fasta"] : (seqfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-k", "--k"] : (isKeepFiles, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-r", "--r"] : (isRepack, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-t", "--t"] : (targetlength, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-baseurl", "--baseurl"] : (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"] : (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-force", "--force"]: g_params['isForceRun'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: modelfile = argv[i] i += 1 if jobid == "": print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0]) return 1 if myfunc.checkfile(modelfile, "modelfile") != 0: return 1 if outpath == "": print >> sys.stderr, "outpath not set. exit" return 1 elif not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print >> sys.stderr, e return 1
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 infile = "" progList = [] progListFile = "" outpath = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-prog", "--prog"]: tmpstr, i = myfunc.my_getopt_str(argv, i) progList.append(tmpstr) elif argv[i] in ["-gzip", "--gzip"]: tmpstr, i = myfunc.my_getopt_str(argv, i) if tmpstr.upper()[0] == "-": print >> sys.stderr, "Bad argument, -gzip should be"\ " followed by yes or no" return 1 elif tmpstr.upper()[0] == "Y": g_params['isGzip'] = True else: g_params['isGzip'] = False elif argv[i] in ["-num", "--num"]: g_params['num_per_split'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-proglist", "--proglist"]: progListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: outpath, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True; i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile(infile,"infile") != 0: return 1 inputList = ReadInputList(infile) # [(filename, numseq)] inputList = sorted(inputList, key=lambda x:x[1], reverse=False) rtname_infile = os.path.basename(os.path.splitext(infile)[0]) # get progList if len(progList) == 0 and progListFile == "": progList = default_progList else: if progListFile != "": tmp_list = myfunc.ReadIDList(progListFile) if len(tmp_list) == 0: print >> sys.stderr, "progListFile %s does not exist or empty"%( progListFile) return 1 else: progList += tmp_list if len(progList) == 0: print >> sys.stderr, "progList is empty. exit" return 1 if outpath != "" and not os.path.exists(outpath): try: subprocess.check_output(["mkdir", "-p", outpath]) except subprocess.CalledProcessError, e: print e return 1
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" idListFile = "" idList = [] infile = "" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: strs = argv[i].split() if len(strs) == 1: idList.append(argv[i]) else: idList.append(tuple(strs)) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"] : (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-m", "--m", "-methodid", "--methodid"] : (g_params['method_getid'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-se,", "--se", "-selfield", "--selfield"] : (tmpint, i) = myfunc.my_getopt_int(argv, i) g_params['sel_field_list'].append(tmpint) g_params['method_getid'] = 3 elif argv[i] in ["-i", "--i"] : (infile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: strs = argv[i].split() if len(strs) == 1: idList.append(argv[i]) else: idList.append(tuple(strs)) i += 1 if infile == "": print >> sys.stderr, "infile not set" print >> sys.stderr, usage_short return 1 elif not os.path.exists(infile): print >> sys.stderr, "infile %s does not exist"%(infile) return 1 if idListFile != "": hdl = myfunc.ReadLineByBlock(idListFile) if not hdl.failure: lines = hdl.readlines() while lines != None: for line in lines: if line: strs = line.split() if len(strs) == 1: idList.append(strs[0]) else: idList.append(tuple(strs)) lines = hdl.readlines() hdl.close() if len(idList) < 1: print >> sys.stderr, "ID not set" print >> sys.stderr, usage_short idListSet = set(idList) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) status = SelectLineByID(infile, idListSet, fpout) myfunc.myclose(fpout) return status
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 SPE_PAIR_LIST = [(2, 1), (2, 4), (2, 6), (2, 8), (3, 6), (3, 7), (4, 6), (4, 8), (4, 10), (5, 7), (5, 10), (6, 8), (6, 10), (6, 12), (7, 14), (8, 10), (8, 12), (10, 12), (10, 13), (11, 13), (12, 14)] outfile = "" infile = "" pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3) signalpFile = "%s/wk/MPTopo/pfamAna_refpro/pred_signalp/refpro20120604-celluar.selmaxlength-m1.nr100.signalp_list" % ( DATADIR3) #seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2clanid"%(DATADIR3) #seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2pfamid"%(DATADIR3) seqid2clanidMapFile = "" seqid2pfamidMapFile = "" tm_pfamidListFile = "" tm_clanidListFile = "" pfamid2seqidMapFile = "" clanid2seqidMapFile = "" dbname_predTM = "" pairlistwithpfamidFile = "" pfamtype = "" pairListFile = "" #classList_TableNumTMHeatMap = ["ALL", "RMSP"] classList_TableNumTMHeatMap = ["ALL"] i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infile = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outpath", "--outpath"]: (g_params['outpath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamdef", "--pfamdef"]: (pfamDefFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-signalp", "--signalp"]: (signalpFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mp", "--mp"]: g_params[ 'pairwise_comparison_method'], i = myfunc.my_getopt_int( argv, i) elif argv[i] in ["-mindiffpair", "--mindiffpair"]: g_params['mindiffpair'], i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-pfamtype", "--pfamtype"]: pfamtype, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-clanidlist", "--clanidlist"]: (tm_clanidListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamidlist", "--pfamidlist"]: (tm_pfamidListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqid2clanid", "--seqid2clanid"]: (seqid2clanidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqid2pfamid", "--seqid2pfamid"]: (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamid2seqid", "--pfamid2seqid"]: (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-clanid2seqid", "--clanid2seqid"]: (clanid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairlistwithpfamid", "--pairlistwithpfamid"]: (pairlistwithpfamidFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-predTMdbname", "--predTMdbname"]: (dbname_predTM, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairlist", "--pairlist"]: (pairListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-winsize", "--winsize"]: (g_params['winsize'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-outname", "--outname"]: (g_params['outname'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-prokar", "--prokar"]: g_params['isOnlyAnaProkar'] = True i += 1 elif argv[i] in ["-eukar", "--eukar"]: g_params['isOnlyAnaEukar'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infile = argv[i] i += 1 if myfunc.checkfile( infile, "%s (line %d): infile" % (__file__, inspect.currentframe().f_lineno)) != 0: return 1 dirpath = myfunc.my_dirname(infile) # try to obtain Pfam family tag tag = "" if pfamtype != "": if pfamtype.upper().find("FAM") != -1: tag = ".Family" elif pfamtype.upper().find("DOM") != -1: tag = ".Domain" elif pfamtype.upper().find("REP") != -1: tag = ".Repeat" elif pfamtype.upper().find("MOT") != -1: tag = ".Motif" else: tag = "" else: if infile.find(".Family.") != -1: tag = ".Family" elif infile.find(".Domain.") != -1: tag = ".Domain" elif infile.find(".Repeat.") != -1: tag = ".Repeat" elif infile.find(".Motif.") != -1: tag = ".Motif" else: tag = "" if seqid2clanidMapFile == "": seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.seqid2clanid" % ( DATADIR3) if myfunc.checkfile( seqid2clanidMapFile, "%s (line %d): seqid2clanidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if seqid2pfamidMapFile == "": seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.seqid2pfamid" % ( DATADIR3, tag) if myfunc.checkfile( seqid2pfamidMapFile, "%s (line %d): seqid2pfamidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if pfamid2seqidMapFile == "": pfamid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.pfamid2seqid" % ( DATADIR3) if myfunc.checkfile( pfamid2seqidMapFile, "%s (line %d): pfamid2seqidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if clanid2seqidMapFile == "": clanid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.clanid2seqid" % ( DATADIR3, tag) if myfunc.checkfile( clanid2seqidMapFile, "%s (line %d): clanid2seqidMapFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if tm_pfamidListFile == "": tm_pfamidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.pfamidlist" % ( DATADIR3, tag) if myfunc.checkfile( tm_pfamidListFile, "%s (line %d): tm_pfamidListFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if tm_clanidListFile == "": tm_clanidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.clanidlist" % ( DATADIR3) if myfunc.checkfile( tm_clanidListFile, "%s (line %d): tm_clanidListFile" % (__file__, inspect.currentframe().f_lineno)): return 1 if dbname_predTM == "": dbname_predTM = "%s/wk/MPTopo/pfamAna_refpro/pred_topcons_single_method4/refpro20120604-celluar.selmaxlength-m1.topcons-single_topcons_single.m1.agree-44.RMSP" % ( DATADIR3) if myfunc.checkfile( "%s0.db" % (dbname_predTM), "%s (line %d): dbname_predTM" % (__file__, inspect.currentframe().f_lineno)): return 1 if g_params['isOnlyAnaProkar']: prokarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Prokaryota.seqidlist" % ( DATADIR3) g_params['prokarSeqIDSet'] = set(myfunc.ReadIDList(prokarseqidfile)) if len(g_params['prokarSeqIDSet']) < 1: return 1 if g_params['isOnlyAnaEukar']: eukarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Eukaryota.seqidlist" % ( DATADIR3) g_params['eukarSeqIDSet'] = set(myfunc.ReadIDList(eukarseqidfile)) if len(g_params['eukarSeqIDSet']) < 1: return 1 if pairlistwithpfamidFile == "": pairlistwithpfamidFile = "%s/../../Pfam-.maxpair100.pairlistwithpfamid" % ( dirpath) if myfunc.checkfile( pairlistwithpfamidFile, "%s (line %d): pairlistwithpfamidFile" % (__file__, inspect.currentframe().f_lineno)): return 1 pfamid_2_seqidpair_Dict = ReadPairListWithFamID(pairlistwithpfamidFile) usedPfamIDSet = set( pfamid_2_seqidpair_Dict.keys()) # pfamids used in pair selection if pairListFile != "": li = myfunc.ReadPairList(pairListFile) SPE_PAIR_LIST = [] for tup in li: SPE_PAIR_LIST.append((int(tup[0]), int(tup[1]))) (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile) signalpDict = lcmp.ReadSignalPDict(signalpFile) seqid2clanidDict = myfunc.ReadFam2SeqidMap(seqid2clanidMapFile) seqid2pfamidDict = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile) clanid2seqidDict = myfunc.ReadFam2SeqidMap(clanid2seqidMapFile) pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile) tm_pfamidList = myfunc.ReadIDList(tm_pfamidListFile) tm_clanidList = myfunc.ReadIDList(tm_clanidListFile) tm_pfamidSet = set(tm_pfamidList) tm_clanidSet = set(tm_clanidList) hdl_predTM = myfunc.MyDB(dbname_predTM) if not hdl_predTM.failure: idSet_TMpro = set(hdl_predTM.indexedIDList) else: idSet_TMpro = set([]) #classList_TableNumTMHeatMap = ["ALL", "RMSP", "RMDUP"] #alignrangeList = ['FULL_ALIGNED', 'all', 'PART_ALIGNED'] alignrangeList = ['FULL_ALIGNED'] if g_params['outpath'] != "" and not os.path.exists(g_params['outpath']): cmd = ["mkdir", "-p", g_params['outpath']] try: subprocess.check_call(cmd) except subprocess.CalledProcessError, e: print e return 1
def main():#{{{ argv = sys.argv numArgv=len(sys.argv) if numArgv < 2: PrintHelp() sys.exit() max_numpair = 10*1000*1000 isQuiet = False rand_seed = None idList = [] idListFile = "" outfile="" method = 0 i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg=False i += 1 elif sys.argv[i] == "--": isNonOptionArg=True i += 1 elif sys.argv[i][0] == "-": if sys.argv[i] in [ "-h" , "--help"]: PrintHelp() sys.exit() elif sys.argv[i] in [ "-o" , "--o", "-outfile" , "--outfile"]: outfile, i = myfunc.my_getopt_str(argv,i) elif argv[i] in ["-m", "--m", "-method", "--method"]: method, i = myfunc.my_getopt_int(argv,i) elif sys.argv[i] in [ "-l" , "--l", "-listfile" , "--listfile"]: idListFile, i = myfunc.my_getopt_str(argv,i) elif sys.argv[i] in [ "-maxpair" , "--maxpair"]: max_numpair, i = myfunc.my_getopt_int(argv,i) elif sys.argv[i] in [ "-seed" , "--seed"]: rand_seed, i = myfunc.my_getopt_int(argv,i) elif sys.argv[i] == "-q": isQuiet=True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", sys.argv[i] return 1 else: idList.append(argv[i]) i += 1 if idListFile != "": idList += myfunc.ReadIDList(idListFile) numseqid = len(idList) if numseqid <= 0: print >> sys.stderr, "List file is empty." return 1 elif numseqid < 2: print >> sys.stderr, "Too few items. At least 2 are required." return 1 fpout = myfunc.myopen(outfile, sys.stdout, "w", False) if method == 0: pairlist = myfunc.GenerateRandomPair(len(idList), max_numpair, rand_seed) elif method == 1: pairlist = myfunc.GenerateRandomPair_no_repeat_use(len(idList), max_numpair, rand_seed) for pair in pairlist: print >> fpout, "%s %s" %(idList[pair[0]], idList[pair[1]]) myfunc.myclose(fpout) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "" pairListFile = "" seqlenFile = "" shortid2fullidFile = "" seqid2pfamidMapFile = "" pfamDefFile = '/data3/data/pfam/pfam27.0/Pfam-A.clans.tsv' topodb = "" seqdb = "" pdb2spFile = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: isNonOptionArg = False i += 1 return 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-topodb", "--topodb"]: (topodb, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pdb2sp", "-pdb2sp", "-pdbtosp", "--pdbtosp"]: (pdb2spFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqdb", "--seqdb"]: (seqdb, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqmsapath", "--seqmsapath"]: (g_params['seqmsapath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-datapath", "--datapath"]: (g_params['datapath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seq2pfam", "--seq2pfam"]: (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfam2seq", "--pfam2seq"]: (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-description", "--description"]: (g_params['description'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pfamdef", "--pfamdef"]: (pfamDefFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-alignrange", "--alignrange"]: g_params['alignrange'], i = myfunc.my_getopt_str(argv, i) if not g_params['alignrange'] in ['all', 'full', 'part']: print >> sys.stderr, "alignrange must be one of [all, full, part]" return 1 else: if g_params['alignrange'] == 'full': g_params['alignrange'] = 'FULL_ALIGNED' elif g_params['alignrange'] == 'part': g_params['alignrange'] = 'PART_ALIGNED' elif argv[i] in ["-basename", "--basename"]: (g_params['basename'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-treepath", "--treepath"]: (g_params['treepath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-pairalnpath", "--pairalnpath"]: (g_params['pairalnpath'], i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-maxperfamily", "--maxperfamily"]: (g_params['max_num_output_per_family'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-min-seqidt", "--min-seqidt"]: g_params['minSeqIDT'], i = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-max-seqidt", "--max-seqidt"]: g_params['maxSeqIDT'], i = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-shortid2fullid", "--shortid2fullid"]: (shortid2fullidFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-debug", "--debug"]: if argv[i + 1][0].lower() == 'y': g_params['isDEBUG'] = True else: g_params['isDEBUG'] = False i += 2 elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 if g_params['basename'] == "": print >> sys.stderr, "basename not set. exit" return 1 if myfunc.checkfile(g_params['datapath'], "datapath") != 0: return 1 if myfunc.checkfile(seqid2pfamidMapFile, "seqid2pfamidMapFile") != 0: return 1 if myfunc.checkfile(pfamid2seqidMapFile, "pfamid2seqidMapFile") != 0: return 1 if myfunc.checkfile(topodb + "0.db", "topodb") != 0: return 1 if myfunc.checkfile(seqdb + "0.db", "seqdb") != 0: return 1 if myfunc.checkfile(g_params['seqmsapath'], "seqmsapath") != 0: return 1 if pdb2spFile != "": (g_params['pdb2uniprotMap'], g_params['uniprot2pdbMap']) = myfunc.ReadPDBTOSP(pdb2spFile) if g_params['datapath'] == "": print >> sys.stderr, "datapath not set" return 1 elif not os.path.exists(g_params['datapath']): print >> sys.stderr, "datapath %s does not exist" % ( g_params['datapath']) return 1 if outpath == "": print >> sys.stderr, "outpath not set" return 1 elif not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] subprocess.check_call(cmd) paircmpfile = "%s/%s.paircmp" % (g_params['datapath'], g_params['basename']) if myfunc.checkfile(paircmpfile, "paircmpfile") != 0: return 1 (g_params['pfamidDefDict'], g_params['clanidDefDict']) = lcmp.ReadPfamDefFile(pfamDefFile) g_params['seqid2pfamidDict'] = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile) g_params['pfamid2seqidDict'] = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile) tmpdir = tempfile.mkdtemp() if g_params['msapath'] == "": g_params['msapath'] = tmpdir if g_params['treepath'] == "": g_params['treepath'] = tmpdir if g_params['pairalnpath'] == "": g_params['pairalnpath'] = tmpdir pairCmpRecordList = [] unprocessedBuffer = "" cntTotalReadInRecord = 0 cntTotalOutputRecord = 0 isEOFreached = False try: fpin = open(paircmpfile, "r") except IOError: print >> sys.stderr, "Failed to open input file %s" % (paircmpfile) return 1 while 1: buff = fpin.read(myfunc.BLOCK_SIZE) if buff == "": isEOFreached = True buff = unprocessedBuffer + buff rdList = [] unprocessedBuffer = lcmp.ReadPairCmpResultFromBuffer(buff, rdList) rdList = FilterPairCmpResult(rdList) cntTotalReadInRecord += len(rdList) pairCmpRecordList += rdList if isEOFreached == True: break fpin.close() print "cntTotalReadInRecord =", cntTotalReadInRecord g_params['hdl_seqdb'] = myfunc.MyDB(seqdb) g_params['hdl_topodb'] = myfunc.MyDB(topodb) g_params['OS'] = os.uname()[0] if g_params['OS'].find('Linux') != -1: g_params['CP_EXE'] = "/bin/cp -uf" else: g_params['CP_EXE'] = "/bin/cp -f" if shortid2fullidFile != "": g_params['uniprotAC2FullSeqIDMap'] = myfunc.ReadID2IDMap( shortid2fullidFile) addname = "" if g_params['alignrange'] != 'all': addname += ".%s" % (g_params['alignrange']) dataTable = {} # structure of dataTable # dataTable[pfamid] = {'set_seqid':set(), 'difftopopair':[{'INV':[(id1,id2)]},{'TM2GAP':},{}} # first read in pairCmpRecordList AddAllSeqInPairCmp(dataTable, pairCmpRecordList, g_params['seqid2pfamidDict']) pairInfoFileList = [] for cmpclass in g_params['cmpClassList_mp3_cmpdup'][0:]: ss = "%s/%s_.cmpdup.FULL_ALIGNED.%s.pairinfo.txt" % ( g_params['datapath'], g_params['basename'], cmpclass) pairInfoFileList.append(ss) pairinfoList = ReadPairInfo_cmpclass(ss) AddPairInfo(dataTable, pairinfoList, cmpclass) # print "\n".join(pairInfoFileList) if g_params['isDEBUG']: #{{{ for pfamid in dataTable: print pfamid print "\tset_seqid" print dataTable[pfamid]['set_seqid'] print "\tdifftopopair" for cls in dataTable[pfamid]['difftopopair']: print "\t\t", cls for tup in dataTable[pfamid]['difftopopair'][cls]: print "\t\t\t", tup #}}} WriteHTML(dataTable, outpath) os.system("rm -rf %s" % (tmpdir))
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileListFile = "" fileList = [] pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3) threshold_Fraction_Group_2 = 0.05 threshold_NumSeq_Group_2 = 2 tableinfoFile = "" pdbtospFile = "" sprotACListFile = "" threshold_g12_seqidt = 20.0 topoalnFile = "" aapath = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-seqidttype", "--seqidttype"]: (g_params['seqidttype'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-tableinfo", "--tableinfo"]: (tableinfoFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-topoaln", "--topoaln"]: (topoalnFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-aapath", "--aapath"]: (aapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-thncls2", "--thncls2"]: (threshold_NumSeq_Group_2, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-thfrac2", "--thfrac2"]: (threshold_Fraction_Group_2, i) = myfunc.my_getopt_float(argv, i) elif argv[i] in ["-pfamdef", "--pfamdef"]: (pfamDefFile, i) = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-pdbtosp", "--pdbtosp"]): pdbtospFile, i = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-sprot", "--sprot"]): sprotACListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "No input set. exit" return 1 if myfunc.checkfile(topoalnFile, "topoalnFile") != 0: return 1 if myfunc.checkfile(aapath, "aapath") != 0: return 1 if outfile == "": print >> sys.stderr, "outfile not set. Exit" return 1 outpath = myfunc.my_dirname(outfile) if not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] try: subprocess.check_output(cmd) except subprocess.CalledProcessError, e: print e return 1
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" idListFile = None idList = [] seqfile = "" topofile = "" max_dist = 12 # maximal distance to the TM helix so that K, R residues are counted flank_win = 5 # flanking window of the TM helix, residues at position #TMbeg-flank_win and TMend+flank_win are also counted i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-maxdist", "--maxdist"]: (max_dist, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-flankwin", "--flankwin"]: (flank_win, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-seqfile", "--seqfile"]: (seqfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-topofile", "--topofile"]: (topofile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-l", "--l"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-debug"]: g_params['isDEBUG'] = True i += 1 else: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 else: idList.append(argv[i]) i += 1 (idListSeq, annoListSeq, seqList) = myfunc.ReadFasta(seqfile) (idListTopo, annoListTopo, topoList) = myfunc.ReadFasta(topofile) numSeq = len(idListSeq) numTopo = len(idListTopo) if numSeq < 1 or numTopo < 1: print("No seq set", file=sys.stderr) return 1 seqDict = {} for i in range(numSeq): seqDict[idListSeq[i]] = seqList[i] topoDict = {} for i in range(numTopo): topoDict[idListTopo[i]] = topoList[i] cmpclassDict = {} for anno in annoListTopo: anno = anno.lstrip(">") strs = anno.split() cmpclassDict[strs[0]] = strs[1] outpath = os.path.dirname(seqfile) if outpath == "": outpath = "." rootname = os.path.basename(os.path.splitext(seqfile)[0]) outfile_kr_list = outpath + os.sep + rootname + ".krlist.txt" outfile_krbias = outpath + os.sep + rootname + ".krbias.txt" fpout_krlist = open(outfile_kr_list, "w") fpout_krbias = open(outfile_krbias, "w") for idd in idListSeq: if g_params['isDEBUG']: print("seqid: %s" % (idd)) try: topo = topoDict[idd] except KeyError: print("no topo for %s" % idd, file=sys.stderr) continue try: seq = seqDict[idd] except KeyError: print("no seq for %s" % idd, file=sys.stderr) continue try: cmpclass = cmpclassDict[idd] except KeyError: cmpclass = "INV" (kr_bias, KR_pos_list, numTM) = CalKRBias(seq, topo, flank_win, max_dist) WriteResult(idd, cmpclass, seq, numTM, kr_bias, KR_pos_list, fpout_krlist) if cmpclass in ["IDT", "INV"]: fpout_krbias.write("%d\n" % kr_bias) fpout_krlist.close() fpout_krbias.close()
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 workdir = "" fileListFile = "" idListFile = "" extList = [] maxfile_per_folder = 2000 method = 0 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-idlist", "--idlist"]: (idListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-filelist", "--filelist"]: (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-workdir", "--workdir"]: (workdir, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-max", "--max"]: (maxfile_per_folder, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-method", "--method"]: (method, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-ext", "--ext"]: (tmpstr, i) = myfunc.my_getopt_str(argv, i) extList.append(tmpstr) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 if myfunc.checkfile(workdir) != 0: return 1 if idListFile == "" and fileListFile == "": print >> sys.stderr, "At least one of idListFile and fileListFile need to be set" return 1 if idListFile != "": if os.path.exists(idListFile): idList = myfunc.ReadIDList(idListFile) if len(idList) <= 0: print >> sys.stderr, "No ID in idListFile %s" % (idListFile) elif len(extList) <= 0: print >> sys.stderr, "No extension set when idList is used." else: SplitToFolder_idlist(idList, workdir, extList, maxfile_per_folder) else: print >> sys.stderr, "idListFile %s does not exist" % (idListFile) if fileListFile != "": if os.path.exists(fileListFile): fileList = open(fileListFile, "r").read().split("\n") fileList = filter(None, fileList) if len(fileList) <= 0: print >> sys.stderr, "No file in fileListFile %s" % ( fileListFile) else: SplitToFolder_filelist(fileList, workdir, maxfile_per_folder) else: print >> sys.stderr, "fileListFile %s does not exist" % ( fileListFile)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" msafile = "" topofile = "" topodb = "" isIgnoreBadseq = True method_match = 1 i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-msa", "--msa"]: (msafile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-topo", "--topo"]: (topofile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-m", "--m"]: (method_match, i) = myfunc.my_getopt_int(argv, i) if method_match not in [0, 1]: print("method_match %d not in [0,1]" % method_match, file=sys.stderr) return 1 elif argv[i] in ["-topodb", "--topodb"]: (topodb, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in [ "-ig", "--ig", "-ignore-badseq", "--ignore-badseq" ]: (tmpss, i) = myfunc.my_getopt_str(argv, i) if tmpss[0].lower() == "y": isIgnoreBadseq = True else: isIgnoreBadseq = False elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 else: print("Error! Wrong argument:", argv[i], file=sys.stderr) return 1 if myfunc.checkfile(msafile) != 0: return 1 if topodb != "": if not os.path.exists(topodb + '0.db'): print("topodb %s does not exist" % (topodb), file=sys.stderr) return 1 else: return MatchMSATopo_using_topodb(msafile, topodb, isIgnoreBadseq, method_match, outfile) elif topofile != "": if not os.path.exists(topofile): print("topofile %s does not exist" % (topofile), file=sys.stderr) return 1 else: return MatchMSATopo_using_topofile(msafile, topofile, isIgnoreBadseq, method_match, outfile) else: print("neither topofile nor topodb is set. exit", file=sys.stderr) return 1 return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outpath = "./" infileList = [] gomapfile = "/data3/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.Family.nr100.filter.fragmented.uniq.pfam.goinfowithancestor.txt" gotermfile = "/data3/wk/MPTopo/pfamAna_refpro/GO_analysis/GO_term.txt" anclevel = 2 gotype = "function" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: infileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-gomap", "--gomap"]: (gomapfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-goterm", "--goterm"]: (gotermfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mp", "--mp"]: (g_params['pairwise_comparison_method'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: infileList.append(argv[i]) i += 1 # print len(gomapfile), gomapfile # lines = open(gomapfile, "r").readlines() # print lines if myfunc.checkfile(gomapfile, "GO map file") != 0: return 1 if myfunc.checkfile(gotermfile, "GO Term file") != 0: return 1 cmpclassList = [] if g_params['pairwise_comparison_method'] == 1: cmpclassList = cmpclassList_method1 elif g_params['pairwise_comparison_method'] == 3: cmpclassList = cmpclassList_method3 else: print >> sys.stderr, "mp not in [1,3]. Exit" return 1 numCmpClass = len(cmpclassList) numInfile = len(infileList) if numInfile < len(cmpclassList): print >> sys.stderr, "input file less than len(cmpclassList)=%d" % ( len(cmpclassList)) goMapDict = ReadGOMap(gomapfile) goTermDict = ReadGOTerm(gotermfile) pairinfoDict = {} for infile in infileList: tag = "" for cls in cmpclassList: if infile.find(".%s." % (cls)) != -1: tag = cls break if tag == "": print >> sys.stderr, "bad infile %s" % (infile) return 1 pairinfoDict[tag] = ReadPairInfo(infile) for tag in pairinfoDict: pairinfo = pairinfoDict[tag] for j in xrange(len(pairinfo)): tup = pairinfo[j] ancList1 = GetAncenstorGOList_LevelOne(tup[0], goMapDict, gotype) ancList2 = GetAncenstorGOList_LevelOne(tup[1], goMapDict, gotype) common_ancList = list(set(ancList1) & set(ancList2)) for goid in common_ancList: pairinfo[j].append(goid) # output pairinfo with GO common term stemname = os.path.splitext(infileList[0].replace(".pairinfo.txt", ""))[0] for tag in pairinfoDict: outfile = outpath + os.sep + stemname + ".%s" % ( tag) + ".pairinfowithGOterm.txt" WritePairInfoWithGO(pairinfoDict[tag], outfile) print "%s output" % (outfile) tableCmpclassDict = {} tableNumTMHeatMapDict = {} for goid in SEL_GOID_SET: tableCmpclassDict[goid] = {} tableNumTMHeatMapDict[goid] = {} InitTableCmpClass(tableCmpclassDict[goid], numSeqIDTGroup, numCmpClass) InitTableNumTMHeatMap(tableNumTMHeatMapDict[goid], classList_TableNumTMHeatMap, MAX_NUMTM) for tag in pairinfoDict.keys(): cmpclass = tag idxClass = GetClassIndex(cmpclass, cmpclassList) pairinfo = pairinfoDict[tag] for li in pairinfo: if len(li) > 9: #print li seqidt = li[8] numTM1 = li[4] numTM2 = li[5] minNumTM = min(numTM1, numTM2) maxNumTM = max(numTM1, numTM2) for goid in li[9:]: idxGroup = GetSeqIDTGroupIndex(seqidt, seqIDTGroup) tableCmpclassDict[goid]['freq'][idxGroup][idxClass] += 1 tableCmpclassDict[goid]['subsum'][idxGroup] += 1 dt = tableNumTMHeatMapDict[goid]['ALL'] dt['data'][minNumTM][maxNumTM] += 1 if maxNumTM > dt['maxNumTM']: dt['maxNumTM'] += 1 dt['numPair'] += 1 # write cmpclass stemname2 = os.path.splitext(os.path.basename(stemname))[0] print "stemname2=", stemname2 for goid in SEL_GOID_SET: data = tableCmpclassDict[goid] outfile = outpath + os.sep + stemname2 + "." + goid + ".cmpclass.txt" if WriteTable2D(data['freq'], data['subsum'], cmpclassList, seqIDTGroup, outfile) == 0: xlabel = "Sequence identity" print "%s output" % (outfile) if g_params['pairwise_comparison_method'] == 1: cmd = "%s/plotCmpClass_mp1_cmpsp_5.sh %s -xlabel \"%s\""\ " -outstyle eps -outpath %s -plot1 -multiplot" %( binpath, outfile, xlabel, outpath) elif g_params['pairwise_comparison_method'] == 3: cmd = "%s/plotCmpClass_mp3.sh %s -xlabel \"%s\""\ " -outstyle eps -outpath %s -plot1 -multiplot" %( binpath, outfile, xlabel, outpath) os.system(cmd) data = tableNumTMHeatMapDict[goid] outfile = outpath + os.sep + stemname2 + "." + goid + ".numTMHeatMap.ALL.txt" mtx = data['ALL'] mode_norm = "norm_diag" print "%s numPair=%d" % (goid, mtx['numPair']) if WriteNumTMHeatMap(mtx['data'], mtx['maxNumTM'], mtx['numPair'], mode_norm, outfile) == 0: cmd = "%s/plotNumTMHeatMap.sh %s" % (binpath, outfile) os.system(cmd)
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 rmsg = "" outpath = "" jobid = "" datapath = "" numseq = -1 numseq_this_user = -1 email = "" host_ip = "" base_www_url = "" i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: print >> g_params['fperr'], "Error! Wrong argument:", argv[i] return 1 isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outpath", "--outpath"]: (outpath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-email", "--email"]: (email, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-host", "--host"]: (host_ip, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-nseq", "--nseq"]: (numseq, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-nseq-this-user", "--nseq-this-user"]: (numseq_this_user, i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-baseurl", "--baseurl"]: (base_www_url, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-jobid", "--jobid"]: (jobid, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-datapath", "--datapath"]: (datapath, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-force", "--force"]: g_params['isForceRun'] = True i += 1 elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> g_params['fperr'], "Error! Wrong argument:", argv[i] return 1 else: print >> g_params['fperr'], "Error! Wrong argument:", argv[i] return 1 if outpath == "": print >> g_params['fperr'], "outpath not set. exit" return 1 elif not os.path.exists(outpath): cmd = ["mkdir", "-p", outpath] try: rmsg = subprocess.check_output(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError, e: print e print rmsg return 1
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" fileListFile = "" fileList = [] i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: fileList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile"]: (outfile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-nmax", "--nmax"]: (g_params['nmax'], i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-l", "--l"] : (fileListFile, i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-q", "--q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: fileList.append(argv[i]) i += 1 if fileListFile != "": fileList += myfunc.ReadIDList(fileListFile) if len(fileList) < 1: print >> sys.stderr, "%s: no input file is set. exit"%(sys.argv[0]) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) pfamidset_all = set([]) pfamidset_output = set([]) nmax = g_params['nmax'] cnt_round = 0 while 1: cnt_round += 1 famid2seqidDict = {} for i in xrange(len(fileList)): hdl = myfunc.ReadLineByBlock(fileList[i]) if hdl.failure: continue lines = hdl.readlines() while lines != None: for line in lines: line = line.strip() if not line or line[0] == "#": continue strs = line.split() if len(strs) > 2: seqid = strs[0] pfamidlist = strs[2:] for pfamid in pfamidlist: if cnt_round == 1: pfamidset_all.add(pfamid) if pfamid in pfamidset_output: continue if not pfamid in famid2seqidDict: if len(famid2seqidDict) < nmax: famid2seqidDict[pfamid] = [] if pfamid in famid2seqidDict: famid2seqidDict[pfamid].append(seqid) else: msg="broken item in file %s: line \"%s\"" print >> sys.stderr, msg%(fileList[i], line) lines = hdl.readlines() hdl.close() for pfamid in famid2seqidDict: pfamidset_output.add(pfamid) seqidlist = famid2seqidDict[pfamid] seqidlist = myfunc.uniquelist(seqidlist) fpout.write("%s %d"%(pfamid, len(seqidlist))) for seqid in seqidlist: fpout.write(" %s"%(seqid)) fpout.write("\n") if len(pfamidset_output) == len(pfamidset_all): break else: print " %d / %d "%(len(pfamidset_output), len(pfamidset_all)) myfunc.myclose(fpout) if outfile != "": print "result output to %s"%(outfile) return 0
def main(g_params): #{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 outfile = "" outfile_with_famid = "" outfile_with_pdb = "" outfile_fam2seqmap = "" idListFile = "" mapfile = "%s%s%s" % ( DATADIR3, os.sep, "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid" ) restrictIDListFile = "" idList = [] maxseq_for_fam = 200 maxpair_for_fam = 300 method = 0 rand_seed = None pdbtospFile = "" isOnlyPDB = False i = 1 isNonOptionArg = False while i < numArgv: if isNonOptionArg == True: idList.append(argv[i]) isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]: outfile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithfamid", "--outwithfamid"]: outfile_with_famid, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]: outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-outwithpdb", "--outwithpdb"]: outfile_with_pdb, i = myfunc.my_getopt_str(argv, i) elif argv[i] in [ "-tmprolist", "--tmprolist", "-restrictlist", "--restrictlist" ]: restrictIDListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-mapfile", "--mapfile"]: mapfile, i = myfunc.my_getopt_str(argv, i) elif (argv[i] in ["-pdbtosp", "--pdbtosp"]): pdbtospFile, i = myfunc.my_getopt_str(argv, i) elif sys.argv[i] in ["-seed", "--seed"]: rand_seed, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-l", "--l"]: idListFile, i = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-maxseq", "--maxseq"]: maxseq_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-maxpair", "--maxpair"]: maxpair_for_fam, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-m", "--m", "-method", "--method"]: method, i = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 elif argv[i] in ["-onlypdb", "--onlypdb"]: g_params['isOnlyPDB'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: idList.append(argv[i]) i += 1 if os.path.exists(idListFile): idList += myfunc.ReadIDList(idListFile) if len(idList) < 1: print >> sys.stderr, "no ID set. exit" return 1 if myfunc.checkfile(mapfile, "idMapFile") != 0: return 1 idMapDict = myfunc.ReadFam2SeqidMap(mapfile) # Read in pdbtosp map if pdbtospFile != "": (pdb2uniprotMap, uniprot2pdbMap) =\ myfunc.ReadPDBTOSP(pdbtospFile) g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys()) g_params['uniprot2pdbMap'] = uniprot2pdbMap if g_params['isOnlyPDB'] == True: if pdbtospFile == "": print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit." return 1 elif g_params['uniprotidlist_with_pdb'] == set([]): print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit." return 1 restrictIDSet = set([]) if restrictIDListFile != "": restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile)) fpout = myfunc.myopen(outfile, sys.stdout, "w", False) fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False) fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False) fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False) if method == 0: GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet, maxseq_for_fam, rand_seed, fpout, fpout_withfamid) elif method == 1: GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet, maxpair_for_fam, rand_seed, fpout, fpout_withfamid, fpout_fam2seqmap) elif method == 2: #all to all GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout, fpout_withfamid, fpout_withpdb) myfunc.myclose(fpout) myfunc.myclose(fpout_withfamid) myfunc.myclose(fpout_withpdb) myfunc.myclose(fpout_fam2seqmap) return 0
def main(g_params):#{{{ argv = sys.argv numArgv = len(argv) if numArgv < 2: PrintHelp() return 1 msapath = "" outname = "" pairlistwithfamid_file = "" isLocalAlignment = False idListFile = None idList = [] msaext = ".msa.fa" i = 1 isNonOptionArg=False while i < numArgv: if isNonOptionArg == True: pairlistwithfamid_file = argv[i] isNonOptionArg = False i += 1 elif argv[i] == "--": isNonOptionArg = True i += 1 elif argv[i][0] == "-": if argv[i] in ["-h", "--help"]: PrintHelp() return 1 elif argv[i] in ["-outname", "--outname"]: (outname, i) = myfunc.my_getopt_str(argv,i) elif argv[i] in ["-msapath", "--msapath"]: (msapath,i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-msaext", "--msaext"]: (msaext,i) = myfunc.my_getopt_str(argv, i) elif argv[i] in ["-localali", "--localali"]: isLocalAlignment = True; i += 1 elif argv[i] in ["-all", "--all"]: g_params['isOutputAll'] = True; i += 1 elif argv[i] in ["-seqidttype", "--seqidttype"]: (g_params['seqidttype'],i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-maxsel", "--maxsel"]: (g_params['maxsel'],i) = myfunc.my_getopt_int(argv,i) elif argv[i] in ["-verbose", "--verbose"]: (g_params['verbose'],i) = myfunc.my_getopt_int(argv, i) elif argv[i] in ["-q"]: g_params['isQuiet'] = True i += 1 else: print >> sys.stderr, "Error! Wrong argument:", argv[i] return 1 else: pairlistwithfamid_file = argv[i] i += 1 if pairlistwithfamid_file == "": print >> sys.stderr, "pairlistwithfamid_file not set" return 1 if outname == "": addname = "" if isLocalAlignment is True: addname = ".local" outname = os.path.splitext(pairlistwithfamid_file)[0] + addname + ".selected" verbose = g_params['verbose'] if verbose >= 1: print "Reading file", pairlistwithfamid_file # pairlistDict {pfamid:[(id1,id2),(id1,id2),...] } pairlistDict = ReadPairListWithFamID(pairlistwithfamid_file) #print pairlistDict # obtain pairwise alignment factor if verbose >= 1: print "Obtaining pairwise alignment factor" AddPairwiseAlignmentFactor(pairlistDict, msapath, msaext, isLocalAlignment) if not g_params['isOutputAll']: if verbose >=1: print "Select pair list to limit to ", g_params['maxsel'] pairlistDict = RandSelectPairList(pairlistDict) #print "selected pairlistDict = ", pairlistDict #print "pairlistDict_selected=",pairlistDict if verbose >= 1: print "Write out the alignment" WritePairAln(pairlistDict, msapath, msaext, outname)