Example #1
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    fileListFile = ""
    fileList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-e", "--e"]:
                (g_params['evalue_threshold'],
                 i) = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-overwrite", "--overwrite", "-force", "--force"]:
                g_params['isOverwrite'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    if len(fileList) < 1:
        print >> sys.stderr, "%s: no input file is set. exit" % (sys.argv[0])
        return 1

    if outpath != "" and not os.path.exists():
        cmd = ["mkdir", "-p", outpath]
        subprocess.check_output(cmd)

    for i in xrange(len(fileList)):
        Build_seqid2pfamid(fileList[i], outpath)
Example #2
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    datapath = "."
    outpath = './'
    idList = []
    idListFile = ''

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            idList.append(sys.argv[i])
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-datapath", "--datapath"]:
                datapath = sys.argv[i + 1]
                i += 2
            elif argv[i] in ["-method", "--method"]:
                g_params['method'], i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-l", "--l"]:
                idListFile = sys.argv[i + 1]
                i = i + 2
            elif sys.argv[i] in ["-outpath", "--outpath"]:
                outpath = sys.argv[i + 1]
                i = i + 2
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            idList.append(sys.argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)
    if len(idList) > 0:
        os.system("mkdir -p %s" % outpath)
        cnt = 0
        for pfamid in idList:
            print "================== ", cnt, pfamid, " ===================="
            if g_params['method'] == 0:
                Itol_Tree_m0(pfamid, datapath, outpath)
            elif g_params['method'] == 1:
                Itol_Tree_m1(pfamid, datapath, outpath)
            cnt += 1
Example #3
0
def main(g_params):  #{{{
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileList = []
    fileListFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(sys.argv[i])
            isNonOptionArg = False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-l", "--l"]:
                fileListFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-o", "--o", "-outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-evalue", "--evalue"]:
                g_params['evalue_th'], i = myfunc.my_getopt_float(sys.argv, i)
            elif sys.argv[i] in ["-seqidt", "--seqidt"]:
                g_params['seqidt_th'], i = myfunc.my_getopt_float(sys.argv, i)
            elif sys.argv[i] in ["-round", "--round"]:
                g_params['iteration'] = myfunc.my_getopt_int(sys.argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument: '%s'" %
                                      sys.argv[i])
                return 1
        else:
            fileList.append(sys.argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile, delim="\n")
    if len(fileList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    for infile in fileList:
        BlastM9toPairlist(infile, fpout)

    myfunc.myclose(fpout)
Example #4
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    idListFile = ""
    idList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            i += 1
            isNonOptionArg = False
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l", "-list"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)

    numID = len(idList)

    if numID < 1:
        print >> sys.stderr, "No ID set. exit"
        return 1

    params = {}
    params['from'] = 'P_GI'
    params['to'] = 'ID'  # to uniprot id
    params['format'] = 'tab'
    params['query'] = " ".join(idList)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    GIID2UniprotID(params, fpout)
    myfunc.myclose(fpout)
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    outfile = ""
    fileListFile = ""
    fileList = []
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv"%(DATADIR3)

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"] :
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile)

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    for i in xrange(len(fileList)):
        CountUniquePairInvertedInfo(fileList[i], pfamidDefDict, fpout)

    myfunc.myclose(fpout)
def main(g_params):
    seqinfofile = "/data3/wk/MPTopo/pfamAna/pfam2-giid-refseqid-pfamid-description.txt"
    seqidlistfile = "/data3/wk/MPTopo/pfamAna/pairwise/all/pfamfullseq.selTM_uniq.seqidlist"
    seqInfoDict = {}
    seqInfoDict = ReadSeqDefInfo(seqinfofile)
    idList = myfunc.ReadIDList(seqidlistfile)

    print "#gi_id | refseq_id | pfamid | sequence_description"
    for idd in idList:
        print "%s | %s | %s | %s" % ( idd,
                seqInfoDict[idd]['refseqid'],seqInfoDict[idd]['pfamid'],
                seqInfoDict[idd]['seqdef'])
    return 0
Example #7
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    outfile = ""
    fileListFile = ""
    fileList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    if outpath != "" and not os.path.exists(outpath):
        os.system("mkdir -p %s" % (outpath))

    for i in xrange(len(fileList)):
        ExcludeConsensus(fileList[i], outpath)
def main(g_params):  #{{{
    argv = sys.argv
    datapath = ''
    ext = ""
    outpath = ''
    fileList = []
    fileListFile = ''

    i = 1
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return ()
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            fileList.append(argv[i])
            i = i + 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif argv[i][0] == "-":
            if argv[i] == "-h" or argv[i] == "--help":
                PrintHelp()
                return (0)
            elif argv[i] == "-outpath" or argv[i] == "--outpath":
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            else:
                print(("Error! Wrong argument:%s" % argv[i]), file=sys.stderr)
                return (1)
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    if len(fileList) <= 0:
        print("No input set. Exit", file=sys.stderr)
        return (1)
    else:
        cnt = 0
        for treefile in fileList:
            print("================== ", cnt, treefile,
                  " ====================")
            GetTreeListOrder(treefile, outpath)
            cnt += 1
def SplitIDList(infile, nsplit, method, datapath, ext, outpath):#{{{
    idList = myfunc.ReadIDList(infile)
    rootname = os.path.basename(os.path.splitext(infile)[0])
    numID = len(idList)
    if numID <= 0:
        print >> sys.stderr, "no ID in the idlist file %s"%(infile)
        return 1

    if method == 0:
        nfile_per_split = int(ceil(numID / float(nsplit)))
        i = 0
        cntfile = 0
        while i < numID:
            outfile = outpath + os.sep + rootname + ".split_%d"%(cntfile)
            fpout = myfunc.myopen(outfile, None, "w", True)
            cntID_per_split = 0
            for j in xrange(i, i + nfile_per_split):
                if j < numID:
                    fpout.write("%s\n"%(idList[j]))
                    cntID_per_split += 1
            myfunc.myclose(fpout)
            print "split to %s \t %4d IDs"%(outfile, cntID_per_split)
            cntfile += 1
            i += nfile_per_split
    elif method in  [1,2]:
        sumFileSize = 0.0
        fsizeList = []
        for idd in idList:
            fname = datapath + os.sep + idd + ext
            if os.path.exists(fname):
                fsize = os.path.getsize(fname)
                if fsize > 0:
                    if method == 1:
                        sumFileSize += float(fsize)
                        fsizeList.append((idd, float(fsize)))
                    elif method == 2:
                        sumFileSize += float(fsize)*float(fsize)
                        fsizeList.append((idd, float(fsize)*fsize))
        sumfilesize_per_split = ceil(sumFileSize / float(nsplit))
        fsizeList = sorted(fsizeList, key = lambda x:x[1], reverse=True)
        print "sumfilesize_per_split = %g"% (sumfilesize_per_split)
        i = 0
        cntfile = 0
        numID = len(fsizeList)
        while i < numID:
            outfile = outpath + os.sep + rootname + ".split_%d"%(cntfile)
            fpout = myfunc.myopen(outfile, None, "w", True)
            j = 0
            cntID_per_split = 0
            sumFileSize = 0.0
            while sumFileSize <= sumfilesize_per_split:
                idx = i+j
                if i+j > numID -1:
                    break
                idd = fsizeList[idx][0]
                fsize = fsizeList[idx][1]
                sumFileSize += fsize
                fpout.write("%s\n"%(idd))
                cntID_per_split += 1
                j += 1
            myfunc.myclose(fpout)
            print "split to %s \t %4d IDs sumsize = %g"%(outfile, cntID_per_split, sumFileSize)
            cntfile += 1
            i += j
        return 1
Example #10
0
def main(g_params):  #{{{
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    idList = []
    idListFile = ""
    fastaFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(sys.argv[i])
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if (sys.argv[i] in ["-h", "--help"]):
                PrintHelp()
                return 1
            elif (sys.argv[i] in ["-l", "--l", "-list", "--list"]):
                idListFile = sys.argv[i + 1]
                i = i + 2
            elif (sys.argv[i] in ["-f", "--f", "-fasta", "--fasta"]):
                fastaFile = sys.argv[i + 1]
                i = i + 2
            elif (sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]):
                outFile = sys.argv[i + 1]
                i = i + 2
            elif (sys.argv[i] in ["-mine", "--mine"]):
                g_params['min_evalue'] = float(sys.argv[i + 1])
                g_params['isEvalueSet'] = True
                i = i + 2
            elif (sys.argv[i] in ["-maxe", "--maxe"]):
                g_params['max_evalue'] = float(sys.argv[i + 1])
                g_params['isEvalueSet'] = True
                i = i + 2
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]),
                      file=sys.stderr)
                return 1
        else:
            idList.append(sys.argv[i])
            i += 1

    if fastaFile == "":
        print("Fatal!  fasta file not set. Exit.", file=sys.stderr)
        return 1
    elif not os.path.exists(fastaFile):
        print("Fatal! fasta file %s does not exist. Exit." % (fastaFile),
              file=sys.stderr)
        return 1

    if os.path.exists(idListFile):
        idList += myfunc.ReadIDList(idListFile)

    if len(idList) > 0:
        isIDSet = True
    else:
        isIDSet = False

    if not g_params['isEvalueSet'] and not isIDSet:
        print("Error! no ID nor evalue threshold is set. Eixt",
              file=sys.stderr)
        return 1

    idListSet = set(idList)
    fpout = myfunc.myopen(filename=outFile,
                          default_fp=sys.stdout,
                          mode="w",
                          isRaise=False)

    fpin = open(fastaFile, "r")
    if not fpin:
        print("Failed to open fastafile %s" % (fastaFile), file=sys.stderr)
        return -1
    unprocessedBuffer = ""
    isEOFreached = False
    BLOCK_SIZE = g_params['BLOCK_SIZE']
    isEvalueSet = g_params['isEvalueSet']
    min_evalue = g_params['min_evalue']
    max_evalue = g_params['max_evalue']
    while 1:
        buff = fpin.read(BLOCK_SIZE)
        if len(buff) < BLOCK_SIZE:
            isEOFreached = True
        buff = unprocessedBuffer + buff
        recordList = []
        unprocessedBuffer = myfunc.ReadFastaFromBuffer(buff, recordList,
                                                       isEOFreached)
        if len(recordList) > 0:
            for r in recordList:
                if ((not isIDSet) or (r[0] in idListSet)):
                    if (not isEvalueSet or r[1].lower().find('evalue') < 0):
                        fpout.write(">%s\n" % r[1])
                        fpout.write("%s\n" % r[2])
                    else:
                        evalue = myfunc.GetEvalueFromAnnotation(r[1])
                        if (evalue == None or
                            (evalue >= min_evalue and evalue <= max_evalue)):
                            fpout.write(">%s\n" % r[1])
                            fpout.write("%s\n" % r[2])

        if isEOFreached == True:
            break
    fpin.close()
    myfunc.myclose(fpout)
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-nmax", "--nmax"]:
                (g_params['nmax'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-l", "--l"] :
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1


    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    if len(fileList) < 1:
        print >> sys.stderr, "%s: no input file is set. exit"%(sys.argv[0])

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    pfamidset_all = set([])
    pfamidset_output = set([])
    nmax = g_params['nmax']

    cnt_round = 0
    while 1:
        cnt_round += 1
        famid2seqidDict = {}
        for i in xrange(len(fileList)):
            hdl = myfunc.ReadLineByBlock(fileList[i])
            if hdl.failure:
                continue
            lines = hdl.readlines()
            while lines != None:
                for line in lines:
                    line = line.strip()
                    if not line or line[0] == "#":
                        continue
                    strs = line.split()
                    if len(strs) > 2:
                        seqid = strs[0]
                        pfamidlist = strs[2:]
                        for pfamid in pfamidlist:
                            if cnt_round == 1:
                                pfamidset_all.add(pfamid)
                            if pfamid in pfamidset_output:
                                continue
                            if not pfamid in famid2seqidDict:
                                if len(famid2seqidDict) < nmax:
                                    famid2seqidDict[pfamid] = []
                            if pfamid in famid2seqidDict:
                                famid2seqidDict[pfamid].append(seqid)
                    else:
                        msg="broken item in file %s: line \"%s\""
                        print >> sys.stderr, msg%(fileList[i], line)
                lines = hdl.readlines()
            hdl.close()

        for pfamid in famid2seqidDict:
            pfamidset_output.add(pfamid)
            seqidlist = famid2seqidDict[pfamid]
            seqidlist = myfunc.uniquelist(seqidlist)
            fpout.write("%s %d"%(pfamid, len(seqidlist)))
            for seqid in seqidlist:
                fpout.write(" %s"%(seqid))
            fpout.write("\n")

        if len(pfamidset_output) == len(pfamidset_all):
            break
        else:
            print " %d / %d "%(len(pfamidset_output), len(pfamidset_all))

    myfunc.myclose(fpout)
    if outfile != "":
        print "result output to %s"%(outfile)

    return 0
Example #12
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    workdir = ""
    fileListFile = ""
    idListFile = ""
    extList = []
    maxfile_per_folder = 2000
    method = 0

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-idlist", "--idlist"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-filelist", "--filelist"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-workdir", "--workdir"]:
                (workdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-max", "--max"]:
                (maxfile_per_folder, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-method", "--method"]:
                (method, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-ext", "--ext"]:
                (tmpstr, i) = myfunc.my_getopt_str(argv, i)
                extList.append(tmpstr)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1

    if myfunc.checkfile(workdir) != 0:
        return 1

    if idListFile == "" and fileListFile == "":
        print >> sys.stderr, "At least one of idListFile and fileListFile need to be set"
        return 1

    if idListFile != "":
        if os.path.exists(idListFile):
            idList = myfunc.ReadIDList(idListFile)
            if len(idList) <= 0:
                print >> sys.stderr, "No ID in idListFile %s" % (idListFile)
            elif len(extList) <= 0:
                print >> sys.stderr, "No extension set when idList is used."

            else:
                SplitToFolder_idlist(idList, workdir, extList,
                                     maxfile_per_folder)
        else:
            print >> sys.stderr, "idListFile %s does not exist" % (idListFile)

    if fileListFile != "":
        if os.path.exists(fileListFile):
            fileList = open(fileListFile, "r").read().split("\n")
            fileList = filter(None, fileList)
            if len(fileList) <= 0:
                print >> sys.stderr, "No file in fileListFile %s" % (
                    fileListFile)
            else:
                SplitToFolder_filelist(fileList, workdir, maxfile_per_folder)
        else:
            print >> sys.stderr, "fileListFile %s does not exist" % (
                fileListFile)
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    idListFile = ""
    euk = ""
    gram_pos = ""
    gram_neg = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idListFile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-euk", "--euk"]:
                (euk, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram+", "--gram+"]:
                (gram_pos, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram-", "--gram-"]:
                (gram_neg, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idListFile = argv[i]
            i += 1

    if myfunc.checkfile(idListFile, "idListFile") != 0:
        return 1
    if myfunc.checkfile(euk, "euk") != 0:
        return 1
    if myfunc.checkfile(gram_pos, "gram_pos") != 0:
        return 1
    if myfunc.checkfile(gram_neg, "gram_neg") != 0:
        return 1

    idList = myfunc.ReadIDList(idListFile)
    set_euk_idlist = set(myfunc.ReadIDList(euk))
    set_gram_pos_idlist = set(myfunc.ReadIDList(gram_pos))
    set_gram_neg_idlist = set(myfunc.ReadIDList(gram_neg))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    NCBI_TaxID = ""
    for i in xrange(len(idList)):
        seqid = idList[i]
        cls = ""
        if seqid in set_euk_idlist:
            cls = "euk"
        elif seqid in set_gram_pos_idlist:
            cls = "gram+"
        elif seqid in set_gram_neg_idlist:
            cls = "gram-"
        else:
            cls = "NA"
        print >> fpout, "%s\t%s\t%s" % (seqid, NCBI_TaxID, cls)
    myfunc.myclose(fpout)
Example #14
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    infile = ""
    classfile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-class", "--class"]:
                (classfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if myfunc.checkfile(infile) != 0:
        return 1
    if myfunc.checkfile(classfile, "Class File") != 0:
        return 1
    if outpath == "":
        outpath = os.path.dirname(infile)
        if outpath == "":
            outpath = "."
    (id2ClassDict, classList) = ReadClassDict(classfile)
    idList = myfunc.ReadIDList(infile)
    rootname = os.path.basename(os.path.splitext(infile)[0])
    ext = os.path.splitext(infile)[1]

    fpoutList = {}
    for i in range(len(classList)):
        outfile = outpath + os.sep + rootname + ".%s" % classList[i] + ext
        fpoutList[classList[i]] = open(outfile, "w")

    for idd in idList:
        try:
            cls = id2ClassDict[idd]
        except:
            print >> sys.stderr, "id %s not in classDict" % idd
            continue
        fpoutList[cls].write("%s\n" % idd)

    for i in range(len(classList)):
        fpoutList[classList[i]].close()
Example #15
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    datapath = "."
    outpath = './'
    idList = []
    idListFile = ''
    treefile = ""
    fastafile = ""

    i = 1;
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False;
            idList.append(sys.argv[i])
            i = i + 1;
        elif sys.argv[i] == "--":
            isNonOptionArg=True;
            i = i + 1;
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in [ "-h", "--help"]:
                PrintHelp();
                return 1
            elif sys.argv[i] in [ "-datapath", "--datapath"]:
                datapath = sys.argv[i+1]
                i += 2;
            elif argv[i] in [ "-m", "--m", "-method", "--method"]:
                g_params['method'], i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-treefile", "--treefile"]:
                treefile = sys.argv[i+1]
                i += 2;
            elif sys.argv[i] in [ "-fastafile", "--fastafile"]:
                fastafile = sys.argv[i+1]
                i += 2;
            elif sys.argv[i] in [ "-l", "--l"]:
                idListFile = sys.argv[i+1]
                i = i + 2;
            elif sys.argv[i] in ["-outpath", "--outpath"]:
                outpath = sys.argv[i+1];
                i = i + 2;
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr);
                return 1
        else:
            idList.append(sys.argv[i]);
            i+=1;

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)
    if len(idList) > 0:
        os.system("mkdir -p %s"%outpath)
        cnt = 0
        for pfamid in idList:
            print("================== ", cnt , pfamid, " ====================")
            if g_params['method'] == "0":
                Itol_Tree_m0(pfamid, datapath, outpath)
            elif g_params['method'] == "1":
                Itol_Tree_m1(pfamid, datapath, outpath)
            elif g_params['method'] == "sd1":
                Itol_Tree_m_sd1(pfamid, datapath, outpath)
            elif g_params['method'] == "sd2":
                Itol_Tree_m_sd2(pfamid, datapath, outpath)
            elif g_params['method'] == "sd3":
                Itol_Tree_m_sd3(pfamid, datapath, outpath)
            cnt += 1
    if treefile != "":
        if g_params['method'] == "linear":
            Itol_Tree_linear(treefile, fastafile, outpath)
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    outfile = ""
    idListFile = ""
    uniprotDBname = ""
    idList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-uniprotdb", "--uniprotdb"]:
                (uniprotDBname, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)

    if uniprotDBname == "":
        print >> sys.stderr, "uniprotdb not set"
        return 1
    uniprotdbfile = "%s0.db" % uniprotDBname
    if myfunc.checkfile(uniprotdbfile, "uniprotdbfile") != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    hdl = myfunc.MyDB(uniprotDBname)
    if hdl.failure:
        return 1

    for seqid in idList:
        data = hdl.GetRecord(seqid)
        if data != None:
            goinfo = GetGOInfoFromUniprotData(data)
            WriteGOInfo(seqid, goinfo, fpout)
    hdl.close()
    myfunc.myclose(fpout)
def main():#{{{
    argv = sys.argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        sys.exit()

    max_numpair = 10*1000*1000
    isQuiet = False
    rand_seed = None
    idList = []
    idListFile = ""
    outfile=""
    method = 0

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg=False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in [  "-h" , "--help"]:
                PrintHelp()
                sys.exit()
            elif sys.argv[i] in [ "-o" , "--o", "-outfile" , "--outfile"]:
                outfile, i = myfunc.my_getopt_str(argv,i)
            elif argv[i] in ["-m", "--m", "-method", "--method"]:
                method, i = myfunc.my_getopt_int(argv,i)
            elif sys.argv[i] in [ "-l" , "--l", "-listfile" , "--listfile"]:
                idListFile, i = myfunc.my_getopt_str(argv,i)
            elif sys.argv[i]  in [ "-maxpair" , "--maxpair"]:
                max_numpair, i = myfunc.my_getopt_int(argv,i)
            elif sys.argv[i]  in [ "-seed" , "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(argv,i)
            elif sys.argv[i] == "-q":
                isQuiet=True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", sys.argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)

    numseqid = len(idList)
    if numseqid <= 0:
        print >> sys.stderr, "List file is empty."
        return 1
    elif numseqid < 2:
        print >> sys.stderr, "Too few items. At least 2 are required."
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    if method == 0:
        pairlist = myfunc.GenerateRandomPair(len(idList), max_numpair,
                rand_seed)
    elif method == 1:
        pairlist = myfunc.GenerateRandomPair_no_repeat_use(len(idList),
                max_numpair, rand_seed)

    for pair in pairlist:
        print >> fpout, "%s %s" %(idList[pair[0]], idList[pair[1]])

    myfunc.myclose(fpout)
    return 0
Example #18
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    threshold_Fraction_Group_2 = 0.05
    threshold_NumSeq_Group_2 = 2
    tableinfoFile = ""
    pdbtospFile = ""
    sprotACListFile = ""

    threshold_g12_seqidt = 20.0

    topoalnFile = ""
    aapath = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqidttype", "--seqidttype"]:
                (g_params['seqidttype'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-tableinfo", "--tableinfo"]:
                (tableinfoFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topoaln", "--topoaln"]:
                (topoalnFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-aapath", "--aapath"]:
                (aapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-thncls2", "--thncls2"]:
                (threshold_NumSeq_Group_2, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-thfrac2", "--thfrac2"]:
                (threshold_Fraction_Group_2,
                 i) = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-sprot", "--sprot"]):
                sprotACListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)
    if len(fileList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    if myfunc.checkfile(topoalnFile, "topoalnFile") != 0:
        return 1
    if myfunc.checkfile(aapath, "aapath") != 0:
        return 1
    if outfile == "":
        print >> sys.stderr, "outfile not set. Exit"
        return 1

    outpath = myfunc.my_dirname(outfile)
    if not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1
Example #19
0
        cmd = ["mkdir", "-p", outpath]
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1

    (pfamidDefDict, clanidDefDict) = lcmp.ReadPfamDefFile(pfamDefFile)
    # Read in pdbtosp map
    if pdbtospFile != "":
        (pdb2uniprotMap, uniprot2pdbMap) = myfunc.ReadPDBTOSP(pdbtospFile)
    else:
        (pdb2uniprotMap, uniprot2pdbMap) = ({}, {})
# Read in swissprot ac list
    if sprotACListFile != "":
        swissprotAcSet = set(myfunc.ReadIDList(sprotACListFile))
    else:
        swissprotAcSet = set([])

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    if tableinfoFile != "":
        pairalnStat = lcmp.ReadPairAlnTableInfo(tableinfoFile)
    if pairalnStat != {}:
        outfile_pair_g12 = outfile + ".pair_group1_2.txt"
        fpout_pair = myfunc.myopen(outfile_pair_g12, sys.stdout, "w", False)
    else:
        fpout_pair = None

    selectedPairList = []  # select pairs for draw pairwise topology alignment
    # for each family, select the pair with highest sequence identity between
Example #20
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-m", "--m"]:
                (g_params['method'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-md5", "--md5"]:
                (tmpstr, i) = myfunc.my_getopt_str(argv, i)
                if tmpstr.lower() == "yes":
                    g_params['isUseMD5'] = True
                elif tmpstr.lower() == "no":
                    g_params['isUseMD5'] = False
                else:
                    print >> sys.stderr, "Bad syntax. option -md5 must be followed by yes or no"
                    return 1
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    if len(fileList) < 1:
        print >> sys.stderr, "%s: no input file is set. exit" % (sys.argv[0])

    if not g_params['method'] in ["id", "seq"]:
        print >> sys.stderr, "%s: bad method \"%s\"" % (sys.argv[0],
                                                        g_params['method'])

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    for i in xrange(len(fileList)):
        status = IsUniqueSeq(fileList[i], g_params['method'],
                             g_params['isUseMD5'])
        if status >= 0:
            if status == 1:
                yes_or_no = "yes"
            else:
                yes_or_no = "no"
            print >> fpout, "%s\t%s" % (fileList[i], yes_or_no)
        else:
            print >> sys.stderr, "%s: Failed to read file %s" % (sys.argv[0],
                                                                 fileList[i])

    myfunc.myclose(fpout)
Example #21
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    infile = ""
    outpath = "./"
    isQuiet = False
    tableinfoFile = ""
    cmpclassList = []
    restrictIDListFile = ""

    signalpFile = ""
    dupFile = ""
    outfile = ""
    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = sys.argv[i]
            isNonOptionArg = False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                sys.exit()
            elif argv[i] in ["-o", "--o"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-cmpclass", "--cmpclass"]:
                (tmpstr, i) = myfunc.my_getopt_str(argv, i)
                cmpclassList.append(tmpstr)
            elif argv[i] in ["-signalp", "--signalp"]:
                (signalpFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-restrictidlist", "--restrictidlist"]:
                (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i)
                g_params['isRestrictIDListSet'] = True
            elif argv[i] in ["-dup", "--dup", "-dupfile", "--dupfile"]:
                (dupFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-rmsp", "--rmsp"]:
                g_params['isRemoveSignalP'] = True
                i += 1
            elif argv[i] in ["-rmdup", "--rmdup"]:
                g_params['isRemoveDup'] = True
                i += 1
            elif argv[i] in ["-seq2fammap", "--seq2fammap"]:
                (seq2famMapfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqidttype", "--seqidttype"]:
                g_params['seqidttype'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-tableinfo", "--tableinfo"]:
                tableinfoFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-min-seqidt", "--min-seqidt"]:
                g_params['minSeqIDT'], i = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-max-seqidt", "--max-seqidt"]:
                g_params['maxSeqIDT'], i = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-evodist", "--evodist"]:
                g_params['isEvodist'] = True
                i += 1
            elif argv[i] in ["-alignrange", "--alignrange"]:
                g_params['alignrange'], i = myfunc.my_getopt_str(argv, i)
                if not g_params['alignrange'] in ['all', 'full', 'part']:
                    print >> sys.stderr, "alignrange must be one of [all, full, part]"
                    return 1
                else:
                    if g_params['alignrange'] == 'full':
                        g_params['alignrange'] = 'FULL_ALIGNED'
                    elif g_params['alignrange'] == 'part':
                        g_params['alignrange'] = 'PART_ALIGNED'
            elif argv[i] in ["-debug", "--debug"]:
                if argv[i + 1][0].lower() == 'y':
                    g_params['isDEBUG'] = True
                else:
                    g_params['isDEBUG'] = False
                i += 2
            elif argv[i] in [
                    "-debug-unmapped-position", "--debug-unmapped-position"
            ]:
                DEBUG_UNMAPPED_TM_POSITION = 1
                i += 2
            elif sys.argv[i] == "-q":
                isQuiet = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", sys.argv[i]
                return -1
        else:
            infile = sys.argv[i]
            i += 1
    if infile == "":
        print >> sys.stderr, "infile not set. Exit."
        return -1
    elif not os.path.exists(infile):
        print >> sys.stderr, "infile %s does not exists. Exit." % infile

    try:
        fpin = open(infile, "rb")
    except IOError:
        print >> sys.stderr, "Failed to open input file %s" % (infile)
        return -1

    pairalnStat = {}
    if g_params['seqidttype'] != 0:
        if tableinfoFile == "" or not os.path.exists(tableinfoFile):
            print >> sys.stderr, "tableinfoFile must be set when seqidttype is set to 1 or 2"
            print >> sys.stderr, "but seqidttype = %d is set. Exit." % g_params[
                'seqidttype']
            return -1
        pairalnStat = lcmp.ReadPairAlnTableInfo(tableinfoFile)

    rootname = os.path.basename(os.path.splitext(infile)[0])

    binpath = os.path.dirname(sys.argv[0])

    signalpDict = {}
    if signalpFile != "":
        signalpDict = lcmp.ReadSignalPDict(signalpFile)
    if signalpDict != {}:
        g_params['isSignalPSet'] = True

    dupPairList = []
    if dupFile != "":
        dupPairList = lcmp.ReadDupPairList(dupFile)
    if len(dupPairList) > 0:
        g_params['isDupSet'] = True
    dupPairSet = set(dupPairList)

    restrictIDSet = set([])
    if restrictIDListFile != "":
        restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile))

    rltyDict = {}
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    unprocessedBuffer = ""
    cntTotalReadInRecord = 0
    cntTotalOutputRecord = 0
    isEOFreached = False
    while 1:
        buff = fpin.read(BLOCK_SIZE)
        if buff == "":
            isEOFreached = True
        buff = unprocessedBuffer + buff
        pairCmpRecordList = []
        unprocessedBuffer = lcmp.ReadPairCmpResultFromBuffer(
            buff, pairCmpRecordList)

        AddTableInfo(pairCmpRecordList, pairalnStat)
        AddSignalPInfo(pairCmpRecordList, signalpDict)
        AddDupInfo(pairCmpRecordList, dupPairSet)

        cntTotalReadInRecord += len(pairCmpRecordList)
        pairCmpRecordList = FilterPairCmpResult(pairCmpRecordList,
                                                cmpclassList, rltyDict,
                                                restrictIDSet)

        if len(pairCmpRecordList) > 0:
            lcmp.WritePairCmpRecord(pairCmpRecordList, cntTotalOutputRecord,
                                    fpout)
            cntTotalOutputRecord += len(pairCmpRecordList)
        if isEOFreached == True:
            break
    fpin.close()

    print "cntTotalReadInRecord =", cntTotalReadInRecord
    print "cntTotalOutputRecord =", cntTotalOutputRecord
    myfunc.myclose(fpout)
    return 0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    datafile = "/data3/data/uniprot/uniprot_trembl.tableinfo"
    idList = []
    idListFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-datafile", "--datafile"]:
                datafile = argv[i + 1]
                i += 2
            elif argv[i] in ["-l", "--l", "-listfile", "--listfile"]:
                idListFile = argv[i + 1]
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if os.path.exists(idListFile):
        idList += myfunc.ReadIDList(idListFile)

    if (len(idList)) < 1:
        print >> sys.stderr, "id not set. Exit"
        return 1

    if not os.path.exists(datafile):
        print >> sys.stderr, "datafile %s not set or not exists. Exit" % (
            datafile)
        return 1

    uniprotInfoDict = ReadUniprotInfo(datafile)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    UnirefSeqid2UniprotTableInfo(idList, uniprotInfoDict, fpout)

    myfunc.myclose(fpout)
    return 0
Example #23
0
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if outpath != "" and not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        try:
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError, e:
            print e
            raise

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    if len(fileList) < 1:
        print >> sys.stderr, "%s: no input file is set. exit" % (sys.argv[0])

    if not g_params['method'] in ["id", "seq"]:
        print >> sys.stderr, "%s: bad method \"%s\"" % (sys.argv[0],
                                                        g_params['method'])

    for i in xrange(len(fileList)):
        RemoveDupSeq(fileList[i], outpath, g_params['method'],
                     g_params['isUseMD5'])


#}}}
Example #24
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    SPE_PAIR_LIST = [(2, 1), (2, 4), (2, 6), (2, 8), (3, 6), (3, 7), (4, 6),
                     (4, 8), (4, 10), (5, 7), (5, 10), (6, 8), (6, 10),
                     (6, 12), (7, 14), (8, 10), (8, 12), (10, 12), (10, 13),
                     (11, 13), (12, 14)]

    outfile = ""

    infile = ""
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    signalpFile = "%s/wk/MPTopo/pfamAna_refpro/pred_signalp/refpro20120604-celluar.selmaxlength-m1.nr100.signalp_list" % (
        DATADIR3)

    #seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2clanid"%(DATADIR3)
    #seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2pfamid"%(DATADIR3)
    seqid2clanidMapFile = ""
    seqid2pfamidMapFile = ""
    tm_pfamidListFile = ""
    tm_clanidListFile = ""
    pfamid2seqidMapFile = ""
    clanid2seqidMapFile = ""
    dbname_predTM = ""
    pairlistwithpfamidFile = ""

    pfamtype = ""

    pairListFile = ""

    #classList_TableNumTMHeatMap = ["ALL", "RMSP"]
    classList_TableNumTMHeatMap = ["ALL"]

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (g_params['outpath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-signalp", "--signalp"]:
                (signalpFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mp", "--mp"]:
                g_params[
                    'pairwise_comparison_method'], i = myfunc.my_getopt_int(
                        argv, i)
            elif argv[i] in ["-mindiffpair", "--mindiffpair"]:
                g_params['mindiffpair'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-pfamtype", "--pfamtype"]:
                pfamtype, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanidlist", "--clanidlist"]:
                (tm_clanidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamidlist", "--pfamidlist"]:
                (tm_pfamidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2clanid", "--seqid2clanid"]:
                (seqid2clanidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2pfamid", "--seqid2pfamid"]:
                (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamid2seqid", "--pfamid2seqid"]:
                (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanid2seqid", "--clanid2seqid"]:
                (clanid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlistwithpfamid", "--pairlistwithpfamid"]:
                (pairlistwithpfamidFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-predTMdbname", "--predTMdbname"]:
                (dbname_predTM, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlist", "--pairlist"]:
                (pairListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-winsize", "--winsize"]:
                (g_params['winsize'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-outname", "--outname"]:
                (g_params['outname'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-prokar", "--prokar"]:
                g_params['isOnlyAnaProkar'] = True
                i += 1
            elif argv[i] in ["-eukar", "--eukar"]:
                g_params['isOnlyAnaEukar'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(
            infile, "%s (line %d): infile" %
        (__file__, inspect.currentframe().f_lineno)) != 0:
        return 1

    dirpath = myfunc.my_dirname(infile)

    # try to obtain Pfam family tag
    tag = ""
    if pfamtype != "":
        if pfamtype.upper().find("FAM") != -1:
            tag = ".Family"
        elif pfamtype.upper().find("DOM") != -1:
            tag = ".Domain"
        elif pfamtype.upper().find("REP") != -1:
            tag = ".Repeat"
        elif pfamtype.upper().find("MOT") != -1:
            tag = ".Motif"
        else:
            tag = ""
    else:
        if infile.find(".Family.") != -1:
            tag = ".Family"
        elif infile.find(".Domain.") != -1:
            tag = ".Domain"
        elif infile.find(".Repeat.") != -1:
            tag = ".Repeat"
        elif infile.find(".Motif.") != -1:
            tag = ".Motif"
        else:
            tag = ""

    if seqid2clanidMapFile == "":
        seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.seqid2clanid" % (
            DATADIR3)
    if myfunc.checkfile(
            seqid2clanidMapFile, "%s (line %d): seqid2clanidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if seqid2pfamidMapFile == "":
        seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.seqid2pfamid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            seqid2pfamidMapFile, "%s (line %d): seqid2pfamidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if pfamid2seqidMapFile == "":
        pfamid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.pfamid2seqid" % (
            DATADIR3)
    if myfunc.checkfile(
            pfamid2seqidMapFile, "%s (line %d): pfamid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if clanid2seqidMapFile == "":
        clanid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.clanid2seqid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            clanid2seqidMapFile, "%s (line %d): clanid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_pfamidListFile == "":
        tm_pfamidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.pfamidlist" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            tm_pfamidListFile, "%s (line %d): tm_pfamidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_clanidListFile == "":
        tm_clanidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.clanidlist" % (
            DATADIR3)
    if myfunc.checkfile(
            tm_clanidListFile, "%s (line %d): tm_clanidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if dbname_predTM == "":
        dbname_predTM = "%s/wk/MPTopo/pfamAna_refpro/pred_topcons_single_method4/refpro20120604-celluar.selmaxlength-m1.topcons-single_topcons_single.m1.agree-44.RMSP" % (
            DATADIR3)
    if myfunc.checkfile(
            "%s0.db" % (dbname_predTM), "%s (line %d): dbname_predTM" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if g_params['isOnlyAnaProkar']:
        prokarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Prokaryota.seqidlist" % (
            DATADIR3)
        g_params['prokarSeqIDSet'] = set(myfunc.ReadIDList(prokarseqidfile))
        if len(g_params['prokarSeqIDSet']) < 1:
            return 1
    if g_params['isOnlyAnaEukar']:
        eukarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Eukaryota.seqidlist" % (
            DATADIR3)
        g_params['eukarSeqIDSet'] = set(myfunc.ReadIDList(eukarseqidfile))
        if len(g_params['eukarSeqIDSet']) < 1:
            return 1

    if pairlistwithpfamidFile == "":
        pairlistwithpfamidFile = "%s/../../Pfam-.maxpair100.pairlistwithpfamid" % (
            dirpath)
    if myfunc.checkfile(
            pairlistwithpfamidFile, "%s (line %d): pairlistwithpfamidFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    pfamid_2_seqidpair_Dict = ReadPairListWithFamID(pairlistwithpfamidFile)
    usedPfamIDSet = set(
        pfamid_2_seqidpair_Dict.keys())  # pfamids used in pair selection

    if pairListFile != "":
        li = myfunc.ReadPairList(pairListFile)
        SPE_PAIR_LIST = []
        for tup in li:
            SPE_PAIR_LIST.append((int(tup[0]), int(tup[1])))

    (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile)
    signalpDict = lcmp.ReadSignalPDict(signalpFile)

    seqid2clanidDict = myfunc.ReadFam2SeqidMap(seqid2clanidMapFile)
    seqid2pfamidDict = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile)

    clanid2seqidDict = myfunc.ReadFam2SeqidMap(clanid2seqidMapFile)
    pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile)

    tm_pfamidList = myfunc.ReadIDList(tm_pfamidListFile)
    tm_clanidList = myfunc.ReadIDList(tm_clanidListFile)

    tm_pfamidSet = set(tm_pfamidList)
    tm_clanidSet = set(tm_clanidList)

    hdl_predTM = myfunc.MyDB(dbname_predTM)
    if not hdl_predTM.failure:
        idSet_TMpro = set(hdl_predTM.indexedIDList)
    else:
        idSet_TMpro = set([])

    #classList_TableNumTMHeatMap = ["ALL", "RMSP", "RMDUP"]
    #alignrangeList = ['FULL_ALIGNED', 'all', 'PART_ALIGNED']
    alignrangeList = ['FULL_ALIGNED']

    if g_params['outpath'] != "" and not os.path.exists(g_params['outpath']):
        cmd = ["mkdir", "-p", g_params['outpath']]
        try:
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    datapath = ""
    pairListFile = ""
    pairList = []

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            pairList.append(argv[i].split())
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-datapath", "--datapath"]:
                (datapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"] :
                (pairListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            pairList.append(argv[i].split())
            i += 1


    if pairListFile != "":
        pairList += [x.split() for x in myfunc.ReadIDList(pairListFile,
            delim="\n")]

    numpair = len(pairListFile)
    if numpair < 1:
        print >> sys.stderr, "no pair set. exit"
        return 1
    if datapath == "":
        print >> sys.stderr, "datapath not set"
        return 1
    elif not os.path.exists(datapath):
        print >> sys.stderr, "datapath %s does not exist"%(datapath)
        return 1

    if outpath == "":
        print >> sys.stderr, "outpath not set"
        return 1
    elif not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        subprocess.check_call(cmd)


    WriteHTML(pairList, datapath, outpath)
Example #26
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    outfile = ""
    real_topofile = ""
    seqfile = ""
    restrictIDListFile = ""
    outfile_wrong_predtopo = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-owrong", "--owrong"]:
                (outfile_wrong_predtopo, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-realtopo", "--realtopo"]:
                (real_topofile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqfile", "--seqfile"]:
                (seqfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mode", "--mode"]:
                (g_params['mode'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-path_predtopo", "--path_predtopo"]:
                (g_params['path_predtopo'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-basename", "--basename"]:
                (g_params['basename'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-restrictidlist", "--restrictidlist"]:
                (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-rmsp", "--rmsp"]:
                g_params['isRMSP'] = True
                i += 1
            elif argv[i] in ["-debug", "--debug"]:
                g_params['isDEBUG'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1
            i += 1

    if myfunc.checkfile(g_params['path_predtopo'], "path_predtopo") != 0:
        return 1
    if g_params['basename'] == "":
        print >> sys.stderr, "%s: basename not set. exit" % (argv[0])
        return 1
    if myfunc.checkfile(real_topofile, "real_topofile") != 0:
        return 1

    if restrictIDListFile != "":
        g_params['restrictIDset'] = set(myfunc.ReadIDList(restrictIDListFile))
        g_params['isRestrictIDList'] = True

    if g_params['mode'] == "":
        if g_params['path_predtopo'].find("topcons_single") >= 0:
            g_params['mode'] = "tps"
        elif g_params['path_predtopo'].find("topcons") >= 0:
            g_params['mode'] = "tp"
        else:
            print >> sys.stderr, "mode not set, and can not be recognized from path_predtopo=%s" % (
                path_predtopo)
            return 1

    if not g_params['mode'] in ["tp", "tps"]:
        print >> sys.stderr, "Unrecognized mode = %s" % (g_params['mode'])
        return 1

    (real_idlist, real_annolist,
     real_topolist) = myfunc.ReadFasta(real_topofile)
    seqDict = {}
    if seqfile != "" and os.path.exists(seqfile):
        (seq_idlist, seq_annolist, seqlist) = myfunc.ReadFasta(seqfile)
        for i in xrange(len(seq_idlist)):
            seqDict[seq_idlist[i]] = seqlist[i]

    if len(real_idlist) <= 0:
        print >> sys.stderr, "Failed to read real_topofile %s" % (
            real_topofile)
        return 1

    real_topodict = {}
    for i in xrange(len(real_idlist)):
        real_topodict[real_idlist[i]] = real_topolist[i]

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout_wrong = myfunc.myopen(outfile_wrong_predtopo, None, "w", False)

    idSet_single = set([])
    idSet_multi = set([])
    for seqid in real_topodict:
        topo = real_topodict[seqid]
        numTM = myfunc.CountTM(topo)
        if numTM == 1:
            idSet_single.add(seqid)
        elif numTM > 1:
            idSet_multi.add(seqid)

#     print "len(real_topodict)", len(real_topodict)
#     print "len(idSet_single)", len(idSet_single)
#     print "len(idSet_multi)", len(idSet_multi)

#for TM_type in ["All_Alpha", "Single", "Multi"]:
    for TM_type in ["All_Alpha"]:
        if TM_type == "All_Alpha":
            sub_real_topodict = real_topodict
        else:
            sub_real_topodict = {}
            for seqid in real_topodict:
                topo = real_topodict[seqid]
                numTM = myfunc.CountTM(topo)
                if TM_type == "Single" and numTM == 1:
                    sub_real_topodict[seqid] = topo
                elif TM_type == "Multi" and numTM > 1:
                    sub_real_topodict[seqid] = topo
        Benchmark(sub_real_topodict, idSet_single, idSet_multi, TM_type, fpout,
                  fpout_wrong, seqDict)

    myfunc.myclose(fpout)
Example #27
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    datapath = ""
    idListFile = ""
    idList = []
    ext = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-ext", "--ext"]:
                (ext, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-datapath", "--datapath"]:
                (datapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if ext == "":
        print >> sys.stderr, "file extension not set. exit"
        return 1
    if datapath == "":
        print >> sys.stderr, "datapath not set. exit"
        return 1
    elif not os.path.exists(datapath):
        print >> sys.stderr, "datapath %s does not exist. exit" % (datapath)
        return 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)
    if len(idList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    fpout = sys.stdout
    if outfile != "":
        fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    id2pathmapfile = "%s%s%s" % (datapath, os.sep, "id2pathmap.txt")
    id2pathMapDict = myfunc.ReadIDPathMapDict(id2pathmapfile)
    for idd in idList:
        filename = ID2File(idd, datapath, id2pathMapDict, ext)
        if filename != "":
            print >> fpout, filename

    if outfile != "":
        myfunc.myclose(fpout)
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    infile = ""
    outfile = ""
    keyIDListFile = ""
    contentIDListFile = ""
    isKeyIDSet = False
    isContentIDSet = False

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o"]:
                outfile = argv[i+1]
                i += 2
            elif argv[i] in ["-idlist1", "--idlist1"] :
                keyIDListFile = argv[i+1]
                isKeyIDSet = True
                i += 2
            elif argv[i] in ["-idlist2", "--idlist2"] :
                contentIDListFile = argv[i+1]
                isContentIDSet = True
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if infile == "":
        print >> sys.stderr, "infile not set. Exit"
        return 1
    elif not os.path.exists(infile):
        print >> sys.stderr, "infile %s does not exist. Exit"%(infile)
        return 1

    keyIDSet = {}
    contentIDSet = {}
    if keyIDListFile != "":
        keyIDSet = set(myfunc.ReadIDList(keyIDListFile))
    if contentIDListFile != "":
        contentIDSet = set(myfunc.ReadIDList(contentIDListFile))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

# if idlist1 and idlist2 is empty, output nothing
    if isKeyIDSet or isContentIDSet:
        Filter_seqid2fam_map(infile, keyIDSet, contentIDSet, isKeyIDSet,
                isContentIDSet, fpout)
    else:
        os.system("cat %s"%(infile))

    myfunc.myclose(fpout)
Example #29
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    outfile_with_famid = ""
    outfile_with_pdb = ""
    outfile_fam2seqmap = ""
    idListFile = ""
    mapfile = "%s%s%s" % (
        DATADIR3, os.sep,
        "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid"
    )
    restrictIDListFile = ""
    idList = []
    maxseq_for_fam = 200
    maxpair_for_fam = 300
    method = 0
    rand_seed = None
    pdbtospFile = ""
    isOnlyPDB = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithfamid", "--outwithfamid"]:
                outfile_with_famid, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]:
                outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithpdb", "--outwithpdb"]:
                outfile_with_pdb, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in [
                    "-tmprolist", "--tmprolist", "-restrictlist",
                    "--restrictlist"
            ]:
                restrictIDListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mapfile", "--mapfile"]:
                mapfile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif sys.argv[i] in ["-seed", "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-l", "--l"]:
                idListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-maxseq", "--maxseq"]:
                maxseq_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-maxpair", "--maxpair"]:
                maxpair_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-m", "--m", "-method", "--method"]:
                method, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-onlypdb", "--onlypdb"]:
                g_params['isOnlyPDB'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if os.path.exists(idListFile):
        idList += myfunc.ReadIDList(idListFile)

    if len(idList) < 1:
        print >> sys.stderr, "no ID set. exit"
        return 1
    if myfunc.checkfile(mapfile, "idMapFile") != 0:
        return 1

    idMapDict = myfunc.ReadFam2SeqidMap(mapfile)

    # Read in pdbtosp map
    if pdbtospFile != "":
        (pdb2uniprotMap, uniprot2pdbMap) =\
                myfunc.ReadPDBTOSP(pdbtospFile)
        g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys())
        g_params['uniprot2pdbMap'] = uniprot2pdbMap

    if g_params['isOnlyPDB'] == True:
        if pdbtospFile == "":
            print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit."
            return 1
        elif g_params['uniprotidlist_with_pdb'] == set([]):
            print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit."
            return 1

    restrictIDSet = set([])
    if restrictIDListFile != "":
        restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False)
    fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False)
    fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False)

    if method == 0:
        GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet,
                                  maxseq_for_fam, rand_seed, fpout,
                                  fpout_withfamid)
    elif method == 1:
        GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet,
                                  maxpair_for_fam, rand_seed, fpout,
                                  fpout_withfamid, fpout_fam2seqmap)
    elif method == 2:  #all to all
        GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout,
                                  fpout_withfamid, fpout_withpdb)

    myfunc.myclose(fpout)
    myfunc.myclose(fpout_withfamid)
    myfunc.myclose(fpout_withpdb)
    myfunc.myclose(fpout_fam2seqmap)
    return 0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    infile = ""
    progList = []
    progListFile = ""
    outpath = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-prog", "--prog"]:
                tmpstr, i = myfunc.my_getopt_str(argv, i)
                progList.append(tmpstr)
            elif argv[i] in ["-gzip", "--gzip"]:
                tmpstr, i = myfunc.my_getopt_str(argv, i)
                if tmpstr.upper()[0] == "-":
                    print >> sys.stderr, "Bad argument, -gzip should be"\
                            " followed by yes or no"
                    return 1
                elif tmpstr.upper()[0] == "Y":
                    g_params['isGzip'] = True
                else:
                    g_params['isGzip'] = False
            elif argv[i] in ["-num", "--num"]:
                g_params['num_per_split'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-proglist", "--proglist"]:
                progListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                outpath, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True; i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile,"infile") != 0:
        return 1

    inputList = ReadInputList(infile) # [(filename, numseq)]
    inputList = sorted(inputList, key=lambda x:x[1], reverse=False)
    rtname_infile = os.path.basename(os.path.splitext(infile)[0])

# get progList
    if len(progList) == 0 and progListFile == "":
        progList = default_progList
    else:
        if progListFile != "":
            tmp_list = myfunc.ReadIDList(progListFile)
            if len(tmp_list) == 0:
                print >> sys.stderr, "progListFile %s does not exist or empty"%(
                        progListFile)
                return 1
            else:
                progList += tmp_list
        if len(progList) == 0:
            print >> sys.stderr, "progList is empty. exit"
            return 1

    if outpath != "" and not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print e
            return 1