Esempio n. 1
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    gramfile = ""
    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram", "--gram"]:
                (gramfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1
    if myfunc.checkfile(gramfile) != 0:
        return 1

    grampairlist = myfunc.ReadPairList(gramfile)
    gramMapDict = {}
    for tup in grampairlist:
        gramMapDict[tup[0]] = tup[1]

    gi2taxidpairlist = myfunc.ReadPairList(infile)

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    for tup in gi2taxidpairlist:
        try:
            fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], gramMapDict[tup[1]]))
        except KeyError:
            fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], "NA"))

    myfunc.myclose(fpout)
def main():#{{{
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    argv = sys.argv
    topofile = ""
    sigpepfile = ""
    outfile = ""
    isQuiet = False
    isDeleteSeqWithSignalPeptide = False
    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in [ "-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in [ '-o',  '--o', "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ['-topo', '--topo']:
                topofile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ['-sig', '--sig']:
                sigpepfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] == "-q":
                isQuiet=True; i += 1
            elif argv[i] in ["-deleteseq", "--deleteseq"]:
                isDeleteSeqWithSignalPeptide=True; i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1

    if myfunc.checkfile(topofile, 'topofile') != 0:
        return 1
    if myfunc.checkfile(sigpepfile, 'sigpepfile') != 0:
        return 1

    sigpepDict = lcmp.ReadSignalPDict(sigpepfile)
    FilterSignalPeptide(topofile, sigpepDict, outfile, isDeleteSeqWithSignalPeptide)

    return 0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    mapfile = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-map", "--map", "-mapfile"]:
                (mapfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if myfunc.checkfile(infile) != 0:
        return 1
    if myfunc.checkfile(mapfile) != 0:
        return 1

    clanid2pfamidDict = myfunc.ReadFam2SeqidMap(mapfile)
    pfamPercentTMDict = ReadPercentTM(infile)

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    GetPercentTMOfClan(pfamPercentTMDict, clanid2pfamidDict, fpout)
    myfunc.myclose(fpout)
Esempio n. 4
0
def main(g_params):  #{{{#{{{
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    BLOCK_SIZE = 100000
    isPrintID = False
    isJustPrintSum = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            infile = sys.argv[i]
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 0
            elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]:
                isPrintID = True
                i += 1
            elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]:
                isJustPrintSum = True
                i += 1
            elif sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-bs", "--bs", "-block-size", "--block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print("Error! BLOCK_SIZE should >0", file=sys.stderr)
                    return 1
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]),
                      file=sys.stderr)
                return 1
        else:
            infile = sys.argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout)
    myfunc.myclose(fpout)

    return status
Esempio n. 5
0
def main(g_params):#{{{#{{{
    # Check argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    BLOCK_SIZE = 100000
    isPrintID = False
    isJustPrintSum = False

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            infile = sys.argv[i]
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 0
            elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]:
                isPrintID = True
                i += 1
            elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]:
                isJustPrintSum = True
                i += 1
            elif sys.argv[i] in [ "-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-bs", "--bs", "-block-size", "--block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print >> sys.stderr,"Error! BLOCK_SIZE should >0"
                    return 1
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            infile=sys.argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout)
    myfunc.myclose(fpout)

    return status
Esempio n. 6
0
def main(g_params):
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    begin = 0
    end = 999999999

    method = 2

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-b", "--b", "--begin"]:
                begin, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-e", "--e", "--end"]:
                end, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o", "--o", "--outfile"]:
                outFile, i = my_getopt_str(sys.argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile = sys.argv[i]
            i += 1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1

    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)

    if method == 1:
        CatFasta(inFile, begin, end, fpout)
    else:
        CatFasta2(inFile, begin, end, fpout)

    myfunc.myclose(fpout)
    return 0
Esempio n. 7
0
def main(g_params):
    # Check argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    begin = 0
    end = 999999999

    method = 2

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-b", "--b", "--begin"]:
                begin, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-e", "--e", "--end"]:
                end, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o" , "--o","--outfile"]:
                outFile, i = my_getopt_str(sys.argv,i )
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile=sys.argv[i]
            i+=1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1

    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)

    if method == 1:
        CatFasta(inFile,begin, end, fpout)
    else:
        CatFasta2(inFile,begin, end, fpout)

    myfunc.myclose(fpout)
    return 0
Esempio n. 8
0
def main():  #{{{
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    N = 999999999
    rand_seed = None

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-n", "--n"]:
                N, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-seed", "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o", "--outfile"]:
                outFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-bs", "--block-size", "-block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print("Error! BLOCK_SIZE should >0", file=sys.stderr)
                    return 1
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]),
                      file=sys.stderr)
                return 1
        else:
            inFile = sys.argv[i]
            i += 1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1
    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)
    RandFasta(inFile, N, rand_seed, fpout)
    myfunc.myclose(fpout)
Esempio n. 9
0
def main():#{{{
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile=""
    inFile=""
    N=999999999
    rand_seed=None

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in [ "-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i",  "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-n" ,  "--n"]:
                N,i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in [ "-seed" , "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in [ "-o" , "--outfile"]:
                outFile,i  = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-bs" ,  "--block-size" ,  "-block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print >> sys.stderr,"Error! BLOCK_SIZE should >0"
                    return 1
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile = sys.argv[i]
            i+=1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1
    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)
    RandFasta(inFile, N, rand_seed,  fpout)
    myfunc.myclose(fpout)
Esempio n. 10
0
def PrepareDataForPairaln(seqid1, seqid2, outpath):  #{{{

    topoalnfile = "%s/%s.topoaln.fa" % (g_params['datapath'],
                                        g_params['basename'])
    if myfunc.checkfile(topoalnfile, "topoalnfile") != 0:
        return 1
    # 1 seqid1-seqid2.topoaln.fa
    ext_topoaln = ".topoaln.fa"
    pair_topoalnfile = "%s/%s_%s.topoaln.fa" % (outpath, seqid1, seqid2)
    cmd = [
        "%s/selectPairaln.py" % (rundir), "-pairaln", topoalnfile, "-outpath",
        outpath, "-ext", ext_topoaln, "-split", seqid1, seqid2
    ]
    try:
        subprocess.check_output(cmd)
    except subprocess.CalledProcessError, e:
        print e
Esempio n. 11
0
def main():  #{{{
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    argv = sys.argv

    outfile = ""
    infile = ""
    output_format = "mfa"

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = sys.argv[i]
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-o", "--o"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif sys.argv[i] in ["-of", "--of"]:
                output_format, i = myfunc.my_getopt_str(argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i])
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile, "MSA file") != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    # detect the format of mpa files, the old format

    MPA2MSA(infile, output_format, fpout)

    myfunc.myclose(fpout)
Esempio n. 12
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    infile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1
    if outpath == "":
        print >> sys.stderr, "outpath not set"
    elif not os.path.exists(outpath):
        os.system("mkdir -p %s" % (outpath))

    SplitPfamFasta(infile, outpath)
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    goancDict = 

    myfunc.myclose(fpout)
Esempio n. 14
0
def main():  #{{{
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    argv = sys.argv

    outfile = ""
    infile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = sys.argv[i]
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-o", "--o"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i])
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1

    return ReWriteFasta(infile, outfile)
Esempio n. 15
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    msafile = ""
    topofile = ""
    topodb = ""
    isIgnoreBadseq = True
    method_match = 1

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print("Error! Wrong argument:", argv[i], file=sys.stderr)
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-msa", "--msa"]:
                (msafile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topo", "--topo"]:
                (topofile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-m", "--m"]:
                (method_match, i) = myfunc.my_getopt_int(argv, i)
                if method_match not in [0, 1]:
                    print("method_match %d not in [0,1]" % method_match,
                          file=sys.stderr)
                    return 1
            elif argv[i] in ["-topodb", "--topodb"]:
                (topodb, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in [
                    "-ig", "--ig", "-ignore-badseq", "--ignore-badseq"
            ]:
                (tmpss, i) = myfunc.my_getopt_str(argv, i)
                if tmpss[0].lower() == "y":
                    isIgnoreBadseq = True
                else:
                    isIgnoreBadseq = False
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print("Error! Wrong argument:", argv[i], file=sys.stderr)
                return 1
        else:
            print("Error! Wrong argument:", argv[i], file=sys.stderr)
            return 1

    if myfunc.checkfile(msafile) != 0:
        return 1

    if topodb != "":
        if not os.path.exists(topodb + '0.db'):
            print("topodb %s does not exist" % (topodb), file=sys.stderr)
            return 1
        else:
            return MatchMSATopo_using_topodb(msafile, topodb, isIgnoreBadseq,
                                             method_match, outfile)
    elif topofile != "":
        if not os.path.exists(topofile):
            print("topofile %s does not exist" % (topofile), file=sys.stderr)
            return 1
        else:
            return MatchMSATopo_using_topofile(msafile, topofile,
                                               isIgnoreBadseq, method_match,
                                               outfile)
    else:
        print("neither topofile nor topodb is set. exit", file=sys.stderr)
        return 1

    return 0
Esempio n. 16
0
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1

    if outpath == "":
        print >> sys.stderr, "Error! outpath not set. Exit"
        print usage_short
        return 1
    elif not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print e
            return 1

    if myfunc.checkfile(pfamid2seqidFile, "pfamid2seqidFile") != 0:
        return 1

    if myfunc.checkfile("%s0.db" % topodb, "topodb") != 0:
        return 1

    if myfunc.checkfile("%s0.db" % seqdb, "seqdb") != 0:
        return 1

    pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidFile)

    hdl_topo = myfunc.MyDB(topodb)
    if not hdl_topo.failure:
        idSet_topo = set(hdl_topo.indexedIDList)
    else:
        idSet_topo = set([])
Esempio n. 17
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    modelfile = ""
    seqfile = ""
    tmpdir = ""
    email = ""
    jobid = ""
    isKeepFiles = "no"
    isRepack = "yes"
    targetlength = None

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            modelfile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-tmpdir", "--tmpdir"] :
                (tmpdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"] :
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-fasta", "--fasta"] :
                (seqfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-k", "--k"] :
                (isKeepFiles, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-r", "--r"] :
                (isRepack, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-t", "--t"] :
                (targetlength, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"] :
                (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"] :
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-force", "--force"]:
                g_params['isForceRun'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            modelfile = argv[i]
            i += 1

    if jobid == "":
        print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0])
        return 1

    if myfunc.checkfile(modelfile, "modelfile") != 0:
        return 1
    if outpath == "":
        print >> sys.stderr, "outpath not set. exit"
        return 1
    elif not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print >> sys.stderr, e
            return 1
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    infile = ""
    progList = []
    progListFile = ""
    outpath = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-prog", "--prog"]:
                tmpstr, i = myfunc.my_getopt_str(argv, i)
                progList.append(tmpstr)
            elif argv[i] in ["-gzip", "--gzip"]:
                tmpstr, i = myfunc.my_getopt_str(argv, i)
                if tmpstr.upper()[0] == "-":
                    print >> sys.stderr, "Bad argument, -gzip should be"\
                            " followed by yes or no"
                    return 1
                elif tmpstr.upper()[0] == "Y":
                    g_params['isGzip'] = True
                else:
                    g_params['isGzip'] = False
            elif argv[i] in ["-num", "--num"]:
                g_params['num_per_split'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-proglist", "--proglist"]:
                progListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                outpath, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True; i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile,"infile") != 0:
        return 1

    inputList = ReadInputList(infile) # [(filename, numseq)]
    inputList = sorted(inputList, key=lambda x:x[1], reverse=False)
    rtname_infile = os.path.basename(os.path.splitext(infile)[0])

# get progList
    if len(progList) == 0 and progListFile == "":
        progList = default_progList
    else:
        if progListFile != "":
            tmp_list = myfunc.ReadIDList(progListFile)
            if len(tmp_list) == 0:
                print >> sys.stderr, "progListFile %s does not exist or empty"%(
                        progListFile)
                return 1
            else:
                progList += tmp_list
        if len(progList) == 0:
            print >> sys.stderr, "progList is empty. exit"
            return 1

    if outpath != "" and not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print e
            return 1
Esempio n. 19
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    modelfile = ""
    seqfile = ""
    tmpdir = ""
    email = ""
    jobid = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            modelfile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-tmpdir", "--tmpdir"]:
                (tmpdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"]:
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-fasta", "--fasta"]:
                (seqfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"]:
                (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"]:
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-force", "--force"]:
                g_params['isForceRun'] = True
                i += 1
            else:
                print("Error! Wrong argument:", argv[i], file=sys.stderr)
                return 1
        else:
            modelfile = argv[i]
            i += 1

    if jobid == "":
        print("%s: jobid not set. exit" % (sys.argv[0]), file=sys.stderr)
        return 1

    if myfunc.checkfile(modelfile, "modelfile") != 0:
        return 1
    if outpath == "":
        print("outpath not set. exit", file=sys.stderr)
        return 1
    elif not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError as e:
            print(e, file=sys.stderr)
            return 1
    if tmpdir == "":
        print("tmpdir not set. exit", file=sys.stderr)
        return 1
    elif not os.path.exists(tmpdir):
        try:
            subprocess.check_output(["mkdir", "-p", tmpdir])
        except subprocess.CalledProcessError as e:
            print(e, file=sys.stderr)
            return 1

    g_params['debugfile'] = "%s/debug.log" % (outpath)

    if not os.path.exists(path_profile_cache):
        os.makedirs(path_profile_cache)

    return RunJob(modelfile, seqfile, outpath, tmpdir, email, jobid, g_params)
Esempio n. 20
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    infile = ""
    tmpdir = ""
    email = ""
    jobid = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-tmpdir", "--tmpdir"] :
                (tmpdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"] :
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"] :
                (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"] :
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if jobid == "":
        print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0])
        return 1

    if myfunc.checkfile(infile, "infile") != 0:
        return 1
    if outpath == "":
        print >> sys.stderr, "outpath not set. exit"
        return 1
    elif not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print >> sys.stderr, e
            return 1
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    outfile = ""
    idListFile = ""
    uniprotDBname = ""
    idList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-uniprotdb", "--uniprotdb"]:
                (uniprotDBname, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)

    if uniprotDBname == "":
        print >> sys.stderr, "uniprotdb not set"
        return 1
    uniprotdbfile = "%s0.db" % uniprotDBname
    if myfunc.checkfile(uniprotdbfile, "uniprotdbfile") != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    hdl = myfunc.MyDB(uniprotDBname)
    if hdl.failure:
        return 1

    for seqid in idList:
        data = hdl.GetRecord(seqid)
        if data != None:
            goinfo = GetGOInfoFromUniprotData(data)
            WriteGOInfo(seqid, goinfo, fpout)
    hdl.close()
    myfunc.myclose(fpout)
Esempio n. 22
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    threshold_Fraction_Group_2 = 0.05
    threshold_NumSeq_Group_2 = 2
    tableinfoFile = ""
    pdbtospFile = ""
    sprotACListFile = ""

    threshold_g12_seqidt = 20.0

    topoalnFile = ""
    aapath = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqidttype", "--seqidttype"]:
                (g_params['seqidttype'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-tableinfo", "--tableinfo"]:
                (tableinfoFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topoaln", "--topoaln"]:
                (topoalnFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-aapath", "--aapath"]:
                (aapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-thncls2", "--thncls2"]:
                (threshold_NumSeq_Group_2, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-thfrac2", "--thfrac2"]:
                (threshold_Fraction_Group_2,
                 i) = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-sprot", "--sprot"]):
                sprotACListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)
    if len(fileList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    if myfunc.checkfile(topoalnFile, "topoalnFile") != 0:
        return 1
    if myfunc.checkfile(aapath, "aapath") != 0:
        return 1
    if outfile == "":
        print >> sys.stderr, "outfile not set. Exit"
        return 1

    outpath = myfunc.my_dirname(outfile)
    if not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1
Esempio n. 23
0
        outfile_selected_pair, "-split", "-outpath", outpath, "-ext",
        ext_topoaln
    ]
    print '\n', " ".join(cmd), '\n'
    try:
        subprocess.check_output(cmd)
    except subprocess.CalledProcessError, e:
        print e
        return 1
# draw pairwise topology alignment
    for li in selectedPairList:
        single_topoaln_file = "%s%s%s%s" % (outpath, os.sep, "%s_%s" %
                                            (li[0], li[1]), ext_topoaln)
        aafile = "%s%s%s%s" % (aapath, os.sep, li[3], ".fa")

        if myfunc.checkfile(single_topoaln_file, "single_topoaln_file") != 0:
            continue
        if myfunc.checkfile(aafile, "aafile") != 0:
            continue
        method_shrink = "2"
        method_plot = "mat"
        shrinkrateTM = "3"
        maxHoldLoop = "4"
        cmd = [
            "%s/drawMSATopo.py" % (binpath), single_topoaln_file, "-aaseq",
            aafile, "-pdg", "yes", "-shrink", "yes", "-m-shrink",
            method_shrink, "-method", method_plot, "-text", "yes", "-krbias",
            "-shrinkrateTM", shrinkrateTM, "-max-hold-loop", maxHoldLoop
        ]
        try:
            subprocess.check_output(cmd)
Esempio n. 24
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    pairalnFile = ""
    pair = []
    listfile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            pair.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairaln", "--pairaln"]:
                pairalnFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                g_params['outpath'], i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-ext", "--ext"]:
                g_params['ext'], i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                listfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-split", "--split"]:
                g_params['isSplit'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            pair.append(argv[i])
            i += 1

    if myfunc.checkfile(pairalnFile, "pairalnFile") != 0:
        return 1

    if g_params['isSplit']:
        if g_params['outpath'] == "":
            print >> sys.stderr, "Error! outpath string is empty when 'split'"\
                    " is enabled. exit"
            return 1
        elif not os.path.exists(g_params['outpath']):
            cmd = ["mkdir", "-p", g_params['outpath']]
            subprocess.check_call(cmd)

    pairlist = []
    if len(pair) >= 2:
        pairlist.append((pair[0], pair[1]))
    if listfile != "":
        pairlist += myfunc.ReadPairList(listfile)

    pairlistSet = set([])
    for pair in pairlist:
        pairlistSet.add(pair)
    del pairlist

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    SelectPairaln(pairalnFile, pairlistSet, fpout)
    myfunc.myclose(fpout)
Esempio n. 25
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    infile = ""
    classfile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-class", "--class"]:
                (classfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if myfunc.checkfile(infile) != 0:
        return 1
    if myfunc.checkfile(classfile, "Class File") != 0:
        return 1
    if outpath == "":
        outpath = os.path.dirname(infile)
        if outpath == "":
            outpath = "."
    (id2ClassDict, classList) = ReadClassDict(classfile)
    idList = myfunc.ReadIDList(infile)
    rootname = os.path.basename(os.path.splitext(infile)[0])
    ext = os.path.splitext(infile)[1]

    fpoutList = {}
    for i in range(len(classList)):
        outfile = outpath + os.sep + rootname + ".%s" % classList[i] + ext
        fpoutList[classList[i]] = open(outfile, "w")

    for idd in idList:
        try:
            cls = id2ClassDict[idd]
        except:
            print >> sys.stderr, "id %s not in classDict" % idd
            continue
        fpoutList[cls].write("%s\n" % idd)

    for i in range(len(classList)):
        fpoutList[classList[i]].close()
Esempio n. 26
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    outfile = ""
    real_topofile = ""
    seqfile = ""
    restrictIDListFile = ""
    outfile_wrong_predtopo = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-owrong", "--owrong"]:
                (outfile_wrong_predtopo, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-realtopo", "--realtopo"]:
                (real_topofile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqfile", "--seqfile"]:
                (seqfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mode", "--mode"]:
                (g_params['mode'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-path_predtopo", "--path_predtopo"]:
                (g_params['path_predtopo'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-basename", "--basename"]:
                (g_params['basename'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-restrictidlist", "--restrictidlist"]:
                (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-rmsp", "--rmsp"]:
                g_params['isRMSP'] = True
                i += 1
            elif argv[i] in ["-debug", "--debug"]:
                g_params['isDEBUG'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1
            i += 1

    if myfunc.checkfile(g_params['path_predtopo'], "path_predtopo") != 0:
        return 1
    if g_params['basename'] == "":
        print >> sys.stderr, "%s: basename not set. exit" % (argv[0])
        return 1
    if myfunc.checkfile(real_topofile, "real_topofile") != 0:
        return 1

    if restrictIDListFile != "":
        g_params['restrictIDset'] = set(myfunc.ReadIDList(restrictIDListFile))
        g_params['isRestrictIDList'] = True

    if g_params['mode'] == "":
        if g_params['path_predtopo'].find("topcons_single") >= 0:
            g_params['mode'] = "tps"
        elif g_params['path_predtopo'].find("topcons") >= 0:
            g_params['mode'] = "tp"
        else:
            print >> sys.stderr, "mode not set, and can not be recognized from path_predtopo=%s" % (
                path_predtopo)
            return 1

    if not g_params['mode'] in ["tp", "tps"]:
        print >> sys.stderr, "Unrecognized mode = %s" % (g_params['mode'])
        return 1

    (real_idlist, real_annolist,
     real_topolist) = myfunc.ReadFasta(real_topofile)
    seqDict = {}
    if seqfile != "" and os.path.exists(seqfile):
        (seq_idlist, seq_annolist, seqlist) = myfunc.ReadFasta(seqfile)
        for i in xrange(len(seq_idlist)):
            seqDict[seq_idlist[i]] = seqlist[i]

    if len(real_idlist) <= 0:
        print >> sys.stderr, "Failed to read real_topofile %s" % (
            real_topofile)
        return 1

    real_topodict = {}
    for i in xrange(len(real_idlist)):
        real_topodict[real_idlist[i]] = real_topolist[i]

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout_wrong = myfunc.myopen(outfile_wrong_predtopo, None, "w", False)

    idSet_single = set([])
    idSet_multi = set([])
    for seqid in real_topodict:
        topo = real_topodict[seqid]
        numTM = myfunc.CountTM(topo)
        if numTM == 1:
            idSet_single.add(seqid)
        elif numTM > 1:
            idSet_multi.add(seqid)

#     print "len(real_topodict)", len(real_topodict)
#     print "len(idSet_single)", len(idSet_single)
#     print "len(idSet_multi)", len(idSet_multi)

#for TM_type in ["All_Alpha", "Single", "Multi"]:
    for TM_type in ["All_Alpha"]:
        if TM_type == "All_Alpha":
            sub_real_topodict = real_topodict
        else:
            sub_real_topodict = {}
            for seqid in real_topodict:
                topo = real_topodict[seqid]
                numTM = myfunc.CountTM(topo)
                if TM_type == "Single" and numTM == 1:
                    sub_real_topodict[seqid] = topo
                elif TM_type == "Multi" and numTM > 1:
                    sub_real_topodict[seqid] = topo
        Benchmark(sub_real_topodict, idSet_single, idSet_multi, TM_type, fpout,
                  fpout_wrong, seqDict)

    myfunc.myclose(fpout)
Esempio n. 27
0
def Itol_Tree(pfamid, datapath, outpath):
#Create the Itol class
    itl = Itol.Itol()
#Set the tree file
    treefile = "%s%s%s%s"%(datapath ,  os.sep , pfamid ,
            ".TMpro.clustalo10.fasttree")
    taxofile = "%s%s%s%s"%(datapath , os.sep , pfamid ,
            "-taxonomy-file.txt")
    TMdeffile = "%s%s%s%s"%(datapath , os.sep , pfamid ,
            "tms.txt")

    if not os.path.exists(treefile):
        print >> sys.stderr, "tree file %s does not exist. Ignore" %(treefile)
        return 1
    if myfunc.checkfile(taxofile, "taxofile") != 0:
        return 1

    
    numLeave = len(open(taxofile, "r").readlines())

#     t = Tree(treefile)
#     leaves = t.get_leaves()


    fontsize = GetFontSize(numLeave)

    colordeffile = taxofile
    branchlabelfile = ""
    dataset1 = ""
    dataset2 = ""
    dataset3 = ""
    dataset4 = ""

    dataset1 = TMdeffile
#===================================
    itl.add_variable('treeFile',treefile)
    itl.add_variable('treeName', pfamid)
    itl.add_variable('treeFormat','newick')
    if os.path.exists(colordeffile):
        itl.add_variable('colorDefinitionFile', colordeffile)
    if os.path.exists(branchlabelfile):
        itl.add_variable('branchLabelsFile', branchlabelfile)

    if os.path.exists(dataset1):
        itl.add_variable('dataset1File',dataset1)
        itl.add_variable('dataset1Label','numTM_and_repeat')
        itl.add_variable('dataset1Separator','comma')
        itl.add_variable('dataset1Type','domains')
        itl.add_variable('dataset1PreventOverlap','1')
        itl.add_variable('dataset1Color','#FF0000')
#        itl.add_variable('dataset1BarSizeMax','300')

#===================================
#     itl.add_variable('dataset1File',dataset1)
#     itl.add_variable('dataset1Label','numTM')
#     itl.add_variable('dataset1Separator','comma')
#     itl.add_variable('dataset1Type','simplebar')
#     itl.add_variable('dataset1Color','#FF0000')

#===================================
    if os.path.exists(dataset2):
        itl.add_variable('dataset2File', dataset2)
        itl.add_variable('dataset2Label', 'taxonomy')
        itl.add_variable('dataset2Separator','comma')
        itl.add_variable('dataset2Type','colorstrip')
        itl.add_variable('dataset2StripWidth','300')
        itl.add_variable('dataset2PreventOverlap','1')
        itl.add_variable('dataset2ColoringType','both')
        itl.add_variable('dataset2CirclesSpacing','100')


#===================================
    if os.path.exists(dataset3):
        itl.add_variable('dataset3File', dataset3)
        itl.add_variable('dataset3Label', 'pfam')
        itl.add_variable('dataset3Separator','tab')
        itl.add_variable('dataset3Type','colorstrip')
#        itl.add_variable('dataset3Type','ColorDefinitionFile')
#        itl.add_variable('dataset3StripWidth','300')
#        itl.add_variable('dataset3PreventOverlap','1')
#        itl.add_variable('dataset3ColoringType','both')
#        itl.add_variable('dataset3CirclesSpacing','100')

#===================================
    if os.path.exists(dataset4):
        itl.add_variable('dataset4File', dataset4)
        itl.add_variable('dataset4Label', 'cluster')
        itl.add_variable('dataset4Separator','comma')
        itl.add_variable('dataset4Type','colorstrip')
        itl.add_variable('dataset4StripWidth','200')
        itl.add_variable('dataset4PreventOverlap','1')
        itl.add_variable('dataset4ColoringType','both')
#itl.add_variable('dataset1BarSizeMax','1')

#===================================
# Check parameters
# itl.print_variables()


#Submit the tree
    print ''
    print 'Uploading the tree.  This may take some time depending on how large the tree is and how much load there is on the itol server'
    good_upload = itl.upload()
    if good_upload == False:
        print 'There was an error:'+itl.comm.upload_output
        sys.exit(1)

#Read the tree ID
    print 'Tree ID: '+str(itl.comm.tree_id)

#Read the iTOL API return statement
    print 'iTOL output: '+str(itl.comm.upload_output)

#Website to be redirected to iTOL tree
    print 'Tree Web Page URL: '+itl.get_webpage()

# Warnings associated with the upload
    print 'Warnings: '+str(itl.comm.warnings)

#Export to pdf
    print 'Exporting to pdf'
    itol_exporter = itl.get_itol_export()
#itol_exporter = itolexport.ItolExport()
#itol_exporter.set_export_param_value('tree','18793532031912684633930')
    itol_exporter.set_export_param_value('format', 'eps')
    itol_exporter.set_export_param_value('displayMode',"circular")
    itol_exporter.set_export_param_value('showBS',"0")
    itol_exporter.set_export_param_value('fontSize',fontsize)
    itol_exporter.set_export_param_value('alignLabels',"1")
    itol_exporter.set_export_param_value('datasetList','dataset1,dataset2,dataset3,dataset4')
    epsfile = outpath + os.sep + pfamid + '-itol.eps'
    pdffile = outpath + os.sep + pfamid + '-itol.pdf'
    jpgfile = outpath + os.sep + pfamid + '-itol.jpg'
    svgfile = outpath + os.sep + pfamid + '-itol.svg'
    itol_exporter.export(epsfile)
    itol_exporter.export(svgfile)
    os.system("epstopdf %s" % epsfile )
    os.system("convert  %s %s" % (epsfile, jpgfile) )
    print 'exported tree to ', pdffile
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    infileList = []
    gomapfile = "/data3/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.Family.nr100.filter.fragmented.uniq.pfam.goinfowithancestor.txt"
    gotermfile = "/data3/wk/MPTopo/pfamAna_refpro/GO_analysis/GO_term.txt"
    anclevel = 2
    gotype = "function"

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gomap", "--gomap"]:
                (gomapfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-goterm", "--goterm"]:
                (gotermfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mp", "--mp"]:
                (g_params['pairwise_comparison_method'],
                 i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infileList.append(argv[i])
            i += 1

#     print len(gomapfile), gomapfile
#     lines = open(gomapfile, "r").readlines()
#     print lines

    if myfunc.checkfile(gomapfile, "GO map file") != 0:
        return 1
    if myfunc.checkfile(gotermfile, "GO Term file") != 0:
        return 1

    cmpclassList = []
    if g_params['pairwise_comparison_method'] == 1:
        cmpclassList = cmpclassList_method1
    elif g_params['pairwise_comparison_method'] == 3:
        cmpclassList = cmpclassList_method3
    else:
        print >> sys.stderr, "mp not in [1,3]. Exit"
        return 1

    numCmpClass = len(cmpclassList)

    numInfile = len(infileList)
    if numInfile < len(cmpclassList):
        print >> sys.stderr, "input file less than len(cmpclassList)=%d" % (
            len(cmpclassList))

    goMapDict = ReadGOMap(gomapfile)
    goTermDict = ReadGOTerm(gotermfile)

    pairinfoDict = {}
    for infile in infileList:
        tag = ""
        for cls in cmpclassList:
            if infile.find(".%s." % (cls)) != -1:
                tag = cls
                break
        if tag == "":
            print >> sys.stderr, "bad infile %s" % (infile)
            return 1
        pairinfoDict[tag] = ReadPairInfo(infile)

    for tag in pairinfoDict:
        pairinfo = pairinfoDict[tag]
        for j in xrange(len(pairinfo)):
            tup = pairinfo[j]
            ancList1 = GetAncenstorGOList_LevelOne(tup[0], goMapDict, gotype)
            ancList2 = GetAncenstorGOList_LevelOne(tup[1], goMapDict, gotype)
            common_ancList = list(set(ancList1) & set(ancList2))
            for goid in common_ancList:
                pairinfo[j].append(goid)

# output pairinfo with GO common term
    stemname = os.path.splitext(infileList[0].replace(".pairinfo.txt", ""))[0]
    for tag in pairinfoDict:
        outfile = outpath + os.sep + stemname + ".%s" % (
            tag) + ".pairinfowithGOterm.txt"
        WritePairInfoWithGO(pairinfoDict[tag], outfile)
        print "%s output" % (outfile)

    tableCmpclassDict = {}
    tableNumTMHeatMapDict = {}
    for goid in SEL_GOID_SET:
        tableCmpclassDict[goid] = {}
        tableNumTMHeatMapDict[goid] = {}
        InitTableCmpClass(tableCmpclassDict[goid], numSeqIDTGroup, numCmpClass)
        InitTableNumTMHeatMap(tableNumTMHeatMapDict[goid],
                              classList_TableNumTMHeatMap, MAX_NUMTM)

    for tag in pairinfoDict.keys():
        cmpclass = tag
        idxClass = GetClassIndex(cmpclass, cmpclassList)
        pairinfo = pairinfoDict[tag]
        for li in pairinfo:
            if len(li) > 9:
                #print li
                seqidt = li[8]
                numTM1 = li[4]
                numTM2 = li[5]
                minNumTM = min(numTM1, numTM2)
                maxNumTM = max(numTM1, numTM2)
                for goid in li[9:]:
                    idxGroup = GetSeqIDTGroupIndex(seqidt, seqIDTGroup)
                    tableCmpclassDict[goid]['freq'][idxGroup][idxClass] += 1
                    tableCmpclassDict[goid]['subsum'][idxGroup] += 1
                    dt = tableNumTMHeatMapDict[goid]['ALL']
                    dt['data'][minNumTM][maxNumTM] += 1
                    if maxNumTM > dt['maxNumTM']:
                        dt['maxNumTM'] += 1
                    dt['numPair'] += 1

# write cmpclass
    stemname2 = os.path.splitext(os.path.basename(stemname))[0]
    print "stemname2=", stemname2
    for goid in SEL_GOID_SET:
        data = tableCmpclassDict[goid]
        outfile = outpath + os.sep + stemname2 + "." + goid + ".cmpclass.txt"
        if WriteTable2D(data['freq'], data['subsum'], cmpclassList,
                        seqIDTGroup, outfile) == 0:
            xlabel = "Sequence identity"
            print "%s output" % (outfile)
            if g_params['pairwise_comparison_method'] == 1:
                cmd = "%s/plotCmpClass_mp1_cmpsp_5.sh %s -xlabel \"%s\""\
                    " -outstyle eps  -outpath %s -plot1 -multiplot" %(
                    binpath, outfile, xlabel, outpath)
            elif g_params['pairwise_comparison_method'] == 3:
                cmd = "%s/plotCmpClass_mp3.sh %s -xlabel \"%s\""\
                    " -outstyle eps  -outpath %s -plot1 -multiplot" %(
                    binpath, outfile, xlabel, outpath)

            os.system(cmd)
        data = tableNumTMHeatMapDict[goid]
        outfile = outpath + os.sep + stemname2 + "." + goid + ".numTMHeatMap.ALL.txt"
        mtx = data['ALL']
        mode_norm = "norm_diag"
        print "%s numPair=%d" % (goid, mtx['numPair'])
        if WriteNumTMHeatMap(mtx['data'], mtx['maxNumTM'], mtx['numPair'],
                             mode_norm, outfile) == 0:
            cmd = "%s/plotNumTMHeatMap.sh %s" % (binpath, outfile)
            os.system(cmd)
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    infile = ""
    tmpdir = ""
    email = ""
    jobid = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-tmpdir", "--tmpdir"] :
                (tmpdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"] :
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"] :
                (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"] :
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if jobid == "":
        print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0])
        return 1

    if myfunc.checkfile(infile, "infile"):
        return 1
    if outpath == "":
        print >> sys.stderr, "outpath not set. exit"
        return 1
    elif not os.path.exists(outpath):
        try:
            myfunc.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print >> sys.stderr, e
            return 1
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    gramPositiveFile = ""
    gramNegativeFile = ""
    eukFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram+", "--gram+"]:
                (gramPositiveFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram-", "--gram-"]:
                (gramNegativeFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-euk", "--euk"]:
                (eukFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-debug", "--debug"]:
                g_params['isDEBUG'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile, "taxidwithtaxoFile") != 0:
        return 1
    if myfunc.checkfile(gramPositiveFile, "gramPositiveFile") != 0:
        return 1
    if myfunc.checkfile(gramNegativeFile, "gramNegativeFile") != 0:
        return 1
    if myfunc.checkfile(eukFile, "eukFile") != 0:
        return 1

    gramPositiveDict = ReadSignalPFile(gramPositiveFile)
    gramNegativeDict = ReadSignalPFile(gramNegativeFile)
    eukDict = ReadSignalPFile(eukFile)

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    hdl = myfunc.ReadLineByBlock(infile)
    if hdl.failure:
        return 1
    lines = hdl.readlines()
    while lines != None:
        for line in lines:
            strs = line.split("\t")
            if len(strs) == 3:
                seqid = strs[0].strip()
                taxo = strs[2].strip()
                info = ""
                try:
                    if taxo == "Gram+" or taxo == "gram+":
                        info = gramPositiveDict[seqid]
                    elif taxo == "Gram-" or taxo == "gram-":
                        info = gramNegativeDict[seqid]
                    elif taxo == "Euk" or taxo == "euk":
                        info = eukDict[seqid]

                    if g_params['isDEBUG']:
                        print >> sys.stderr, "%s: %s" % (seqid, taxo)
                except KeyError:
                    info = ""
                if info != "":
                    fpout.write("%s\n" % info)

        lines = hdl.readlines()
    hdl.close()
    myfunc.myclose(fpout)
Esempio n. 31
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    outfile = ""
    infile = ""
    goancfile = "%s%s%s" % (
        DATADIR3, os.sep,
        "wk/MPTopo/pfamAna_refpro/GO_analysis/GO_ancenstor.MF.txt")
    uniqGOIDListFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-wgoidlist", "--wgoidlist"]:
                (uniqGOIDListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-anc", "--anc"]:
                (goancfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-uniprotdb", "--uniprotdb"]:
                (uniprotDBname, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1
    if myfunc.checkfile(goancfile, "ancestor file") != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    GOinfoList = ReadGOInfo(infile)
    GOAncDict = ReadGOAnc(goancfile)

    if uniqGOIDListFile != "":  #{{{
        fp = open(uniqGOIDListFile, "w")
        for goinfo in GOinfoList:
            for item in ["function", "process", "location"]:
                #for item in ["function"]:
                if len(goinfo[item]) > 0:
                    for li in goinfo[item]:
                        goidSet.add(li[0])
        for goid in goidSet:
            print >> fp, goid
        fp.close()
#}}}
    for goinfo in GOinfoList:
        nF = len(goinfo["function"])
        nP = len(goinfo["process"])
        nC = len(goinfo["location"])
        fpout.write("%s  F %1d  P %1d   C %1d\n" %
                    (goinfo['seqid'], nF, nP, nC))
        for item in ["function", "process", "location"]:
            for li in goinfo[item]:
                try:
                    goancinfo = GOAncDict[li[0]]
                except KeyError:
                    goancinfo = ""
                fpout.write("%s\t%s\n" % (li[2], goancinfo))

    myfunc.myclose(fpout)
Esempio n. 32
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    infile = ""
    tmpdir = ""
    email = ""
    jobid = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-tmpdir", "--tmpdir"] :
                (tmpdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"] :
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"] :
                (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"] :
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-force", "--force"]:
                g_params['isForceRun'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if jobid == "":
        print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0])
        return 1

    g_params['jobid'] = jobid
    # create a lock file in the resultpath when run_job.py is running for this
    # job, so that daemon will not run on this folder
    lockname = "runjob.lock"
    lock_file = "%s/%s/%s"%(path_result, jobid, lockname)
    g_params['lockfile'] = lock_file
    fp = open(lock_file, 'w')
    try:
        fcntl.lockf(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        print >> sys.stderr, "Another instance of %s is running"%(progname)
        return 1

    if myfunc.checkfile(infile, "infile") != 0:
        return 1
    if outpath == "":
        print >> sys.stderr, "outpath not set. exit"
        return 1
    elif not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        (t_isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, gen_logfile, gen_errfile)
        if not t_isCmdSuccess:
            return 1
    if tmpdir == "":
        print >> sys.stderr, "tmpdir not set. exit"
        return 1
    elif not os.path.exists(tmpdir):
        cmd = ["mkdir", "-p", tmpdir]
        (t_isCmdSuccess, t_runtime) = webcom.RunCmd(cmd, gen_logfile, gen_errfile)
        if not t_isCmdSuccess:
            return 1

    numseq = myfunc.CountFastaSeq(infile)
    g_params['debugfile'] = "%s/debug.log"%(outpath)
    return RunJob(infile, outpath, tmpdir, email, jobid, g_params)
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    idListFile = ""
    euk = ""
    gram_pos = ""
    gram_neg = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idListFile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-euk", "--euk"]:
                (euk, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram+", "--gram+"]:
                (gram_pos, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram-", "--gram-"]:
                (gram_neg, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idListFile = argv[i]
            i += 1

    if myfunc.checkfile(idListFile, "idListFile") != 0:
        return 1
    if myfunc.checkfile(euk, "euk") != 0:
        return 1
    if myfunc.checkfile(gram_pos, "gram_pos") != 0:
        return 1
    if myfunc.checkfile(gram_neg, "gram_neg") != 0:
        return 1

    idList = myfunc.ReadIDList(idListFile)
    set_euk_idlist = set(myfunc.ReadIDList(euk))
    set_gram_pos_idlist = set(myfunc.ReadIDList(gram_pos))
    set_gram_neg_idlist = set(myfunc.ReadIDList(gram_neg))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    NCBI_TaxID = ""
    for i in xrange(len(idList)):
        seqid = idList[i]
        cls = ""
        if seqid in set_euk_idlist:
            cls = "euk"
        elif seqid in set_gram_pos_idlist:
            cls = "gram+"
        elif seqid in set_gram_neg_idlist:
            cls = "gram-"
        else:
            cls = "NA"
        print >> fpout, "%s\t%s\t%s" % (seqid, NCBI_TaxID, cls)
    myfunc.myclose(fpout)
Esempio n. 34
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    workdir = ""
    fileListFile = ""
    idListFile = ""
    extList = []
    maxfile_per_folder = 2000
    method = 0

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-idlist", "--idlist"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-filelist", "--filelist"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-workdir", "--workdir"]:
                (workdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-max", "--max"]:
                (maxfile_per_folder, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-method", "--method"]:
                (method, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-ext", "--ext"]:
                (tmpstr, i) = myfunc.my_getopt_str(argv, i)
                extList.append(tmpstr)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1

    if myfunc.checkfile(workdir) != 0:
        return 1

    if idListFile == "" and fileListFile == "":
        print >> sys.stderr, "At least one of idListFile and fileListFile need to be set"
        return 1

    if idListFile != "":
        if os.path.exists(idListFile):
            idList = myfunc.ReadIDList(idListFile)
            if len(idList) <= 0:
                print >> sys.stderr, "No ID in idListFile %s" % (idListFile)
            elif len(extList) <= 0:
                print >> sys.stderr, "No extension set when idList is used."

            else:
                SplitToFolder_idlist(idList, workdir, extList,
                                     maxfile_per_folder)
        else:
            print >> sys.stderr, "idListFile %s does not exist" % (idListFile)

    if fileListFile != "":
        if os.path.exists(fileListFile):
            fileList = open(fileListFile, "r").read().split("\n")
            fileList = filter(None, fileList)
            if len(fileList) <= 0:
                print >> sys.stderr, "No file in fileListFile %s" % (
                    fileListFile)
            else:
                SplitToFolder_filelist(fileList, workdir, maxfile_per_folder)
        else:
            print >> sys.stderr, "fileListFile %s does not exist" % (
                fileListFile)
Esempio n. 35
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    outfile_with_famid = ""
    outfile_with_pdb = ""
    outfile_fam2seqmap = ""
    idListFile = ""
    mapfile = "%s%s%s" % (
        DATADIR3, os.sep,
        "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid"
    )
    restrictIDListFile = ""
    idList = []
    maxseq_for_fam = 200
    maxpair_for_fam = 300
    method = 0
    rand_seed = None
    pdbtospFile = ""
    isOnlyPDB = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithfamid", "--outwithfamid"]:
                outfile_with_famid, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]:
                outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithpdb", "--outwithpdb"]:
                outfile_with_pdb, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in [
                    "-tmprolist", "--tmprolist", "-restrictlist",
                    "--restrictlist"
            ]:
                restrictIDListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mapfile", "--mapfile"]:
                mapfile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif sys.argv[i] in ["-seed", "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-l", "--l"]:
                idListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-maxseq", "--maxseq"]:
                maxseq_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-maxpair", "--maxpair"]:
                maxpair_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-m", "--m", "-method", "--method"]:
                method, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-onlypdb", "--onlypdb"]:
                g_params['isOnlyPDB'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if os.path.exists(idListFile):
        idList += myfunc.ReadIDList(idListFile)

    if len(idList) < 1:
        print >> sys.stderr, "no ID set. exit"
        return 1
    if myfunc.checkfile(mapfile, "idMapFile") != 0:
        return 1

    idMapDict = myfunc.ReadFam2SeqidMap(mapfile)

    # Read in pdbtosp map
    if pdbtospFile != "":
        (pdb2uniprotMap, uniprot2pdbMap) =\
                myfunc.ReadPDBTOSP(pdbtospFile)
        g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys())
        g_params['uniprot2pdbMap'] = uniprot2pdbMap

    if g_params['isOnlyPDB'] == True:
        if pdbtospFile == "":
            print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit."
            return 1
        elif g_params['uniprotidlist_with_pdb'] == set([]):
            print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit."
            return 1

    restrictIDSet = set([])
    if restrictIDListFile != "":
        restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False)
    fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False)
    fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False)

    if method == 0:
        GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet,
                                  maxseq_for_fam, rand_seed, fpout,
                                  fpout_withfamid)
    elif method == 1:
        GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet,
                                  maxpair_for_fam, rand_seed, fpout,
                                  fpout_withfamid, fpout_fam2seqmap)
    elif method == 2:  #all to all
        GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout,
                                  fpout_withfamid, fpout_withpdb)

    myfunc.myclose(fpout)
    myfunc.myclose(fpout_withfamid)
    myfunc.myclose(fpout_withpdb)
    myfunc.myclose(fpout_fam2seqmap)
    return 0
Esempio n. 36
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    SPE_PAIR_LIST = [(2, 1), (2, 4), (2, 6), (2, 8), (3, 6), (3, 7), (4, 6),
                     (4, 8), (4, 10), (5, 7), (5, 10), (6, 8), (6, 10),
                     (6, 12), (7, 14), (8, 10), (8, 12), (10, 12), (10, 13),
                     (11, 13), (12, 14)]

    outfile = ""

    infile = ""
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    signalpFile = "%s/wk/MPTopo/pfamAna_refpro/pred_signalp/refpro20120604-celluar.selmaxlength-m1.nr100.signalp_list" % (
        DATADIR3)

    #seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2clanid"%(DATADIR3)
    #seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2pfamid"%(DATADIR3)
    seqid2clanidMapFile = ""
    seqid2pfamidMapFile = ""
    tm_pfamidListFile = ""
    tm_clanidListFile = ""
    pfamid2seqidMapFile = ""
    clanid2seqidMapFile = ""
    dbname_predTM = ""
    pairlistwithpfamidFile = ""

    pfamtype = ""

    pairListFile = ""

    #classList_TableNumTMHeatMap = ["ALL", "RMSP"]
    classList_TableNumTMHeatMap = ["ALL"]

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (g_params['outpath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-signalp", "--signalp"]:
                (signalpFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mp", "--mp"]:
                g_params[
                    'pairwise_comparison_method'], i = myfunc.my_getopt_int(
                        argv, i)
            elif argv[i] in ["-mindiffpair", "--mindiffpair"]:
                g_params['mindiffpair'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-pfamtype", "--pfamtype"]:
                pfamtype, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanidlist", "--clanidlist"]:
                (tm_clanidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamidlist", "--pfamidlist"]:
                (tm_pfamidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2clanid", "--seqid2clanid"]:
                (seqid2clanidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2pfamid", "--seqid2pfamid"]:
                (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamid2seqid", "--pfamid2seqid"]:
                (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanid2seqid", "--clanid2seqid"]:
                (clanid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlistwithpfamid", "--pairlistwithpfamid"]:
                (pairlistwithpfamidFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-predTMdbname", "--predTMdbname"]:
                (dbname_predTM, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlist", "--pairlist"]:
                (pairListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-winsize", "--winsize"]:
                (g_params['winsize'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-outname", "--outname"]:
                (g_params['outname'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-prokar", "--prokar"]:
                g_params['isOnlyAnaProkar'] = True
                i += 1
            elif argv[i] in ["-eukar", "--eukar"]:
                g_params['isOnlyAnaEukar'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(
            infile, "%s (line %d): infile" %
        (__file__, inspect.currentframe().f_lineno)) != 0:
        return 1

    dirpath = myfunc.my_dirname(infile)

    # try to obtain Pfam family tag
    tag = ""
    if pfamtype != "":
        if pfamtype.upper().find("FAM") != -1:
            tag = ".Family"
        elif pfamtype.upper().find("DOM") != -1:
            tag = ".Domain"
        elif pfamtype.upper().find("REP") != -1:
            tag = ".Repeat"
        elif pfamtype.upper().find("MOT") != -1:
            tag = ".Motif"
        else:
            tag = ""
    else:
        if infile.find(".Family.") != -1:
            tag = ".Family"
        elif infile.find(".Domain.") != -1:
            tag = ".Domain"
        elif infile.find(".Repeat.") != -1:
            tag = ".Repeat"
        elif infile.find(".Motif.") != -1:
            tag = ".Motif"
        else:
            tag = ""

    if seqid2clanidMapFile == "":
        seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.seqid2clanid" % (
            DATADIR3)
    if myfunc.checkfile(
            seqid2clanidMapFile, "%s (line %d): seqid2clanidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if seqid2pfamidMapFile == "":
        seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.seqid2pfamid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            seqid2pfamidMapFile, "%s (line %d): seqid2pfamidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if pfamid2seqidMapFile == "":
        pfamid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.pfamid2seqid" % (
            DATADIR3)
    if myfunc.checkfile(
            pfamid2seqidMapFile, "%s (line %d): pfamid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if clanid2seqidMapFile == "":
        clanid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.clanid2seqid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            clanid2seqidMapFile, "%s (line %d): clanid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_pfamidListFile == "":
        tm_pfamidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.pfamidlist" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            tm_pfamidListFile, "%s (line %d): tm_pfamidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_clanidListFile == "":
        tm_clanidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.clanidlist" % (
            DATADIR3)
    if myfunc.checkfile(
            tm_clanidListFile, "%s (line %d): tm_clanidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if dbname_predTM == "":
        dbname_predTM = "%s/wk/MPTopo/pfamAna_refpro/pred_topcons_single_method4/refpro20120604-celluar.selmaxlength-m1.topcons-single_topcons_single.m1.agree-44.RMSP" % (
            DATADIR3)
    if myfunc.checkfile(
            "%s0.db" % (dbname_predTM), "%s (line %d): dbname_predTM" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if g_params['isOnlyAnaProkar']:
        prokarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Prokaryota.seqidlist" % (
            DATADIR3)
        g_params['prokarSeqIDSet'] = set(myfunc.ReadIDList(prokarseqidfile))
        if len(g_params['prokarSeqIDSet']) < 1:
            return 1
    if g_params['isOnlyAnaEukar']:
        eukarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Eukaryota.seqidlist" % (
            DATADIR3)
        g_params['eukarSeqIDSet'] = set(myfunc.ReadIDList(eukarseqidfile))
        if len(g_params['eukarSeqIDSet']) < 1:
            return 1

    if pairlistwithpfamidFile == "":
        pairlistwithpfamidFile = "%s/../../Pfam-.maxpair100.pairlistwithpfamid" % (
            dirpath)
    if myfunc.checkfile(
            pairlistwithpfamidFile, "%s (line %d): pairlistwithpfamidFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    pfamid_2_seqidpair_Dict = ReadPairListWithFamID(pairlistwithpfamidFile)
    usedPfamIDSet = set(
        pfamid_2_seqidpair_Dict.keys())  # pfamids used in pair selection

    if pairListFile != "":
        li = myfunc.ReadPairList(pairListFile)
        SPE_PAIR_LIST = []
        for tup in li:
            SPE_PAIR_LIST.append((int(tup[0]), int(tup[1])))

    (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile)
    signalpDict = lcmp.ReadSignalPDict(signalpFile)

    seqid2clanidDict = myfunc.ReadFam2SeqidMap(seqid2clanidMapFile)
    seqid2pfamidDict = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile)

    clanid2seqidDict = myfunc.ReadFam2SeqidMap(clanid2seqidMapFile)
    pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile)

    tm_pfamidList = myfunc.ReadIDList(tm_pfamidListFile)
    tm_clanidList = myfunc.ReadIDList(tm_clanidListFile)

    tm_pfamidSet = set(tm_pfamidList)
    tm_clanidSet = set(tm_clanidList)

    hdl_predTM = myfunc.MyDB(dbname_predTM)
    if not hdl_predTM.failure:
        idSet_TMpro = set(hdl_predTM.indexedIDList)
    else:
        idSet_TMpro = set([])

    #classList_TableNumTMHeatMap = ["ALL", "RMSP", "RMDUP"]
    #alignrangeList = ['FULL_ALIGNED', 'all', 'PART_ALIGNED']
    alignrangeList = ['FULL_ALIGNED']

    if g_params['outpath'] != "" and not os.path.exists(g_params['outpath']):
        cmd = ["mkdir", "-p", g_params['outpath']]
        try:
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1