Esempio n. 1
0
def main(g_params):
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    begin = 0
    end = 999999999

    method = 2

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-b", "--b", "--begin"]:
                begin, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-e", "--e", "--end"]:
                end, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o", "--o", "--outfile"]:
                outFile, i = my_getopt_str(sys.argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile = sys.argv[i]
            i += 1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1

    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)

    if method == 1:
        CatFasta(inFile, begin, end, fpout)
    else:
        CatFasta2(inFile, begin, end, fpout)

    myfunc.myclose(fpout)
    return 0
Esempio n. 2
0
def main(g_params):
    # Check argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    begin = 0
    end = 999999999

    method = 2

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-b", "--b", "--begin"]:
                begin, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-e", "--e", "--end"]:
                end, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o" , "--o","--outfile"]:
                outFile, i = my_getopt_str(sys.argv,i )
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile=sys.argv[i]
            i+=1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1

    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)

    if method == 1:
        CatFasta(inFile,begin, end, fpout)
    else:
        CatFasta2(inFile,begin, end, fpout)

    myfunc.myclose(fpout)
    return 0
Esempio n. 3
0
def main():  #{{{
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    N = 999999999
    rand_seed = None

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-n", "--n"]:
                N, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-seed", "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o", "--outfile"]:
                outFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-bs", "--block-size", "-block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print("Error! BLOCK_SIZE should >0", file=sys.stderr)
                    return 1
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]),
                      file=sys.stderr)
                return 1
        else:
            inFile = sys.argv[i]
            i += 1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1
    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)
    RandFasta(inFile, N, rand_seed, fpout)
    myfunc.myclose(fpout)
Esempio n. 4
0
def main():#{{{
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile=""
    inFile=""
    N=999999999
    rand_seed=None

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in [ "-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i",  "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-n" ,  "--n"]:
                N,i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in [ "-seed" , "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in [ "-o" , "--outfile"]:
                outFile,i  = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-bs" ,  "--block-size" ,  "-block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print >> sys.stderr,"Error! BLOCK_SIZE should >0"
                    return 1
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile = sys.argv[i]
            i+=1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1
    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)
    RandFasta(inFile, N, rand_seed,  fpout)
    myfunc.myclose(fpout)
Esempio n. 5
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    datapath = "."
    outpath = './'
    idList = []
    idListFile = ''

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            idList.append(sys.argv[i])
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-datapath", "--datapath"]:
                datapath = sys.argv[i + 1]
                i += 2
            elif argv[i] in ["-method", "--method"]:
                g_params['method'], i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-l", "--l"]:
                idListFile = sys.argv[i + 1]
                i = i + 2
            elif sys.argv[i] in ["-outpath", "--outpath"]:
                outpath = sys.argv[i + 1]
                i = i + 2
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            idList.append(sys.argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)
    if len(idList) > 0:
        os.system("mkdir -p %s" % outpath)
        cnt = 0
        for pfamid in idList:
            print "================== ", cnt, pfamid, " ===================="
            if g_params['method'] == 0:
                Itol_Tree_m0(pfamid, datapath, outpath)
            elif g_params['method'] == 1:
                Itol_Tree_m1(pfamid, datapath, outpath)
            cnt += 1
Esempio n. 6
0
def main(g_params):  #{{{#{{{
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    BLOCK_SIZE = 100000
    isPrintID = False
    isJustPrintSum = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            infile = sys.argv[i]
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 0
            elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]:
                isPrintID = True
                i += 1
            elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]:
                isJustPrintSum = True
                i += 1
            elif sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-bs", "--bs", "-block-size", "--block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print("Error! BLOCK_SIZE should >0", file=sys.stderr)
                    return 1
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]),
                      file=sys.stderr)
                return 1
        else:
            infile = sys.argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout)
    myfunc.myclose(fpout)

    return status
Esempio n. 7
0
def main(g_params):  #{{{
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileList = []
    fileListFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(sys.argv[i])
            isNonOptionArg = False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-l", "--l"]:
                fileListFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-o", "--o", "-outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-evalue", "--evalue"]:
                g_params['evalue_th'], i = myfunc.my_getopt_float(sys.argv, i)
            elif sys.argv[i] in ["-seqidt", "--seqidt"]:
                g_params['seqidt_th'], i = myfunc.my_getopt_float(sys.argv, i)
            elif sys.argv[i] in ["-round", "--round"]:
                g_params['iteration'] = myfunc.my_getopt_int(sys.argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument: '%s'" %
                                      sys.argv[i])
                return 1
        else:
            fileList.append(sys.argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile, delim="\n")
    if len(fileList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    for infile in fileList:
        BlastM9toPairlist(infile, fpout)

    myfunc.myclose(fpout)
Esempio n. 8
0
def main(g_params):#{{{#{{{
    # Check argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    BLOCK_SIZE = 100000
    isPrintID = False
    isJustPrintSum = False

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            infile = sys.argv[i]
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 0
            elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]:
                isPrintID = True
                i += 1
            elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]:
                isJustPrintSum = True
                i += 1
            elif sys.argv[i] in [ "-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-bs", "--bs", "-block-size", "--block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print >> sys.stderr,"Error! BLOCK_SIZE should >0"
                    return 1
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            infile=sys.argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout)
    myfunc.myclose(fpout)

    return status
Esempio n. 9
0
def main(g_params):  #{{{
    argv = sys.argv
    outpath = ''

    numArgv = len(argv)

    if numArgv < 2:
        PrintHelp()
        return 1

    fileList = []
    method = 0

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            fileList.append(argv[i])
            i = i + 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif argv[i][0] == "-":
            if argv[i] == "-h" or argv[i] == "--help":
                PrintHelp()
                sys.exit(0)
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-m", "--m"]:
                (method, i) = myfunc.my_getopt_int(argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i])
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if len(fileList) < 1:
        print >> sys.stderr, "No input set"
        return 1
    cnt = 0
    for treefile in fileList:
        print "================== ", cnt, treefile, " ===================="
        if method == 0:
            Itol_Tree_m0(treefile, outpath, method)
        elif method == 1:
            Itol_Tree_m1(treefile, outpath, method)
        cnt += 1
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    datapath = ""
    pairListFile = ""
    seqlenFile = ""
    shortid2fullidFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i += 1
            return 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-datapath", "--datapath"]:
                (datapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-treepath", "--treepath"]:
                (g_params['treepath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-maxperfamily", "--maxperfamily"]:
                (g_params['max_num_output_per_family'],
                 i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-swissprot", "--swissprot"]:
                (g_params['isSP_threshold'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-msapath", "--msapath"]:
                (g_params['msapath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqlen", "--seqlen"]:
                (seqlenFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-shortid2fullid", "--shortid2fullid"]:
                (shortid2fullidFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (pairListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1

    if pairListFile != "":
        pairInfoDict = ReadPairInfo(pairListFile)

    if len(pairInfoDict) < 1:
        print >> sys.stderr, "no pair info read in. exit"
        return 1

    if datapath == "":
        print >> sys.stderr, "datapath not set"
        return 1
    elif not os.path.exists(datapath):
        print >> sys.stderr, "datapath %s does not exist" % (datapath)
        return 1

    if outpath == "":
        print >> sys.stderr, "outpath not set"
        return 1
    elif not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        subprocess.check_call(cmd)

    g_params['OS'] = os.uname()[0]
    if g_params['OS'].find('Linux') != -1:
        g_params['CP_EXE'] = "/bin/cp -uf"
    else:
        g_params['CP_EXE'] = "/bin/cp -f"

    # read seqlen file
    if seqlenFile != "":
        g_params['seqLenDict'] = myfunc.ReadSeqLengthDict(seqlenFile)

    if shortid2fullidFile != "":
        g_params['uniprotAC2FullSeqIDMap'] = myfunc.ReadID2IDMap(
            shortid2fullidFile)
    #Sort by descending order of numseq
    pairInfoList = sorted(pairInfoDict.items(),
                          key=lambda x: x[1]['numseq'],
                          reverse=True)

    #pairInfoList ['pfamid', {'numseq': 123, 'pairlist':[]}]
    WriteHTML(pairInfoList, datapath, outpath)
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    rmsg = ""
    outpath = ""
    jobid = ""
    datapath = ""
    numseq = -1
    numseq_this_user = -1
    email = ""
    host_ip = ""
    base_www_url = ""
    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            print >> g_params['fperr'], "Error! Wrong argument:", argv[i]
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"]:
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-host", "--host"]:
                (host_ip, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-nseq", "--nseq"]:
                (numseq, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-nseq-this-user", "--nseq-this-user"]:
                (numseq_this_user, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"]:
                (base_www_url, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"] :
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-datapath", "--datapath"] :
                (datapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-force", "--force"]:
                g_params['isForceRun'] = True
                i += 1
            elif argv[i] in ["-runlocal", "--runlocal"]:
                g_params['isRunLocal'] = True
                i += 1
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> g_params['fperr'], "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> g_params['fperr'], "Error! Wrong argument:", argv[i]
            return 1

    if outpath == "":
        print >> g_params['fperr'], "outpath not set. exit"
        return 1
    elif not os.path.exists(outpath):
        cmd =  ["mkdir", "-p", outpath]
        try:
            rmsg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError, e:
            print e
            print rmsg
            return 1
Esempio n. 12
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    infile = ""
    outpath = "./"
    isQuiet = False
    tableinfoFile = ""
    cmpclassList = []
    restrictIDListFile = ""

    signalpFile = ""
    dupFile = ""
    outfile = ""
    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = sys.argv[i]
            isNonOptionArg = False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                sys.exit()
            elif argv[i] in ["-o", "--o"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-cmpclass", "--cmpclass"]:
                (tmpstr, i) = myfunc.my_getopt_str(argv, i)
                cmpclassList.append(tmpstr)
            elif argv[i] in ["-signalp", "--signalp"]:
                (signalpFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-restrictidlist", "--restrictidlist"]:
                (restrictIDListFile, i) = myfunc.my_getopt_str(argv, i)
                g_params['isRestrictIDListSet'] = True
            elif argv[i] in ["-dup", "--dup", "-dupfile", "--dupfile"]:
                (dupFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-rmsp", "--rmsp"]:
                g_params['isRemoveSignalP'] = True
                i += 1
            elif argv[i] in ["-rmdup", "--rmdup"]:
                g_params['isRemoveDup'] = True
                i += 1
            elif argv[i] in ["-seq2fammap", "--seq2fammap"]:
                (seq2famMapfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqidttype", "--seqidttype"]:
                g_params['seqidttype'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-tableinfo", "--tableinfo"]:
                tableinfoFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-min-seqidt", "--min-seqidt"]:
                g_params['minSeqIDT'], i = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-max-seqidt", "--max-seqidt"]:
                g_params['maxSeqIDT'], i = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-evodist", "--evodist"]:
                g_params['isEvodist'] = True
                i += 1
            elif argv[i] in ["-alignrange", "--alignrange"]:
                g_params['alignrange'], i = myfunc.my_getopt_str(argv, i)
                if not g_params['alignrange'] in ['all', 'full', 'part']:
                    print >> sys.stderr, "alignrange must be one of [all, full, part]"
                    return 1
                else:
                    if g_params['alignrange'] == 'full':
                        g_params['alignrange'] = 'FULL_ALIGNED'
                    elif g_params['alignrange'] == 'part':
                        g_params['alignrange'] = 'PART_ALIGNED'
            elif argv[i] in ["-debug", "--debug"]:
                if argv[i + 1][0].lower() == 'y':
                    g_params['isDEBUG'] = True
                else:
                    g_params['isDEBUG'] = False
                i += 2
            elif argv[i] in [
                    "-debug-unmapped-position", "--debug-unmapped-position"
            ]:
                DEBUG_UNMAPPED_TM_POSITION = 1
                i += 2
            elif sys.argv[i] == "-q":
                isQuiet = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", sys.argv[i]
                return -1
        else:
            infile = sys.argv[i]
            i += 1
    if infile == "":
        print >> sys.stderr, "infile not set. Exit."
        return -1
    elif not os.path.exists(infile):
        print >> sys.stderr, "infile %s does not exists. Exit." % infile

    try:
        fpin = open(infile, "rb")
    except IOError:
        print >> sys.stderr, "Failed to open input file %s" % (infile)
        return -1

    pairalnStat = {}
    if g_params['seqidttype'] != 0:
        if tableinfoFile == "" or not os.path.exists(tableinfoFile):
            print >> sys.stderr, "tableinfoFile must be set when seqidttype is set to 1 or 2"
            print >> sys.stderr, "but seqidttype = %d is set. Exit." % g_params[
                'seqidttype']
            return -1
        pairalnStat = lcmp.ReadPairAlnTableInfo(tableinfoFile)

    rootname = os.path.basename(os.path.splitext(infile)[0])

    binpath = os.path.dirname(sys.argv[0])

    signalpDict = {}
    if signalpFile != "":
        signalpDict = lcmp.ReadSignalPDict(signalpFile)
    if signalpDict != {}:
        g_params['isSignalPSet'] = True

    dupPairList = []
    if dupFile != "":
        dupPairList = lcmp.ReadDupPairList(dupFile)
    if len(dupPairList) > 0:
        g_params['isDupSet'] = True
    dupPairSet = set(dupPairList)

    restrictIDSet = set([])
    if restrictIDListFile != "":
        restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile))

    rltyDict = {}
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    unprocessedBuffer = ""
    cntTotalReadInRecord = 0
    cntTotalOutputRecord = 0
    isEOFreached = False
    while 1:
        buff = fpin.read(BLOCK_SIZE)
        if buff == "":
            isEOFreached = True
        buff = unprocessedBuffer + buff
        pairCmpRecordList = []
        unprocessedBuffer = lcmp.ReadPairCmpResultFromBuffer(
            buff, pairCmpRecordList)

        AddTableInfo(pairCmpRecordList, pairalnStat)
        AddSignalPInfo(pairCmpRecordList, signalpDict)
        AddDupInfo(pairCmpRecordList, dupPairSet)

        cntTotalReadInRecord += len(pairCmpRecordList)
        pairCmpRecordList = FilterPairCmpResult(pairCmpRecordList,
                                                cmpclassList, rltyDict,
                                                restrictIDSet)

        if len(pairCmpRecordList) > 0:
            lcmp.WritePairCmpRecord(pairCmpRecordList, cntTotalOutputRecord,
                                    fpout)
            cntTotalOutputRecord += len(pairCmpRecordList)
        if isEOFreached == True:
            break
    fpin.close()

    print "cntTotalReadInRecord =", cntTotalReadInRecord
    print "cntTotalOutputRecord =", cntTotalOutputRecord
    myfunc.myclose(fpout)
    return 0
Esempio n. 13
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    modelfile = ""
    seqfile = ""
    tmpdir = ""
    email = ""
    jobid = ""
    isKeepFiles = "no"
    isRepack = "yes"
    targetlength = None

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            modelfile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-tmpdir", "--tmpdir"] :
                (tmpdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"] :
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-fasta", "--fasta"] :
                (seqfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-k", "--k"] :
                (isKeepFiles, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-r", "--r"] :
                (isRepack, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-t", "--t"] :
                (targetlength, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"] :
                (g_params['base_www_url'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"] :
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-force", "--force"]:
                g_params['isForceRun'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            modelfile = argv[i]
            i += 1

    if jobid == "":
        print >> sys.stderr, "%s: jobid not set. exit"%(sys.argv[0])
        return 1

    if myfunc.checkfile(modelfile, "modelfile") != 0:
        return 1
    if outpath == "":
        print >> sys.stderr, "outpath not set. exit"
        return 1
    elif not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print >> sys.stderr, e
            return 1
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    infile = ""
    progList = []
    progListFile = ""
    outpath = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-prog", "--prog"]:
                tmpstr, i = myfunc.my_getopt_str(argv, i)
                progList.append(tmpstr)
            elif argv[i] in ["-gzip", "--gzip"]:
                tmpstr, i = myfunc.my_getopt_str(argv, i)
                if tmpstr.upper()[0] == "-":
                    print >> sys.stderr, "Bad argument, -gzip should be"\
                            " followed by yes or no"
                    return 1
                elif tmpstr.upper()[0] == "Y":
                    g_params['isGzip'] = True
                else:
                    g_params['isGzip'] = False
            elif argv[i] in ["-num", "--num"]:
                g_params['num_per_split'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-proglist", "--proglist"]:
                progListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                outpath, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True; i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile,"infile") != 0:
        return 1

    inputList = ReadInputList(infile) # [(filename, numseq)]
    inputList = sorted(inputList, key=lambda x:x[1], reverse=False)
    rtname_infile = os.path.basename(os.path.splitext(infile)[0])

# get progList
    if len(progList) == 0 and progListFile == "":
        progList = default_progList
    else:
        if progListFile != "":
            tmp_list = myfunc.ReadIDList(progListFile)
            if len(tmp_list) == 0:
                print >> sys.stderr, "progListFile %s does not exist or empty"%(
                        progListFile)
                return 1
            else:
                progList += tmp_list
        if len(progList) == 0:
            print >> sys.stderr, "progList is empty. exit"
            return 1

    if outpath != "" and not os.path.exists(outpath):
        try:
            subprocess.check_output(["mkdir", "-p", outpath])
        except subprocess.CalledProcessError, e:
            print e
            return 1
Esempio n. 15
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    idListFile = ""
    idList = []
    infile = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            strs = argv[i].split()
            if len(strs) == 1:
                idList.append(argv[i])
            else:
                idList.append(tuple(strs))
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"] :
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-m", "--m", "-methodid", "--methodid"] :
                (g_params['method_getid'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-se,", "--se", "-selfield", "--selfield"] :
                (tmpint, i) = myfunc.my_getopt_int(argv, i)
                g_params['sel_field_list'].append(tmpint)
                g_params['method_getid'] = 3
            elif argv[i] in ["-i", "--i"] :
                (infile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            strs = argv[i].split()
            if len(strs) == 1:
                idList.append(argv[i])
            else:
                idList.append(tuple(strs))
            i += 1

    if infile == "":
        print >> sys.stderr, "infile not set"
        print >> sys.stderr, usage_short
        return 1
    elif not os.path.exists(infile):
        print >> sys.stderr, "infile %s does not exist"%(infile)
        return 1

    if idListFile != "":
        hdl = myfunc.ReadLineByBlock(idListFile)
        if not hdl.failure:
            lines = hdl.readlines()
            while lines != None:
                for line in lines:
                    if line:
                        strs = line.split()
                        if len(strs) == 1:
                            idList.append(strs[0])
                        else:
                            idList.append(tuple(strs))
                lines = hdl.readlines()
            hdl.close()


    if len(idList) < 1:
        print >> sys.stderr, "ID not set"
        print >> sys.stderr, usage_short
    idListSet = set(idList)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False) 
    status = SelectLineByID(infile, idListSet, fpout)
    myfunc.myclose(fpout)
    return status
Esempio n. 16
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    SPE_PAIR_LIST = [(2, 1), (2, 4), (2, 6), (2, 8), (3, 6), (3, 7), (4, 6),
                     (4, 8), (4, 10), (5, 7), (5, 10), (6, 8), (6, 10),
                     (6, 12), (7, 14), (8, 10), (8, 12), (10, 12), (10, 13),
                     (11, 13), (12, 14)]

    outfile = ""

    infile = ""
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    signalpFile = "%s/wk/MPTopo/pfamAna_refpro/pred_signalp/refpro20120604-celluar.selmaxlength-m1.nr100.signalp_list" % (
        DATADIR3)

    #seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2clanid"%(DATADIR3)
    #seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.seqid2pfamid"%(DATADIR3)
    seqid2clanidMapFile = ""
    seqid2pfamidMapFile = ""
    tm_pfamidListFile = ""
    tm_clanidListFile = ""
    pfamid2seqidMapFile = ""
    clanid2seqidMapFile = ""
    dbname_predTM = ""
    pairlistwithpfamidFile = ""

    pfamtype = ""

    pairListFile = ""

    #classList_TableNumTMHeatMap = ["ALL", "RMSP"]
    classList_TableNumTMHeatMap = ["ALL"]

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (g_params['outpath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-signalp", "--signalp"]:
                (signalpFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mp", "--mp"]:
                g_params[
                    'pairwise_comparison_method'], i = myfunc.my_getopt_int(
                        argv, i)
            elif argv[i] in ["-mindiffpair", "--mindiffpair"]:
                g_params['mindiffpair'], i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-pfamtype", "--pfamtype"]:
                pfamtype, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanidlist", "--clanidlist"]:
                (tm_clanidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamidlist", "--pfamidlist"]:
                (tm_pfamidListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2clanid", "--seqid2clanid"]:
                (seqid2clanidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqid2pfamid", "--seqid2pfamid"]:
                (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamid2seqid", "--pfamid2seqid"]:
                (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-clanid2seqid", "--clanid2seqid"]:
                (clanid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlistwithpfamid", "--pairlistwithpfamid"]:
                (pairlistwithpfamidFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-predTMdbname", "--predTMdbname"]:
                (dbname_predTM, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairlist", "--pairlist"]:
                (pairListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-winsize", "--winsize"]:
                (g_params['winsize'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-outname", "--outname"]:
                (g_params['outname'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-prokar", "--prokar"]:
                g_params['isOnlyAnaProkar'] = True
                i += 1
            elif argv[i] in ["-eukar", "--eukar"]:
                g_params['isOnlyAnaEukar'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(
            infile, "%s (line %d): infile" %
        (__file__, inspect.currentframe().f_lineno)) != 0:
        return 1

    dirpath = myfunc.my_dirname(infile)

    # try to obtain Pfam family tag
    tag = ""
    if pfamtype != "":
        if pfamtype.upper().find("FAM") != -1:
            tag = ".Family"
        elif pfamtype.upper().find("DOM") != -1:
            tag = ".Domain"
        elif pfamtype.upper().find("REP") != -1:
            tag = ".Repeat"
        elif pfamtype.upper().find("MOT") != -1:
            tag = ".Motif"
        else:
            tag = ""
    else:
        if infile.find(".Family.") != -1:
            tag = ".Family"
        elif infile.find(".Domain.") != -1:
            tag = ".Domain"
        elif infile.find(".Repeat.") != -1:
            tag = ".Repeat"
        elif infile.find(".Motif.") != -1:
            tag = ".Motif"
        else:
            tag = ""

    if seqid2clanidMapFile == "":
        seqid2clanidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.seqid2clanid" % (
            DATADIR3)
    if myfunc.checkfile(
            seqid2clanidMapFile, "%s (line %d): seqid2clanidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if seqid2pfamidMapFile == "":
        seqid2pfamidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.seqid2pfamid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            seqid2pfamidMapFile, "%s (line %d): seqid2pfamidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if pfamid2seqidMapFile == "":
        pfamid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.nr100.filter.fragmented.pfamid2seqid" % (
            DATADIR3)
    if myfunc.checkfile(
            pfamid2seqidMapFile, "%s (line %d): pfamid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if clanid2seqidMapFile == "":
        clanid2seqidMapFile = "%s/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.nr100.filter.fragmented.clanid2seqid" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            clanid2seqidMapFile, "%s (line %d): clanid2seqidMapFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_pfamidListFile == "":
        tm_pfamidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20%s.pfamidlist" % (
            DATADIR3, tag)
    if myfunc.checkfile(
            tm_pfamidListFile, "%s (line %d): tm_pfamidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if tm_clanidListFile == "":
        tm_clanidListFile = "%s/data/pfam/pfam26.0/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.clanidlist" % (
            DATADIR3)
    if myfunc.checkfile(
            tm_clanidListFile, "%s (line %d): tm_clanidListFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if dbname_predTM == "":
        dbname_predTM = "%s/wk/MPTopo/pfamAna_refpro/pred_topcons_single_method4/refpro20120604-celluar.selmaxlength-m1.topcons-single_topcons_single.m1.agree-44.RMSP" % (
            DATADIR3)
    if myfunc.checkfile(
            "%s0.db" % (dbname_predTM), "%s (line %d): dbname_predTM" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    if g_params['isOnlyAnaProkar']:
        prokarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Prokaryota.seqidlist" % (
            DATADIR3)
        g_params['prokarSeqIDSet'] = set(myfunc.ReadIDList(prokarseqidfile))
        if len(g_params['prokarSeqIDSet']) < 1:
            return 1
    if g_params['isOnlyAnaEukar']:
        eukarseqidfile = "%s/data/uniprot/reference_proteome/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.Eukaryota.seqidlist" % (
            DATADIR3)
        g_params['eukarSeqIDSet'] = set(myfunc.ReadIDList(eukarseqidfile))
        if len(g_params['eukarSeqIDSet']) < 1:
            return 1

    if pairlistwithpfamidFile == "":
        pairlistwithpfamidFile = "%s/../../Pfam-.maxpair100.pairlistwithpfamid" % (
            dirpath)
    if myfunc.checkfile(
            pairlistwithpfamidFile, "%s (line %d): pairlistwithpfamidFile" %
        (__file__, inspect.currentframe().f_lineno)):
        return 1

    pfamid_2_seqidpair_Dict = ReadPairListWithFamID(pairlistwithpfamidFile)
    usedPfamIDSet = set(
        pfamid_2_seqidpair_Dict.keys())  # pfamids used in pair selection

    if pairListFile != "":
        li = myfunc.ReadPairList(pairListFile)
        SPE_PAIR_LIST = []
        for tup in li:
            SPE_PAIR_LIST.append((int(tup[0]), int(tup[1])))

    (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile)
    signalpDict = lcmp.ReadSignalPDict(signalpFile)

    seqid2clanidDict = myfunc.ReadFam2SeqidMap(seqid2clanidMapFile)
    seqid2pfamidDict = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile)

    clanid2seqidDict = myfunc.ReadFam2SeqidMap(clanid2seqidMapFile)
    pfamid2seqidDict = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile)

    tm_pfamidList = myfunc.ReadIDList(tm_pfamidListFile)
    tm_clanidList = myfunc.ReadIDList(tm_clanidListFile)

    tm_pfamidSet = set(tm_pfamidList)
    tm_clanidSet = set(tm_clanidList)

    hdl_predTM = myfunc.MyDB(dbname_predTM)
    if not hdl_predTM.failure:
        idSet_TMpro = set(hdl_predTM.indexedIDList)
    else:
        idSet_TMpro = set([])

    #classList_TableNumTMHeatMap = ["ALL", "RMSP", "RMDUP"]
    #alignrangeList = ['FULL_ALIGNED', 'all', 'PART_ALIGNED']
    alignrangeList = ['FULL_ALIGNED']

    if g_params['outpath'] != "" and not os.path.exists(g_params['outpath']):
        cmd = ["mkdir", "-p", g_params['outpath']]
        try:
            subprocess.check_call(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1
Esempio n. 17
0
def main():#{{{
    argv = sys.argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        sys.exit()

    max_numpair = 10*1000*1000
    isQuiet = False
    rand_seed = None
    idList = []
    idListFile = ""
    outfile=""
    method = 0

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg=False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in [  "-h" , "--help"]:
                PrintHelp()
                sys.exit()
            elif sys.argv[i] in [ "-o" , "--o", "-outfile" , "--outfile"]:
                outfile, i = myfunc.my_getopt_str(argv,i)
            elif argv[i] in ["-m", "--m", "-method", "--method"]:
                method, i = myfunc.my_getopt_int(argv,i)
            elif sys.argv[i] in [ "-l" , "--l", "-listfile" , "--listfile"]:
                idListFile, i = myfunc.my_getopt_str(argv,i)
            elif sys.argv[i]  in [ "-maxpair" , "--maxpair"]:
                max_numpair, i = myfunc.my_getopt_int(argv,i)
            elif sys.argv[i]  in [ "-seed" , "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(argv,i)
            elif sys.argv[i] == "-q":
                isQuiet=True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", sys.argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)

    numseqid = len(idList)
    if numseqid <= 0:
        print >> sys.stderr, "List file is empty."
        return 1
    elif numseqid < 2:
        print >> sys.stderr, "Too few items. At least 2 are required."
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    if method == 0:
        pairlist = myfunc.GenerateRandomPair(len(idList), max_numpair,
                rand_seed)
    elif method == 1:
        pairlist = myfunc.GenerateRandomPair_no_repeat_use(len(idList),
                max_numpair, rand_seed)

    for pair in pairlist:
        print >> fpout, "%s %s" %(idList[pair[0]], idList[pair[1]])

    myfunc.myclose(fpout)
    return 0
Esempio n. 18
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = ""
    pairListFile = ""
    seqlenFile = ""
    shortid2fullidFile = ""
    seqid2pfamidMapFile = ""
    pfamDefFile = '/data3/data/pfam/pfam27.0/Pfam-A.clans.tsv'
    topodb = ""
    seqdb = ""
    pdb2spFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i += 1
            return 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topodb", "--topodb"]:
                (topodb, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pdb2sp", "-pdb2sp", "-pdbtosp", "--pdbtosp"]:
                (pdb2spFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqdb", "--seqdb"]:
                (seqdb, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqmsapath", "--seqmsapath"]:
                (g_params['seqmsapath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-datapath", "--datapath"]:
                (g_params['datapath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seq2pfam", "--seq2pfam"]:
                (seqid2pfamidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfam2seq", "--pfam2seq"]:
                (pfamid2seqidMapFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-description", "--description"]:
                (g_params['description'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-alignrange", "--alignrange"]:
                g_params['alignrange'], i = myfunc.my_getopt_str(argv, i)
                if not g_params['alignrange'] in ['all', 'full', 'part']:
                    print >> sys.stderr, "alignrange must be one of [all, full, part]"
                    return 1
                else:
                    if g_params['alignrange'] == 'full':
                        g_params['alignrange'] = 'FULL_ALIGNED'
                    elif g_params['alignrange'] == 'part':
                        g_params['alignrange'] = 'PART_ALIGNED'
            elif argv[i] in ["-basename", "--basename"]:
                (g_params['basename'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-treepath", "--treepath"]:
                (g_params['treepath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-pairalnpath", "--pairalnpath"]:
                (g_params['pairalnpath'], i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-maxperfamily", "--maxperfamily"]:
                (g_params['max_num_output_per_family'],
                 i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-min-seqidt", "--min-seqidt"]:
                g_params['minSeqIDT'], i = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-max-seqidt", "--max-seqidt"]:
                g_params['maxSeqIDT'], i = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-shortid2fullid", "--shortid2fullid"]:
                (shortid2fullidFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-debug", "--debug"]:
                if argv[i + 1][0].lower() == 'y':
                    g_params['isDEBUG'] = True
                else:
                    g_params['isDEBUG'] = False
                i += 2
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1

    if g_params['basename'] == "":
        print >> sys.stderr, "basename not set. exit"
        return 1
    if myfunc.checkfile(g_params['datapath'], "datapath") != 0:
        return 1
    if myfunc.checkfile(seqid2pfamidMapFile, "seqid2pfamidMapFile") != 0:
        return 1
    if myfunc.checkfile(pfamid2seqidMapFile, "pfamid2seqidMapFile") != 0:
        return 1

    if myfunc.checkfile(topodb + "0.db", "topodb") != 0:
        return 1
    if myfunc.checkfile(seqdb + "0.db", "seqdb") != 0:
        return 1
    if myfunc.checkfile(g_params['seqmsapath'], "seqmsapath") != 0:
        return 1

    if pdb2spFile != "":
        (g_params['pdb2uniprotMap'],
         g_params['uniprot2pdbMap']) = myfunc.ReadPDBTOSP(pdb2spFile)

    if g_params['datapath'] == "":
        print >> sys.stderr, "datapath not set"
        return 1
    elif not os.path.exists(g_params['datapath']):
        print >> sys.stderr, "datapath %s does not exist" % (
            g_params['datapath'])
        return 1

    if outpath == "":
        print >> sys.stderr, "outpath not set"
        return 1
    elif not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        subprocess.check_call(cmd)

    paircmpfile = "%s/%s.paircmp" % (g_params['datapath'],
                                     g_params['basename'])
    if myfunc.checkfile(paircmpfile, "paircmpfile") != 0:
        return 1

    (g_params['pfamidDefDict'],
     g_params['clanidDefDict']) = lcmp.ReadPfamDefFile(pfamDefFile)

    g_params['seqid2pfamidDict'] = myfunc.ReadFam2SeqidMap(seqid2pfamidMapFile)
    g_params['pfamid2seqidDict'] = myfunc.ReadFam2SeqidMap(pfamid2seqidMapFile)

    tmpdir = tempfile.mkdtemp()
    if g_params['msapath'] == "":
        g_params['msapath'] = tmpdir
    if g_params['treepath'] == "":
        g_params['treepath'] = tmpdir
    if g_params['pairalnpath'] == "":
        g_params['pairalnpath'] = tmpdir

    pairCmpRecordList = []
    unprocessedBuffer = ""
    cntTotalReadInRecord = 0
    cntTotalOutputRecord = 0
    isEOFreached = False
    try:
        fpin = open(paircmpfile, "r")
    except IOError:
        print >> sys.stderr, "Failed to open input file %s" % (paircmpfile)
        return 1
    while 1:
        buff = fpin.read(myfunc.BLOCK_SIZE)
        if buff == "":
            isEOFreached = True
        buff = unprocessedBuffer + buff
        rdList = []
        unprocessedBuffer = lcmp.ReadPairCmpResultFromBuffer(buff, rdList)
        rdList = FilterPairCmpResult(rdList)
        cntTotalReadInRecord += len(rdList)
        pairCmpRecordList += rdList
        if isEOFreached == True:
            break
    fpin.close()

    print "cntTotalReadInRecord =", cntTotalReadInRecord

    g_params['hdl_seqdb'] = myfunc.MyDB(seqdb)
    g_params['hdl_topodb'] = myfunc.MyDB(topodb)

    g_params['OS'] = os.uname()[0]
    if g_params['OS'].find('Linux') != -1:
        g_params['CP_EXE'] = "/bin/cp -uf"
    else:
        g_params['CP_EXE'] = "/bin/cp -f"

    if shortid2fullidFile != "":
        g_params['uniprotAC2FullSeqIDMap'] = myfunc.ReadID2IDMap(
            shortid2fullidFile)

    addname = ""
    if g_params['alignrange'] != 'all':
        addname += ".%s" % (g_params['alignrange'])

    dataTable = {}
    # structure of dataTable
    #    dataTable[pfamid] = {'set_seqid':set(), 'difftopopair':[{'INV':[(id1,id2)]},{'TM2GAP':},{}}

    # first read in pairCmpRecordList
    AddAllSeqInPairCmp(dataTable, pairCmpRecordList,
                       g_params['seqid2pfamidDict'])

    pairInfoFileList = []
    for cmpclass in g_params['cmpClassList_mp3_cmpdup'][0:]:
        ss = "%s/%s_.cmpdup.FULL_ALIGNED.%s.pairinfo.txt" % (
            g_params['datapath'], g_params['basename'], cmpclass)
        pairInfoFileList.append(ss)
        pairinfoList = ReadPairInfo_cmpclass(ss)
        AddPairInfo(dataTable, pairinfoList, cmpclass)
#     print "\n".join(pairInfoFileList)
    if g_params['isDEBUG']:  #{{{
        for pfamid in dataTable:
            print pfamid
            print "\tset_seqid"
            print dataTable[pfamid]['set_seqid']
            print "\tdifftopopair"
            for cls in dataTable[pfamid]['difftopopair']:
                print "\t\t", cls
                for tup in dataTable[pfamid]['difftopopair'][cls]:
                    print "\t\t\t", tup  #}}}

    WriteHTML(dataTable, outpath)

    os.system("rm -rf %s" % (tmpdir))
Esempio n. 19
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv" % (DATADIR3)
    threshold_Fraction_Group_2 = 0.05
    threshold_NumSeq_Group_2 = 2
    tableinfoFile = ""
    pdbtospFile = ""
    sprotACListFile = ""

    threshold_g12_seqidt = 20.0

    topoalnFile = ""
    aapath = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-seqidttype", "--seqidttype"]:
                (g_params['seqidttype'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-tableinfo", "--tableinfo"]:
                (tableinfoFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topoaln", "--topoaln"]:
                (topoalnFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-aapath", "--aapath"]:
                (aapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-thncls2", "--thncls2"]:
                (threshold_NumSeq_Group_2, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-thfrac2", "--thfrac2"]:
                (threshold_Fraction_Group_2,
                 i) = myfunc.my_getopt_float(argv, i)
            elif argv[i] in ["-pfamdef", "--pfamdef"]:
                (pfamDefFile, i) = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-sprot", "--sprot"]):
                sprotACListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)
    if len(fileList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    if myfunc.checkfile(topoalnFile, "topoalnFile") != 0:
        return 1
    if myfunc.checkfile(aapath, "aapath") != 0:
        return 1
    if outfile == "":
        print >> sys.stderr, "outfile not set. Exit"
        return 1

    outpath = myfunc.my_dirname(outfile)
    if not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        try:
            subprocess.check_output(cmd)
        except subprocess.CalledProcessError, e:
            print e
            return 1
Esempio n. 20
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    idListFile = None
    idList = []
    seqfile = ""
    topofile = ""
    max_dist = 12  # maximal distance to the TM helix so that K, R residues are counted
    flank_win = 5  # flanking window of the TM helix, residues at position
    #TMbeg-flank_win and TMend+flank_win are also counted

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-maxdist", "--maxdist"]:
                (max_dist, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-flankwin", "--flankwin"]:
                (flank_win, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-seqfile", "--seqfile"]:
                (seqfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topofile", "--topofile"]:
                (topofile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-debug"]:
                g_params['isDEBUG'] = True
                i += 1
            else:
                print("Error! Wrong argument:", argv[i], file=sys.stderr)
                return 1
        else:
            idList.append(argv[i])
            i += 1
    (idListSeq, annoListSeq, seqList) = myfunc.ReadFasta(seqfile)
    (idListTopo, annoListTopo, topoList) = myfunc.ReadFasta(topofile)

    numSeq = len(idListSeq)
    numTopo = len(idListTopo)
    if numSeq < 1 or numTopo < 1:
        print("No seq set", file=sys.stderr)
        return 1
    seqDict = {}
    for i in range(numSeq):
        seqDict[idListSeq[i]] = seqList[i]
    topoDict = {}
    for i in range(numTopo):
        topoDict[idListTopo[i]] = topoList[i]

    cmpclassDict = {}
    for anno in annoListTopo:
        anno = anno.lstrip(">")
        strs = anno.split()
        cmpclassDict[strs[0]] = strs[1]

    outpath = os.path.dirname(seqfile)
    if outpath == "":
        outpath = "."
    rootname = os.path.basename(os.path.splitext(seqfile)[0])
    outfile_kr_list = outpath + os.sep + rootname + ".krlist.txt"
    outfile_krbias = outpath + os.sep + rootname + ".krbias.txt"

    fpout_krlist = open(outfile_kr_list, "w")
    fpout_krbias = open(outfile_krbias, "w")

    for idd in idListSeq:
        if g_params['isDEBUG']:
            print("seqid: %s" % (idd))
        try:
            topo = topoDict[idd]
        except KeyError:
            print("no topo for %s" % idd, file=sys.stderr)
            continue
        try:
            seq = seqDict[idd]
        except KeyError:
            print("no seq for %s" % idd, file=sys.stderr)
            continue
        try:
            cmpclass = cmpclassDict[idd]
        except KeyError:
            cmpclass = "INV"
        (kr_bias, KR_pos_list, numTM) = CalKRBias(seq, topo, flank_win,
                                                  max_dist)
        WriteResult(idd, cmpclass, seq, numTM, kr_bias, KR_pos_list,
                    fpout_krlist)
        if cmpclass in ["IDT", "INV"]:
            fpout_krbias.write("%d\n" % kr_bias)
    fpout_krlist.close()
    fpout_krbias.close()
Esempio n. 21
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    workdir = ""
    fileListFile = ""
    idListFile = ""
    extList = []
    maxfile_per_folder = 2000
    method = 0

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-idlist", "--idlist"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-filelist", "--filelist"]:
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-workdir", "--workdir"]:
                (workdir, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-max", "--max"]:
                (maxfile_per_folder, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-method", "--method"]:
                (method, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-ext", "--ext"]:
                (tmpstr, i) = myfunc.my_getopt_str(argv, i)
                extList.append(tmpstr)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> sys.stderr, "Error! Wrong argument:", argv[i]
            return 1

    if myfunc.checkfile(workdir) != 0:
        return 1

    if idListFile == "" and fileListFile == "":
        print >> sys.stderr, "At least one of idListFile and fileListFile need to be set"
        return 1

    if idListFile != "":
        if os.path.exists(idListFile):
            idList = myfunc.ReadIDList(idListFile)
            if len(idList) <= 0:
                print >> sys.stderr, "No ID in idListFile %s" % (idListFile)
            elif len(extList) <= 0:
                print >> sys.stderr, "No extension set when idList is used."

            else:
                SplitToFolder_idlist(idList, workdir, extList,
                                     maxfile_per_folder)
        else:
            print >> sys.stderr, "idListFile %s does not exist" % (idListFile)

    if fileListFile != "":
        if os.path.exists(fileListFile):
            fileList = open(fileListFile, "r").read().split("\n")
            fileList = filter(None, fileList)
            if len(fileList) <= 0:
                print >> sys.stderr, "No file in fileListFile %s" % (
                    fileListFile)
            else:
                SplitToFolder_filelist(fileList, workdir, maxfile_per_folder)
        else:
            print >> sys.stderr, "fileListFile %s does not exist" % (
                fileListFile)
Esempio n. 22
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    msafile = ""
    topofile = ""
    topodb = ""
    isIgnoreBadseq = True
    method_match = 1

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print("Error! Wrong argument:", argv[i], file=sys.stderr)
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-msa", "--msa"]:
                (msafile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-topo", "--topo"]:
                (topofile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-m", "--m"]:
                (method_match, i) = myfunc.my_getopt_int(argv, i)
                if method_match not in [0, 1]:
                    print("method_match %d not in [0,1]" % method_match,
                          file=sys.stderr)
                    return 1
            elif argv[i] in ["-topodb", "--topodb"]:
                (topodb, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in [
                    "-ig", "--ig", "-ignore-badseq", "--ignore-badseq"
            ]:
                (tmpss, i) = myfunc.my_getopt_str(argv, i)
                if tmpss[0].lower() == "y":
                    isIgnoreBadseq = True
                else:
                    isIgnoreBadseq = False
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print("Error! Wrong argument:", argv[i], file=sys.stderr)
                return 1
        else:
            print("Error! Wrong argument:", argv[i], file=sys.stderr)
            return 1

    if myfunc.checkfile(msafile) != 0:
        return 1

    if topodb != "":
        if not os.path.exists(topodb + '0.db'):
            print("topodb %s does not exist" % (topodb), file=sys.stderr)
            return 1
        else:
            return MatchMSATopo_using_topodb(msafile, topodb, isIgnoreBadseq,
                                             method_match, outfile)
    elif topofile != "":
        if not os.path.exists(topofile):
            print("topofile %s does not exist" % (topofile), file=sys.stderr)
            return 1
        else:
            return MatchMSATopo_using_topofile(msafile, topofile,
                                               isIgnoreBadseq, method_match,
                                               outfile)
    else:
        print("neither topofile nor topodb is set. exit", file=sys.stderr)
        return 1

    return 0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    infileList = []
    gomapfile = "/data3/wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/Pfam-A-full.seqfrompfamfasta.percentTMpro_scampi.perTM75_nseq20.Family.nr100.filter.fragmented.uniq.pfam.goinfowithancestor.txt"
    gotermfile = "/data3/wk/MPTopo/pfamAna_refpro/GO_analysis/GO_term.txt"
    anclevel = 2
    gotype = "function"

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gomap", "--gomap"]:
                (gomapfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-goterm", "--goterm"]:
                (gotermfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mp", "--mp"]:
                (g_params['pairwise_comparison_method'],
                 i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infileList.append(argv[i])
            i += 1

#     print len(gomapfile), gomapfile
#     lines = open(gomapfile, "r").readlines()
#     print lines

    if myfunc.checkfile(gomapfile, "GO map file") != 0:
        return 1
    if myfunc.checkfile(gotermfile, "GO Term file") != 0:
        return 1

    cmpclassList = []
    if g_params['pairwise_comparison_method'] == 1:
        cmpclassList = cmpclassList_method1
    elif g_params['pairwise_comparison_method'] == 3:
        cmpclassList = cmpclassList_method3
    else:
        print >> sys.stderr, "mp not in [1,3]. Exit"
        return 1

    numCmpClass = len(cmpclassList)

    numInfile = len(infileList)
    if numInfile < len(cmpclassList):
        print >> sys.stderr, "input file less than len(cmpclassList)=%d" % (
            len(cmpclassList))

    goMapDict = ReadGOMap(gomapfile)
    goTermDict = ReadGOTerm(gotermfile)

    pairinfoDict = {}
    for infile in infileList:
        tag = ""
        for cls in cmpclassList:
            if infile.find(".%s." % (cls)) != -1:
                tag = cls
                break
        if tag == "":
            print >> sys.stderr, "bad infile %s" % (infile)
            return 1
        pairinfoDict[tag] = ReadPairInfo(infile)

    for tag in pairinfoDict:
        pairinfo = pairinfoDict[tag]
        for j in xrange(len(pairinfo)):
            tup = pairinfo[j]
            ancList1 = GetAncenstorGOList_LevelOne(tup[0], goMapDict, gotype)
            ancList2 = GetAncenstorGOList_LevelOne(tup[1], goMapDict, gotype)
            common_ancList = list(set(ancList1) & set(ancList2))
            for goid in common_ancList:
                pairinfo[j].append(goid)

# output pairinfo with GO common term
    stemname = os.path.splitext(infileList[0].replace(".pairinfo.txt", ""))[0]
    for tag in pairinfoDict:
        outfile = outpath + os.sep + stemname + ".%s" % (
            tag) + ".pairinfowithGOterm.txt"
        WritePairInfoWithGO(pairinfoDict[tag], outfile)
        print "%s output" % (outfile)

    tableCmpclassDict = {}
    tableNumTMHeatMapDict = {}
    for goid in SEL_GOID_SET:
        tableCmpclassDict[goid] = {}
        tableNumTMHeatMapDict[goid] = {}
        InitTableCmpClass(tableCmpclassDict[goid], numSeqIDTGroup, numCmpClass)
        InitTableNumTMHeatMap(tableNumTMHeatMapDict[goid],
                              classList_TableNumTMHeatMap, MAX_NUMTM)

    for tag in pairinfoDict.keys():
        cmpclass = tag
        idxClass = GetClassIndex(cmpclass, cmpclassList)
        pairinfo = pairinfoDict[tag]
        for li in pairinfo:
            if len(li) > 9:
                #print li
                seqidt = li[8]
                numTM1 = li[4]
                numTM2 = li[5]
                minNumTM = min(numTM1, numTM2)
                maxNumTM = max(numTM1, numTM2)
                for goid in li[9:]:
                    idxGroup = GetSeqIDTGroupIndex(seqidt, seqIDTGroup)
                    tableCmpclassDict[goid]['freq'][idxGroup][idxClass] += 1
                    tableCmpclassDict[goid]['subsum'][idxGroup] += 1
                    dt = tableNumTMHeatMapDict[goid]['ALL']
                    dt['data'][minNumTM][maxNumTM] += 1
                    if maxNumTM > dt['maxNumTM']:
                        dt['maxNumTM'] += 1
                    dt['numPair'] += 1

# write cmpclass
    stemname2 = os.path.splitext(os.path.basename(stemname))[0]
    print "stemname2=", stemname2
    for goid in SEL_GOID_SET:
        data = tableCmpclassDict[goid]
        outfile = outpath + os.sep + stemname2 + "." + goid + ".cmpclass.txt"
        if WriteTable2D(data['freq'], data['subsum'], cmpclassList,
                        seqIDTGroup, outfile) == 0:
            xlabel = "Sequence identity"
            print "%s output" % (outfile)
            if g_params['pairwise_comparison_method'] == 1:
                cmd = "%s/plotCmpClass_mp1_cmpsp_5.sh %s -xlabel \"%s\""\
                    " -outstyle eps  -outpath %s -plot1 -multiplot" %(
                    binpath, outfile, xlabel, outpath)
            elif g_params['pairwise_comparison_method'] == 3:
                cmd = "%s/plotCmpClass_mp3.sh %s -xlabel \"%s\""\
                    " -outstyle eps  -outpath %s -plot1 -multiplot" %(
                    binpath, outfile, xlabel, outpath)

            os.system(cmd)
        data = tableNumTMHeatMapDict[goid]
        outfile = outpath + os.sep + stemname2 + "." + goid + ".numTMHeatMap.ALL.txt"
        mtx = data['ALL']
        mode_norm = "norm_diag"
        print "%s numPair=%d" % (goid, mtx['numPair'])
        if WriteNumTMHeatMap(mtx['data'], mtx['maxNumTM'], mtx['numPair'],
                             mode_norm, outfile) == 0:
            cmd = "%s/plotNumTMHeatMap.sh %s" % (binpath, outfile)
            os.system(cmd)
Esempio n. 24
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    rmsg = ""
    outpath = ""
    jobid = ""
    datapath = ""
    numseq = -1
    numseq_this_user = -1
    email = ""
    host_ip = ""
    base_www_url = ""
    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            print >> g_params['fperr'], "Error! Wrong argument:", argv[i]
            return 1
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-email", "--email"]:
                (email, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-host", "--host"]:
                (host_ip, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-nseq", "--nseq"]:
                (numseq, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-nseq-this-user", "--nseq-this-user"]:
                (numseq_this_user, i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-baseurl", "--baseurl"]:
                (base_www_url, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-jobid", "--jobid"]:
                (jobid, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-datapath", "--datapath"]:
                (datapath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-force", "--force"]:
                g_params['isForceRun'] = True
                i += 1
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> g_params['fperr'], "Error! Wrong argument:", argv[i]
                return 1
        else:
            print >> g_params['fperr'], "Error! Wrong argument:", argv[i]
            return 1

    if outpath == "":
        print >> g_params['fperr'], "outpath not set. exit"
        return 1
    elif not os.path.exists(outpath):
        cmd = ["mkdir", "-p", outpath]
        try:
            rmsg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError, e:
            print e
            print rmsg
            return 1
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-nmax", "--nmax"]:
                (g_params['nmax'], i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-l", "--l"] :
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1


    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    if len(fileList) < 1:
        print >> sys.stderr, "%s: no input file is set. exit"%(sys.argv[0])

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    pfamidset_all = set([])
    pfamidset_output = set([])
    nmax = g_params['nmax']

    cnt_round = 0
    while 1:
        cnt_round += 1
        famid2seqidDict = {}
        for i in xrange(len(fileList)):
            hdl = myfunc.ReadLineByBlock(fileList[i])
            if hdl.failure:
                continue
            lines = hdl.readlines()
            while lines != None:
                for line in lines:
                    line = line.strip()
                    if not line or line[0] == "#":
                        continue
                    strs = line.split()
                    if len(strs) > 2:
                        seqid = strs[0]
                        pfamidlist = strs[2:]
                        for pfamid in pfamidlist:
                            if cnt_round == 1:
                                pfamidset_all.add(pfamid)
                            if pfamid in pfamidset_output:
                                continue
                            if not pfamid in famid2seqidDict:
                                if len(famid2seqidDict) < nmax:
                                    famid2seqidDict[pfamid] = []
                            if pfamid in famid2seqidDict:
                                famid2seqidDict[pfamid].append(seqid)
                    else:
                        msg="broken item in file %s: line \"%s\""
                        print >> sys.stderr, msg%(fileList[i], line)
                lines = hdl.readlines()
            hdl.close()

        for pfamid in famid2seqidDict:
            pfamidset_output.add(pfamid)
            seqidlist = famid2seqidDict[pfamid]
            seqidlist = myfunc.uniquelist(seqidlist)
            fpout.write("%s %d"%(pfamid, len(seqidlist)))
            for seqid in seqidlist:
                fpout.write(" %s"%(seqid))
            fpout.write("\n")

        if len(pfamidset_output) == len(pfamidset_all):
            break
        else:
            print " %d / %d "%(len(pfamidset_output), len(pfamidset_all))

    myfunc.myclose(fpout)
    if outfile != "":
        print "result output to %s"%(outfile)

    return 0
Esempio n. 26
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    outfile_with_famid = ""
    outfile_with_pdb = ""
    outfile_fam2seqmap = ""
    idListFile = ""
    mapfile = "%s%s%s" % (
        DATADIR3, os.sep,
        "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid"
    )
    restrictIDListFile = ""
    idList = []
    maxseq_for_fam = 200
    maxpair_for_fam = 300
    method = 0
    rand_seed = None
    pdbtospFile = ""
    isOnlyPDB = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithfamid", "--outwithfamid"]:
                outfile_with_famid, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]:
                outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithpdb", "--outwithpdb"]:
                outfile_with_pdb, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in [
                    "-tmprolist", "--tmprolist", "-restrictlist",
                    "--restrictlist"
            ]:
                restrictIDListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mapfile", "--mapfile"]:
                mapfile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif sys.argv[i] in ["-seed", "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-l", "--l"]:
                idListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-maxseq", "--maxseq"]:
                maxseq_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-maxpair", "--maxpair"]:
                maxpair_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-m", "--m", "-method", "--method"]:
                method, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-onlypdb", "--onlypdb"]:
                g_params['isOnlyPDB'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if os.path.exists(idListFile):
        idList += myfunc.ReadIDList(idListFile)

    if len(idList) < 1:
        print >> sys.stderr, "no ID set. exit"
        return 1
    if myfunc.checkfile(mapfile, "idMapFile") != 0:
        return 1

    idMapDict = myfunc.ReadFam2SeqidMap(mapfile)

    # Read in pdbtosp map
    if pdbtospFile != "":
        (pdb2uniprotMap, uniprot2pdbMap) =\
                myfunc.ReadPDBTOSP(pdbtospFile)
        g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys())
        g_params['uniprot2pdbMap'] = uniprot2pdbMap

    if g_params['isOnlyPDB'] == True:
        if pdbtospFile == "":
            print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit."
            return 1
        elif g_params['uniprotidlist_with_pdb'] == set([]):
            print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit."
            return 1

    restrictIDSet = set([])
    if restrictIDListFile != "":
        restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False)
    fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False)
    fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False)

    if method == 0:
        GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet,
                                  maxseq_for_fam, rand_seed, fpout,
                                  fpout_withfamid)
    elif method == 1:
        GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet,
                                  maxpair_for_fam, rand_seed, fpout,
                                  fpout_withfamid, fpout_fam2seqmap)
    elif method == 2:  #all to all
        GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout,
                                  fpout_withfamid, fpout_withpdb)

    myfunc.myclose(fpout)
    myfunc.myclose(fpout_withfamid)
    myfunc.myclose(fpout_withpdb)
    myfunc.myclose(fpout_fam2seqmap)
    return 0
Esempio n. 27
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    msapath = ""
    outname = ""
    pairlistwithfamid_file = ""
    isLocalAlignment = False

    idListFile = None
    idList = []
    msaext = ".msa.fa"

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            pairlistwithfamid_file = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outname", "--outname"]:
                (outname, i)  = myfunc.my_getopt_str(argv,i)
            elif argv[i] in ["-msapath", "--msapath"]:
                (msapath,i)  = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-msaext", "--msaext"]:
                (msaext,i)  = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-localali", "--localali"]:
                isLocalAlignment = True; i += 1
            elif argv[i] in ["-all", "--all"]:
                g_params['isOutputAll'] = True; i += 1
            elif argv[i] in ["-seqidttype", "--seqidttype"]:
                (g_params['seqidttype'],i) = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-maxsel", "--maxsel"]:
                (g_params['maxsel'],i) = myfunc.my_getopt_int(argv,i)
            elif argv[i] in ["-verbose", "--verbose"]:
                (g_params['verbose'],i) =  myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            pairlistwithfamid_file = argv[i]
            i += 1

    if pairlistwithfamid_file == "":
        print >> sys.stderr, "pairlistwithfamid_file not set"
        return 1

    if outname == "":
        addname = ""
        if isLocalAlignment is True:
            addname = ".local"
        outname = os.path.splitext(pairlistwithfamid_file)[0] + addname + ".selected"

    verbose = g_params['verbose']

    if verbose >= 1:
        print "Reading file", pairlistwithfamid_file
    # pairlistDict {pfamid:[(id1,id2),(id1,id2),...]    }
    pairlistDict = ReadPairListWithFamID(pairlistwithfamid_file)
    #print pairlistDict
    # obtain pairwise alignment factor
    if verbose >= 1:
        print "Obtaining pairwise alignment factor"
    AddPairwiseAlignmentFactor(pairlistDict, msapath, msaext, isLocalAlignment)
    if not g_params['isOutputAll']:
        if verbose >=1:
            print "Select pair list to limit to ", g_params['maxsel']
        pairlistDict = RandSelectPairList(pairlistDict)
    #print "selected pairlistDict = ", pairlistDict
    #print "pairlistDict_selected=",pairlistDict
    if verbose >= 1:
        print "Write out the alignment"
    WritePairAln(pairlistDict, msapath, msaext, outname)