Ejemplo n.º 1
0
def FilterSignalPeptide(topofile, sigpepDict, outfile,
        isDeleteSeqWithSignalPeptide):
    hdl = myfunc.ReadFastaByBlock(topofile)
    if hdl.failure:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            try:
                sp_pos = sigpepDict[rd.seqid]
            except KeyError:
                sp_pos = -1
            if sp_pos != -1:
                if isDeleteSeqWithSignalPeptide:
                    newtopo = ""
                else:
                    newtopo = lcmp.FilterSignalPeptideInTopology(rd.seq, sp_pos)
            else:
                newtopo = rd.seq
            if newtopo != "" and myfunc.CountTM(newtopo) > 0:
                fpout.write(">%s\n"%(rd.description))
                fpout.write("%s\n"%(newtopo))
        recordList = hdl.readseq()
    hdl.close()
    myfunc.myclose(fpout)
    return 0
Ejemplo n.º 2
0
def MSA2Seq_fasta(infile, outfile):  #{{{
    try:
        fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

        fpin = open(infile, "rb")
        unprocessedBuffer = ""
        isEOFreached = False
        while 1:
            buff = fpin.read(BLOCK_SIZE)
            if len(buff) < BLOCK_SIZE:
                isEOFreached = True
            buff = unprocessedBuffer + buff
            recordList = []
            unprocessedBuffer = myfunc.ReadFastaFromBuffer(
                buff, recordList, isEOFreached)
            for rd in recordList:
                anno = rd[1]
                seq = rd[2].replace("-", "").replace(".", "").replace(" ", "")
                fpout.write(">%s\n" % anno)
                fpout.write("%s\n" % seq)
            if isEOFreached == True:
                break
        fpin.close()
        myfunc.myclose(fpout)
    except IOError:
        print >> sys.stderr, "Failed to read file", infile
        return 1
Ejemplo n.º 3
0
def MatchMSATopo_using_topofile(
        msafile,
        topofile,
        isIgnoreBadseq,  #{{{
        method_match,
        outfile):
    topoDict = GetTopoDict(topofile)
    hdl = myfunc.ReadFastaByBlock(msafile)
    if hdl.failure:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            try:
                topo = topoDict[rd.seqid]
            except KeyError:
                print("topo not found for ID %s" % (rd.seqid), file=sys.stderr)
                topo = ""
            matchedtopo = MatchSeqToTopo(rd.seq, topo, method_match)
            if not (matchedtopo == "BADSEQ" and isIgnoreBadseq):
                print(">%s" % (rd.description), file=fpout)
                print("%s" % (matchedtopo), file=fpout)
        recordList = hdl.readseq()

    myfunc.myclose(fpout)
    hdl.close()

    return 0
Ejemplo n.º 4
0
def Stat2(lines, outfile):
    MAX_NUMTM = 14
    dt = {}
    for item in [1, 12, 2]:
        dt[item] = InitXY(MAX_NUMTM)
    cnt = 0
    for line in lines:
        if line:
            strs = line.split()
            if strs[0] == "TMMap":
                cnt += 1
                numTM = int(strs[4].rstrip(":"))
                if numTM <= 1 or numTM >= MAX_NUMTM:
                    continue
                mapArray = [int(x) for x in strs[5:]]
                st = 1
                str_maparray_list = ["%d" % x for x in mapArray]
                str_maparray = "".join(str_maparray_list)
                num_1 = str_maparray.count("1")
                num_2 = str_maparray.count("2")
                num_12 = num_1 + num_2
                dt[1][1][num_1] += 1
                dt[2][1][num_2] += 1
                dt[12][1][num_12] += 1
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout.write("#%2s %8s %3s %8s %3s %8s\n" %
                ("1x", "1y", "2x", "2y", "12x", "12y"))
    for i in xrange(1, MAX_NUMTM + 1):
        for item in [1, 2, 12]:
            (lx, ly) = dt[item]
            fpout.write("%3d %8d " % (lx[i], ly[i]))
        fpout.write("\n")
    myfunc.myclose(fpout)
Ejemplo n.º 5
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    gramfile = ""
    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram", "--gram"]:
                (gramfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1
    if myfunc.checkfile(gramfile) != 0:
        return 1

    grampairlist = myfunc.ReadPairList(gramfile)
    gramMapDict = {}
    for tup in grampairlist:
        gramMapDict[tup[0]] = tup[1]

    gi2taxidpairlist = myfunc.ReadPairList(infile)

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    for tup in gi2taxidpairlist:
        try:
            fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], gramMapDict[tup[1]]))
        except KeyError:
            fpout.write("%s\t%s\t%s\n" % (tup[0], tup[1], "NA"))

    myfunc.myclose(fpout)
Ejemplo n.º 6
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    topofile = ""
    outfile = ""
    isGapLess = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            topofile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o"]:
                outfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-i", "--i"]:
                topofile = argv[i + 1]
                i += 2
            elif argv[i] in ["-gapless", "--gapless"]:
                isGapLess = True
                i += 1
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            topofile = argv[i]
            i += 1
    if topofile == "":
        print >> sys.stderr, "topofile not set. exit"
        return 1
    try:
        (idList, annoList, seqList) = myfunc.ReadFasta(topofile)
        fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
        for i in xrange(len(idList)):
            topo = seqList[i]
            seqid = idList[i]
            if isGapLess:
                topo = topo.replace("-", "").replace(".", "")
            posTMList = myfunc.GetTMPosition(topo)
            print >> fpout, seqid, posTMList
        myfunc.myclose(fpout)
    except (IOError, IndexError):
        pass
Ejemplo n.º 7
0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    idwithannofile = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o"]:
                outfile = argv[i+1]
                i += 2
            elif argv[i] in ["-case", "--case"]:
                g_params['isCaseSensitive'] = True
                i += 1
            elif argv[i] in ["-idwithanno", "--idwithanno"]:
                idwithannofile = argv[i+1]
                i += 2
            elif argv[i] in ["-key", "--key"]:
                g_params['keywordList'].append(argv[i+1])
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if infile == "":
        print >> sys.stderr, "infile not set.exit"
        return 1
    if idwithannofile == "":
        print >> sys.stderr, "idwithannofile not set.exit"
        return 1

    seqid2AnnoDict = ReadSeqIDWithAnnoFile(idwithannofile)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    FilterUniprotTableinfoByKeyword(infile, seqid2AnnoDict, fpout)
    myfunc.myclose(fpout)
Ejemplo n.º 8
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileListFile = ""
    fileList = []
    isOverWrite = 0

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outpath", "--outpath"]:
                outpath = argv[i + 1]
                i += 2
            elif argv[i] in ["-l", "--l"]:
                fileListFile = argv[i + 1]
                i += 2
            elif argv[i] in ["-overwrite", "--overwrite"]:
                isOverWrite = 1
                i += 1
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        try:
            fp = open(fileListFile, "r")
            fileList += fp.read().split()
            fp.close()
        except IOError:
            msg = "Failed to read idlistfile {}."
            print >> sys.stderr, msg.format(fileListFile)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    for i in xrange(len(fileList)):
        CleanAASeq(fileList[i], isOverWrite, fpout)
    myfunc.myclose(fpout)
Ejemplo n.º 9
0
def main(g_params):  #{{{#{{{
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    BLOCK_SIZE = 100000
    isPrintID = False
    isJustPrintSum = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            infile = sys.argv[i]
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 0
            elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]:
                isPrintID = True
                i += 1
            elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]:
                isJustPrintSum = True
                i += 1
            elif sys.argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-bs", "--bs", "-block-size", "--block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print("Error! BLOCK_SIZE should >0", file=sys.stderr)
                    return 1
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]),
                      file=sys.stderr)
                return 1
        else:
            infile = sys.argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout)
    myfunc.myclose(fpout)

    return status
Ejemplo n.º 10
0
def main(g_params):#{{{#{{{
    # Check argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    BLOCK_SIZE = 100000
    isPrintID = False
    isJustPrintSum = False

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            infile = sys.argv[i]
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 0
            elif sys.argv[i] in ["-i", "--i", "-printid", "--printid"]:
                isPrintID = True
                i += 1
            elif sys.argv[i] in ["-just-print-sum", "--just-print-sum"]:
                isJustPrintSum = True
                i += 1
            elif sys.argv[i] in [ "-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-bs", "--bs", "-block-size", "--block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print >> sys.stderr,"Error! BLOCK_SIZE should >0"
                    return 1
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            infile=sys.argv[i]
            i += 1

    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    status = Getseqlen(infile, isPrintID, isJustPrintSum, BLOCK_SIZE, fpout)
    myfunc.myclose(fpout)

    return status
Ejemplo n.º 11
0
def main(g_params):  #{{{
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    fileList = []
    fileListFile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(sys.argv[i])
            isNonOptionArg = False
            i += 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-l", "--l"]:
                fileListFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-o", "--o", "-outfile"]:
                outfile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-evalue", "--evalue"]:
                g_params['evalue_th'], i = myfunc.my_getopt_float(sys.argv, i)
            elif sys.argv[i] in ["-seqidt", "--seqidt"]:
                g_params['seqidt_th'], i = myfunc.my_getopt_float(sys.argv, i)
            elif sys.argv[i] in ["-round", "--round"]:
                g_params['iteration'] = myfunc.my_getopt_int(sys.argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument: '%s'" %
                                      sys.argv[i])
                return 1
        else:
            fileList.append(sys.argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile, delim="\n")
    if len(fileList) < 1:
        print >> sys.stderr, "No input set. exit"
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    for infile in fileList:
        BlastM9toPairlist(infile, fpout)

    myfunc.myclose(fpout)
Ejemplo n.º 12
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    idListFile = ""
    idList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            i += 1
            isNonOptionArg = False
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l", "-list"]:
                (idListFile, i) = myfunc.my_getopt_str(argv, i)
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if idListFile != "":
        idList += myfunc.ReadIDList(idListFile)

    numID = len(idList)

    if numID < 1:
        print >> sys.stderr, "No ID set. exit"
        return 1

    params = {}
    params['from'] = 'P_GI'
    params['to'] = 'ID'  # to uniprot id
    params['format'] = 'tab'
    params['query'] = " ".join(idList)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    GIID2UniprotID(params, fpout)
    myfunc.myclose(fpout)
Ejemplo n.º 13
0
def OutputPfamFastaFile(seqidList, pfamid, seqdbDict, hdl_seqdb,
                        extra_desp_dict, outpath):  #{{{
    outfile = "%s%s%s%s" % (outpath, os.sep, pfamid, g_params['out_ext'])
    fpout = myfunc.myopen(outfile, None, "w", True)
    isAddExtraDescription = False
    if len(extra_desp_dict) > 0:
        isAddExtraDescription = True

    for seqid in seqidList:
        if seqid.find("UniRef") != -1:
            try:
                ss = seqid.split("_")
                seqid = ss[1]
            except IndexError:
                pass

        if g_params['isBigmem']:
            try:
                record = seqdbDict[seqid]
                (tmpanno, tmpseq) = record
                if isAddExtraDescription:
                    try:
                        extraanno = extra_desp_dict[seqid]
                    except KeyError:
                        extraanno = ""
                    if extraanno != "":
                        tmpanno = "%s %s" % (extraanno, tmpanno)
                fpout.write(">%s\n%s\n" % (tmpanno, tmpseq))
            except KeyError:
                print >> sys.stderr, "seqid %s not found in seqdb" % (seqid)
        else:
            record = hdl_seqdb.GetRecord(seqid)
            if record:
                if isAddExtraDescription:
                    try:
                        extraanno = extra_desp_dict[seqid]
                    except KeyError:
                        extraanno = ""
                    if extraanno == "":
                        fpout.write("%s" % (record))
                    else:
                        (tmpseqid, tmpanno,
                         tmpseq) = myfunc.ExtractFromSeqWithAnno(record)
                        tmpanno = "%s %s" % (extraanno, tmpanno)
                        fpout.write(">%s\n%s\n" % (tmpanno, tmpseq))
                else:
                    fpout.write("%s" % (record))
            else:
                print >> sys.stderr, "seqid %s not found in seqdb" % (seqid)
    myfunc.myclose(fpout)
    if g_params['isGzip']:
        cmd = ["gzip", "-N", "-f", outfile]
        print " ".join(cmd)
        subprocess.check_call(cmd, stdout=open(os.devnull, "w"))
Ejemplo n.º 14
0
def main(g_params):
    # Check argv
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    begin = 0
    end = 999999999

    method = 2

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-b", "--b", "--begin"]:
                begin, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-e", "--e", "--end"]:
                end, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o" , "--o","--outfile"]:
                outFile, i = my_getopt_str(sys.argv,i )
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile=sys.argv[i]
            i+=1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1

    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)

    if method == 1:
        CatFasta(inFile,begin, end, fpout)
    else:
        CatFasta2(inFile,begin, end, fpout)

    myfunc.myclose(fpout)
    return 0
Ejemplo n.º 15
0
def main(g_params):
    # Check argv
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    begin = 0
    end = 999999999

    method = 2

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-b", "--b", "--begin"]:
                begin, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-e", "--e", "--end"]:
                end, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o", "--o", "--outfile"]:
                outFile, i = my_getopt_str(sys.argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile = sys.argv[i]
            i += 1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1

    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)

    if method == 1:
        CatFasta(inFile, begin, end, fpout)
    else:
        CatFasta2(inFile, begin, end, fpout)

    myfunc.myclose(fpout)
    return 0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outpath = "./"
    outfile = ""
    fileListFile = ""
    fileList = []
    pfamDefFile = "%s/data/pfam/pfam26.0/Pfam-A.clans.tsv"%(DATADIR3)

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            fileList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outpath", "--outpath"]:
                (outpath, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-l", "--l"] :
                (fileListFile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            fileList.append(argv[i])
            i += 1

    if fileListFile != "":
        fileList += myfunc.ReadIDList(fileListFile)

    (pfamidDefDict, clanidDefDict) = ReadPfamDefFile(pfamDefFile)

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    for i in xrange(len(fileList)):
        CountUniquePairInvertedInfo(fileList[i], pfamidDefDict, fpout)

    myfunc.myclose(fpout)
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    allinfoFile = ""
    idwithannofile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            allinfoFile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o"]:
                outfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-idwithanno", "--idwithanno"]:
                idwithannofile = argv[i + 1]
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            allinfoFile = argv[i]
            i += 1

    if allinfoFile == "":
        print >> sys.stderr, "allinfoFile not set.exit"
        return 1
    if idwithannofile == "":
        print >> sys.stderr, "idwithannofile not set.exit"
        return 1

    seqid2AnnoDict = ReadSeqIDWithAnnoFile(idwithannofile)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    FilterTopconsSingleResultUniprot(allinfoFile, seqid2AnnoDict, fpout)
    myfunc.myclose(fpout)
Ejemplo n.º 18
0
def main():  #{{{
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    inFile = ""
    N = 999999999
    rand_seed = None

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i", "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-n", "--n"]:
                N, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-seed", "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in ["-o", "--outfile"]:
                outFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in ["-bs", "--block-size", "-block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print("Error! BLOCK_SIZE should >0", file=sys.stderr)
                    return 1
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]),
                      file=sys.stderr)
                return 1
        else:
            inFile = sys.argv[i]
            i += 1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1
    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)
    RandFasta(inFile, N, rand_seed, fpout)
    myfunc.myclose(fpout)
Ejemplo n.º 19
0
def ReWriteFasta(infile, outfile):  #{{{
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    hdl = myfunc.ReadFastaByBlock(infile, 0, 1)
    if hdl.failure:
        return 1
    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            fpout.write(">%s\n" % rd.description)
            fpout.write("%s\n" % rd.seq)
        recordList = hdl.readseq()
    hdl.close()
    myfunc.myclose(fpout)
    return 0
Ejemplo n.º 20
0
def main():#{{{
    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile=""
    inFile=""
    N=999999999
    rand_seed=None

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            isNonOptionArg=False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in [ "-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-i",  "--infile"]:
                inFile, i = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-n" ,  "--n"]:
                N,i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in [ "-seed" , "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(sys.argv, i)
            elif sys.argv[i] in [ "-o" , "--outfile"]:
                outFile,i  = myfunc.my_getopt_str(sys.argv, i)
            elif sys.argv[i] in [ "-bs" ,  "--block-size" ,  "-block-size"]:
                BLOCK_SIZE, i = myfunc.my_getopt_int(sys.argv, i)
                if BLOCK_SIZE < 0:
                    print >> sys.stderr,"Error! BLOCK_SIZE should >0"
                    return 1
            else:
                print >> sys.stderr,("Error! Wrong argument:%s" % sys.argv[i])
                return 1
        else:
            inFile = sys.argv[i]
            i+=1

    if myfunc.checkfile(inFile, "Input file") != 0:
        return 1
    fpout = myfunc.myopen(outFile, sys.stdout, "w", False)
    RandFasta(inFile, N, rand_seed,  fpout)
    myfunc.myclose(fpout)
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""
    mapfile = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-map", "--map", "-mapfile"]:
                (mapfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if myfunc.checkfile(infile) != 0:
        return 1
    if myfunc.checkfile(mapfile) != 0:
        return 1

    clanid2pfamidDict = myfunc.ReadFam2SeqidMap(mapfile)
    pfamPercentTMDict = ReadPercentTM(infile)

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    GetPercentTMOfClan(pfamPercentTMDict, clanid2pfamidDict, fpout)
    myfunc.myclose(fpout)
Ejemplo n.º 22
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    infile = ""
    outfile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1

    if infile == "":
        print >> sys.stderr, "infile not set. exit"
        return 1
    elif not os.path.exists(infile):
        print >> sys.stderr, "infile %s does not exist. exit" % (infile)
        return 1
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    FilterUniprotIDMap(infile, fpout)

    myfunc.myclose(fpout)
Ejemplo n.º 23
0
def main():  #{{{
    numArgv = len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    argv = sys.argv

    outfile = ""
    infile = ""
    output_format = "mfa"

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = sys.argv[i]
            isNonOptionArg = False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg = True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif sys.argv[i] in ["-o", "--o"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif sys.argv[i] in ["-of", "--of"]:
                output_format, i = myfunc.my_getopt_str(argv, i)
            else:
                print >> sys.stderr, ("Error! Wrong argument:%s" % argv[i])
                return 1
        else:
            infile = argv[i]
            i += 1

    if myfunc.checkfile(infile, "MSA file") != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    # detect the format of mpa files, the old format

    MPA2MSA(infile, output_format, fpout)

    myfunc.myclose(fpout)
def GetSeqFromMSA(infile, outfile, hdl_seqdb):#{{{
    if not os.path.exists(infile):
        print >> sys.stderr, "infile %s does not exist."%(infile)
        return 1

    outdir = os.path.dirname(outfile)
    if outdir != "" and not os.path.exists(outdir):
        os.system("mkdir -p %s"%(outdir))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    filetype = GetFileType(infile)
    inputfile = ""
    if filetype == "fasta":
        tmpf = tempfile.mktemp()
        os.system("grep '^>' %s | sed 's/>//' > %s"%(infile, tmpf))
        inputfile = tmpf
        try:
            first_record = SeqIO.parse(open(infile, "rU"), "fasta").next()
            if first_record.id == "target":
                seq = (first_record.seq._data).replace("-","")
                fpout.write(">%s\n"%(first_record.description))
                fpout.write("%s\n"%(seq))
        except (IOError,ValueError,KeyError):
            pass
        
    elif filetype == "idlist":
        inputfile = infile
    else:
        print >> sys.stderr, "Unrecognized infile type"
        return 1

    (status, numseq, numRetrieved) = GetFullSeq(inputfile, hdl_seqdb, fpout)
    if status == 1:
        msg = "%s retrieved %d out of %d sequences. Failed to read."
    else:
        msg = "%s retrieved %d out of %d sequences. Succeeded."
    print msg%(infile, numRetrieved, numseq)

    if filetype == "fasta":
        os.system("rm -f %s"%(tmpf))

    myfunc.myclose(fpout)

    return 0
def main(g_params):#{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    infile = ""

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if myfunc.checkfile(infile) != 0:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    goancDict = 

    myfunc.myclose(fpout)
Ejemplo n.º 26
0
def Stat1(lines, outfile):
    MAX_NUMTM = 14
    dt = {}
    for item in [0.0, 0.5, 1.0]:
        dt[item] = InitXY(MAX_NUMTM)
    cnt = 0
    for line in lines:
        if line:
            strs = line.split()
            if strs[0] == "TMMap":
                cnt += 1
                numTM = int(strs[4].rstrip(":"))
                if numTM <= 1 or numTM >= MAX_NUMTM:
                    continue
                mapArray = [int(x) for x in strs[5:]]
                st = 1
                str_maparray_list = ["%d" % x for x in mapArray]
                str_maparray = "".join(str_maparray_list)
                posContList = GetSegPos(str_maparray, "%d" % st)
                neighbour_char = "0"
                posContList = FilterSegPos(posContList, str_maparray,
                                           neighbour_char)
                if len(posContList) >= 1:
                    for (b, e) in posContList:
                        if b == 0:
                            pp = 0.0
                        elif e == numTM:
                            pp = 1.0
                        else:
                            pp = 0.5
                            if DEBUG and (e - b) == 1:
                                print "Maparray", mapArray, posContList
                        dt[pp][1][e - b] += 1
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout.write("#%2s %8s %3s %8s %3s %8s\n" %
                ("Nx", "Ny", "Ix", "Iy", "Cx", "Cy"))
    for i in xrange(1, MAX_NUMTM + 1):
        for item in [0.0, 0.5, 1.0]:
            (lx, ly) = dt[item]
            fpout.write("%3d %8d " % (lx[i], ly[i]))
        fpout.write("\n")
    myfunc.myclose(fpout)
Ejemplo n.º 27
0
def MatchMSATopo_using_topodb(
        msafile,
        topodb,
        isIgnoreBadseq,  #{{{
        method_match,
        outfile):
    hdl_topo = myfunc.MyDB(topodb)
    if hdl_topo.failure:
        return 1

    hdl = myfunc.ReadFastaByBlock(msafile)
    if hdl.failure:
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            topowithanno = hdl_topo.GetRecord(rd.seqid)
            if topowithanno != None:
                (topoid, topoanno,
                 topo) = myfunc.ExtractFromSeqWithAnno(topowithanno)
            else:
                print("topo not found for ID %s" % (rd.seqid), file=sys.stderr)
                topo = ""
            matchedtopo = MatchSeqToTopo(rd.seq, topo, method_match)
            if not (matchedtopo == "BADSEQ" and isIgnoreBadseq):
                print(">%s" % (rd.description), file=fpout)
                print("%s" % (matchedtopo), file=fpout)
        recordList = hdl.readseq()

    myfunc.myclose(fpout)
    hdl.close()
    hdl_topo.close()

    return 0
Ejemplo n.º 28
0
def RemoveDupSeq(infile, g_outpath, method, isUseMD5):#{{{
    if g_outpath == "":
        outpath = myfunc.my_dirname(infile)
    else:
        outpath = g_outpath
    rootname = os.path.basename(os.path.splitext(infile)[0])

    outfile = "%s%s%s"%(outpath, os.sep, rootname)

    fpout = myfunc.myopen(outfile, None, "w", False)
    if fpout == None:
        return 1

    hdl = myfunc.ReadFastaByBlock(infile)
    if hdl.failure:
        return -1

    myset = set([])

    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            if method == "id":
                key = rd.seqid
            elif method == "seq":
                if isUseMD5:
                    key = md5.new(rd.seq).digest()
                else:
                    key = rd.seq
            if not key in myset:
                myset.add(key)
                fpout.write(">%s\n%s\n"%(rd.description, rd.seq))
        recordList = hdl.readseq()

    hdl.close()
    myfunc.myclose(fpout)
    return 0
Ejemplo n.º 29
0
def RemoveDupSeq(infile, g_outpath, method, isUseMD5):  #{{{
    if g_outpath == "":
        outpath = myfunc.my_dirname(infile)
    else:
        outpath = g_outpath
    rootname = os.path.basename(os.path.splitext(infile)[0])

    outfile = "%s%s%s" % (outpath, os.sep, rootname)

    fpout = myfunc.myopen(outfile, None, "w", False)
    if fpout == None:
        return 1

    hdl = myfunc.ReadFastaByBlock(infile)
    if hdl.failure:
        return -1

    myset = set([])

    recordList = hdl.readseq()
    while recordList != None:
        for rd in recordList:
            if method == "id":
                key = rd.seqid
            elif method == "seq":
                if isUseMD5:
                    key = md5.new(rd.seq).digest()
                else:
                    key = rd.seq
            if not key in myset:
                myset.add(key)
                fpout.write(">%s\n%s\n" % (rd.description, rd.seq))
        recordList = hdl.readseq()

    hdl.close()
    myfunc.myclose(fpout)
    return 0
Ejemplo n.º 30
0
def main(g_params):

    numArgv=len(sys.argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outFile = ""
    orderlistfile = ""
    msafile = ""
    outformat = "fasta" # fasta or anno

    i = 1
    isNonOptionArg=False
    while i < numArgv:
        if isNonOptionArg == True:
            msafile = sys.argv[i]
            isNonOptionArg=False
            i = i + 1
        elif sys.argv[i] == "--":
            isNonOptionArg=True
            i = i + 1
        elif sys.argv[i][0] == "-":
            if sys.argv[i] ==  "-h" or  sys.argv[i] == "--help":
                PrintHelp()
                return 1
            elif sys.argv[i] in [ "-o", "--o"] :
                outFile=sys.argv[i+1]
                i = i + 2
            elif sys.argv[i] == "-orderlist" or sys.argv[i] == "--orderlist":
                orderlistfile = sys.argv[i+1]
                i = i + 2
            elif sys.argv[i] == "-msafile" or sys.argv[i] == "--msafile":
                msafile = sys.argv[i+1]
                i = i + 2
            elif sys.argv[i] in ["-of", "--of", "-outformat", "--outformat"]:
                outformat = sys.argv[i+1].lower()
                i += 2
            else:
                print(("Error! Wrong argument:%s" % sys.argv[i]), file=sys.stderr)
                return 1
        else:
            msafile = sys.argv[i]
            i+=1

    if not outformat in ["anno", "fasta"]:
        print("Unrecognized outformat \"%s\","%(
                outformat) + " should be either \"anno\" or \"fasta\".", file=sys.stderr)
        return 1

    if orderlistfile == "":
        print("orderlist file not set. Exit", file=sys.stderr)
        return 1
    if msafile == "":
        print("msafile not set. Exit", file=sys.stderr)
    orderList = ReadOrderList(orderlistfile)  
    (idList, annoList, seqList) = myfunc.ReadFasta(msafile)

    if len(orderList) > 0  and len(idList) > 0:
        fpout = sys.stdout
        fpout = myfunc.myopen(outFile, sys.stdout, "w", False)

        seqDict = {}
        annoDict = {}
        numSeq = len(idList)
        for i in range(numSeq):
            annoDict[idList[i]] = annoList[i]
        if outformat != "anno":
            for i in range(numSeq):
                seqDict[idList[i]] = seqList[i]
        for sid in orderList:
            if sid in annoDict:
                fpout.write(">%s\n"%annoDict[sid])
                if outformat != "anno":
                    fpout.write("%s\n"%seqDict[sid])
            else:
                print("seqid %s not in msafile %s"%(
                        sid, msafile), file=sys.stderr)
        myfunc.myclose(fpout)

    return 0
Ejemplo n.º 31
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    outfile_with_famid = ""
    outfile_with_pdb = ""
    outfile_fam2seqmap = ""
    idListFile = ""
    mapfile = "%s%s%s" % (
        DATADIR3, os.sep,
        "wk/MPTopo/pfamAna_refpro/pfammap_from_uniprot/refpro20120604-celluar.selmaxlength-m1.nr100.filter.fragmented.clanid2seqid"
    )
    restrictIDListFile = ""
    idList = []
    maxseq_for_fam = 200
    maxpair_for_fam = 300
    method = 0
    rand_seed = None
    pdbtospFile = ""
    isOnlyPDB = False

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idList.append(argv[i])
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithfamid", "--outwithfamid"]:
                outfile_with_famid, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outfam2seqmap", "--outfam2seqmap"]:
                outfile_fam2seqmap, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-outwithpdb", "--outwithpdb"]:
                outfile_with_pdb, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in [
                    "-tmprolist", "--tmprolist", "-restrictlist",
                    "--restrictlist"
            ]:
                restrictIDListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-mapfile", "--mapfile"]:
                mapfile, i = myfunc.my_getopt_str(argv, i)
            elif (argv[i] in ["-pdbtosp", "--pdbtosp"]):
                pdbtospFile, i = myfunc.my_getopt_str(argv, i)
            elif sys.argv[i] in ["-seed", "--seed"]:
                rand_seed, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-l", "--l"]:
                idListFile, i = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-maxseq", "--maxseq"]:
                maxseq_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-maxpair", "--maxpair"]:
                maxpair_for_fam, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-m", "--m", "-method", "--method"]:
                method, i = myfunc.my_getopt_int(argv, i)
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            elif argv[i] in ["-onlypdb", "--onlypdb"]:
                g_params['isOnlyPDB'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idList.append(argv[i])
            i += 1

    if os.path.exists(idListFile):
        idList += myfunc.ReadIDList(idListFile)

    if len(idList) < 1:
        print >> sys.stderr, "no ID set. exit"
        return 1
    if myfunc.checkfile(mapfile, "idMapFile") != 0:
        return 1

    idMapDict = myfunc.ReadFam2SeqidMap(mapfile)

    # Read in pdbtosp map
    if pdbtospFile != "":
        (pdb2uniprotMap, uniprot2pdbMap) =\
                myfunc.ReadPDBTOSP(pdbtospFile)
        g_params['uniprotidlist_with_pdb'] = set(uniprot2pdbMap.keys())
        g_params['uniprot2pdbMap'] = uniprot2pdbMap

    if g_params['isOnlyPDB'] == True:
        if pdbtospFile == "":
            print >> sys.stderr, "onlypdb is true but pdbtospFile is not set. exit."
            return 1
        elif g_params['uniprotidlist_with_pdb'] == set([]):
            print >> sys.stderr, "onlypdb is true but uniprotidlist_with_pdb is empty. exit."
            return 1

    restrictIDSet = set([])
    if restrictIDListFile != "":
        restrictIDSet = set(myfunc.ReadIDList(restrictIDListFile))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    fpout_withfamid = myfunc.myopen(outfile_with_famid, None, "w", False)
    fpout_withpdb = myfunc.myopen(outfile_with_pdb, None, "w", False)
    fpout_fam2seqmap = myfunc.myopen(outfile_fam2seqmap, None, "w", False)

    if method == 0:
        GeneratePairWithinFam_m_0(idList, idMapDict, restrictIDSet,
                                  maxseq_for_fam, rand_seed, fpout,
                                  fpout_withfamid)
    elif method == 1:
        GeneratePairWithinFam_m_1(idList, idMapDict, restrictIDSet,
                                  maxpair_for_fam, rand_seed, fpout,
                                  fpout_withfamid, fpout_fam2seqmap)
    elif method == 2:  #all to all
        GeneratePairWithinFam_m_2(idList, idMapDict, restrictIDSet, fpout,
                                  fpout_withfamid, fpout_withpdb)

    myfunc.myclose(fpout)
    myfunc.myclose(fpout_withfamid)
    myfunc.myclose(fpout_withpdb)
    myfunc.myclose(fpout_fam2seqmap)
    return 0
Ejemplo n.º 32
0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    seqdbfile = ""
    infile = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            infile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-outfile", "--outfile"]:
                outfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-seqdb", "--seqdb"]:
                seqdbfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            infile = argv[i]
            i += 1
    if infile == "":
        print >> sys.stderr, "annotation file not set"
        return 1
    elif not os.path.exists(infile):
        print >> sys.stderr, "annotation file %s does not exist" % (infile)
        return 1
    if seqdbfile == "":
        print >> sys.stderr, "seqdbfile file not set"
        return 1
    elif not os.path.exists(seqdbfile):
        print >> sys.stderr, "seqdbfile file %s does not exist" % (seqdbfile)
        return 1
    seqDict = GetSeqDict(seqdbfile)
    if seqDict == {}:
        print >> sys.stderr, "Failed to read seqdbfile %s" % (seqdbfile)
        return 1
    (idList, annoList, contentList) = myfunc.ReadFasta(infile)
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    for i in xrange(len(idList)):
        seqid = idList[i]
        try:
            seq = seqDict[seqid]
            fpout.write(">%s\n" % (annoList[i]))
            fpout.write("%s\n" % (seq))
            if contentList[i] != "":
                fpout.write("%s\n" % (contentList[i]))
        except KeyError:
            print >> sys.stderr, "seqid %s not found in seqdb" % (seqid)

    myfunc.myclose(fpout)
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    datafile = ""
    restrictOCList = []

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            datafile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile", "--outfile"]:
                outfile = argv[i + 1]
                i += 2
            elif argv[i] in ["-i", "--i"]:
                datafile = argv[i + 1]
                i += 2
            elif argv[i] in ["-keep_isoform", "--keep_isoform"]:
                g_params['filter_isoform'] = False
                i += 1
            elif argv[i] in ["-keep_non_refpro", "--keep_non_refpro"]:
                g_params['filter_non_refpro'] = False
                i += 1
            elif argv[i] in ["-keep_no_genename", "--keep_no_genename"]:
                g_params['filter_no_genename'] = False
                i += 1
            elif argv[i] in ["-oc", "--oc"]:
                restrictOCList.append(argv[i + 1])
                i += 2
            elif argv[i] in ["-q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            datafile = argv[i]
            i += 1

    if not os.path.exists(datafile):
        print >> sys.stderr, "datafile %s not set or not exists. Exit" % (
            datafile)
        return 1

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    UniprotTremblData2Table(datafile, restrictOCList, fpout)
    myfunc.myclose(fpout)
    return 0
def main(g_params):  #{{{
    argv = sys.argv
    numArgv = len(argv)
    if numArgv < 2:
        PrintHelp()
        return 1

    outfile = ""
    idListFile = ""
    euk = ""
    gram_pos = ""
    gram_neg = ""

    i = 1
    isNonOptionArg = False
    while i < numArgv:
        if isNonOptionArg == True:
            idListFile = argv[i]
            isNonOptionArg = False
            i += 1
        elif argv[i] == "--":
            isNonOptionArg = True
            i += 1
        elif argv[i][0] == "-":
            if argv[i] in ["-h", "--help"]:
                PrintHelp()
                return 1
            elif argv[i] in ["-o", "--o", "-outfile"]:
                (outfile, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-euk", "--euk"]:
                (euk, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram+", "--gram+"]:
                (gram_pos, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-gram-", "--gram-"]:
                (gram_neg, i) = myfunc.my_getopt_str(argv, i)
            elif argv[i] in ["-q", "--q"]:
                g_params['isQuiet'] = True
                i += 1
            else:
                print >> sys.stderr, "Error! Wrong argument:", argv[i]
                return 1
        else:
            idListFile = argv[i]
            i += 1

    if myfunc.checkfile(idListFile, "idListFile") != 0:
        return 1
    if myfunc.checkfile(euk, "euk") != 0:
        return 1
    if myfunc.checkfile(gram_pos, "gram_pos") != 0:
        return 1
    if myfunc.checkfile(gram_neg, "gram_neg") != 0:
        return 1

    idList = myfunc.ReadIDList(idListFile)
    set_euk_idlist = set(myfunc.ReadIDList(euk))
    set_gram_pos_idlist = set(myfunc.ReadIDList(gram_pos))
    set_gram_neg_idlist = set(myfunc.ReadIDList(gram_neg))

    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)

    NCBI_TaxID = ""
    for i in xrange(len(idList)):
        seqid = idList[i]
        cls = ""
        if seqid in set_euk_idlist:
            cls = "euk"
        elif seqid in set_gram_pos_idlist:
            cls = "gram+"
        elif seqid in set_gram_neg_idlist:
            cls = "gram-"
        else:
            cls = "NA"
        print >> fpout, "%s\t%s\t%s" % (seqid, NCBI_TaxID, cls)
    myfunc.myclose(fpout)
Ejemplo n.º 35
0
    print usage
    sys.exit(1)

try:
    path_result = sys.argv[2]
except:
    print usage
    sys.exit(1)

outfile=""
try:
    outfile = sys.argv[3]
except:
    pass

fpout = myfunc.myopen(outfile, sys.stdout, "w", False)



hdl = myfunc.ReadLineByBlock(jobruntimeFile)
if hdl.failure:
    sys.exit(1)

lines = hdl.readlines()
cnt=0
while lines != None:
    for line in lines:
        strs = line.split("\t")
        if len(strs) < 8:
            continue
        jobid = strs[0]
Ejemplo n.º 36
0
def CalculateQueue(resultdir, outfile):  # {{{
    # 1. get the list of working folders
    raw_folder_list = os.listdir(resultdir)
    folder_nr_list = []
    for folder in raw_folder_list:
        if os.path.isdir(resultdir + "/" + folder) and (
            folder.isdigit() or folder[0:2] == "r_" or folder[:4] == "rst_"
        ):
            folder_nr_list.append(folder)
    # 2. gather information for queued jobs
    job_table_in_queue = {}
    other_job_table = {}
    freq_user_in_queue = {}  # count the frequency of the user of jobs in queue
    freq_user_running = {}  # count the frequency of the user for running jobs
    for folder in folder_nr_list:
        workdir = "%s/%s" % (resultdir, folder)
        status = get_job_status(workdir)
        if status in ["Queued"]:  # jobs in queue
            email = ReadContent("%s/%s" % (workdir, "email"))
            host = ReadContent("%s/%s" % (workdir, "host"))
            date_str = ReadContent("%s/%s" % (workdir, "date"))
            sequence = ReadContent("%s/%s" % (workdir, "sequence"))
            length_seq = len(sequence)
            user = ""
            if email and email != "N/A":
                user = email
            else:
                user = host
            if not user in freq_user_in_queue:
                freq_user_in_queue[user] = 0
            freq_user_in_queue[user] += 1

            try:
                date_submitted = datetime.datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
            except ValueError:
                print >>sys.stderr, "datefile = '%s'. date = '%s'" % ("%s/date" % (workdir), date_str)
                print >>sys.stderr, "Ignore %s" % folder
                continue
            date_now = datetime.datetime.now()
            queue_time = date_now - date_submitted
            queue_time_in_sec = get_total_seconds(queue_time)

            job_table_in_queue[folder] = [status, user, queue_time_in_sec, length_seq]
        else:
            email = ReadContent("%s/%s" % (workdir, "email"))
            host = ReadContent("%s/%s" % (workdir, "host"))
            user = ""
            if email and email != "N/A":
                user = email
            else:
                user = host
            if status in ["Running", "Rerun"]:
                if not user in freq_user_running:
                    freq_user_running[user] = 0
                freq_user_running[user] += 1
                other_job_table[folder] = [status, user, 0, 0]
            else:
                other_job_table[folder] = [status, user, 0, 0]

    for folder in job_table_in_queue:
        user = job_table_in_queue[folder][1]
        freq_in_queue = 1
        freq_running = 0
        if user != "":
            freq_in_queue = freq_user_in_queue[user]
        try:
            freq_running = freq_user_running[user]
        except KeyError:
            freq_running = 0

        job_table_in_queue[folder].append(freq_in_queue)
        job_table_in_queue[folder].append(freq_running)

    for folder in other_job_table:
        other_job_table[folder].append(0)
        other_job_table[folder].append(0)

    # calculate the priority
    # now each job_table_in_queue[folder] has five element
    # [user, queue_time_in_sec, length_seq, freq_in_queue, freq_running]
    # Group the jobs in each user, and for the sublist of each user, first rank by
    # the queue_time_in_sec, and then do as follows
    # Note, for those target with <= 100 aa, the run time is relatively similar
    # score = queue_time_in_sec / ((rank+addition)**1.5 * max(length_seq,100)**1.5)
    # where addition = freq_running
    for user in freq_user_in_queue:
        sub_table = {}
        for folder in job_table_in_queue:
            if job_table_in_queue[folder][1] == user:
                sub_table[folder] = job_table_in_queue[folder]

        # in descending order by queue_time_in_sec
        sorted_sub_table = sorted(sub_table.items(), key=lambda x: x[1][2], reverse=True)

        for i in xrange(len(sorted_sub_table)):
            folder_nr = sorted_sub_table[i][0]
            queue_time_in_sec = sorted_sub_table[i][1][2]
            length_seq = sorted_sub_table[i][1][3]
            freq_in_queue = sorted_sub_table[i][1][4]
            freq_running = sorted_sub_table[i][1][5]
            rank = i + 1
            addition = freq_running
            if user == "":
                rank = 1
                addition = 0

            score = queue_time_in_sec / ((rank + addition) ** 2 * max(length_seq, 100) ** 1.5)
            job_table_in_queue[folder_nr].append(score)

    for folder in other_job_table:
        other_job_table[folder].append(0)

    # now rank the job_table_in_queue again
    sorted_job_table_in_queue = sorted(job_table_in_queue.items(), key=lambda x: x[1][6], reverse=True)

    # write the result
    fpout = myfunc.myopen(outfile, sys.stdout, "w", False)
    print >> fpout, "#%-5s %8s %4s %-30s %10s %10s %6s %6s" % (
        "ID",
        "Status",
        "Rank",
        "User",
        "PD_time(s)",
        "Score",
        "Count_PD",
        "Count_R",
    )
    for i in xrange(len(sorted_job_table_in_queue)):
        folder = sorted_job_table_in_queue[i][0]
        rank = i + 1
        status = sorted_job_table_in_queue[i][1][0]
        user = sorted_job_table_in_queue[i][1][1]
        queue_time_in_sec = sorted_job_table_in_queue[i][1][2]
        freq_in_queue = sorted_job_table_in_queue[i][1][4]
        freq_running = sorted_job_table_in_queue[i][1][5]
        score = sorted_job_table_in_queue[i][1][6]
        print >> fpout, "%-6s %8s %4d %-30s %10.1f %10.1f %6d %6d" % (
            folder,
            status,
            rank,
            user,
            queue_time_in_sec,
            score,
            freq_in_queue,
            freq_running,
        )

    # now rank the job_table_in_queue again
    sorted_other_job_table = sorted(other_job_table.items(), key=lambda x: x[1][0])  # sorted by status
    for i in xrange(len(sorted_other_job_table)):
        folder = sorted_other_job_table[i][0]
        rank = 0
        status = sorted_other_job_table[i][1][0]
        user = sorted_other_job_table[i][1][1]
        queue_time_in_sec = sorted_other_job_table[i][1][2]
        freq_in_queue = sorted_other_job_table[i][1][4]
        freq_running = sorted_other_job_table[i][1][5]
        score = sorted_other_job_table[i][1][6]
        print >> fpout, "%-6s %8s %4d %-30s %10.1f %10.1f %6d %6d" % (
            folder,
            status,
            rank,
            user,
            queue_time_in_sec,
            score,
            freq_in_queue,
            freq_running,
        )

    myfunc.myclose(fpout)