Example #1
0
def OnSeq(Label, Seq):
    global MissingRankCount
    global EmptyTaxCount
    global BadLevelCount
    global SeqCount

    SeqCount += 1

    Acc = utax2.GetAccFromLabel(Label)
    Acc = Acc.replace("_", "")
    TaxStr = utax2.GetTaxFromLabel(Label)

    Fields = TaxStr.split(',')
    if len(Fields) != RequiredLevelCount:
        BadLevelCount += 1
        return
    MotTaxStr = ""
    LastRankIndex = -1
    LastLevelChar = ""
    for Field in Fields:
        assert Field[1] == ':'
        LevelChar = Field[0]
        RankIndex = utax2.LevelCharToRankIndex(LevelChar)
        if LastRankIndex != -1 and LastRankIndex > 1 and not RankIndex == LastRankIndex + 1:
            MissingRankCount += 1
            #			print >> sys.stderr, "Missing rank: %c,%d %c,%d %s" % (LastLevelChar, LastRankIndex, LevelChar, RankIndex, TaxStr)
            break
        LastRankIndex = RankIndex
        LastLevelChar = LevelChar
        if MotTaxStr != "":
            MotTaxStr += ';'
        MotTaxStr += LevelChar + ":" + Field[2:]

    if MotTaxStr == "":
        EmptyTaxCount += 1
        return

    MotTaxStr += ";"

    fasta.WriteSeq(fFa, Seq, Acc)

    print >> fTax, Acc + "\t" + MotTaxStr
Example #2
0
def OnSeq(Label, Seq):
    global Missing

    Acc = utax2.GetAccFromLabel(Label)
    Family = utax2.GetNameFromLabel(Label, 'f')
    Genus = utax2.GetNameFromLabel(Label, 'g')
    Species = utax2.GetNameFromLabel(Label, 's')

    if Genus == "" or Species == "":
        Missing += 1
        return

    if Family == "":
        Family = "NA"

    Family = Family.replace("f:", "")
    Genus = Genus.replace("g:", "")
    Species = Species.replace("s:", "")

    NewLabel = Acc + "\t" + Species + "\t" + Genus + "\t" + Family

    fasta.WriteSeq(sys.stdout, Seq, NewLabel)