def OnSeq(Label, Seq): global MissingRankCount global EmptyTaxCount global BadLevelCount global SeqCount SeqCount += 1 Acc = utax2.GetAccFromLabel(Label) Acc = Acc.replace("_", "") TaxStr = utax2.GetTaxFromLabel(Label) Fields = TaxStr.split(',') if len(Fields) != RequiredLevelCount: BadLevelCount += 1 return MotTaxStr = "" LastRankIndex = -1 LastLevelChar = "" for Field in Fields: assert Field[1] == ':' LevelChar = Field[0] RankIndex = utax2.LevelCharToRankIndex(LevelChar) if LastRankIndex != -1 and LastRankIndex > 1 and not RankIndex == LastRankIndex + 1: MissingRankCount += 1 # print >> sys.stderr, "Missing rank: %c,%d %c,%d %s" % (LastLevelChar, LastRankIndex, LevelChar, RankIndex, TaxStr) break LastRankIndex = RankIndex LastLevelChar = LevelChar if MotTaxStr != "": MotTaxStr += ';' MotTaxStr += LevelChar + ":" + Field[2:] if MotTaxStr == "": EmptyTaxCount += 1 return MotTaxStr += ";" fasta.WriteSeq(fFa, Seq, Acc) print >> fTax, Acc + "\t" + MotTaxStr
def OnSeq(Label, Seq): global Missing Acc = utax2.GetAccFromLabel(Label) Family = utax2.GetNameFromLabel(Label, 'f') Genus = utax2.GetNameFromLabel(Label, 'g') Species = utax2.GetNameFromLabel(Label, 's') if Genus == "" or Species == "": Missing += 1 return if Family == "": Family = "NA" Family = Family.replace("f:", "") Genus = Genus.replace("g:", "") Species = Species.replace("s:", "") NewLabel = Acc + "\t" + Species + "\t" + Genus + "\t" + Family fasta.WriteSeq(sys.stdout, Seq, NewLabel)