def OnSeq(Label, Seq): Acc = Label.split(";")[0] Tax = utax2.GetTaxFromLabel(Label) if KeepTaxAnnot: Acc = Label Chars, Names = utax2.TaxToVecs(Tax) N = len(Chars) assert len(Names) == N N = len(Chars) OutLine = Acc + "\t" for i in range(0, N): Char = Chars[i] Name = Names[i] OutLine += Char + "__" + Name if i + 1 < N: OutLine += "; " print >> fTax, OutLine if KeepTaxAnnot: fasta.WriteSeq(fFa, Seq, Label) else: fasta.WriteSeq(fFa, Seq, Acc)
def OnSeq(Label, Seq): Acc = fasta.GetAccFromLabel(Label) TaxStr = utax2.GetTaxFromLabel(Label) k = 'k' if TaxStr.find("d:") >= 0: k = 'd' # NR_117221.1 species:Mycobacterium arosiense;genus:Mycobacterium;family:Mycobacteriaceae;order:Corynebacteriales;class:Actinobacteria;phylum:Actinobacteria;superkingdom:Bacteria; Tax = "" if TaxStr.find(",s:") > 0: Tax = AppendRank(Tax, TaxStr, 's') Tax = AppendRank(Tax, TaxStr, 'g') Tax = AppendRank(Tax, TaxStr, 'f') Tax = AppendRank(Tax, TaxStr, 'o') Tax = AppendRank(Tax, TaxStr, 'c') Tax = AppendRank(Tax, TaxStr, 'p') Tax = AppendRank(Tax, TaxStr, k) Acc = Acc.upper() NewLabel = "REF_" + Acc fasta.WriteSeq(fFa, Seq, NewLabel) print >> fTax, "%s\t%s" % (NewLabel, Tax)
def OnSeq(Label, Seq): global MissingRankCount global EmptyTaxCount global BadLevelCount global SeqCount SeqCount += 1 Acc = utax2.GetAccFromLabel(Label) Acc = Acc.replace("_", "") TaxStr = utax2.GetTaxFromLabel(Label) Fields = TaxStr.split(',') if len(Fields) != RequiredLevelCount: BadLevelCount += 1 return MotTaxStr = "" LastRankIndex = -1 LastLevelChar = "" for Field in Fields: assert Field[1] == ':' LevelChar = Field[0] RankIndex = utax2.LevelCharToRankIndex(LevelChar) if LastRankIndex != -1 and LastRankIndex > 1 and not RankIndex == LastRankIndex + 1: MissingRankCount += 1 # print >> sys.stderr, "Missing rank: %c,%d %c,%d %s" % (LastLevelChar, LastRankIndex, LevelChar, RankIndex, TaxStr) break LastRankIndex = RankIndex LastLevelChar = LevelChar if MotTaxStr != "": MotTaxStr += ';' MotTaxStr += LevelChar + ":" + Field[2:] if MotTaxStr == "": EmptyTaxCount += 1 return MotTaxStr += ";" fasta.WriteSeq(fFa, Seq, Acc) print >> fTax, Acc + "\t" + MotTaxStr
def OnSeq(Label, Seq): global Missing Acc = utax2.GetAccFromLabel(Label) Family = utax2.GetNameFromLabel(Label, 'f') Genus = utax2.GetNameFromLabel(Label, 'g') Species = utax2.GetNameFromLabel(Label, 's') if Genus == "" or Species == "": Missing += 1 return if Family == "": Family = "NA" Family = Family.replace("f:", "") Genus = Genus.replace("g:", "") Species = Species.replace("s:", "") NewLabel = Acc + "\t" + Species + "\t" + Genus + "\t" + Family fasta.WriteSeq(sys.stdout, Seq, NewLabel)
def OnSeq(Label, Seq): Acc = fasta.GetAccFromLabel(Label) TaxStr = utax2.GetTaxFromLabel(Label) k = 'k' if TaxStr.find("d:") >= 0: k = 'd' # DQ200983.1.1404.B Bacteria;Actinobacteria;Actinobacteria;Frankiales;Geodermatophilaceae;Blastococcus;Blastococcus jejuensis; Tax = "" Tax = AppendRank(Tax, TaxStr, k) Tax = AppendRank(Tax, TaxStr, 'p') Tax = AppendRank(Tax, TaxStr, 'c') Tax = AppendRank(Tax, TaxStr, 'o') Tax = AppendRank(Tax, TaxStr, 'f') Tax = AppendRank(Tax, TaxStr, 'g') Tax = AppendRank(Tax, TaxStr, 's') Acc = Acc.upper() fasta.WriteSeq(fFa, Seq, Acc) print >> fTax, "%s\t%s" % (Acc, Tax)