Ejemplo n.º 1
0
def OnSeq(Label, Seq):
    Acc = Label.split(";")[0]
    Tax = utax2.GetTaxFromLabel(Label)
    if KeepTaxAnnot:
        Acc = Label

    Chars, Names = utax2.TaxToVecs(Tax)
    N = len(Chars)
    assert len(Names) == N

    N = len(Chars)
    OutLine = Acc + "\t"
    for i in range(0, N):
        Char = Chars[i]
        Name = Names[i]
        OutLine += Char + "__" + Name
        if i + 1 < N:
            OutLine += "; "

    print >> fTax, OutLine

    if KeepTaxAnnot:
        fasta.WriteSeq(fFa, Seq, Label)
    else:
        fasta.WriteSeq(fFa, Seq, Acc)
Ejemplo n.º 2
0
def OnSeq(Label, Seq):
    Acc = fasta.GetAccFromLabel(Label)
    TaxStr = utax2.GetTaxFromLabel(Label)
    k = 'k'
    if TaxStr.find("d:") >= 0:
        k = 'd'


# NR_117221.1     species:Mycobacterium arosiense;genus:Mycobacterium;family:Mycobacteriaceae;order:Corynebacteriales;class:Actinobacteria;phylum:Actinobacteria;superkingdom:Bacteria;

    Tax = ""
    if TaxStr.find(",s:") > 0:
        Tax = AppendRank(Tax, TaxStr, 's')
    Tax = AppendRank(Tax, TaxStr, 'g')
    Tax = AppendRank(Tax, TaxStr, 'f')
    Tax = AppendRank(Tax, TaxStr, 'o')
    Tax = AppendRank(Tax, TaxStr, 'c')
    Tax = AppendRank(Tax, TaxStr, 'p')
    Tax = AppendRank(Tax, TaxStr, k)

    Acc = Acc.upper()
    NewLabel = "REF_" + Acc
    fasta.WriteSeq(fFa, Seq, NewLabel)

    print >> fTax, "%s\t%s" % (NewLabel, Tax)
Ejemplo n.º 3
0
def OnSeq(Label, Seq):
    global MissingRankCount
    global EmptyTaxCount
    global BadLevelCount
    global SeqCount

    SeqCount += 1

    Acc = utax2.GetAccFromLabel(Label)
    Acc = Acc.replace("_", "")
    TaxStr = utax2.GetTaxFromLabel(Label)

    Fields = TaxStr.split(',')
    if len(Fields) != RequiredLevelCount:
        BadLevelCount += 1
        return
    MotTaxStr = ""
    LastRankIndex = -1
    LastLevelChar = ""
    for Field in Fields:
        assert Field[1] == ':'
        LevelChar = Field[0]
        RankIndex = utax2.LevelCharToRankIndex(LevelChar)
        if LastRankIndex != -1 and LastRankIndex > 1 and not RankIndex == LastRankIndex + 1:
            MissingRankCount += 1
            #			print >> sys.stderr, "Missing rank: %c,%d %c,%d %s" % (LastLevelChar, LastRankIndex, LevelChar, RankIndex, TaxStr)
            break
        LastRankIndex = RankIndex
        LastLevelChar = LevelChar
        if MotTaxStr != "":
            MotTaxStr += ';'
        MotTaxStr += LevelChar + ":" + Field[2:]

    if MotTaxStr == "":
        EmptyTaxCount += 1
        return

    MotTaxStr += ";"

    fasta.WriteSeq(fFa, Seq, Acc)

    print >> fTax, Acc + "\t" + MotTaxStr
Ejemplo n.º 4
0
def OnSeq(Label, Seq):
    global Missing

    Acc = utax2.GetAccFromLabel(Label)
    Family = utax2.GetNameFromLabel(Label, 'f')
    Genus = utax2.GetNameFromLabel(Label, 'g')
    Species = utax2.GetNameFromLabel(Label, 's')

    if Genus == "" or Species == "":
        Missing += 1
        return

    if Family == "":
        Family = "NA"

    Family = Family.replace("f:", "")
    Genus = Genus.replace("g:", "")
    Species = Species.replace("s:", "")

    NewLabel = Acc + "\t" + Species + "\t" + Genus + "\t" + Family

    fasta.WriteSeq(sys.stdout, Seq, NewLabel)
Ejemplo n.º 5
0
def OnSeq(Label, Seq):
    Acc = fasta.GetAccFromLabel(Label)
    TaxStr = utax2.GetTaxFromLabel(Label)

    k = 'k'
    if TaxStr.find("d:") >= 0:
        k = 'd'


# DQ200983.1.1404.B       Bacteria;Actinobacteria;Actinobacteria;Frankiales;Geodermatophilaceae;Blastococcus;Blastococcus jejuensis;
    Tax = ""
    Tax = AppendRank(Tax, TaxStr, k)
    Tax = AppendRank(Tax, TaxStr, 'p')
    Tax = AppendRank(Tax, TaxStr, 'c')
    Tax = AppendRank(Tax, TaxStr, 'o')
    Tax = AppendRank(Tax, TaxStr, 'f')
    Tax = AppendRank(Tax, TaxStr, 'g')
    Tax = AppendRank(Tax, TaxStr, 's')

    Acc = Acc.upper()
    fasta.WriteSeq(fFa, Seq, Acc)

    print >> fTax, "%s\t%s" % (Acc, Tax)