Example #1
0
def OnSeq(Label, Seq):
    Acc = fasta.GetAccFromLabel(Label)
    TaxStr = utax2.GetTaxFromLabel(Label)
    k = 'k'
    if TaxStr.find("d:") >= 0:
        k = 'd'


# NR_117221.1     species:Mycobacterium arosiense;genus:Mycobacterium;family:Mycobacteriaceae;order:Corynebacteriales;class:Actinobacteria;phylum:Actinobacteria;superkingdom:Bacteria;

    Tax = ""
    if TaxStr.find(",s:") > 0:
        Tax = AppendRank(Tax, TaxStr, 's')
    Tax = AppendRank(Tax, TaxStr, 'g')
    Tax = AppendRank(Tax, TaxStr, 'f')
    Tax = AppendRank(Tax, TaxStr, 'o')
    Tax = AppendRank(Tax, TaxStr, 'c')
    Tax = AppendRank(Tax, TaxStr, 'p')
    Tax = AppendRank(Tax, TaxStr, k)

    Acc = Acc.upper()
    NewLabel = "REF_" + Acc
    fasta.WriteSeq(fFa, Seq, NewLabel)

    print >> fTax, "%s\t%s" % (NewLabel, Tax)
Example #2
0
def FixQueryLabel(Label):
    Acc = fasta.GetAccFromLabel(Label)
    Tax = fasta.GetTaxFromLabel(Label)
    s = Acc + ";tax="
    Fields = Tax.split(',')
    n = len(Fields)
    for i in range(0, n):
        Field = Fields[i]
        if i > 0:
            s += ","
        s += Field[0] + ":" + Field[2:]
    s += ";"
    return s
Example #3
0
def OnSeq(Label, Seq):
    global AccToTax
    global DomainRank

    Acc = fasta.GetAccFromLabel(Label)
    Tax = fasta.GetTaxFromLabel(Label)

    Acc = Acc.upper()
    if Acc.startswith("REF_"):
        Acc = Acc.replace("REF_", "")
    AccToTax[Acc] = Tax

    if DomainRank == '?':
        DomainRank = Tax[0]
Example #4
0
def OnSeq(Label, Seq):
    global AccToTax
    global DomainRank
    global Ranks

    Acc = fasta.GetAccFromLabel(Label)
    Tax = fasta.GetTaxFromLabel(Label)

    Acc = Acc.upper()
    AccToTax[Acc] = Tax

    if DomainRank == '?':
        DomainRank = Tax[0]
        Ranks = DomainRank + Ranks[1:]
        assert Ranks[1] == 'p'
def OnSeq(Label, Seq):
    Acc = fasta.GetAccFromLabel(Label)
    TaxStr = utax2.GetTaxFromLabel(Label)

    k = 'k'
    if TaxStr.find("d:") >= 0:
        k = 'd'


# DQ200983.1.1404.B       Bacteria;Actinobacteria;Actinobacteria;Frankiales;Geodermatophilaceae;Blastococcus;Blastococcus jejuensis;
    Tax = ""
    Tax = AppendRank(Tax, TaxStr, k)
    Tax = AppendRank(Tax, TaxStr, 'p')
    Tax = AppendRank(Tax, TaxStr, 'c')
    Tax = AppendRank(Tax, TaxStr, 'o')
    Tax = AppendRank(Tax, TaxStr, 'f')
    Tax = AppendRank(Tax, TaxStr, 'g')
    Tax = AppendRank(Tax, TaxStr, 's')

    Acc = Acc.upper()
    fasta.WriteSeq(fFa, Seq, Acc)

    print >> fTax, "%s\t%s" % (Acc, Tax)
Example #6
0
def DoPredFile(FileName, NameToCountA, NameToCountB):
    f = open(FileName)
    TP = 0
    TN = 0
    FN = 0
    OC = 0
    MC = 0
    NC = 0
    Known = 0
    Novel = 0

    while 1:
        Line = f.readline()
        if len(Line) == 0:
            return Known, Novel, TP, TN, FN, OC, MC, NC

        Fields = Line[:-1].split('\t')
        assert len(Fields) == 2

        QueryLabel = Fields[0]
        Pred = Fields[1]
        if Pred.endswith(';'):
            Pred = Pred[:-1]

        QueryName = utax2.GetNameFromLabel(QueryLabel, Rank)
        assert QueryName != ""

        if Pred.find("tax=") >= 0:
            Pred = Pred.split("tax=")[1]

        if Pred == "*":
            PredName = ""
        else:
            PredName = utax2.GetNameFromTaxStr(Pred, Rank)
        if PredName != "":
            NC += 1

        Count = sortdict.GetCount(NameToCountB, QueryName)
        IsKnown = (Count > 0)
        if IsKnown:
            Known += 1
        else:
            Novel += 1

        if PredName == QueryName and not IsKnown:
            die.Die("QueryName=%s, PredName=%s >%s" %
                    (QueryName, PredName, QueryLabel))

        if PredName == QueryName:
            XX = "TP"
            TP += 1
        elif PredName == "":
            if Count == 0:
                XX = "TN"
                TN += 1
            else:
                XX = "FN"
                FN += 1
        else:
            if Count == 0:
                XX = "OC"
                OC += 1
            else:
                XX = "MC"
                MC += 1

        if REPORT:
            Acc = fasta.GetAccFromLabel(QueryLabel)
            if IsKnown:
                k = "known"
            else:
                k = "novel"

            PredNameStr = "-"
            if PredName != "":
                PredNameStr = PredName
            s = Acc
            s += "\t" + XX
            s += "\t" + k
            s += "\t" + QueryName
            s += "\t" + PredNameStr
            s += "\t" + str(Count)
            print s
Example #7
0
        assert Ranks[1] == 'p'


fasta.ReadSeqsOnSeq(FastaFileName, OnSeq)

f = open(FileName)
while 1:
    Line = f.readline()
    if len(Line) == 0:
        break

# gi_1018196556     Bacteria;Proteobacteria;Gammaproteobacteria;Oceanospirillales;Halomonadaceae;   96.58   292     91.03
    Fields = Line[:-1].split('\t')

    Label = Fields[0]
    Acc = fasta.GetAccFromLabel(Label)
    Acc = Acc.upper()

    QTax = AccToTax[Acc]

    Tax = Fields[1]
    Fields2 = Tax.split(";")
    n = len(Fields2) - 1
    assert n <= len(Ranks)
    Pred = ""
    for i in range(0, n):
        if i > 0:
            Pred += ","
        Rank = Ranks[i]
        Pred += Rank + ":" + Fields2[i]