Ejemplo n.º 1
0
def OnSeq(Label, Seq):
    global Missing

    Acc = utax2.GetAccFromLabel(Label)
    Family = utax2.GetNameFromLabel(Label, 'f')
    Genus = utax2.GetNameFromLabel(Label, 'g')
    Species = utax2.GetNameFromLabel(Label, 's')

    if Genus == "" or Species == "":
        Missing += 1
        return

    if Family == "":
        Family = "NA"

    Family = Family.replace("f:", "")
    Genus = Genus.replace("g:", "")
    Species = Species.replace("s:", "")

    NewLabel = Acc + "\t" + Species + "\t" + Genus + "\t" + Family

    fasta.WriteSeq(sys.stdout, Seq, NewLabel)
Ejemplo n.º 2
0
def DoPredFile(FileName, NameToCountA, NameToCountB):
    f = open(FileName)
    TP = 0
    TN = 0
    FN = 0
    OC = 0
    MC = 0
    NC = 0
    Known = 0
    Novel = 0

    while 1:
        Line = f.readline()
        if len(Line) == 0:
            return Known, Novel, TP, TN, FN, OC, MC, NC

        Fields = Line[:-1].split('\t')
        assert len(Fields) == 2

        QueryLabel = Fields[0]
        Pred = Fields[1]
        if Pred.endswith(';'):
            Pred = Pred[:-1]

        QueryName = utax2.GetNameFromLabel(QueryLabel, Rank)
        assert QueryName != ""

        if Pred.find("tax=") >= 0:
            Pred = Pred.split("tax=")[1]

        if Pred == "*":
            PredName = ""
        else:
            PredName = utax2.GetNameFromTaxStr(Pred, Rank)
        if PredName != "":
            NC += 1

        Count = sortdict.GetCount(NameToCountB, QueryName)
        IsKnown = (Count > 0)
        if IsKnown:
            Known += 1
        else:
            Novel += 1

        if PredName == QueryName and not IsKnown:
            die.Die("QueryName=%s, PredName=%s >%s" %
                    (QueryName, PredName, QueryLabel))

        if PredName == QueryName:
            XX = "TP"
            TP += 1
        elif PredName == "":
            if Count == 0:
                XX = "TN"
                TN += 1
            else:
                XX = "FN"
                FN += 1
        else:
            if Count == 0:
                XX = "OC"
                OC += 1
            else:
                XX = "MC"
                MC += 1

        if REPORT:
            Acc = fasta.GetAccFromLabel(QueryLabel)
            if IsKnown:
                k = "known"
            else:
                k = "novel"

            PredNameStr = "-"
            if PredName != "":
                PredNameStr = PredName
            s = Acc
            s += "\t" + XX
            s += "\t" + k
            s += "\t" + QueryName
            s += "\t" + PredNameStr
            s += "\t" + str(Count)
            print s