global DomainRank global Ranks Acc = fasta.GetAccFromLabel(Label) Tax = fasta.GetTaxFromLabel(Label) Acc = Acc.upper() AccToTax[Acc] = Tax if DomainRank == '?': DomainRank = Tax[0] Ranks = DomainRank + Ranks[1:] assert Ranks[1] == 'p' fasta.ReadSeqsOnSeq(FastaFileName, OnSeq) f = open(FileName) while 1: Line = f.readline() if len(Line) == 0: break # gi_1018196556 Bacteria;Proteobacteria;Gammaproteobacteria;Oceanospirillales;Halomonadaceae; 96.58 292 91.03 Fields = Line[:-1].split('\t') Label = Fields[0] Acc = fasta.GetAccFromLabel(Label) Acc = Acc.upper() QTax = AccToTax[Acc]
return Tax + RankName + ":" + Name[2:] + ";" def OnSeq(Label, Seq): Acc = fasta.GetAccFromLabel(Label) TaxStr = utax2.GetTaxFromLabel(Label) k = 'k' if TaxStr.find("d:") >= 0: k = 'd' # NR_117221.1 species:Mycobacterium arosiense;genus:Mycobacterium;family:Mycobacteriaceae;order:Corynebacteriales;class:Actinobacteria;phylum:Actinobacteria;superkingdom:Bacteria; Tax = "" if TaxStr.find(",s:") > 0: Tax = AppendRank(Tax, TaxStr, 's') Tax = AppendRank(Tax, TaxStr, 'g') Tax = AppendRank(Tax, TaxStr, 'f') Tax = AppendRank(Tax, TaxStr, 'o') Tax = AppendRank(Tax, TaxStr, 'c') Tax = AppendRank(Tax, TaxStr, 'p') Tax = AppendRank(Tax, TaxStr, k) Acc = Acc.upper() NewLabel = "REF_" + Acc fasta.WriteSeq(fFa, Seq, NewLabel) print >> fTax, "%s\t%s" % (NewLabel, Tax) fasta.ReadSeqsOnSeq(InputFileName, OnSeq)