def test_parse_taxonomy_table(self): """ Testing parse_taxonomy_table function. :return: Returns OK if test goals were achieved, otherwise raises error. """ taxa_data = ut.parse_taxonomy_table("phylotoast/test/test_taxa.txt") # Testing the validity of the function. hand_calc = {"018AP132": "k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Neisseriales; f__Neisseriaceae; g__Neisseria; s__HOT.018", "057BE024": "k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Streptococcaceae; g__Streptococcus; s__HOT.057", "083BS091": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae_[XIVa]; g__Lachnoanaerobaculum; s__HOT.083", "105_3039": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptostreptococcaceae_[XI]; g__Eubacterium_[XI][G-1]; s__infirmum", "122_8622": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megasphaera; s__micronuciformis", "130Snoxi": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__noxia", "139EW076": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__dianae", "151_K168": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__sputigena", "214DE081": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__shahii", "220FB074": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichiaceae_[G-1]; s__HOT.220", "222_7816": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__wadei"} for ids in hand_calc: self.assertEqual( taxa_data[ids], hand_calc[ids], msg="Taxonomy file was not accurately parsed into (OTU, taxonomy) dict." )
def main(): args = handle_program_options() try: with open(args.id_to_taxonomy_fp): pass except IOError as ioe: sys.exit( '\nError mapping sequences to assigned taxonomy filepath:{}\n' .format(ioe) ) # input the ID to Taxonomy table and the rep set taxids = util.parse_taxonomy_table(args.id_to_taxonomy_fp) rep_set = SeqIO.to_dict(SeqIO.parse(args.rep_set_fp, 'fasta')) # write out the assigned taxonomy file with open(args.assigned_taxonomy_fp, 'w') as outF: for taxid in rep_set: line = '{0}\t{1}\t{2}\t{0}\n'.format(taxid, taxids[taxid], 0.0) outF.write(line) if args.verbose: print 'Taxonomy written to: {}'.format(args.assigned_taxonomy_fp) print '{} OTU records written'.format(len(rep_set))
def main(): args = handle_program_options() try: with open(args.otu_id_fp): pass except IOError as ioe: sys.exit("\nError with file containing OTUIDs/BIOM format:{}\n".format(ioe)) with open(args.otu_id_fp, "rU") as otuF: if args.reverse_lookup: otu_ids = [] for line in otuF.readlines(): if line: otu_ids.append(line.strip()) else: otu_ids = [line.strip().split("\t") for line in otuF.readlines()] taxa = util.parse_taxonomy_table(args.taxonomy_fp) with open(args.output_fp, "w") as outF: for entry in otu_ids: if isinstance(entry, list): # check for comments in BIOM files if not entry[0][0] == "#": ID = entry[0] else: outF.write("{}\n".format("\t".join(entry))) continue # instead of a BIOM file, a line-by-line list of OTU IDs else: ID = entry # for looking up OTUIDs if args.reverse_lookup: for id, fulltaxa in taxa.iteritems(): otuname = otuc.otu_name(fulltaxa.split("; ")) if otuname == ID: taxa_id = id # for looking up OTU name else: if ID in taxa: named_ID = otuc.otu_name(taxa[ID].split("; ")) else: print "Error: OTU ID {} not found in supplied taxonomy file.".format(ID) return # write out to file out_str = "{}\t{}\n" if isinstance(entry, list): outF.write(out_str.format(named_ID, "\t".join(entry[1:]))) else: if args.reverse_lookup: outF.write("{}\n".format(taxa_id)) else: outF.write(out_str.format(ID, named_ID))
def test_parse_taxonomy_table(self): """ Testing parse_taxonomy_table function. :return: Returns OK if test goals were achieved, otherwise raises error. """ taxa_data = ut.parse_taxonomy_table("phylotoast/test/test_taxa.txt") # Testing the validity of the function. hand_calc = { "018AP132": "k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Neisseriales; f__Neisseriaceae; g__Neisseria; s__HOT.018", "057BE024": "k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Streptococcaceae; g__Streptococcus; s__HOT.057", "083BS091": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae_[XIVa]; g__Lachnoanaerobaculum; s__HOT.083", "105_3039": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptostreptococcaceae_[XI]; g__Eubacterium_[XI][G-1]; s__infirmum", "122_8622": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megasphaera; s__micronuciformis", "130Snoxi": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__noxia", "139EW076": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__dianae", "151_K168": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__sputigena", "214DE081": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__shahii", "220FB074": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichiaceae_[G-1]; s__HOT.220", "222_7816": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__wadei" } for ids in hand_calc: self.assertEqual( taxa_data[ids], hand_calc[ids], msg= "Taxonomy file was not accurately parsed into (OTU, taxonomy) dict." )
def main(): args = handle_program_options() try: with open(args.otu_id_fp): pass except IOError as ioe: sys.exit('\nError with file containing OTUIDs/BIOM format:{}\n'.format(ioe)) with open(args.otu_id_fp, 'rU') as otuF: otu_ids = [line.strip().split('\t') for line in otuF.readlines()] taxa = util.parse_taxonomy_table(args.taxonomy_fp) with open(args.output_fp, 'w') as outF: for entry in otu_ids: if isinstance(entry, list): # check for comments in BIOM files if not entry[0][0] == '#': ID = entry[0] else: outF.write('{}\n'.format('\t'.join(entry))) continue # instead of a BIOM file, a line-by-line list of OTU IDs else: ID = entry if ID in taxa: named_ID = otuc.otu_name(taxa[ID].split('; ')) else: print 'Error: OTU ID {} not found in supplied taxonomy file; stopping...'.format(ID) return # write out to file out_str = '{}\t{}\n' if isinstance(entry, list): outF.write(out_str.format(named_ID, '\t'.join(entry[1:]))) else: outF.write(out_str.format(ID, named_ID))