コード例 #1
0
    def test_parse_taxonomy_table(self):
        """
        Testing parse_taxonomy_table function.

        :return: Returns OK if test goals were achieved, otherwise raises
                 error.
        """
        taxa_data = ut.parse_taxonomy_table("phylotoast/test/test_taxa.txt")

        # Testing the validity of the function.
        hand_calc = {"018AP132": "k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Neisseriales; f__Neisseriaceae; g__Neisseria; s__HOT.018",
                     "057BE024": "k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Streptococcaceae; g__Streptococcus; s__HOT.057",
                     "083BS091": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae_[XIVa]; g__Lachnoanaerobaculum; s__HOT.083",
                     "105_3039": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptostreptococcaceae_[XI]; g__Eubacterium_[XI][G-1]; s__infirmum",
                     "122_8622": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megasphaera; s__micronuciformis",
                     "130Snoxi": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__noxia",
                     "139EW076": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__dianae",
                     "151_K168": "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__sputigena",
                     "214DE081": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__shahii",
                     "220FB074": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichiaceae_[G-1]; s__HOT.220",
                     "222_7816": "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__wadei"}
        for ids in hand_calc:
            self.assertEqual(
                taxa_data[ids], hand_calc[ids],
                msg="Taxonomy file was not accurately parsed into (OTU, taxonomy) dict."
                )
コード例 #2
0
def main():
    args = handle_program_options()

    try:
        with open(args.id_to_taxonomy_fp):
            pass
    except IOError as ioe:
        sys.exit(
            '\nError mapping sequences to assigned taxonomy filepath:{}\n'
            .format(ioe)
        )

    # input the ID to Taxonomy table and the rep set
    taxids = util.parse_taxonomy_table(args.id_to_taxonomy_fp)
    rep_set = SeqIO.to_dict(SeqIO.parse(args.rep_set_fp, 'fasta'))

    # write out the assigned taxonomy file
    with open(args.assigned_taxonomy_fp, 'w') as outF:
        for taxid in rep_set:
            line = '{0}\t{1}\t{2}\t{0}\n'.format(taxid, taxids[taxid], 0.0)
            outF.write(line)

    if args.verbose:
        print 'Taxonomy written to: {}'.format(args.assigned_taxonomy_fp)
        print '{} OTU records written'.format(len(rep_set))
コード例 #3
0
def main():
    args = handle_program_options()

    try:
        with open(args.otu_id_fp):
            pass
    except IOError as ioe:
        sys.exit("\nError with file containing OTUIDs/BIOM format:{}\n".format(ioe))

    with open(args.otu_id_fp, "rU") as otuF:
        if args.reverse_lookup:
            otu_ids = []
            for line in otuF.readlines():
                if line:
                    otu_ids.append(line.strip())
        else:
            otu_ids = [line.strip().split("\t") for line in otuF.readlines()]
    taxa = util.parse_taxonomy_table(args.taxonomy_fp)

    with open(args.output_fp, "w") as outF:
        for entry in otu_ids:
            if isinstance(entry, list):
                # check for comments in BIOM files
                if not entry[0][0] == "#":
                    ID = entry[0]
                else:
                    outF.write("{}\n".format("\t".join(entry)))
                    continue
            # instead of a BIOM file, a line-by-line list of OTU IDs
            else:
                ID = entry

            # for looking up OTUIDs
            if args.reverse_lookup:
                for id, fulltaxa in taxa.iteritems():
                    otuname = otuc.otu_name(fulltaxa.split("; "))
                    if otuname == ID:
                        taxa_id = id
            # for looking up OTU name
            else:
                if ID in taxa:
                    named_ID = otuc.otu_name(taxa[ID].split("; "))
                else:
                    print "Error: OTU ID {} not found in supplied taxonomy file.".format(ID)
                    return

            # write out to file
            out_str = "{}\t{}\n"
            if isinstance(entry, list):
                outF.write(out_str.format(named_ID, "\t".join(entry[1:])))
            else:
                if args.reverse_lookup:
                    outF.write("{}\n".format(taxa_id))
                else:
                    outF.write(out_str.format(ID, named_ID))
コード例 #4
0
    def test_parse_taxonomy_table(self):
        """
        Testing parse_taxonomy_table function.

        :return: Returns OK if test goals were achieved, otherwise raises
                 error.
        """
        taxa_data = ut.parse_taxonomy_table("phylotoast/test/test_taxa.txt")

        # Testing the validity of the function.
        hand_calc = {
            "018AP132":
            "k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Neisseriales; f__Neisseriaceae; g__Neisseria; s__HOT.018",
            "057BE024":
            "k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Streptococcaceae; g__Streptococcus; s__HOT.057",
            "083BS091":
            "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae_[XIVa]; g__Lachnoanaerobaculum; s__HOT.083",
            "105_3039":
            "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptostreptococcaceae_[XI]; g__Eubacterium_[XI][G-1]; s__infirmum",
            "122_8622":
            "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megasphaera; s__micronuciformis",
            "130Snoxi":
            "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__noxia",
            "139EW076":
            "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__dianae",
            "151_K168":
            "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Selenomonas; s__sputigena",
            "214DE081":
            "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__shahii",
            "220FB074":
            "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichiaceae_[G-1]; s__HOT.220",
            "222_7816":
            "k__Bacteria; p__Fusobacteria; c__Fusobacteria; o__Fusobacteriales; f__Leptotrichiaceae; g__Leptotrichia; s__wadei"
        }
        for ids in hand_calc:
            self.assertEqual(
                taxa_data[ids],
                hand_calc[ids],
                msg=
                "Taxonomy file was not accurately parsed into (OTU, taxonomy) dict."
            )
コード例 #5
0
def main():
    args = handle_program_options()

    try:
        with open(args.otu_id_fp):
            pass
    except IOError as ioe:
        sys.exit('\nError with file containing OTUIDs/BIOM format:{}\n'.format(ioe))

    with open(args.otu_id_fp, 'rU') as otuF:
        otu_ids = [line.strip().split('\t') for line in otuF.readlines()]
    taxa = util.parse_taxonomy_table(args.taxonomy_fp)

    with open(args.output_fp, 'w') as outF:
        for entry in otu_ids:
            if isinstance(entry, list):
                # check for comments in BIOM files
                if not entry[0][0] == '#':
                    ID = entry[0]
                else:
                    outF.write('{}\n'.format('\t'.join(entry)))
                    continue
            # instead of a BIOM file, a line-by-line list of OTU IDs
            else:
                ID = entry

            if ID in taxa:
                named_ID = otuc.otu_name(taxa[ID].split('; '))
            else:
                print 'Error: OTU ID {} not found in supplied taxonomy file; stopping...'.format(ID)
                return

            # write out to file
            out_str = '{}\t{}\n'
            if isinstance(entry, list):
                outF.write(out_str.format(named_ID, '\t'.join(entry[1:])))
            else:
                outF.write(out_str.format(ID, named_ID))