def test_make_otu_table_with_sample_metadata(self): # Want to make sure that the order of the sample IDs in the OTU # map and the order of the IDs in the mapping file do not matter otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') mapping_f = StringIO(MAPPING_FILE) sample_ids = ['ABC', 'DEF', 'GHI', 'XYZ'] data = [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]] map_data, map_header, map_comments = parse_mapping_file(mapping_f) sample_metadata = mapping_file_to_dict(map_data, map_header) sample_md = [sample_metadata[sample_id] for sample_id in sample_ids] obs = make_otu_table(otu_map_lines, sample_metadata=sample_metadata) exp = Table(data, ['0', '1', 'x', 'z'], sample_ids, sample_metadata=sample_md, input_is_dense=True) self.assertEqual(obs, exp) # Test with a mapping file that is missing a sample's metadata, # make sure it raises the KeyError mapping_f = StringIO(MAPPING_FILE_MISSING_SAMPLE) map_data, map_header, map_comments = parse_mapping_file(mapping_f) sample_metadata = mapping_file_to_dict(map_data, map_header) with self.assertRaises(KeyError): obs = make_otu_table(otu_map_lines, sample_metadata=sample_metadata)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp outfile = open(opts.output_biom_fp, 'w') if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname,'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta','.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp,'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp,'U')) biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), otu_to_taxonomy, ids_to_exclude) outfile.write(biom_otu_table)
def _call_cleanup(self, input_fp, output_dir, params, job_prefix, poll_directly, suppress_submit_jobs): """ Called as the last step in __call__. """ if poll_directly: if params['observation_metadata_fp'] is not None: observation_metadata = \ parse_observation_metadata( open(params['observation_metadata_fp'], 'U')) else: observation_metadata = None biom_fp = join(output_dir, 'observation_table.biom') biom_table = make_otu_table( open(join(output_dir, 'observation_map.txt'), 'U'), observation_metadata) write_biom_table(biom_table, biom_fp) else: # can't construct the final biom file if not polling # directly as the final observation map won't have been created yet pass
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) sample_metadata = None if opts.mapping_fp is not None: mapping_data, mapping_header, mapping_comments = parse_mapping_file(open(opts.mapping_fp, 'U')) sample_metadata = assemble_sample_metadata(mapping_data, mapping_header, mapping_comments) biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), otu_to_taxonomy=otu_to_taxonomy, otu_ids_to_exclude=ids_to_exclude, sample_metadata=sample_metadata) write_biom_table(biom_otu_table, opts.output_biom_fp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) sample_metadata = None if opts.mapping_fp is not None: with open(opts.mapping_fp, 'U') as map_f: mapping_data, mapping_header, mapping_comments = \ parse_mapping_file(map_f) sample_metadata = mapping_file_to_dict(mapping_data, mapping_header) with open(opts.otu_map_fp, 'U') as otu_map_f: biom_otu_table = make_otu_table(otu_map_f, otu_to_taxonomy=otu_to_taxonomy, otu_ids_to_exclude=ids_to_exclude, sample_metadata=sample_metadata) write_biom_table(biom_otu_table, opts.output_biom_fp)
def test_make_otu_table_taxonomy(self): """make_otu_table should work with taxonomy""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') taxonomy = { '0': ['Bacteria', 'Firmicutes'], 'x': ['Bacteria', 'Bacteroidetes'] } obs = make_otu_table(otu_map_lines, taxonomy) data = [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]] obs_md = [{ 'taxonomy': ['Bacteria', 'Firmicutes'] }, { 'taxonomy': ['None'] }, { 'taxonomy': ['Bacteria', 'Bacteroidetes'] }, { 'taxonomy': ['None'] }] exp = Table(data, ['0', '1', 'x', 'z'], ['ABC', 'DEF', 'GHI', 'XYZ'], observation_metadata=obs_md, input_is_dense=True) self.assertEqual(obs, exp)
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp is not None: observation_metadata = parse_taxonomy(open(observation_metadata_fp, "U")) else: observation_metadata = None biom_table = make_otu_table(open(observation_map_fp, "U"), observation_metadata) write_biom_table(biom_table, output_biom_fp)
def test_make_otu_table_no_taxonomy(self): """make_otu_table should work without tax (new-style OTU table)""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') obs = make_otu_table(otu_map_lines,constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "x", "metadata": null}, {"id": "z", "metadata": null}], "format": "Biological Observation Matrix 0.9dev", "data": [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:49:15.978315", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def test_make_otu_table_taxonomy(self): """make_otu_table should work with taxonomy""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') # Single taxonomic assignment. taxonomy = {'0':'Bacteria;Firmicutes', 'x':'Bacteria;Bacteroidetes'} obs = make_otu_table(otu_map_lines, taxonomy,constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": {"taxonomy": ["Bacteria", "Firmicutes"]}}, {"id": "1", "metadata": {"taxonomy": ["None"]}}, {"id": "x", "metadata": {"taxonomy": ["Bacteria", "Bacteroidetes"]}}, {"id": "z", "metadata": {"taxonomy": ["None"]}}], "format": "Biological Observation Matrix 0.9dev", "data": [[1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.0, 0.0], [0.0, 1.0, 0.0, 1.0]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:19:30.961477", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n'))) # Multiple taxonomic assignments. taxonomy = {'0':['Bacteria;Firmicutes'], 'x':['Bacteria;Bacteroidetes', 'Bacteria;Acidobacteria']} obs = make_otu_table(otu_map_lines, taxonomy,constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": {"taxonomy": [["Bacteria", "Firmicutes"]]}}, {"id": "1", "metadata": {"taxonomy": ["None"]}}, {"id": "x", "metadata": {"taxonomy": [["Bacteria", "Bacteroidetes"], ["Bacteria", "Acidobacteria"]]}}, {"id": "z", "metadata": {"taxonomy": ["None"]}}], "format": "Biological Observation Matrix 0.9dev", "data": [[1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.0, 0.0], [0.0, 1.0, 0.0, 1.0]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:19:30.961477", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp is not None: observation_metadata = parse_taxonomy(open(observation_metadata_fp, "U")) else: observation_metadata = None biom_table_f = open(output_biom_fp, "w") biom_table_f.write(make_otu_table(open(observation_map_fp, "U"), observation_metadata)) biom_table_f.close()
def test_make_otu_table_no_taxonomy(self): """make_otu_table should work without tax (new-style OTU table)""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') obs = make_otu_table(otu_map_lines, constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "x", "metadata": null}, {"id": "z", "metadata": null}], "format": "Biological Observation Matrix 0.9dev", "data": [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:49:15.978315", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp is not None: observation_metadata = \ parse_taxonomy(open(observation_metadata_fp, 'U')) else: observation_metadata = None biom_table = make_otu_table(open(observation_map_fp, 'U'), observation_metadata) write_biom_table(biom_table, output_biom_fp)
def test_make_otu_table_taxonomy(self): """make_otu_table should work with taxonomy""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') taxonomy = {'0': 'Bacteria;Firmicutes', 'x': 'Bacteria;Bacteroidetes'} obs = make_otu_table(otu_map_lines, taxonomy, constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": {"taxonomy": ["Bacteria", "Firmicutes"]}}, {"id": "1", "metadata": {"taxonomy": ["None"]}}, {"id": "x", "metadata": {"taxonomy": ["Bacteria", "Bacteroidetes"]}}, {"id": "z", "metadata": {"taxonomy": ["None"]}}], "format": "Biological Observation Matrix 0.9dev", "data": [[1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.0, 0.0], [0.0, 1.0, 0.0, 1.0]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:19:30.961477", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def test_make_otu_table_no_taxonomy(self): """make_otu_table should work without tax (new-style OTU table)""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') obs = make_otu_table(otu_map_lines) data = [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]] exp = Table(data, ['0', '1', 'x', 'z'], ['ABC', 'DEF', 'GHI', 'XYZ'], input_is_dense=True) self.assertEqual(obs, exp)
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp != None: observation_metadata = \ parse_taxonomy(open(observation_metadata_fp,'U')) else: observation_metadata = None biom_table_f = open(output_biom_fp, 'w') biom_table_f.write( make_otu_table(open(observation_map_fp, 'U'), observation_metadata)) biom_table_f.close()
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp != None: observation_metadata = \ parse_taxonomy(open(observation_metadata_fp,'U'), parse_all_fields=True) else: observation_metadata = None biom_table_f = open(output_biom_fp,'w') biom_table_f.write(make_otu_table(open(observation_map_fp,'U'), observation_metadata)) biom_table_f.close()
def test_make_otu_table_no_taxonomy(self): """make_otu_table should work without tax (new-style OTU table)""" otu_to_seqid ={ '0':['ABC_0','DEF_1'], '1':['ABC_1'], 'x':['GHI_2', 'GHI_3','GHI_77'], 'z':['DEF_3','XYZ_1'] } obs = make_otu_table(otu_to_seqid, legacy=False) exp = """# QIIME v%s OTU table OTU ID\tABC\tDEF\tGHI\tXYZ 0\t1\t1\t0\t0 1\t1\t0\t0\t0 x\t0\t0\t3\t0 z\t0\t1\t0\t1""" % __version__ self.assertEqual(obs, exp)
def test_make_otu_table_taxonomy(self): """make_otu_table should work wit tax (new-style OTU table)""" otu_to_seqid ={ '0':['ABC_0','DEF_1'], '1':['ABC_1'], 'x':['GHI_2', 'GHI_3','GHI_77'], 'z':['DEF_3','XYZ_1'] } taxonomy = {'0':'Bacteria;Firmicutes', 'x':'Bacteria;Bacteroidetes'} obs = make_otu_table(otu_to_seqid, taxonomy, legacy=False) exp = """# QIIME v%s OTU table OTU ID\tABC\tDEF\tGHI\tXYZ\tConsensus Lineage 0\t1\t1\t0\t0\tBacteria;Firmicutes 1\t1\t0\t0\t0\tNone x\t0\t0\t3\t0\tBacteria;Bacteroidetes z\t0\t1\t0\t1\tNone""" % __version__ self.assertEqual(obs, exp)
def _call_cleanup(self, input_fp, output_dir, params, job_prefix, poll_directly, suppress_submit_jobs): """ Called as the last step in __call__. """ if poll_directly: if params["observation_metadata_fp"] != None: observation_metadata = parse_observation_metadata(open(params["observation_metadata_fp"], "U")) else: observation_metadata = None biom_fp = join(output_dir, "observation_table.biom") biom_f = open(biom_fp, "w") biom_f.write(make_otu_table(open(join(output_dir, "observation_map.txt"), "U"), observation_metadata)) biom_f.close() else: # can't construct the final biom file if not polling # directly as the final observation map won't have been created yet pass
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) if not opts.counts_fname: seq_counts = None else: seq_counts = {} with open(opts.counts_fname, 'U') as infile: for line in infile: (key, val) = line.split() seq_counts[key] = val ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) sample_metadata = None if opts.mapping_fp is not None: with open(opts.mapping_fp, 'U') as map_f: mapping_data, mapping_header, mapping_comments = \ parse_mapping_file(map_f) sample_metadata = mapping_file_to_dict(mapping_data, mapping_header) with open(opts.otu_map_fp, 'U') as otu_map_f: biom_otu_table = make_otu_table(otu_map_f, otu_to_taxonomy=otu_to_taxonomy, otu_ids_to_exclude=ids_to_exclude, sample_metadata=sample_metadata,seq_counts=seq_counts) write_biom_table(biom_otu_table, opts.output_biom_fp)
def test_make_otu_table_taxonomy(self): """make_otu_table should work with taxonomy""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') taxonomy = {'0': ['Bacteria', 'Firmicutes'], 'x': ['Bacteria', 'Bacteroidetes']} obs = make_otu_table(otu_map_lines, taxonomy) data = [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]] obs_md = [{'taxonomy': ['Bacteria', 'Firmicutes']}, {'taxonomy': ['None']}, {'taxonomy': ['Bacteria', 'Bacteroidetes']}, {'taxonomy': ['None']}] exp = Table(data, ['0', '1', 'x', 'z'], ['ABC', 'DEF', 'GHI', 'XYZ'], observation_metadata=obs_md, input_is_dense=True) self.assertEqual(obs, exp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if opts.output_fp: outfile = open(opts.output_fp, 'w') else: outfile = stdout if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname,'U') otu_to_taxonomy = parse_taxonomy(infile) otu_to_seqid = fields_to_dict(open(opts.otu_map_fp, 'U')) if exclude_otus_fp: otu_to_seqid = remove_otus(otu_to_seqid,open(exclude_otus_fp,'U')) outfile.write(make_otu_table(otu_to_seqid, otu_to_taxonomy))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp outfile = open(opts.output_biom_fp, 'w') if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), otu_to_taxonomy, ids_to_exclude) outfile.write(biom_otu_table)