def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_ext=path.splitext(opts.input_otu_fp)[1] if opts.input_format_classic: otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable) else: if input_ext != '.biom': sys.stderr.write("\nOTU table does not have '.biom' extension! If loading causes error consider using '-f' option to load tab-delimited OTU table!\n\n") otu_table = parse_biom_table(open(opts.input_otu_fp,'U')) ext=path.splitext(opts.input_count_fp)[1] if (ext == '.gz'): count_table = parse_biom_table(gzip.open(opts.input_count_fp,'rb')) else: count_table = parse_biom_table(open(opts.input_count_fp,'U')) #Need to only keep data relevant to our otu list ids=[] for x in otu_table.iterObservations(): ids.append(str(x[1])) ob_id=count_table.ObservationIds[0] filtered_otus=[] filtered_values=[] for x in ids: if count_table.sampleExists(x): filtered_otus.append(x) filtered_values.append(otu_table.observationData(x)) #filtered_values = map(list,zip(*filtered_values)) filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable) copy_numbers_filtered={} for x in filtered_otus: value = count_table.getValueByIds(ob_id,x) try: #data can be floats so round them and make them integers value = int(round(float(value))) except ValueError: raise ValueError,\ "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value) if value < 1: raise ValueError, "Copy numbers must be greater than or equal to 1." copy_numbers_filtered[x]={opts.metadata_identifer:value} filtered_otu_table.addObservationMetadata(copy_numbers_filtered) normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer) make_output_dir_for_file(opts.output_otu_fp) open(opts.output_otu_fp,'w').write(\ normalized_table.getBiomFormatJsonString('PICRUST'))
def test_write_summarize_taxa(self): """write_summarize_taxa functions as expected""" # Classic format. write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp1) obs = open(self.tmp_fp1).read() exp = "\n".join(["Taxon\tfoo\tbar\tfoobar", "a;b;c\t0\t1\t2", "d;e;f\t3\t4\t5\n"]) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp1) # BIOM format. write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp2, file_format="biom") exp = parse_classic_table_to_rich_table(exp.split("\n"), None, None, None, SparseTaxonTable) obs = open(self.tmp_fp2).read() obs = parse_biom_table(obs) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp2)
def test_format_summarize_taxa(self): """format_summarize_taxa functions as expected""" # Classic format. exp = "\n".join(["Taxon\tfoo\tbar\tfoobar", "a;b;c\t0\t1\t2", "d;e;f\t3\t4\t5\n"]) obs = "".join(list(format_summarize_taxa(self.taxa_summary, self.taxa_header))) self.assertEqual(obs, exp) # BIOM format. Test by converting our expected output to a biom table # and comparing that to our observed table. exp = parse_classic_table_to_rich_table(exp.split("\n"), None, None, None, SparseTaxonTable) obs = "".join(list(format_summarize_taxa(self.taxa_summary, self.taxa_header, file_format="biom"))) obs = parse_biom_table(obs) self.assertEqual(obs, exp) # Bad file_format argument. with self.assertRaises(ValueError): list(format_summarize_taxa(self.taxa_summary, self.taxa_header, file_format="foo"))
def test_write_summarize_taxa_transposed_output(self): """write_summarize_taxa_transposed_output functions as expected""" # Classic format. write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp1, transposed_output=True) obs = open(self.tmp_fp1).read() exp = "\n".join(["SampleID\ta;b;c\td;e;f", "foo\t0\t3\nbar\t1\t4", "foobar\t2\t5\n"]) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp1) # BIOM format. write_summarize_taxa( self.taxa_summary, self.taxa_header, self.tmp_fp2, transposed_output=True, file_format="biom" ) exp = parse_classic_table_to_rich_table(exp.split("\n"), None, None, None, SparseTaxonTable) obs = open(self.tmp_fp2).read() obs = parse_biom_table(obs) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp2)
def test_write_summarize_taxa(self): """write_summarize_taxa functions as expected""" # Classic format. write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp1) obs = open(self.tmp_fp1).read() exp = '\n'.join(['Taxon\tfoo\tbar\tfoobar', 'a;b;c\t0\t1\t2', 'd;e;f\t3\t4\t5\n']) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp1) # BIOM format. write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp2, file_format='biom') exp = parse_classic_table_to_rich_table(exp.split('\n'), None, None, None, SparseTaxonTable) obs = open(self.tmp_fp2).read() obs = parse_biom_table(obs) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp2)
def test_write_summarize_taxa_transposed_output(self): """write_summarize_taxa_transposed_output functions as expected""" # Classic format. write_summarize_taxa( self.taxa_summary, self.taxa_header, self.tmp_fp1, transposed_output=True) obs = open(self.tmp_fp1).read() exp = '\n'.join(['SampleID\ta;b;c\td;e;f', 'foo\t0\t3\nbar\t1\t4', 'foobar\t2\t5\n']) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp1) # BIOM format. write_summarize_taxa(self.taxa_summary, self.taxa_header, self.tmp_fp2, transposed_output=True, file_format='biom') exp = parse_classic_table_to_rich_table(exp.split('\n'), None, None, None, SparseTaxonTable) obs = open(self.tmp_fp2).read() obs = parse_biom_table(obs) self.assertEqual(obs, exp) self.files_to_remove.append(self.tmp_fp2)
def test_format_summarize_taxa(self): """format_summarize_taxa functions as expected""" # Classic format. exp = '\n'.join(['Taxon\tfoo\tbar\tfoobar', 'a;b;c\t0\t1\t2', 'd;e;f\t3\t4\t5\n']) obs = ''.join(list(format_summarize_taxa(self.taxa_summary, self.taxa_header))) self.assertEqual(obs, exp) # BIOM format. Test by converting our expected output to a biom table # and comparing that to our observed table. exp = parse_classic_table_to_rich_table(exp.split('\n'), None, None, None, SparseTaxonTable) obs = ''.join(list(format_summarize_taxa(self.taxa_summary, self.taxa_header, file_format='biom'))) obs = parse_biom_table(obs) self.assertEqual(obs, exp) # Bad file_format argument. with self.assertRaises(ValueError): list(format_summarize_taxa(self.taxa_summary, self.taxa_header, file_format='foo'))
def _drop_unknown(): import os import gzip import json from biom.table import DenseOTUTable from biom.parse import ( OBS_META_TYPES, parse_biom_table, parse_classic_table_to_rich_table ) idx = set([ row.strip().split('\t')[0] for row in gzip.open(_copy_fname) ]) filter_func = lambda a, otu_id, c: str(otu_id) in idx tmpfile = file+"_tmp.biom" with open(file) as f, open(tmpfile, 'w') as f_out: try: table = parse_biom_table(f) except Exception as e: table = parse_classic_table_to_rich_table( f, None, None, OBS_META_TYPES['taxonomy'], DenseOTUTable) table = table.filterObservations(filter_func) json.dump( table.getBiomFormatObject("AnADAMA"), f_out ) os.rename(file, addtag(file, "unfiltered")) os.rename(tmpfile, file)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_ext = path.splitext(opts.input_otu_fp)[1] if opts.input_format_classic: otu_table = parse_classic_table_to_rich_table( open(opts.input_otu_fp, 'U'), None, None, None, DenseOTUTable) else: try: otu_table = parse_biom_table(open(opts.input_otu_fp, 'U')) except ValueError: raise ValueError( "Error loading OTU table! If not in BIOM format use '-f' option.\n" ) ids_to_load = otu_table.ObservationIds if (opts.input_count_fp is None): #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz) precalc_file_name = '_'.join( ['16S', opts.gg_version, 'precalculated.tab.gz']) input_count_table = join(get_picrust_project_dir(), 'picrust', 'data', precalc_file_name) else: input_count_table = opts.input_count_fp if opts.verbose: print "Loading trait table: ", input_count_table ext = path.splitext(input_count_table)[1] if (ext == '.gz'): count_table_fh = gzip.open(input_count_table, 'rb') else: count_table_fh = open(input_count_table, 'U') if opts.load_precalc_file_in_biom: count_table = parse_biom_table(count_table_fh.read()) else: count_table = convert_precalc_to_biom(count_table_fh, ids_to_load) #Need to only keep data relevant to our otu list ids = [] for x in otu_table.iterObservations(): ids.append(str(x[1])) ob_id = count_table.ObservationIds[0] filtered_otus = [] filtered_values = [] for x in ids: if count_table.sampleExists(x): filtered_otus.append(x) filtered_values.append(otu_table.observationData(x)) #filtered_values = map(list,zip(*filtered_values)) filtered_otu_table = table_factory(filtered_values, otu_table.SampleIds, filtered_otus, constructor=DenseOTUTable) copy_numbers_filtered = {} for x in filtered_otus: value = count_table.getValueByIds(ob_id, x) try: #data can be floats so round them and make them integers value = int(round(float(value))) except ValueError: raise ValueError,\ "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value) if value < 1: raise ValueError, "Copy numbers must be greater than or equal to 1." copy_numbers_filtered[x] = {opts.metadata_identifer: value} filtered_otu_table.addObservationMetadata(copy_numbers_filtered) normalized_table = filtered_otu_table.normObservationByMetadata( opts.metadata_identifer) #move Observation Metadata from original to filtered OTU table normalized_table = transfer_observation_metadata(otu_table, normalized_table, 'ObservationMetadata') normalized_otu_table = transfer_sample_metadata(otu_table, normalized_table, 'SampleMetadata') make_output_dir_for_file(opts.output_otu_fp) open(opts.output_otu_fp, 'w').write(format_biom_table(normalized_table))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_ext=path.splitext(opts.input_otu_fp)[1] if opts.input_format_classic: otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable) else: try: otu_table = parse_biom_table(open(opts.input_otu_fp,'U')) except ValueError: raise ValueError("Error loading OTU table! If not in BIOM format use '-f' option.\n") ids_to_load = otu_table.ObservationIds if(opts.input_count_fp is None): #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz) precalc_file_name='_'.join(['16S',opts.gg_version,'precalculated.tab.gz']) input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name) else: input_count_table=opts.input_count_fp if opts.verbose: print "Loading trait table: ", input_count_table ext=path.splitext(input_count_table)[1] if (ext == '.gz'): count_table_fh = gzip.open(input_count_table,'rb') else: count_table_fh = open(input_count_table,'U') if opts.load_precalc_file_in_biom: count_table = parse_biom_table(count_table_fh.read()) else: count_table = convert_precalc_to_biom(count_table_fh,ids_to_load) #Need to only keep data relevant to our otu list ids=[] for x in otu_table.iterObservations(): ids.append(str(x[1])) ob_id=count_table.ObservationIds[0] filtered_otus=[] filtered_values=[] for x in ids: if count_table.sampleExists(x): filtered_otus.append(x) filtered_values.append(otu_table.observationData(x)) #filtered_values = map(list,zip(*filtered_values)) filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable) copy_numbers_filtered={} for x in filtered_otus: value = count_table.getValueByIds(ob_id,x) try: #data can be floats so round them and make them integers value = int(round(float(value))) except ValueError: raise ValueError,\ "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value) if value < 1: raise ValueError, "Copy numbers must be greater than or equal to 1." copy_numbers_filtered[x]={opts.metadata_identifer:value} filtered_otu_table.addObservationMetadata(copy_numbers_filtered) normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer) #move Observation Metadata from original to filtered OTU table normalized_table = transfer_observation_metadata(otu_table,normalized_table,'ObservationMetadata') normalized_otu_table = transfer_sample_metadata(otu_table,normalized_table,'SampleMetadata') make_output_dir_for_file(opts.output_otu_fp) open(opts.output_otu_fp,'w').write(format_biom_table(normalized_table))