def test_make_sample_node_table(self): """Test that the sample node table is created correctly.""" # test when sampleids in biom == sampleids in mapping file bt = parse_biom_table(BIOM_STRING_1) mf_dict = parse_mapping_file_to_dict(MF_LINES.split("\n"))[0] obs = make_sample_node_table(bt, mf_dict) exp = [ "#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet", "s1\tsample\t148.0\t1\ta\tpre\thf", "s2\tsample\t156.0\t2\ta\tpre\tlf", "s3\tsample\t164.0\t3\ta\tpre\thf", "s4\tsample\t172.0\t4\ta\tpost\tlf", "s5\tsample\t180.0\t5\ta\tpost\tmf", ] self.assertEqual(obs, exp) # test when sampleids in biom are a subset of sampleids in mapping file bt = parse_biom_table(BIOM_STRING_2) obs = make_sample_node_table(bt, mf_dict) exp = [ "#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet", "s3\tsample\t164.0\t3\ta\tpre\thf", "s4\tsample\t172.0\t4\ta\tpost\tlf", "s5\tsample\t180.0\t5\ta\tpost\tmf", ] self.assertEqual(obs, exp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading sequencing depth table: ",opts.input_seq_depth_file scaling_factors = {} for sample_id,depth in parse_seq_count_file(open(opts.input_seq_depth_file,'U')): scaling_factors[sample_id]=depth ext=path.splitext(opts.input_count_table)[1] if opts.verbose: print "Loading count table: ", opts.input_count_table if (ext == '.gz'): genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb')) else: genome_table = parse_biom_table(open(opts.input_count_table,'U')) if opts.verbose: print "Scaling the metagenome..." scaled_metagenomes = scale_metagenomes(genome_table,scaling_factors) if opts.verbose: print "Writing results to output file: ",opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) open(opts.output_metagenome_table,'w').write(format_biom_table(scaled_metagenomes))
def test_sort_otu_table(self): """ sort_otu_table fns as expected """ actual = sort_otu_table(parse_biom_table(self.otu_table1), ['NA', 'Key', 'Fing']) expected = parse_biom_table(self.age_sorted_otu_table1) self.assertEqual(actual, expected)
def test_make_edge_table(self): '''Test that edge table is created properly.''' bt = parse_biom_table(BIOM_STRING_3) obs_out = make_edge_table(bt) exp_out = [ '#Sample\tOTU\tAbundance', 's3\to1\t3.0', 's3\to2\t8.0', 's3\to3\t13.0', 's3\to4\t18.0', 's3\to5\t23.0', 's3\to6\t28.0', 's3\to7\t33.0', 's3\to8\t38.0', 's4\to1\t4.0', 's4\to2\t9.0', 's4\to3\t14.0', 's4\to4\t19.0', 's4\to5\t24.0', 's4\to6\t29.0', 's4\to7\t34.0', 's4\to8\t39.0', 's5\to1\t5.0', 's5\to2\t10.0', 's5\to3\t15.0', 's5\to4\t20.0', 's5\to5\t25.0', 's5\to6\t30.0', 's5\to7\t35.0', 's5\to8\t40.0' ] self.assertEqual(set(obs_out), set(exp_out)) # test with a row and a col that are all zero bt = parse_biom_table(BIOM_STRING_6) obs_out = make_edge_table(bt) exp_out = [ '#Sample\tOTU\tAbundance', 's2\to1\t2.0', 's2\to2\t7.0', 's2\to3\t12.0', 's2\to4\t17.0', 's2\to6\t27.0', 's2\to7\t32.0', 's2\to8\t37.0', 's3\to1\t3.0', 's3\to2\t8.0', 's3\to3\t13.0', 's3\to4\t18.0', 's3\to6\t28.0', 's3\to7\t33.0', 's3\to8\t38.0', 's4\to1\t4.0', 's4\to2\t9.0', 's4\to3\t14.0', 's4\to4\t19.0', 's4\to6\t29.0', 's4\to7\t34.0', 's4\to8\t39.0', 's5\to1\t5.0', 's5\to2\t10.0', 's5\to3\t15.0', 's5\to4\t20.0', 's5\to6\t30.0', 's5\to7\t35.0', 's5\to8\t40.0' ] self.assertEqual(set(obs_out), set(exp_out))
def generate_full_otu_table(study, study_input_dir, zip_fname, files_to_remove, biom_files,output_dir): """ Merge OTU tables """ master = parse_biom_table(open(biom_files[0],'U')) # only merge if there is more than 1 biom file if len(biom_files) > 1: for input_fp in biom_files[1:]: master = master.merge(parse_biom_table(open(input_fp,'U'))) # write full biom-table full_biom_table_fname='study_%s_closed_reference_otu_table.biom' % \ (str(study)) full_biom_table_fp=join(output_dir,full_biom_table_fname) # add to list of files to remove files_to_remove.append(full_biom_table_fp) biom_f = open(join(full_biom_table_fp),'w') biom_f.write(format_biom_table(master)) biom_f.close() # zip the full biom-table file #cmd_call='cd %s; tar rzvf %s %s' % (study_input_dir,zip_fname, # full_biom_table_fname) #system(cmd_call) return files_to_remove
def test_sort_otu_table_error(self): """ sort_otu_table handles errors """ self.assertRaises(ValueError, sort_otu_table, parse_biom_table(self.otu_table1), ['NA', 'Key', 'Fing', 'Key']) self.assertRaises(KeyError, sort_otu_table, parse_biom_table(self.otu_table1), ['NA', 'Key'])
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.limit_to_function: limit_to_functions = opts.limit_to_function.split(',') if opts.verbose: print "Limiting output to only functions:",limit_to_functions else: limit_to_functions = [] if opts.verbose: print "Loading otu table: ",opts.input_otu_table otu_table = parse_biom_table(open(opts.input_otu_table,'U')) ext=path.splitext(opts.input_count_table)[1] if opts.verbose: print "Loading count table: ", opts.input_count_table if (ext == '.gz'): genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb')) else: genome_table = parse_biom_table(open(opts.input_count_table,'U')) if opts.verbose: print "Predicting the metagenome..." partitioned_metagenomes = partition_metagenome_contributions(otu_table,genome_table,limit_to_functions=limit_to_functions) output_text = "\n".join(["\t".join(map(str,i)) for i in partitioned_metagenomes]) if opts.verbose: print "Writing results to output file: ",opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) open(opts.output_metagenome_table,'w').write(output_text)
def test_suppress_md5(self): """ TableSummarizer functions as expected with md5 suppression """ t = TableSummarizer() # suppress md5 by passing suppress_md5=True actual = t(table=(parse_biom_table(self.biom1_lines), self.biom1_lines), qualitative=False, suppress_md5=True) self.assertEqual(actual['biom_summary'], self.summary_suppress_md5_lines) # suppress md5 by passing None as the second value in table actual = t(table=(parse_biom_table(self.biom1_lines), None), qualitative=False, suppress_md5=False) self.assertEqual(actual['biom_summary'], self.summary_suppress_md5_lines) # suppress md5 by passing None as the second value in table # and suppress_md5=True actual = t(table=(parse_biom_table(self.biom1_lines), None), qualitative=False, suppress_md5=True) self.assertEqual(actual['biom_summary'], self.summary_suppress_md5_lines)
def test_make_sample_node_table(self): '''Test that the sample node table is created correctly.''' # test when sampleids in biom == sampleids in mapping file bt = parse_biom_table(BIOM_STRING_1) mf_dict = parse_mapping_file_to_dict(MF_LINES.split('\n'))[0] obs = make_sample_node_table(bt, mf_dict) exp = [ '#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet', 's1\tsample\t148.0\t1\ta\tpre\thf', 's2\tsample\t156.0\t2\ta\tpre\tlf', 's3\tsample\t164.0\t3\ta\tpre\thf', 's4\tsample\t172.0\t4\ta\tpost\tlf', 's5\tsample\t180.0\t5\ta\tpost\tmf' ] self.assertEqual(obs, exp) # test when sampleids in biom are a subset of sampleids in mapping file bt = parse_biom_table(BIOM_STRING_2) obs = make_sample_node_table(bt, mf_dict) exp = [ '#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet', 's3\tsample\t164.0\t3\ta\tpre\thf', 's4\tsample\t172.0\t4\ta\tpost\tlf', 's5\tsample\t180.0\t5\ta\tpost\tmf' ] self.assertEqual(obs, exp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_ext=path.splitext(opts.input_otu_fp)[1] if opts.input_format_classic: otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable) else: if input_ext != '.biom': sys.stderr.write("\nOTU table does not have '.biom' extension! If loading causes error consider using '-f' option to load tab-delimited OTU table!\n\n") otu_table = parse_biom_table(open(opts.input_otu_fp,'U')) ext=path.splitext(opts.input_count_fp)[1] if (ext == '.gz'): count_table = parse_biom_table(gzip.open(opts.input_count_fp,'rb')) else: count_table = parse_biom_table(open(opts.input_count_fp,'U')) #Need to only keep data relevant to our otu list ids=[] for x in otu_table.iterObservations(): ids.append(str(x[1])) ob_id=count_table.ObservationIds[0] filtered_otus=[] filtered_values=[] for x in ids: if count_table.sampleExists(x): filtered_otus.append(x) filtered_values.append(otu_table.observationData(x)) #filtered_values = map(list,zip(*filtered_values)) filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable) copy_numbers_filtered={} for x in filtered_otus: value = count_table.getValueByIds(ob_id,x) try: #data can be floats so round them and make them integers value = int(round(float(value))) except ValueError: raise ValueError,\ "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value) if value < 1: raise ValueError, "Copy numbers must be greater than or equal to 1." copy_numbers_filtered[x]={opts.metadata_identifer:value} filtered_otu_table.addObservationMetadata(copy_numbers_filtered) normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer) make_output_dir_for_file(opts.output_otu_fp) open(opts.output_otu_fp,'w').write(\ normalized_table.getBiomFormatJsonString('PICRUST'))
def convertBiomFileToStampProfile(self, file_name, output_name, metadata_name): """ Function taken from PICRUSt by Morgan Langill. https://github.com/mlangill/get_mgrast_data/blob/master/biom_to_stamp.py """ #allow file to be optionally gzipped (must use extension '.gz') ext=splitext(file_name)[1] if (ext == '.gz'): table = parse_biom_table(gzip.open(file_name,'rb')) else: table = parse_biom_table(open(file_name,'U')) metadata_name = metadata_name.split('(')[0].rstrip() if metadata_name is None or metadata_name == '<observation ids>': max_len_metadata = 0 elif table.observation_metadata and metadata_name in table.observation_metadata[0]: #figure out the longest list within the given metadata max_len_metadata = max(len(p[metadata_name]) for p in table.observation_metadata) else: QtGui.QMessageBox.information(self, 'Unrecognized metadata file', "'" + metadata_name + "' was not found in the BIOM table.", QtGui.QMessageBox.Ok) return #make the header line header=[] #make simple labels for each level in the metadata (e.g. 'Level_1', 'Level_2', etc.) "+1" for the observation id as well. for i in range(max_len_metadata): header.append('Level_'+ str(i+1)) header.append('Observation Ids') #add the sample ids to the header line header.extend(table.sample_ids) fout = open(output_name, 'w') fout.write("\t".join(header) + '\n') #now process each observation (row in the table) for obs_vals, obs_id, obs_metadata in table.iter(axis='observation'): row=[] if max_len_metadata > 0: row = obs_metadata[metadata_name] # add blanks if the metadata doesn't fill each level if len(row) < max_len_metadata: for i in range(max_len_metadata - len(row)): row.append('unclassified') #Add the observation id as the last "Level" if isNumber(obs_id): row.append('ID' + obs_id) else: row.append(obs_id) #Add count data to the row row.extend(map(str,obs_vals)) fout.write("\t".join(row) + '\n') fout.close()
def test_make_otu_table_no_taxonomy(self): """make_otu_table should work without tax (new-style OTU table)""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') obs = make_otu_table(otu_map_lines,constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "x", "metadata": null}, {"id": "z", "metadata": null}], "format": "Biological Observation Matrix 0.9dev", "data": [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:49:15.978315", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def test_sort_otu_table_by_mapping_field_some_values_differ(self): """ sort_otu_table fns when some values differ""" actual = sort_otu_table_by_mapping_field( parse_biom_table(self.otu_table1), parse_mapping_file(self.mapping_f2), sort_field="Nothing") expected = parse_biom_table(self.nothing_sorted_otu_table1) self.assertEqual(actual, expected)
def test_sort_otu_table_by_mapping_field_some_values_same(self): """ sort_otu_table_by_mapping_field fns when all values are the same""" actual = sort_otu_table_by_mapping_field( parse_biom_table(self.otu_table1), parse_mapping_file(self.mapping_f2), sort_field="Name") expected = parse_biom_table(self.name_sorted_otu_table1) self.assertEqual(actual, expected)
def test_sort_otu_table_by_mapping_field_all_values_differ(self): """ sort_otu_table_by_mapping_field fns when all values differ""" actual = sort_otu_table_by_mapping_field( parse_biom_table(self.otu_table1), parse_mapping_file(self.mapping_f2), sort_field="Age") expected = parse_biom_table(self.age_sorted_otu_table1) self.assertEqual(actual, expected)
def iter_prediction_expectation_pairs(obs_dir_fp, exp_dir_fp, file_name_field_order, file_name_delimiter, verbose=False): """Iterate pairs of observed, expected biom file names""" input_files = sorted(listdir(obs_dir_fp)) for file_number, f in enumerate(input_files): if verbose: print "\nExamining file {0} of {1}: {2}".format( file_number + 1, len(input_files), f) if 'accuracy_metrics' in f: print "%s is an Accuracy file...skipping" % str(f) continue #filename_components_list = f.split(file_name_delimiter) #Get predicted traits filename_metadata = get_metadata_from_filename(f,file_name_field_order,\ file_name_delimiter,verbose=verbose) if filename_metadata.get('file_type', None) == 'predict_traits': if verbose: #print "Found a prediction file" print "\tLoading .biom format observation table:", f try: obs_table =\ parse_biom_table(open(join(obs_dir_fp,f),'U')) except ValueError: print 'Failed, skipping...' continue # raise RuntimeError(\ # "Could not parse predicted trait file: %s. Is it a .biom formatted file?" %(f)) else: continue # Get paired observation file exp_filename = file_name_delimiter.join([ 'exp_biom_traits', filename_metadata['holdout_method'], filename_metadata['distance'], filename_metadata['organism'] ]) exp_filepath = join(exp_dir_fp, exp_filename) if verbose: print "\tLooking for the expected trait file matching %s here: %s" % ( f, exp_filepath) try: exp_table =\ parse_biom_table(open(exp_filepath,"U")) except IOError, e: if strict: raise IOError(e) else: if verbose: print "Missing expectation file....skipping!" continue yield obs_table, exp_table, f
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading otu table: ",opts.input_otu_table otu_table = parse_biom_table(open(opts.input_otu_table,'U')) ext=path.splitext(opts.input_count_table)[1] if opts.verbose: print "Loading count table: ", opts.input_count_table if (ext == '.gz'): genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb')) else: genome_table = parse_biom_table(open(opts.input_count_table,'U')) make_output_dir_for_file(opts.output_metagenome_table) if opts.accuracy_metrics: # Calculate accuracy metrics #unweighted_nsti = calc_nsti(otu_table,genome_table,weighted=False) #print "Unweighted NSTI:", unweighted_nsti weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True) samples= weighted_nsti[0] nstis = list(weighted_nsti[1]) #print "Samples:",samples #print "NSTIs:",nstis samples_and_nstis = zip(samples,nstis) #print "Samples and NSTIs:",samples_and_nstis lines = ["#Sample\tMetric\tValue\n"] #print weighted_nsti for sample,nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti)) lines.append(line) if opts.verbose: for l in sorted(lines): print l if opts.verbose: print "Writing accuracy information to file:", opts.accuracy_metrics open(opts.accuracy_metrics,'w').writelines(sorted(lines)) if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table,genome_table) if opts.verbose: print "Writing results to output file: ",opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) if(opts.format_tab_delimited): open(opts.output_metagenome_table,'w').write(predicted_metagenomes.delimitedSelf()) else: open(opts.output_metagenome_table,'w').write(format_biom_table(predicted_metagenomes))
def test_make_otu_table_no_taxonomy(self): """make_otu_table should work without tax (new-style OTU table)""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') obs = make_otu_table(otu_map_lines, constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "x", "metadata": null}, {"id": "z", "metadata": null}], "format": "Biological Observation Matrix 0.9dev", "data": [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:49:15.978315", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def test_make_otu_table_taxonomy(self): """make_otu_table should work with taxonomy""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') taxonomy = {'0':'Bacteria;Firmicutes', 'x':'Bacteria;Bacteroidetes'} obs = make_otu_table(otu_map_lines, taxonomy,constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": {"taxonomy": ["Bacteria", "Firmicutes"]}}, {"id": "1", "metadata": {"taxonomy": ["None"]}}, {"id": "x", "metadata": {"taxonomy": ["Bacteria", "Bacteroidetes"]}}, {"id": "z", "metadata": {"taxonomy": ["None"]}}], "format": "Biological Observation Matrix 0.9dev", "data": [[1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.0, 0.0], [0.0, 1.0, 0.0, 1.0]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:19:30.961477", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def test_sort_otu_table_by_mapping_field_all_values_differ(self): """ sort_otu_table_by_mapping_field fns when all values differ""" actual = sort_otu_table_by_mapping_field( parse_biom_table(self.otu_table1), parse_mapping_file( self.mapping_f2), sort_field="Age") expected = parse_biom_table(self.age_sorted_otu_table1) self.assertEqual(actual, expected)
def test_sort_otu_table_by_mapping_field_some_values_differ(self): """ sort_otu_table fns when some values differ""" actual = sort_otu_table_by_mapping_field( parse_biom_table(self.otu_table1), parse_mapping_file( self.mapping_f2), sort_field="Nothing") expected = parse_biom_table(self.nothing_sorted_otu_table1) self.assertEqual(actual, expected)
def test_sort_otu_table_by_mapping_field_some_values_same(self): """ sort_otu_table_by_mapping_field fns when all values are the same""" actual = sort_otu_table_by_mapping_field( parse_biom_table(self.otu_table1), parse_mapping_file( self.mapping_f2), sort_field="Name") expected = parse_biom_table(self.name_sorted_otu_table1) self.assertEqual(actual, expected)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) min_args = 1 if len(args) < min_args: option_parser.error('A BIOM file must be provided.') file_name = args[0] #allow file to be optionally gzipped (must use extension '.gz') ext=splitext(file_name)[1] if (ext == '.gz'): table = parse_biom_table(gzip.open(file_name,'rb')) else: table = parse_biom_table(open(file_name,'U')) metadata_name=opts.metadata if metadata_name is None: max_len_metadata=0 elif table.ObservationMetadata and metadata_name in table.ObservationMetadata[0]: #figure out the longest list within the given metadata max_len_metadata = max(len(p[metadata_name]) for p in table.ObservationMetadata) else: raise ValueError("'"+metadata_name+"' was not found in the BIOM table. Please try changing --metadata to a valid metadata field.") #make the header line header=[] #make simple labels for each level in the metadata (e.g. 'Level_1', 'Level_2', etc.) "+1" for the observation id as well. for i in range(max_len_metadata+1): header.append('Level_'+ str(i+1)) #add the sample ids to the header line header.extend(table.SampleIds) print "\t".join(header) #now process each observation (row in the table) for obs_vals,obs_id,obs_metadata in table.iterObservations(): row=[] if max_len_metadata >0: row=obs_metadata[metadata_name] #Add blanks if the metadata doesn't fill each level if len(row) < max_len_metadata: for i in range(max_len_metadata - len(row)): row.append('') #Add the observation id as the last "Level" row.append(obs_id) #Add count data to the row row.extend(map(str,obs_vals)) print "\t".join(row)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_fps = opts.input_fps master = parse_biom_table(open(input_fps[0], 'U')) for input_fp in input_fps[1:]: master = master.merge(parse_biom_table(open(input_fp, 'U'))) out_f = open(opts.output_fp, 'w') out_f.write(format_biom_table(master)) out_f.close()
def test_classic_to_biom(self): """Correctly converts classic to biom.""" obs = self.cmd(table=parse_biom_table(self.classic_lines1), to_json=True, table_type='OTU table') self.assertEqual(obs.keys(), ['table']) obs = parse_biom_table(obs['table'][0].to_json('testing')) self.assertEqual(type(obs), Table) self.assertEqual(len(obs.sample_ids), 9) self.assertEqual(len(obs.observation_ids), 14) self.assertEqual(obs.sample_metadata, None) self.assertNotEqual(obs.observation_metadata, None)
def test_biom_to_classic(self): """Correctly converts biom to classic.""" obs = self.cmd(table=parse_biom_table(self.biom_lines1), to_tsv=True, header_key='taxonomy') self.assertEqual(obs.keys(), ['table']) self.assertEqual(obs['table'][0], classic1) obs = self.cmd(table=parse_biom_table(self.biom_lines1), to_tsv=True, header_key='taxonomy', output_metadata_id='foo') self.assertEqual(obs.keys(), ['table']) obs_md_col = obs['table'][0].split('\n')[1].split('\t')[-1] self.assertEqual(obs_md_col, 'foo')
def test_get_shared_otus(self): otu_table = parse_biom_table(self.get_shared_otus_1_1_input) exp = get_shared_otus([otu_table],1,1) self.assertEqual(self.get_shared_otus_1_1_result,exp) otu_table_2 = parse_biom_table(self.get_shared_otus_2_06_input) exp_2_06 = get_shared_otus([otu_table_2],2,0.6) self.assertEqual(self.get_shared_otus_2_06_result,exp_2_06) otu_table_3 = parse_biom_table(self.get_shared_otus_5_09_input) exp_5_09 = get_shared_otus([otu_table_3],5,0.9) self.assertEqual(self.get_shared_otus_5_09_result,exp_5_09)
def test_classic_to_biom(self): """Correctly converts classic to biom.""" obs = self.cmd(table=parse_biom_table(self.classic_lines1), to_json=True) self.assertEqual(obs.keys(), ['table']) obs = parse_biom_table(obs['table'][0]) self.assertEqual(type(obs), Table) self.assertEqual(len(obs.sample_ids), 9) self.assertEqual(len(obs.observation_ids), 14) self.assertEqual(obs.sample_metadata, None) self.assertNotEqual(obs.observation_metadata, None)
def test_make_otu_table_taxonomy(self): """make_otu_table should work with taxonomy""" otu_map_lines = """0 ABC_0 DEF_1 1 ABC_1 x GHI_2 GHI_3 GHI_77 z DEF_3 XYZ_1""".split('\n') taxonomy = {'0': 'Bacteria;Firmicutes', 'x': 'Bacteria;Bacteroidetes'} obs = make_otu_table(otu_map_lines, taxonomy, constructor=DenseOTUTable) exp = """{"rows": [{"id": "0", "metadata": {"taxonomy": ["Bacteria", "Firmicutes"]}}, {"id": "1", "metadata": {"taxonomy": ["None"]}}, {"id": "x", "metadata": {"taxonomy": ["Bacteria", "Bacteroidetes"]}}, {"id": "z", "metadata": {"taxonomy": ["None"]}}], "format": "Biological Observation Matrix 0.9dev", "data": [[1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.0, 0.0], [0.0, 1.0, 0.0, 1.0]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:19:30.961477", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
def setUp(self): self.otu_table1 = parse_biom_table(otu_table1) self.otu_table_with_taxonomy = parse_biom_table(otu_table_with_taxonomy) self.genome_table1 = parse_biom_table(genome_table1) self.genome_table2 = parse_biom_table(genome_table2) self.predicted_metagenome_table1 = parse_biom_table(predicted_metagenome_table1) self.predicted_gene_partition_table = predicted_gene_partition_table self.predicted_gene_partition_table_with_taxonomy =\ predicted_gene_partition_table_with_taxonomy #Examples of BIOM format value,id,metadata tuples #as returned when iterating over a table #metadata are defined at the bottom of this file. self.metadata_example = [(700.0,"Gene1",example_metadata1),\ (250.0,"Gene2",example_metadata2),(0.0,"Gene3",example_metadata3)]
def test_split_otu_table_on_sample_metadata_extra_mapping_entries(self): """ split_otu_table_on_sample_metadata functions as expected with extra mapping data """ actual = list(split_otu_table_on_sample_metadata(self.otu_table_f1, self.mapping_f2, "Treatment")) actual = [(id_,parse_biom_table(e)) for id_, e in actual] exp = [(id_,parse_biom_table(e)) for id_, e in otu_table_exp1] actual.sort() exp.sort() for a,e in zip(actual,exp): self.assertEqual(a,e,"OTU tables are not equal:\n%s\n%s" % \ (format_biom_table(a[1]),format_biom_table(e[1])))
def test_split_otu_table_on_sample_metadata_extra_mapping_entries(self): """ split_otu_table_on_sample_metadata functions as expected with extra mapping data """ actual = list(split_otu_table_on_sample_metadata(self.otu_table_f1, self.mapping_f2, "Treatment")) actual = [(id_, parse_biom_table(e)) for id_, e in actual] exp = [(id_, parse_biom_table(e)) for id_, e in otu_table_exp1] actual.sort() exp.sort() for a, e in zip(actual, exp): self.assertEqual(a, e, "OTU tables are not equal:\n%s\n%s" % (format_biom_table(a[1]), format_biom_table(e[1])))
def load_otus(conn, table, params): table = parse_biom_table(table) cur = conn.cursor() print "loading observations..." print "locking..." cur.execute('lock table observations') cur.execute('lock table observation_tables') cur.execute("insert into observation_tables values ('%s','%s','%s','%s',%f)" % (\ params['table_id'], params['study_id'], params['ref'], params['trim'], params['similarity'])) tableid = params['table_id'] study = params['study_id'] print "writing observations..." obs_fname = '%s_observations.csv' % study f = open(obs_fname, 'w') for values, sid, md in table.iterSamples(conv_to_np=False): study_sample = "%s::%s" % (study,sid) for (row, c_idx), val in values.items(): f.write("%s,%s,%s,%f\n" % (tableid, study_sample, table.ObservationIds[c_idx], val)) f.close() bulk_load("observations", obs_fname, cur) print "committing..." conn.commit() os.remove(obs_fname)
def getResult(self, data_path, tree_path=None): """Returns distance matrix from (indcidence matrix and optionally tree). Parameters: data_path: path to data file, matrix (samples = cols, taxa = rows) in tab-delimited text format tree_path: path or object. if method is phylogenetic, must supply tree_path. if path, path to Newick-format tree file where taxon ids match taxon ids in the input data file. returns 2d dist matrix, list of sample names ordered as in dist mtx """ #if it's a phylogenetic metric, read the tree if self.IsPhylogenetic: tree = self.getTree(tree_path) else: tree = None otu_table = parse_biom_table(open(data_path,'U')) if isinstance(otu_table, DenseTable): otumtx = otu_table._data.T else: otumtx = asarray([v for v in otu_table.iterSampleData()]) # get the 2d dist matrix from beta diversity analysis if self.IsPhylogenetic: return (self.Metric(otumtx, otu_table.ObservationIds, tree, otu_table.SampleIds), list(otu_table.SampleIds)) else: return self.Metric(otumtx), list(otu_table.SampleIds)
def test_qualitative(self): """ TableSummarizer functions as expected with qualitative=True """ t = TableSummarizer() actual = t(table=(parse_biom_table(self.biom1_lines), self.biom1_lines.split("\n")), qualitative=True) self.assertEqual(actual["biom_summary"], self.summary_qualitative_lines)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_table = parse_biom_table(open(opts.input_otu_table_fp, 'U')) output_table_f = open(opts.output_otu_table_fp, 'w') metadata_field = opts.metadata_field positive_taxa = opts.positive_taxa negative_taxa = opts.negative_taxa if positive_taxa is not None: positive_taxa = positive_taxa.split(',') else: positive_taxa = None if negative_taxa is not None: negative_taxa = negative_taxa.split(',') else: negative_taxa = None filter_fn = get_otu_ids_from_taxonomy_f( positive_taxa, negative_taxa, metadata_field) output_table = input_table.filterObservations(filter_fn) output_table_f.write(format_biom_table(output_table)) output_table_f.close()
def split_otu_table_on_taxonomy_to_files(otu_table_fp, level, output_dir, md_identifier='taxonomy', md_processor=process_md_as_list): """ Split OTU table by taxonomic level, writing otu tables to output dir """ results = [] otu_table = parse_biom_table(open(otu_table_fp,'U')) create_dir(output_dir) def split_f(obs_md): try: result = md_processor(obs_md,md_identifier,level) except KeyError: raise KeyError,\ "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier except TypeError: raise TypeError,\ "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string." except AttributeError: raise AttributeError,\ "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"." return result for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f): output_fp = '%s/otu_table_%s.biom' % (output_dir,bin) output_f = open(output_fp,'w') output_f.write(format_biom_table(sub_otu_table)) output_f.close() results.append(output_fp) return results
def setUp(self): """Set up data for use in unit tests.""" self.cmd = MetadataAdder() self.biom_lines1 = biom1 self.biom_table1 = parse_biom_table(self.biom_lines1) self.sample_md_lines1 = sample_md1.split('\n') self.obs_md_lines1 = obs_md1.split('\n')
def split_otu_table_on_taxonomy_to_files(otu_table_fp, level, output_dir, md_identifier='taxonomy', md_processor=process_md_as_list): """ Split OTU table by taxonomic level, writing otu tables to output dir """ results = [] otu_table = parse_biom_table(open(otu_table_fp, 'U')) create_dir(output_dir) def split_f(obs_md): try: result = md_processor(obs_md, md_identifier, level) except KeyError: raise KeyError,\ "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier except TypeError: raise TypeError,\ "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string." except AttributeError: raise AttributeError,\ "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"." return result for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f): output_fp = '%s/otu_table_%s.biom' % (output_dir, bin) output_f = open(output_fp, 'w') output_f.write(format_biom_table(sub_otu_table)) output_f.close() results.append(output_fp) return results
def load_category_files(category_files): """Loads the category tables as biom files INPUTS: category_files -- a dictionary that associates the mapping category (key) with the file path to the otu_table summarizing that OUTPUTS: category_tables -- a dictionary that associates the mapping category with the summarized otu table for the category. """ category_tables = {} watch_count = 0 watch_list = [] for (category, category_file) in category_files.iteritems(): if isfile(category_file): cat_table = parse_biom_table(open(category_file, 'U')) category_tables[category] = cat_table else: watch_list.append('The summarized OTU table file cannot be found ' 'for %s. \n%s is not in the file path.' % (category, category_file)) watch_count = watch_count + 1 if watch_count > 0: print 'The following category files could not be found: \n%s' \ % '\n'.join(watch_list) if watch_count == len(category_files): raise ValueError('No files could be found for any of the supplied ' 'categories. \n%s' % '\n'.join(watch_list)) return category_tables
def add_counts_to_mapping(biom_lines, mapping_lines, otu_counts, output_fp): """Counts the number of seqs/OTUs per sample and add its to the mapping file Inputs: biom_lines: mapping_lines: otu_counts: output_fp: """ # Parse biom file biom = parse_biom_table(biom_lines) # Parse mapping file map_data, headers, comments = parse_mapping_file(mapping_lines) # Compute the counts per sample min_count, max_count, median_count, mean_count, counts_per_sample =\ compute_counts_per_sample_stats(biom, binary_counts=otu_counts) # Add the counts to the mapping data index = len(headers) - 1 headers.insert(index, "NumIndividuals") for row in map_data: row.insert(index, str(counts_per_sample[row[0]])) # # Add the '#' character to the first header # headers[0] = '#' + headers[0] # # Add headers to the data # map_data.insert(0, headers) # Write the corrected mapping file write_corrected_mapping(output_fp, headers, comments, map_data)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.level <= 0: parser.error("level must be greater than zero!") collapse_f = make_collapse_f(opts.metadata_category, opts.level, opts.ignore) table = parse_biom_table(open(opts.input_fp)) result = table.collapseObservationsByMetadata( collapse_f, one_to_many=True, norm=False, one_to_many_md_key=opts.metadata_category) f = open(opts.output_fp, 'w') if (opts.format_tab_delimited): f.write( result.delimitedSelf(header_key=opts.metadata_category, header_value=opts.metadata_category, metadata_formatter=lambda s: '; '.join(s))) else: f.write(result.getBiomFormatJsonString('picrust %s - categorize_by_function'\ % __version__)) f.close()
def getResult(self, data_path, tree_path=None): """Returns distance matrix from (indcidence matrix and optionally tree). Parameters: data_path: path to data file, matrix (samples = cols, taxa = rows) in tab-delimited text format tree_path: path or object. if method is phylogenetic, must supply tree_path. if path, path to Newick-format tree file where taxon ids match taxon ids in the input data file. returns 2d dist matrix, list of sample names ordered as in dist mtx """ # if it's a phylogenetic metric, read the tree if self.IsPhylogenetic: tree = self.getTree(tree_path) else: tree = None otu_table = parse_biom_table(open(data_path, 'U')) if isinstance(otu_table, DenseTable): otumtx = otu_table._data.T else: otumtx = asarray([v for v in otu_table.iterSampleData()]) # get the 2d dist matrix from beta diversity analysis if self.IsPhylogenetic: return (self.Metric(otumtx, otu_table.ObservationIds, tree, otu_table.SampleIds), list(otu_table.SampleIds)) else: return self.Metric(otumtx), list(otu_table.SampleIds)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_dir = opts.output_dir create_dir(output_dir) otu_table_fp = opts.otu_table otu_table_fh = open(otu_table_fp, 'U') otu_table = parse_biom_table(otu_table_fh) otu_table_fh.close() tree_fh = open(opts.tree_file, 'U') tree = DndParser(tree_fh) tree_fh.close() mapping_fp = opts.mapping_fp if mapping_fp: mapping_f = open(mapping_fp, 'U') input_map_basename = splitext(split(mapping_fp)[1])[0] else: mapping_f = None input_map_basename = None input_table_basename = splitext(split(otu_table_fp)[1])[0] simsam_range_to_files(otu_table, tree, simulated_sample_sizes=map(int, opts.num.split(',')), dissimilarities=map(float, opts.dissim.split(',')), output_dir=output_dir, mapping_f=mapping_f, output_table_basename=input_table_basename, output_map_basename=input_map_basename)
def calc_shared_phylotypes(infile, reference_sample=None): """Calculates number of shared phylotypes for each pair of sample. infile: otu table filehandle reference_sample: if set, will use this sample name to calculate shared OTUs between reference sample, and pair of samples. Useful, e.g. when the reference sample is the Donor in a transplant study """ otu_table = parse_biom_table(infile) if reference_sample: #ref_idx = sample_ids.index(reference_sample) ref_idx = reference_sample num_samples = len(otu_table.SampleIds) result_array = zeros((num_samples, num_samples), dtype=int) for i,samp1_id in enumerate(otu_table.SampleIds): for j,samp2_id in enumerate(otu_table.SampleIds[:i+1]): if reference_sample: result_array[i,j] = result_array[j,i] = \ _calc_shared_phylotypes_multiple(otu_table, [samp1_id, samp2_id, ref_idx]) else: result_array[i,j] = result_array[j,i] = \ _calc_shared_phylotypes_pairwise(otu_table, samp1_id, samp2_id) return format_distance_matrix(otu_table.SampleIds, result_array)+"\n"
def create_fused_data_matrix(biom_fname, map_fname, foi_fname, is_fasta=False): """ create_fused_data_matrix(biom_fname, map_fname=None, foi_fname=None) @biom_fname - path to the BIOM file containing the interesting counts (shall they be k-mers or OTUs, up to you!) @map_fname - contains the path to the mapping file with the environmental factors. default = None, in which case SOMETHING HAPPENS @foi_fname - path to the tab delimited text file containing two columns: the first one contains the keys to the important environmental factors, and the second column contains whether the variables is continuous (e.g. temperature) or discrete (male/female) @data_matrix (return) - dense matrix containing both OTU/k-mer data AND environmental data @site_names (return) - names of the different samples @variable_names (return) - Names of the different variables used @environmental_param (return) - names of the environmental parameters (as used in the foi_fname) @hashtable_env (return) - correspondences between discrete variables and class affected (for instance male->1, female->0) """ if not is_fasta: biom_table = parse_biom_table(open(biom_fname, 'U')) # Function to convert a biom dictionary to a numpy array data_matrix, site_names, variable_names = biom_table_to_array( biom_table) env_table, site_names_env, environmental_param, is_continuous, hashtable_env = read_environment_table( open(map_fname, 'U'), open(foi_fname, 'U')) else: # We read the k-mers from the fasta file instead of the biom file # Call the count-k-mer function env_table, site_names_env, environmental_param, is_continuous, hashtable_env = read_environment_table( open(map_fname, 'U'), open(foi_fname, 'U')) # Merge both data matrices making sure to keep only the sites that are common to both mapping and biom files and making sure that each site from one file is well aligned with the associated one from the other file. complete_matrix, complete_sites, complete_variables = merge_matrices( data_matrix, env_table, site_names, site_names_env, variable_names, environmental_param) return complete_matrix, complete_sites, variable_names, environmental_param, hashtable_env
def test_format_tep_file_lines(self): """ format_tep_file_lines: this converts files into tep lines """ # set variables prefs_dict1 = {'sample_coloring': {'TEST1': {'column': 'TEST1', 'colors': (('red', (0, 100, 100)), ('blue', (240, 100, 100)))}}} test_biom2 = parse_biom_table(biom2) # test with prefs file exp1 = ['>>tre\n', "['(tax1:0.00000043418318065054,((tax2:0.01932550067944402081,tax3:0.08910446960529855298):0.00000043418318065054,tax4:0.17394765077611337722):0.00000043418318065054,tax5:0.00000043418318065054):0.0;']", '\n', '>>otm\n#OTU ID\tOTU Metadata\n', u'tax1\tk__Bacteria;p__Proteobacteria;', '\n', u'tax2\tk__Bacteria;p__Cyanobacteria;', '\n', '>>osm\n', '# Constructed from biom file\n#OTU ID\tsam1\tsam2\tConsensus Lineage\ntax1\t7.0\t4.0\tk__Bacteria;p__Proteobacteria\ntax2\t1.0\t2.0\tk__Bacteria;p__Cyanobacteria', '\n>>sam\n', "['#SampleID\\tcol1\\tcol0\\tDescription', 'sam1\\tv1_3\\tv0_3\\td1', 'sam2\\taval\\tanother\\td2']", '\n>>pre\n', '0,100,100,\n', '240,100,100,\n', '>defaultTEST1:TEST1\n'] obs1 = format_tep_file_lines(test_biom2, StringIO(example_mapping_file2.split('\n')), StringIO(example_tree.split('\n')), prefs_dict1) self.assertEqual(obs1,exp1) # test without prefs file exp2 = ['>>tre\n', "['(tax1:0.00000043418318065054,((tax2:0.01932550067944402081,tax3:0.08910446960529855298):0.00000043418318065054,tax4:0.17394765077611337722):0.00000043418318065054,tax5:0.00000043418318065054):0.0;']", '\n', '>>otm\n#OTU ID\tOTU Metadata\n', u'tax1\tk__Bacteria;p__Proteobacteria;', '\n', u'tax2\tk__Bacteria;p__Cyanobacteria;', '\n', '>>osm\n', '# Constructed from biom file\n#OTU ID\tsam1\tsam2\tConsensus Lineage\ntax1\t7.0\t4.0\tk__Bacteria;p__Proteobacteria\ntax2\t1.0\t2.0\tk__Bacteria;p__Cyanobacteria', '\n>>sam\n', "['#SampleID\\tcol1\\tcol0\\tDescription', 'sam1\\tv1_3\\tv0_3\\td1', 'sam2\\taval\\tanother\\td2']"] obs2 = format_tep_file_lines(test_biom2, StringIO(example_mapping_file2.split('\n')), StringIO(example_tree.split('\n')), {}) self.assertEqual(obs2,exp2)
def test_default(self): """ TableSummarizer functions as expected """ t = TableSummarizer() actual = t(table=(parse_biom_table(self.biom1_lines), self.biom1_lines.split("\n")), qualitative=False) self.assertEqual(actual["biom_summary"], self.summary_default_lines)
def readBIOM(fileName): f = open(fileName, "r") table = parse_biom_table(f) #print table f.close() root = TaxonNode() root.sampleIDs = list(table._sample_ids) #print "SampleIDs type:", type(table.SampleIds) #for obs in table.iterObservations(): for obs in table.iter(axis='observation'): counts = obs[0] otuName = obs[1] taxonomy = obs[2]["taxonomy"] root.maxdepth = max(root.maxdepth, len(taxonomy) - 1) # Build Tree node = root for taxon in taxonomy: n = node.getNode(taxon) if n == None: node = node.addNode(taxon, None, None) else: node = n if taxon == taxonomy[-1]: if node.counts != None: node.counts = map(sum, zip(node.counts, counts)) else: node.counts = counts return root
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_data = parse_biom_table(open(opts.input_otu_table, 'U')) sort_field = opts.sort_field mapping_fp = opts.mapping_fp sorted_sample_ids_fp = opts.sorted_sample_ids_fp if sort_field and mapping_fp: mapping_data = parse_mapping_file(open(mapping_fp, 'U')) result = sort_otu_table_by_mapping_field(otu_table_data, mapping_data, sort_field) elif sorted_sample_ids_fp: sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp, 'U')) result = sort_otu_table(otu_table_data, sorted_sample_ids) else: result = sort_otu_table( otu_table_data, natsort_case_insensitive(otu_table_data.SampleIds)) # format and write the otu table result_str = format_biom_table(result) of = open(opts.output_fp, 'w') of.write(result_str) of.close()
def calc_shared_phylotypes(infile, reference_sample=None): """Calculates number of shared phylotypes for each pair of sample. infile: otu table filehandle reference_sample: if set, will use this sample name to calculate shared OTUs between reference sample, and pair of samples. Useful, e.g. when the reference sample is the Donor in a transplant study """ otu_table = parse_biom_table(infile) if reference_sample: #ref_idx = sample_ids.index(reference_sample) ref_idx = reference_sample num_samples = len(otu_table.SampleIds) result_array = zeros((num_samples, num_samples), dtype=int) for i, samp1_id in enumerate(otu_table.SampleIds): for j, samp2_id in enumerate(otu_table.SampleIds[:i + 1]): if reference_sample: result_array[i, j] = result_array[j, i] = \ _calc_shared_phylotypes_multiple(otu_table, [samp1_id, samp2_id, ref_idx]) else: result_array[i, j] = result_array[j, i] = \ _calc_shared_phylotypes_pairwise(otu_table, samp1_id, samp2_id) return format_distance_matrix(otu_table.SampleIds, result_array) + "\n"
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) output_f = open(opts.output_distance_matrix, 'w') if opts.otu_table_fp: otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) samples_to_keep = otu_table.SampleIds #samples_to_keep = \ # sample_ids_from_otu_table(open(opts.otu_table_fp,'U')) elif opts.sample_id_fp: samples_to_keep = \ get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U')) elif opts.mapping_fp and opts.valid_states: samples_to_keep = sample_ids_from_metadata_description( open(opts.mapping_fp, 'U'), opts.valid_states) else: option_parser.error( 'must pass either --sample_id_fp, -t, or -m and -s') # note that negate gets a little weird here. The function we're calling removes the specified # samples from the distance matrix, but the other QIIME filter scripts keep these samples specified. # So, the interface of this script is designed to keep the specified samples, and therefore # negate=True is passed to filter_samples_from_distance_matrix by default. d = filter_samples_from_distance_matrix(parse_distmat( open(opts.input_distance_matrix, 'U')), samples_to_keep, negate=not opts.negate) output_f.write(d) output_f.close()