def test_sort_otu_table(self): """ sort_otu_table fns as expected """ actual = sort_otu_table(parse_biom_table_str(self.otu_table1), ['NA','Key','Fing']) expected = parse_biom_table_str(self.age_sorted_otu_table1) self.assertEqual(actual, expected)
def test_sort_otu_table_error(self): """ sort_otu_table handles errors """ self.assertRaises(ValueError,sort_otu_table, parse_biom_table_str(self.otu_table1),['NA','Key','Fing','Key']) self.assertRaises(KeyError,sort_otu_table, parse_biom_table_str(self.otu_table1),['NA','Key'])
def test_sort_otu_table_by_mapping_field_all_values_differ(self): """ sort_otu_table_by_mapping_field fns when all values differ""" actual = sort_otu_table_by_mapping_field(parse_biom_table_str(self.otu_table1), parse_mapping_file(self.mapping_f2), sort_field = "Age") expected = parse_biom_table_str(self.age_sorted_otu_table1) self.assertEqual(actual, expected)
def test_sort_otu_table_by_mapping_field_some_values_differ(self): """ sort_otu_table fns when some values differ""" actual = sort_otu_table_by_mapping_field(parse_biom_table_str(self.otu_table1), parse_mapping_file(self.mapping_f2), sort_field = "Nothing") expected = parse_biom_table_str(self.nothing_sorted_otu_table1) self.assertEqual(actual, expected)
def test_sort_otu_table_by_mapping_field_some_values_same(self): """ sort_otu_table_by_mapping_field fns when all values are the same""" actual = sort_otu_table_by_mapping_field(parse_biom_table_str(self.otu_table1), parse_mapping_file(self.mapping_f2), sort_field = "Name") expected = parse_biom_table_str(self.name_sorted_otu_table1) self.assertEqual(actual, expected)
def setUp(self): self.otu_table1 = parse_biom_table_str(otu_table1) self.otu_table_with_taxonomy = parse_biom_table_str(otu_table_with_taxonomy) self.genome_table1 = parse_biom_table_str(genome_table1) self.genome_table2 = parse_biom_table_str(genome_table2) self.predicted_metagenome_table1 = parse_biom_table_str(predicted_metagenome_table1) self.predicted_gene_partition_table = predicted_gene_partition_table self.predicted_gene_partition_table_with_taxonomy = predicted_gene_partition_table_with_taxonomy
def setUp(self): self.otu_table1 = parse_biom_table_str(otu_table1) self.otu_table_with_taxonomy = parse_biom_table_str( otu_table_with_taxonomy) self.genome_table1 = parse_biom_table_str(genome_table1) self.genome_table2 = parse_biom_table_str(genome_table2) self.predicted_metagenome_table1 = parse_biom_table_str( predicted_metagenome_table1) self.predicted_gene_partition_table = predicted_gene_partition_table self.predicted_gene_partition_table_with_taxonomy = predicted_gene_partition_table_with_taxonomy
def test_sort_otu_table_by_mapping_field_error(self): """ sort_otu_table_by_mapping_field fails on samples in otu table but not mapping""" self.assertRaises(KeyError,sort_otu_table_by_mapping_field, parse_biom_table_str(self.otu_table1_bad_sampleID), parse_mapping_file(self.mapping_f2), sort_field = "Age")
def test_compare_treatment_dists(self): """Tests the the entire library functions as expected.""" bt = parse_biom_table_str('{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "test_code","date": "2013-01-30T14:47:00.868638","matrix_type": "sparse","matrix_element_type": "float","shape": [8, 13],"data": [[0,0,100.0],[0,1,12.0],[0,2,45.0],[0,7,24.0],[0,8,67.0],[0,9,132.0],[0,10,991.0],[0,11,21.0],[0,12,5.0],[1,0,54.0],[1,1,989.0],[1,2,200.0],[1,3,425.0],[1,4,2.0],[1,5,4.0],[1,11,52.0],[1,12,13.0],[2,0,4.0],[2,3,11.0],[2,4,11.0],[2,5,100.0],[2,6,491.0],[2,7,55.0],[2,8,98.0],[2,9,54.0],[2,10,104.0],[3,0,45.0],[3,1,78.0],[3,2,52.0],[3,3,14.0],[3,11,1000.0],[3,12,141.0],[4,0,92.0],[4,1,46.0],[4,2,49.0],[4,3,770.0],[4,4,14.0],[4,7,1.0],[4,8,55.0],[4,9,255.0],[4,12,14.0],[5,0,67.0],[5,1,92.0],[5,2,800.0],[5,4,13.0],[5,5,17.0],[5,6,29.0],[5,7,27.0],[5,8,25.0],[5,9,221.0],[5,10,228.0],[5,11,9.0],[5,12,9.0],[6,0,150.0],[6,1,149.0],[6,2,11.0],[6,3,35.0],[6,4,899.0],[6,5,766.0],[6,6,348.0],[6,7,680.0],[6,8,496.0],[6,9,467.0],[6,10,13.0],[6,11,327.0],[6,12,855.0],[7,0,300.0],[7,1,45.0],[7,2,78.0],[7,3,22.0],[7,4,361.0],[7,5,271.0],[7,6,531.0],[7,7,256.0],[7,8,251.0],[7,10,70.0],[7,11,250.0],[7,12,31.0]],"rows": [{"id": "O1", "metadata": null},{"id": "O2", "metadata": null},{"id": "O3", "metadata": null},{"id": "O4", "metadata": null},{"id": "O5", "metadata": null},{"id": "O6", "metadata": null},{"id": "O7", "metadata": null},{"id": "O8", "metadata": null}],"columns": [{"id": "a1", "metadata": null},{"id": "a2", "metadata": null},{"id": "a3", "metadata": null},{"id": "b1", "metadata": null},{"id": "b2", "metadata": null},{"id": "b3", "metadata": null},{"id": "c1", "metadata": null},{"id": "c2", "metadata": null},{"id": "c3", "metadata": null},{"id": "c4", "metadata": null},{"id": "d1", "metadata": null},{"id": "d2", "metadata": null},{"id": "d3", "metadata": null}]}') mf = \ {'a1': {'Diet': 'LF', 'HSID': 'a', 'Pref': '1'}, 'a2': {'Diet': 'LF', 'HSID': 'a', 'Pref': '1'}, 'a3': {'Diet': 'HF', 'HSID': 'a', 'Pref': '1'}, 'b1': {'Diet': 'HF', 'HSID': 'b', 'Pref': '4'}, 'b2': {'Diet': 'HF', 'HSID': 'b', 'Pref': '5'}, 'b3': {'Diet': 'HF', 'HSID': 'b', 'Pref': '5'}, 'c1': {'Diet': 'LF', 'HSID': 'c', 'Pref': '5'}, 'c2': {'Diet': 'LF', 'HSID': 'c', 'Pref': '5'}, 'c3': {'Diet': 'LF', 'HSID': 'c', 'Pref': '5'}, 'c4': {'Diet': 'LF', 'HSID': 'd', 'Pref': '2'}, 'd1': {'Diet': 'HF', 'HSID': 'd', 'Pref': '2'}, 'd2': {'Diet': 'HF', 'HSID': 'd', 'Pref': '1'}, 'd3': {'Diet': 'HF', 'HSID': 'd', 'Pref': '1'}} tr = DndParser('(((O1:0.06,O2:0.1)A:0.031,(O3:0.001,O4:0.01)B:0.2)AB:0.4,((O5:0.03,O6:0.02)C:0.13,(O7:0.01,O8:0.005)D:0.1)CD:0.3)root;') # test known quantitiy sids = [['a1', 'a2'], ['c1'], ['d1', 'd2', 'd3']] sids = ['a1', 'a2','c1', 'd1', 'd2', 'd3'] exp_out = (['LF', 'HF'], array([[ 0.11071343, 0.07019723], [ 0. , 0.06787341]]), array([[ 0.01296343, 0.00657256], [ 0. , 0.00562879]]), array([[ 0.0763829 , 0.0015125 ], [ 0.0724499 , 0.00154609]])) obs_out = compare_treatment_dists(sids, 'Diet', mf, bt, 'unweighted_unifrac', tr) self.assertEqual(exp_out[0], obs_out[0]) for i,j in zip(obs_out[1:], exp_out[1:]): self.assertTrue(allclose(i,j)) # have to use all close bc tiny float errors
def test_plot_rank_abundance_graphs_dense(self): """plot_rank_abundance_graphs works with any number of samples (DenseOTUTable)""" self.otu_table = parse_biom_table_str(otu_table_dense) self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) #test empty sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, '', self.otu_table, self.dir) #test invalid sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, 'Invalid_sample_name', self.otu_table, self.dir) #test with two samples file_type="pdf" plot_rank_abundance_graphs('S3,S5', self.otu_table, self.dir, file_type=file_type) tmp_file = abspath(self.dir+"rank_abundance_cols_0_2."+file_type) self.assertTrue(exists(tmp_file)) self.files_to_remove.append(tmp_file) # test with all samples plot_rank_abundance_graphs('*', self.otu_table, self.dir, file_type=file_type) tmp_file = abspath(self.dir+"rank_abundance_cols_0_1_2."+file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def test_plot_rank_abundance_graphs_dense(self): """plot_rank_abundance_graphs works with any number of samples (DenseOTUTable)""" self.otu_table = parse_biom_table_str(otu_table_dense) self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) tmp_fname = get_tmp_filename(tmp_dir=self.dir) #test empty sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname,'', self.otu_table) #test invalid sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname, 'Invalid_sample_name', self.otu_table) #test with two samples file_type="pdf" tmp_file = abspath(self.dir+"rank_abundance_cols_0_2."+file_type) plot_rank_abundance_graphs(tmp_file, 'S3,S5', self.otu_table, file_type=file_type) self.assertTrue(exists(tmp_file)) self.files_to_remove.append(tmp_file) # test with all samples tmp_file = abspath(self.dir+"rank_abundance_cols_0_1_2."+file_type) plot_rank_abundance_graphs(tmp_file,'*', self.otu_table,file_type=file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def test_longitudinal_otu_table_conversion_wrapper(self): """londitudinal_otu_table_conversion_wrapper works """ mapping_lines = """#SampleID\tindividual\ttimepoint_zero\ttimepoint AT0\tA\t1\t0 AT1\tA\t0\t1 AT2\tA\t0\t2 BT0\tB\t1\t0 BT1\tB\t0\t1 BT2\tB\t0\t2 """.split('\n') category_mapping = parse_mapping_file(mapping_lines) otu_table = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}, {"id": "4", "metadata": null}], "format": "Biological Observation Matrix 1.0.0", "data": [[0, 0, 1.0], [0, 1, 2.0], [0, 2, 3.0], [1, 3, 1.0], [1, 4, 2.0], [1, 5, 3.0], [2, 0, 1.0], [2, 1, 2.0], [2, 2, 3.0], [2, 4, 1.0], [2, 5, 2.0], [3, 0, 2.0], [3, 1, 4.0], [3, 2, 6.0], [3, 4, 1.0], [3, 5, 2.0], [4, 0, 3.0], [4, 1, 2.0], [4, 2, 1.0], [4, 3, 6.0], [4, 4, 4.0], [4, 5, 2.0]], "columns": [{"id": "AT0", "metadata": null}, {"id": "AT1", "metadata": null}, {"id": "AT2", "metadata": null}, {"id": "BT0", "metadata": null}, {"id": "BT1", "metadata": null}, {"id": "BT2", "metadata": null}], "generated_by": "BIOM-Format 1.0.0-dev", "matrix_type": "sparse", "shape": [5, 6], "format_url": "http://biom-format.org", "date": "2012-08-01T09:14:03.574451", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" otu_table = parse_biom_table_str(otu_table) new_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table, category_mapping, 'individual', 'timepoint_zero') new_otu_table = str(new_otu_table).split('\n') self.assertEqual(new_otu_table[0], "# Constructed from biom file") data_line1 = new_otu_table[2].split('\t') self.assertFloatEqual(float(data_line1[0]), 0.0) # sets the reference to 0 self.assertFloatEqual(float(data_line1[1]), 0.0) # subtracts values from same individual from the reference self.assertFloatEqual(float(data_line1[2]), 0.05714286) # sets to ignore number when not observed across a person self.assertFloatEqual(float(data_line1[4]), 999999999.0)
def setUp(self): self.otu_table1 = parse_biom_table_str(otu_table1) self.otu_table1_with_metadata = parse_biom_table_str(otu_table1_with_metadata) self.genome_table1 = parse_biom_table_str(genome_table1) self.genome_table1_with_metadata = parse_biom_table_str(genome_table1_with_metadata) self.genome_table2 = parse_biom_table_str(genome_table2) self.predicted_metagenome_table1 = parse_biom_table_str(predicted_metagenome_table1) self.predicted_metagenome_table1_with_metadata = parse_biom_table_str(predicted_metagenome_table1_with_metadata)
def setUp(self): #self.otu_table_as_string = ["#Test otu table", # "\t".join(["#OTU ID","S1","S2","S3"]), # "\t".join(["0", "1" ,"0" ,"2" ]), # "\t".join(["1", "1" ,"2" ,"0" ]), # "\t".join(["2", "1" ,"0" ,"0" ]), # "\t".join(["3", "1" ,"0" ,"2" ]), # "\t".join(["4", "1" ,"1" ,"2" ])] self.biom_as_string = '{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}, {"id": "4", "metadata": null}], "format": "Biological Observation Matrix v0.9", "data": [[0, 0, 1.0], [0, 2, 2.0], [1, 0, 1.0], [1, 1, 2.0], [2, 0, 1.0], [3, 0, 1.0], [3, 2, 2.0], [4, 0, 1.0], [4, 1, 1.0], [4, 2, 2.0]], "columns": [{"id": "S1", "metadata": null}, {"id": "S2", "metadata": null}, {"id": "S3", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2583", "matrix_type": "sparse", "shape": [5, 3], "format_url": "http://www.qiime.org/svn_documentation/documentation/biom_format.html", "date": "2011-12-22T01:06:31.645277", "type": "OTU table", "id": null, "matrix_element_type": "float"}' self.otu_table = parse_biom_table_str(self.biom_as_string)
def setUp(self): # self.otu_table_as_string = ["#Test otu table", # "\t".join(["#OTU ID","S1","S2","S3"]), # "\t".join(["0", "1" ,"0" ,"2" ]), # "\t".join(["1", "1" ,"2" ,"0" ]), # "\t".join(["2", "1" ,"0" ,"0" ]), # "\t".join(["3", "1" ,"0" ,"2" ]), # "\t".join(["4", "1" ,"1" ,"2" ])] self.biom_as_string = '{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}, {"id": "4", "metadata": null}], "format": "Biological Observation Matrix v0.9", "data": [[0, 0, 1.0], [0, 2, 2.0], [1, 0, 1.0], [1, 1, 2.0], [2, 0, 1.0], [3, 0, 1.0], [3, 2, 2.0], [4, 0, 1.0], [4, 1, 1.0], [4, 2, 2.0]], "columns": [{"id": "S1", "metadata": null}, {"id": "S2", "metadata": null}, {"id": "S3", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2583", "matrix_type": "sparse", "shape": [5, 3], "format_url": "http://www.qiime.org/svn_documentation/documentation/biom_format.html", "date": "2011-12-22T01:06:31.645277", "type": "OTU table", "id": null, "matrix_element_type": "float"}' self.otu_table = parse_biom_table_str(self.biom_as_string)
def convert_biom_to_precalc(biom_in): """Converts a biom file into a PICRUSt precalculated tab-delimited file """ if type(biom_in) == str or type(biom_in) == unicode: biom_table = parse_biom_table_str(biom_in) else: biom_table = parse_biom_table(biom_in) col_ids = biom_table.ObservationIds row_ids = biom_table.SampleIds lines = [] header = ['#OTU_IDs'] + list(col_ids) col_metadata_names = [] #peak at metadata for Samples (e.g. NSTI) so we can set the header if biom_table.SampleMetadata: col_metadata_names = biom_table.SampleMetadata[0].keys() #add the metadata names to the header for col_metadata_name in col_metadata_names: header.append('metadata_' + col_metadata_name) lines.append(map(str, header)) row_metadata_names = [] #peak at metadata for observations (e.g. KEGG_Pathways) if biom_table.ObservationMetadata: row_metadata_names = biom_table.ObservationMetadata[0].keys() for metadata_name in row_metadata_names: metadata_line = ['metadata_' + metadata_name] #do the observation metadata now for col_id in col_ids: metadata = biom_table.ObservationMetadata[ biom_table.getObservationIndex(col_id)] metadata_line.append(biom_meta_to_string(metadata[metadata_name])) lines.append(map(str, metadata_line)) #transpose the actual count data transposed_table = biom_table._data.T for idx, count in enumerate(transposed_table): line = [row_ids[idx]] + map(str, count) #add the metadata values to the end of the row now for meta_name in col_metadata_names: line.append(biom_table.SampleMetadata[idx][meta_name]) lines.append(line) return "\n".join("\t".join(map(str, x)) for x in lines)
def convert_biom_to_precalc(biom_in): """Converts a biom file into a PICRUSt precalculated tab-delimited file """ if type(biom_in) == str or type(biom_in) == unicode: biom_table=parse_biom_table_str(biom_in) else: biom_table=parse_biom_table(biom_in) col_ids=biom_table.ObservationIds row_ids=biom_table.SampleIds lines=[] header = ['#OTU_IDs']+list(col_ids) col_metadata_names=[] #peak at metadata for Samples (e.g. NSTI) so we can set the header if biom_table.SampleMetadata: col_metadata_names=biom_table.SampleMetadata[0].keys() #add the metadata names to the header for col_metadata_name in col_metadata_names: header.append('metadata_'+col_metadata_name) lines.append(map(str,header)) row_metadata_names=[] #peak at metadata for observations (e.g. KEGG_Pathways) if biom_table.ObservationMetadata: row_metadata_names=biom_table.ObservationMetadata[0].keys() for metadata_name in row_metadata_names: metadata_line=['metadata_'+metadata_name] #do the observation metadata now for col_id in col_ids: metadata = biom_table.ObservationMetadata[biom_table.getObservationIndex(col_id)] metadata_line.append(biom_meta_to_string(metadata[metadata_name])) lines.append(map(str,metadata_line)) #transpose the actual count data transposed_table=biom_table._data.T for idx,count in enumerate(transposed_table): line=[row_ids[idx]]+map(str,count) #add the metadata values to the end of the row now for meta_name in col_metadata_names: line.append(biom_table.SampleMetadata[idx][meta_name]) lines.append(line) return "\n".join("\t".join(map(str,x)) for x in lines)
def test_plot_rank_abundance_graphs_filetype(self): """plot_rank_abundance_graphs works with all filetypes""" self.otu_table = parse_biom_table_str(otu_table_sparse) self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) #test all supported filetypes for file_type in ['pdf','svg','png','eps']: plot_rank_abundance_graphs('S3', self.otu_table, self.dir, file_type=file_type) tmp_file = abspath(self.dir+"rank_abundance_cols_0."+file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def test_plot_rank_abundance_graphs_filetype(self): """plot_rank_abundance_graphs works with all filetypes""" self.otu_table = parse_biom_table_str(otu_table_sparse) self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) #test all supported filetypes for file_type in ['pdf','svg','png','eps']: tmp_file = abspath(self.dir+"rank_abundance_cols_0."+file_type) plot_rank_abundance_graphs(tmp_file,'S3', self.otu_table, file_type=file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def test_plot_rank_abundance_graphs_sparse(self): """plot_rank_abundance_graphs works with any number of samples (SparseOTUTable)""" self.otu_table = parse_biom_table_str(otu_table_sparse) self.dir = mkdtemp(dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") self._dirs_to_remove.append(self.dir) _, tmp_fname = mkstemp(dir=self.dir) close(_) # test empty sample name self.assertRaises( ValueError, plot_rank_abundance_graphs, tmp_fname, '', self.otu_table) # test invalid sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname, 'Invalid_sample_name', self.otu_table) # test with two samples file_type = "pdf" tmp_file = abspath(self.dir + "rank_abundance_cols_0_2." + file_type) plot_rank_abundance_graphs(tmp_file, 'S3,S5', self.otu_table, file_type=file_type) self.assertTrue(exists(tmp_file)) self.files_to_remove.append(tmp_file) # test with all samples tmp_file = abspath(self.dir + "rank_abundance_cols_0_1_2." + file_type) plot_rank_abundance_graphs(tmp_file, '*', self.otu_table, file_type=file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def test_compare_treatment_dists(self): """Tests the the entire library functions as expected.""" bt = parse_biom_table_str( '{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "test_code","date": "2013-01-30T14:47:00.868638","matrix_type": "sparse","matrix_element_type": "float","shape": [8, 13],"data": [[0,0,100.0],[0,1,12.0],[0,2,45.0],[0,7,24.0],[0,8,67.0],[0,9,132.0],[0,10,991.0],[0,11,21.0],[0,12,5.0],[1,0,54.0],[1,1,989.0],[1,2,200.0],[1,3,425.0],[1,4,2.0],[1,5,4.0],[1,11,52.0],[1,12,13.0],[2,0,4.0],[2,3,11.0],[2,4,11.0],[2,5,100.0],[2,6,491.0],[2,7,55.0],[2,8,98.0],[2,9,54.0],[2,10,104.0],[3,0,45.0],[3,1,78.0],[3,2,52.0],[3,3,14.0],[3,11,1000.0],[3,12,141.0],[4,0,92.0],[4,1,46.0],[4,2,49.0],[4,3,770.0],[4,4,14.0],[4,7,1.0],[4,8,55.0],[4,9,255.0],[4,12,14.0],[5,0,67.0],[5,1,92.0],[5,2,800.0],[5,4,13.0],[5,5,17.0],[5,6,29.0],[5,7,27.0],[5,8,25.0],[5,9,221.0],[5,10,228.0],[5,11,9.0],[5,12,9.0],[6,0,150.0],[6,1,149.0],[6,2,11.0],[6,3,35.0],[6,4,899.0],[6,5,766.0],[6,6,348.0],[6,7,680.0],[6,8,496.0],[6,9,467.0],[6,10,13.0],[6,11,327.0],[6,12,855.0],[7,0,300.0],[7,1,45.0],[7,2,78.0],[7,3,22.0],[7,4,361.0],[7,5,271.0],[7,6,531.0],[7,7,256.0],[7,8,251.0],[7,10,70.0],[7,11,250.0],[7,12,31.0]],"rows": [{"id": "O1", "metadata": null},{"id": "O2", "metadata": null},{"id": "O3", "metadata": null},{"id": "O4", "metadata": null},{"id": "O5", "metadata": null},{"id": "O6", "metadata": null},{"id": "O7", "metadata": null},{"id": "O8", "metadata": null}],"columns": [{"id": "a1", "metadata": null},{"id": "a2", "metadata": null},{"id": "a3", "metadata": null},{"id": "b1", "metadata": null},{"id": "b2", "metadata": null},{"id": "b3", "metadata": null},{"id": "c1", "metadata": null},{"id": "c2", "metadata": null},{"id": "c3", "metadata": null},{"id": "c4", "metadata": null},{"id": "d1", "metadata": null},{"id": "d2", "metadata": null},{"id": "d3", "metadata": null}]}' ) mf = \ {'a1': {'Diet': 'LF', 'HSID': 'a', 'Pref': '1'}, 'a2': {'Diet': 'LF', 'HSID': 'a', 'Pref': '1'}, 'a3': {'Diet': 'HF', 'HSID': 'a', 'Pref': '1'}, 'b1': {'Diet': 'HF', 'HSID': 'b', 'Pref': '4'}, 'b2': {'Diet': 'HF', 'HSID': 'b', 'Pref': '5'}, 'b3': {'Diet': 'HF', 'HSID': 'b', 'Pref': '5'}, 'c1': {'Diet': 'LF', 'HSID': 'c', 'Pref': '5'}, 'c2': {'Diet': 'LF', 'HSID': 'c', 'Pref': '5'}, 'c3': {'Diet': 'LF', 'HSID': 'c', 'Pref': '5'}, 'c4': {'Diet': 'LF', 'HSID': 'd', 'Pref': '2'}, 'd1': {'Diet': 'HF', 'HSID': 'd', 'Pref': '2'}, 'd2': {'Diet': 'HF', 'HSID': 'd', 'Pref': '1'}, 'd3': {'Diet': 'HF', 'HSID': 'd', 'Pref': '1'}} tr = DndParser( '(((O1:0.06,O2:0.1)A:0.031,(O3:0.001,O4:0.01)B:0.2)AB:0.4,((O5:0.03,O6:0.02)C:0.13,(O7:0.01,O8:0.005)D:0.1)CD:0.3)root;' ) # test known quantitiy sids = [['a1', 'a2'], ['c1'], ['d1', 'd2', 'd3']] sids = ['a1', 'a2', 'c1', 'd1', 'd2', 'd3'] exp_out = (['LF', 'HF'], array([[0.11071343, 0.07019723], [0., 0.06787341]]), array([[0.01296343, 0.00657256], [0., 0.00562879]]), array([[0.0763829, 0.0015125], [0.0724499, 0.00154609]])) obs_out = compare_treatment_dists(sids, 'Diet', mf, bt, 'unweighted_unifrac', tr) self.assertEqual(exp_out[0], obs_out[0]) for i, j in zip(obs_out[1:], exp_out[1:]): self.assertTrue(allclose( i, j)) # have to use all close bc tiny float errors
def setUp(self): """ Initialize variables: run before each test """ self.precalc_in_biom = parse_biom_table_str(precalc_in_biom)
def setUp(self): self.genome_table1 = parse_biom_table_str(genome_table1) self.genome_table2 = parse_biom_table_str(genome_table2)
def setUp(self): #Datasets for metagenome prediction self.otu_table1 = parse_biom_table_str(otu_table1) self.otu_table1_with_metadata = parse_biom_table_str(otu_table1_with_metadata) self.genome_table1 = parse_biom_table_str(genome_table1) self.genome_table1_with_metadata = parse_biom_table_str(genome_table1_with_metadata) self.genome_table2 = parse_biom_table_str(genome_table2) self.predicted_metagenome_table1 = parse_biom_table_str(predicted_metagenome_table1) self.predicted_metagenome_table1_with_metadata = parse_biom_table_str(predicted_metagenome_table1_with_metadata) #Datasets for variance estimation during metagenome prediction self.zero_variance_table1 = parse_biom_table_str(zero_variance_table1) self.variance_table1_var_by_otu = parse_biom_table_str(variance_table1_var_by_otu) self.variance_table1_var_by_gene = parse_biom_table_str(variance_table1_var_by_gene) self.variance_table1_one_gene_one_otu = parse_biom_table_str(variance_table1_one_gene_one_otu) self.predicted_metagenome_table1_zero_variance = parse_biom_table_str(predicted_metagenome_table1_zero_variance) self.predicted_metagenome_variance_table1_one_gene_one_otu =\ parse_biom_table_str(predicted_metagenome_variance_table1_one_gene_one_otu) self.predicted_metagenome_table1_one_gene_one = parse_biom_table_str(predicted_metagenome_table1) #Datasets for testing confidence intervals self.predicted_metagenome_table1_one_gene_one_otu_upper_CI =\ parse_biom_table_str(predicted_metagenome_table1_one_gene_one_otu_upper_CI) self.predicted_metagenome_table1_one_gene_one_otu_lower_CI =\ parse_biom_table_str(predicted_metagenome_table1_one_gene_one_otu_lower_CI)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp,'U') category_mapping = parse_mapping_file(category_mapping) individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp,'U') try: otu_table = parse_biom_table(otu_table) except AttributeError: otu_table = parse_biom_table_str(otu_table) #synchronize the mapping file with the otu table category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include, \ otu_table_relative_abundance=relative_abundance) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = glob('%s/*biom' % otu_table_fp) # if directory, get aggregated results parsed_otu_tables = [] for path in otu_table_paths: ot = open(path,'U') ot = parse_biom_table(ot) parsed_otu_tables.append(ot) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' output = test_wrapper_multiple(test, parsed_otu_tables, \ category_mapping, category, threshold, filter, otu_include,\ otu_table_relative_abundance=relative_abundance) else: raise ValueError("the longitudinal_correlation and paired_T options cannot be run on a directory") of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()
def setUp(self): #Datasets for metagenome prediction self.otu_table1 = parse_biom_table_str(otu_table1) self.otu_table1_with_metadata = parse_biom_table_str( otu_table1_with_metadata) self.genome_table1 = parse_biom_table_str(genome_table1) self.genome_table1_with_metadata = parse_biom_table_str( genome_table1_with_metadata) self.genome_table2 = parse_biom_table_str(genome_table2) self.predicted_metagenome_table1 = parse_biom_table_str( predicted_metagenome_table1) self.predicted_metagenome_table1_with_metadata = parse_biom_table_str( predicted_metagenome_table1_with_metadata) #Datasets for variance estimation during metagenome prediction self.zero_variance_table1 = parse_biom_table_str(zero_variance_table1) self.variance_table1_var_by_otu = parse_biom_table_str( variance_table1_var_by_otu) self.variance_table1_var_by_gene = parse_biom_table_str( variance_table1_var_by_gene) self.variance_table1_one_gene_one_otu = parse_biom_table_str( variance_table1_one_gene_one_otu) self.predicted_metagenome_table1_zero_variance = parse_biom_table_str( predicted_metagenome_table1_zero_variance) self.predicted_metagenome_variance_table1_one_gene_one_otu =\ parse_biom_table_str(predicted_metagenome_variance_table1_one_gene_one_otu) self.predicted_metagenome_table1_one_gene_one = parse_biom_table_str( predicted_metagenome_table1) #Datasets for testing confidence intervals self.predicted_metagenome_table1_one_gene_one_otu_upper_CI =\ parse_biom_table_str(predicted_metagenome_table1_one_gene_one_otu_upper_CI) self.predicted_metagenome_table1_one_gene_one_otu_lower_CI =\ parse_biom_table_str(predicted_metagenome_table1_one_gene_one_otu_lower_CI)
def test_make_new_otu_counts(self): """make_new_otu_counts works """ mapping_lines = """#SampleID\tindividual\ttimepoint_zero\ttimepoint AT0\tA\t1\t0 AT1\tA\t0\t1 AT2\tA\t0\t2 BT0\tB\t1\t0 BT1\tB\t0\t1 BT2\tB\t0\t2 """.split('\n') mapping_data, header, comments = parse_mapping_file(mapping_lines) samples_from_subject, sample_to_subtract = \ get_sample_individual_info(mapping_data, header, 'individual', 'timepoint_zero') otu_table_str = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}], "format": "Biological Observation Matrix v0.9", "data": [[0, 0, 0.5], [0, 1, 0.29999999999999999], [0, 2, 99.0], [0, 3, 0.20000000000000001], [1, 2, 99.0], [1, 4, 0.40000000000000002], [1, 5, 0.5], [1, 6, 0.59999999999999998], [2, 0, 0.10000000000000001], [2, 1, 0.40000000000000002], [2, 2, 99.0], [2, 3, 0.69999999999999996], [2, 4, 0.5], [2, 5, 0.59999999999999998], [2, 6, 0.80000000000000004], [3, 1, 0.10000000000000001], [3, 2, 99.0], [3, 4, 0.40000000000000002]], "columns": [{"id": "AT0", "metadata": null}, {"id": "AT1", "metadata": null}, {"id": "S1", "metadata": null}, {"id": "AT2", "metadata": null}, {"id": "BT0", "metadata": null}, {"id": "BT1", "metadata": null}, {"id": "BT2", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2570", "matrix_type": "sparse", "shape": [4, 7], "format_url": "http://www.qiime.org/svn_documentation/documentation/biom_format.html", "date": "2011-12-21T21:35:19.499263", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" otu_table = parse_biom_table_str(otu_table_str) converted_otu_table_object = make_new_otu_counts(otu_table, sample_to_subtract, samples_from_subject) expected_otu_table_str = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}], "format": "Biological Observation Matrix 0.9.0-dev", "data": [[0, 1, -0.20000000000000001], [0, 2, -0.29999999999999999], [0, 3, 999999999.0], [0, 4, 999999999.0], [0, 5, 999999999.0], [1, 0, 999999999.0], [1, 1, 999999999.0], [1, 2, 999999999.0], [1, 4, 0.10000000000000001], [1, 5, 0.20000000000000001], [2, 1, 0.29999999999999999], [2, 2, 0.59999999999999998], [2, 4, 0.10000000000000001], [2, 5, 0.29999999999999999], [3, 1, 0.10000000000000001], [3, 4, -0.40000000000000002], [3, 5, -0.40000000000000002]], "columns": [{"id": "AT0", "metadata": null}, {"id": "AT1", "metadata": null}, {"id": "AT2", "metadata": null}, {"id": "BT0", "metadata": null}, {"id": "BT1", "metadata": null}, {"id": "BT2", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2570", "matrix_type": "sparse", "shape": [4, 6], "format_url": "http://biom-format.org", "date": "2011-12-21T21:43:06.809380", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" expected_otu_table_object = parse_biom_table_str( expected_otu_table_str) self.assertEqual(converted_otu_table_object.ObservationIds, expected_otu_table_object.ObservationIds) self.assertEqual(converted_otu_table_object.SampleIds, expected_otu_table_object.SampleIds) self.assertEqual(converted_otu_table_object.SampleMetadata, expected_otu_table_object.SampleMetadata) self.assertEqual(converted_otu_table_object.ObservationMetadata, expected_otu_table_object.ObservationMetadata) self.assertFloatEqual(sorted(converted_otu_table_object._data.items()), sorted(expected_otu_table_object._data.items()))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp, 'U') category_mapping = parse_mapping_file(category_mapping) individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp, 'U') try: otu_table = parse_biom_table(otu_table) except AttributeError: otu_table = parse_biom_table_str(otu_table) #synchronize the mapping file with the otu table category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper( otu_table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include, \ otu_table_relative_abundance=relative_abundance) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = glob('%s/*biom' % otu_table_fp) # if directory, get aggregated results parsed_otu_tables = [] for path in otu_table_paths: ot = open(path, 'U') ot = parse_biom_table(ot) parsed_otu_tables.append(ot) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' output = test_wrapper_multiple(test, parsed_otu_tables, \ category_mapping, category, threshold, filter, otu_include,\ otu_table_relative_abundance=relative_abundance) else: raise ValueError( "the longitudinal_correlation and paired_T options cannot be run on a directory" ) of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()