def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.biom_fp map_fp = opts.map_fp output_dir = opts.output_dir scolors = opts.scolors.split(',') ocolors = opts.ocolors.split(',') sshapes = opts.sshapes.split(',') oshapes = opts.oshapes.split(',') ssizes = opts.ssizes.split(',') osizes = opts.osizes.split(',') md_fields = opts.md_fields.split(',') # check that the otu fields asked for are available shared_options = ['NodeType', 'Abundance'] if not all( [i in md_fields + shared_options for i in ocolors + oshapes + osizes]): option_parser.error('The fields specified for observation colors, ' 'sizes, or shapes are not in either the shared ' 'options (NodeType,Abundance) or the supplied ' 'md_fields. These fields must be a subset of the ' 'union of these sets. Have you passed ocolors, ' 'osizes or oshapes that are not in the md_fields?') # check that the sample fields asked for are available. mapping file # elements should all have same metadata keys sopts = parse_mapping_file_to_dict(map_fp)[0].items()[0][1].keys() if not all( [i in sopts + shared_options for i in scolors + sshapes + ssizes]): option_parser.error('The fields specified for sample colors, sizes, ' 'or shapes are not in either the shared options ' '(NodeType,Abundance) or the supplied mapping ' 'file. These fields must be a subset of the union ' 'of these sets. Have you passed scolors, ssizes ' 'or sshapes that are not in the mapping file ' 'headers?') # actual compuation begins try: create_dir(output_dir, fail_on_exist=True) except OSError: option_parser.error('Directory already exists. Will not overwrite.') bt = load_table(otu_table_fp) pmf = parse_mapping_file_to_dict(map_fp)[0] # [1] is comments, don't need sample_node_table = make_sample_node_table(bt, pmf) otu_node_table = make_otu_node_table(bt, opts.observation_md_header_key, md_fields) node_attr_table = make_node_attr_table(otu_node_table, sample_node_table, scolors, ocolors, ssizes, osizes, sshapes, oshapes) edge_table = make_edge_table(bt) _write_table(sample_node_table, os.path.join(output_dir, 'SampleNodeTable.txt')) _write_table(otu_node_table, os.path.join(output_dir, 'OTUNodeTable.txt')) _write_table(node_attr_table, os.path.join(output_dir, 'NodeAttrTable.txt')) _write_table(edge_table, os.path.join(output_dir, 'EdgeTable.txt'))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.biom_fp map_fp = opts.map_fp output_dir = opts.output_dir taxonomy_key = opts.observation_md_header_key scolors = opts.scolors.split(',') ocolors = opts.ocolors.split(',') sshapes = opts.sshapes.split(',') oshapes = opts.oshapes.split(',') ssizes = opts.ssizes.split(',') osizes = opts.osizes.split(',') md_fields = opts.md_fields.split(',') # check that the otu fields asked for are available shared_options = ['NodeType','Abundance'] if not all([i in md_fields+shared_options for i in ocolors+oshapes+osizes]): option_parser.error('The fields specified for observation colors, sizes, or '+\ 'shapes are not in either the shared options (NodeType,Abundance)'+\ ' or the supplied md_fields. These fields must be a subset of '+\ 'the union of these sets. Have you passed ocolors, osizes or '+\ 'oshapes that are not in the md_fields?') # check that the sample fields asked for are available. mapping file # elements should all have same metadata keys sopts = parse_mapping_file_to_dict(map_fp)[0].items()[0][1].keys() if not all([i in sopts+shared_options for i in scolors+sshapes+ssizes]): option_parser.error('The fields specified for sample colors, sizes, or '+\ 'shapes are not in either the shared options (NodeType,Abundance)'+\ ' or the supplied mapping file. These fields must be a subset of '+\ 'the union of these sets. Have you passed scolors, ssizes or '+\ 'sshapes that are not in the mapping file headers?') # actual compuation begins try: create_dir(output_dir, fail_on_exist=True) except OSError: option_parser.error('Directory already exists. Will not overwrite.') bt = parse_biom_table(open(otu_table_fp)) pmf = parse_mapping_file_to_dict(map_fp)[0] # [1] is comments, don't need sample_node_table = make_sample_node_table(bt, pmf) otu_node_table = make_otu_node_table(bt, opts.observation_md_header_key, md_fields) node_attr_table = make_node_attr_table(otu_node_table, sample_node_table, scolors, ocolors, ssizes, osizes, sshapes, oshapes) edge_table = make_edge_table(bt) _write_table(sample_node_table, os.path.join(output_dir,'SampleNodeTable.txt')) _write_table(otu_node_table, os.path.join(output_dir,'OTUNodeTable.txt')) _write_table(node_attr_table, os.path.join(output_dir,'NodeAttrTable.txt')) _write_table(edge_table, os.path.join(output_dir,'EdgeTable.txt'))
def test_make_otu_node_table(self): '''Test that make_otu_node_table makes accurate calculations.''' # test when length of md_fields and length of split taxonomy wouldn't # agree when md type is list or string. bt1 = parse_biom_table(BIOM_STRING_1) md_key = 'taxonomy' md_fields = ['k'] obs = make_otu_node_table(bt1, md_key, md_fields) exp = [ '#NodeID\tNodeType\tAbundance\tk', 'o1\totu\t15.0\tk__Bacteria', 'o2\totu\t40.0\tk__Bacteria', 'o3\totu\t65.0\tk__Bacteria', 'o4\totu\t90.0\tk__Bacteria', 'o5\totu\t115.0\tk__Bacteria', 'o6\totu\t140.0\tk__Bacteria', 'o7\totu\t165.0\tk__Bacteria', 'o8\totu\t190.0\tk__Bacteria'] self.assertEqual(obs, exp) md_fields = ['k', '1', '2', '3', '4', '5', '6'] obs = make_otu_node_table(bt1, md_key, md_fields) exp = [ '#NodeID\tNodeType\tAbundance\tk\t1\t2\t3\t4\t5\t6', 'o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae\tOther\tOther', 'o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1\tOther\tOther', 'o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1\tOther\tOther', 'o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2\tOther\tOther', 'o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2\tOther\tOther', 'o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae\tOther\tOther', 'o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae\tOther\tOther', 'o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae\tOther\tOther'] # test when the length of the md_fields is correct md_fields = ['k', 'p', 'c', 'o', 'f'] obs_bt1 = make_otu_node_table(bt1, md_key, md_fields) exp_bt1 = \ ['#NodeID\tNodeType\tAbundance\tk\tp\tc\to\tf', 'o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt1, exp_bt1) md_fields = ['k', 'p', 'c', 'o', 'f'] bt2 = parse_biom_table(BIOM_STRING_3) obs_bt2 = make_otu_node_table(bt2, md_key, md_fields) exp_bt2 = \ ['#NodeID\tNodeType\tAbundance\tk\tp\tc\to\tf', 'o1\totu\t12.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t27.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t42.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t57.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t72.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t87.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t102.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t117.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt2, exp_bt2) # test when the md is of type dict and fields are correct bt = parse_biom_table(BIOM_STRING_4) md_fields = ['kingdom', 'phylum', 'class', 'order', 'family'] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = \ ['#NodeID\tNodeType\tAbundance\tkingdom\tphylum\tclass\torder\tfamily', 'o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt, exp_bt) # test that it raises an error when md_fields not found in md_dict md_fields = ['KINGDOM', 'phylum', 'class', 'order', 'family'] self.assertRaises( ValueError, make_otu_node_table, bt, md_key, md_fields) # test that it doesn't error when md_fields is subset of md_dict md_fields = ['phylum', 'class', 'order', 'family'] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = \ ['#NodeID\tNodeType\tAbundance\tphylum\tclass\torder\tfamily', 'o1\totu\t15.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt, exp_bt) # test when the md is type defaultdict bt = parse_biom_table(BIOM_STRING_5) # test that it raises an error when md_fields not found in md_dict md_fields = ['KINGDOM', 'phylum', 'class', 'order', 'family'] self.assertRaises( ValueError, make_otu_node_table, bt, md_key, md_fields) # test that it doesn't error when md_fields is subset of md_dict md_fields = ['phylum', 'class', 'order', 'family'] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = \ ['#NodeID\tNodeType\tAbundance\tphylum\tclass\torder\tfamily', 'o1\totu\t15.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt, exp_bt)
def test_node_attr_table(self): '''Test that node_attr_table is made correctly.''' # test color,size,shape with shared attributes: NodeType, Abundance bt = parse_biom_table(BIOM_STRING_1) mf_dict = parse_mapping_file_to_dict(MF_LINES.split('\n'))[0] sample_node_lines = make_sample_node_table(bt, mf_dict) md_key = 'taxonomy' md_fields = ['k', 'p', 'c', 'o', 'f'] otu_node_lines = make_otu_node_table(bt, md_key, md_fields) scolor = ['NodeType'] ocolor = ['NodeType'] sshape = ['NodeType', 'Abundance'] oshape = ['NodeType'] ssize = ['Abundance'] osize = ['Abundance', 'NodeType'] obs_out = make_node_attr_table(otu_node_lines, sample_node_lines, scolor, ocolor, ssize, osize, sshape, oshape) exp_out = \ ['#NodeID\tNodeType\tAbundance\tColor\tSize\tShape', 's1\tsample\t148.0\tsample\t148.0\tsample_148.0', 's2\tsample\t156.0\tsample\t156.0\tsample_156.0', 's3\tsample\t164.0\tsample\t164.0\tsample_164.0', 's4\tsample\t172.0\tsample\t172.0\tsample_172.0', 's5\tsample\t180.0\tsample\t180.0\tsample_180.0', 'o1\totu\t15.0\totu\t15.0_otu\totu', 'o2\totu\t40.0\totu\t40.0_otu\totu', 'o3\totu\t65.0\totu\t65.0_otu\totu', 'o4\totu\t90.0\totu\t90.0_otu\totu', 'o5\totu\t115.0\totu\t115.0_otu\totu', 'o6\totu\t140.0\totu\t140.0_otu\totu', 'o7\totu\t165.0\totu\t165.0_otu\totu', 'o8\totu\t190.0\totu\t190.0_otu\totu'] # order different because computed by hand. As long as sets the same # we are confident they are the same self.assertEqual(set(obs_out), set(exp_out)) # test color,size,shape with some shared and some non-shared attrs md_key = 'taxonomy' md_fields = ['k', 'p', 'c', 'o', 'f'] otu_node_lines = make_otu_node_table(bt, md_key, md_fields) scolor = ['NodeType', 'Diet'] ocolor = ['k', 'p'] sshape = ['Treatment', 'Abundance'] oshape = ['NodeType', 'o'] ssize = ['Abundance'] osize = ['Abundance', 'NodeType'] obs_out = make_node_attr_table(otu_node_lines, sample_node_lines, scolor, ocolor, ssize, osize, sshape, oshape) exp_out = \ ['#NodeID\tNodeType\tAbundance\tColor\tSize\tShape', 's1\tsample\t148.0\tsample_hf\t148.0\tpre_148.0', 's2\tsample\t156.0\tsample_lf\t156.0\tpre_156.0', 's3\tsample\t164.0\tsample_hf\t164.0\tpre_164.0', 's4\tsample\t172.0\tsample_lf\t172.0\tpost_172.0', 's5\tsample\t180.0\tsample_mf\t180.0\tpost_180.0', 'o1\totu\t15.0\tk__Bacteria_p__Firmicutes\t15.0_otu\totu_o__Clostridiales', 'o2\totu\t40.0\tk__Bacteria_p__Firmicutes\t40.0_otu\totu_o__Clostridiales', 'o3\totu\t65.0\tk__Bacteria_p__Firmicutes\t65.0_otu\totu_o__Clostridiales', 'o4\totu\t90.0\tk__Bacteria_p__Firmicutes\t90.0_otu\totu_o__Clostridiales', 'o5\totu\t115.0\tk__Bacteria_p__Firmicutes\t115.0_otu\totu_o__Clostridiales', 'o6\totu\t140.0\tk__Bacteria_p__Firmicutes\t140.0_otu\totu_o__Clostri3', 'o7\totu\t165.0\tk__Bacteria_p__Firmicutes\t165.0_otu\totu_o__Clostri3', 'o8\totu\t190.0\tk__Bacteria_p__Firmicutes\t190.0_otu\totu_o__Clostridiales'] # order different because computed by hand. As long as sets the same # we are confident they are the same self.assertEqual(set(obs_out), set(exp_out))
def test_make_otu_node_table(self): '''Test that make_otu_node_table makes accurate calculations.''' # test when length of md_fields and length of split taxonomy wouldn't # agree when md type is list or string. bt1 = parse_biom_table(BIOM_STRING_1) md_key = 'taxonomy' md_fields = ['k'] obs = make_otu_node_table(bt1, md_key, md_fields) exp = [ '#NodeID\tNodeType\tAbundance\tk', 'o1\totu\t15.0\tk__Bacteria', 'o2\totu\t40.0\tk__Bacteria', 'o3\totu\t65.0\tk__Bacteria', 'o4\totu\t90.0\tk__Bacteria', 'o5\totu\t115.0\tk__Bacteria', 'o6\totu\t140.0\tk__Bacteria', 'o7\totu\t165.0\tk__Bacteria', 'o8\totu\t190.0\tk__Bacteria' ] self.assertEqual(obs, exp) md_fields = ['k', '1', '2', '3', '4', '5', '6'] obs = make_otu_node_table(bt1, md_key, md_fields) exp = [ '#NodeID\tNodeType\tAbundance\tk\t1\t2\t3\t4\t5\t6', 'o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae\tOther\tOther', 'o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1\tOther\tOther', 'o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1\tOther\tOther', 'o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2\tOther\tOther', 'o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2\tOther\tOther', 'o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae\tOther\tOther', 'o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae\tOther\tOther', 'o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae\tOther\tOther' ] # test when the length of the md_fields is correct md_fields = ['k', 'p', 'c', 'o', 'f'] obs_bt1 = make_otu_node_table(bt1, md_key, md_fields) exp_bt1 = \ ['#NodeID\tNodeType\tAbundance\tk\tp\tc\to\tf', 'o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt1, exp_bt1) md_fields = ['k', 'p', 'c', 'o', 'f'] bt2 = parse_biom_table(BIOM_STRING_3) obs_bt2 = make_otu_node_table(bt2, md_key, md_fields) exp_bt2 = \ ['#NodeID\tNodeType\tAbundance\tk\tp\tc\to\tf', 'o1\totu\t12.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t27.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t42.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t57.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t72.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t87.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t102.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t117.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt2, exp_bt2) # test when the md is of type dict and fields are correct bt = parse_biom_table(BIOM_STRING_4) md_fields = ['kingdom', 'phylum', 'class', 'order', 'family'] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = \ ['#NodeID\tNodeType\tAbundance\tkingdom\tphylum\tclass\torder\tfamily', 'o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt, exp_bt) # test that it raises an error when md_fields not found in md_dict md_fields = ['KINGDOM', 'phylum', 'class', 'order', 'family'] self.assertRaises(ValueError, make_otu_node_table, bt, md_key, md_fields) # test that it doesn't error when md_fields is subset of md_dict md_fields = ['phylum', 'class', 'order', 'family'] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = \ ['#NodeID\tNodeType\tAbundance\tphylum\tclass\torder\tfamily', 'o1\totu\t15.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt, exp_bt) # test when the md is type defaultdict bt = parse_biom_table(BIOM_STRING_5) # test that it raises an error when md_fields not found in md_dict md_fields = ['KINGDOM', 'phylum', 'class', 'order', 'family'] self.assertRaises(ValueError, make_otu_node_table, bt, md_key, md_fields) # test that it doesn't error when md_fields is subset of md_dict md_fields = ['phylum', 'class', 'order', 'family'] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = \ ['#NodeID\tNodeType\tAbundance\tphylum\tclass\torder\tfamily', 'o1\totu\t15.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae', 'o2\totu\t40.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o3\totu\t65.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1', 'o4\totu\t90.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o5\totu\t115.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2', 'o6\totu\t140.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o7\totu\t165.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae', 'o8\totu\t190.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae'] self.assertEqual(obs_bt, exp_bt)
def test_make_otu_node_table(self): """Test that make_otu_node_table makes accurate calculations.""" # test when length of md_fields and length of split taxonomy wouldn't # agree when md type is list or string. bt1 = parse_biom_table(BIOM_STRING_1) md_key = "taxonomy" md_fields = ["k"] obs = make_otu_node_table(bt1, md_key, md_fields) exp = [ "#NodeID\tNodeType\tAbundance\tk", "o1\totu\t15.0\tk__Bacteria", "o2\totu\t40.0\tk__Bacteria", "o3\totu\t65.0\tk__Bacteria", "o4\totu\t90.0\tk__Bacteria", "o5\totu\t115.0\tk__Bacteria", "o6\totu\t140.0\tk__Bacteria", "o7\totu\t165.0\tk__Bacteria", "o8\totu\t190.0\tk__Bacteria", ] self.assertEqual(obs, exp) md_fields = ["k", "1", "2", "3", "4", "5", "6"] obs = make_otu_node_table(bt1, md_key, md_fields) exp = [ "#NodeID\tNodeType\tAbundance\tk\t1\t2\t3\t4\t5\t6", "o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae\tOther\tOther", "o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1\tOther\tOther", "o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1\tOther\tOther", "o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2\tOther\tOther", "o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2\tOther\tOther", "o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae\tOther\tOther", "o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae\tOther\tOther", "o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae\tOther\tOther", ] # test when the length of the md_fields is correct md_fields = ["k", "p", "c", "o", "f"] obs_bt1 = make_otu_node_table(bt1, md_key, md_fields) exp_bt1 = [ "#NodeID\tNodeType\tAbundance\tk\tp\tc\to\tf", "o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", "o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", ] self.assertEqual(obs_bt1, exp_bt1) md_fields = ["k", "p", "c", "o", "f"] bt2 = parse_biom_table(BIOM_STRING_3) obs_bt2 = make_otu_node_table(bt2, md_key, md_fields) exp_bt2 = [ "#NodeID\tNodeType\tAbundance\tk\tp\tc\to\tf", "o1\totu\t12.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", "o2\totu\t27.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o3\totu\t42.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o4\totu\t57.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o5\totu\t72.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o6\totu\t87.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o7\totu\t102.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o8\totu\t117.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", ] self.assertEqual(obs_bt2, exp_bt2) # test when the md is of type dict and fields are correct bt = parse_biom_table(BIOM_STRING_4) md_fields = ["kingdom", "phylum", "class", "order", "family"] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = [ "#NodeID\tNodeType\tAbundance\tkingdom\tphylum\tclass\torder\tfamily", "o1\totu\t15.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", "o2\totu\t40.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o3\totu\t65.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o4\totu\t90.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o5\totu\t115.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o6\totu\t140.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o7\totu\t165.0\tk__Bacteria\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o8\totu\t190.0\tk__Bacteria\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", ] self.assertEqual(obs_bt, exp_bt) # test that it raises an error when md_fields not found in md_dict md_fields = ["KINGDOM", "phylum", "class", "order", "family"] self.assertRaises(ValueError, make_otu_node_table, bt, md_key, md_fields) # test that it doesn't error when md_fields is subset of md_dict md_fields = ["phylum", "class", "order", "family"] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = [ "#NodeID\tNodeType\tAbundance\tphylum\tclass\torder\tfamily", "o1\totu\t15.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", "o2\totu\t40.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o3\totu\t65.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o4\totu\t90.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o5\totu\t115.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o6\totu\t140.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o7\totu\t165.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o8\totu\t190.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", ] self.assertEqual(obs_bt, exp_bt) # test when the md is type defaultdict bt = parse_biom_table(BIOM_STRING_5) # test that it raises an error when md_fields not found in md_dict md_fields = ["KINGDOM", "phylum", "class", "order", "family"] self.assertRaises(ValueError, make_otu_node_table, bt, md_key, md_fields) # test that it doesn't error when md_fields is subset of md_dict md_fields = ["phylum", "class", "order", "family"] obs_bt = make_otu_node_table(bt, md_key, md_fields) exp_bt = [ "#NodeID\tNodeType\tAbundance\tphylum\tclass\torder\tfamily", "o1\totu\t15.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", "o2\totu\t40.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o3\totu\t65.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos1", "o4\totu\t90.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o5\totu\t115.0\tp__Firmicutes\tc__Clostridia\to__Clostridiales\tf__Lachnos2", "o6\totu\t140.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o7\totu\t165.0\tp__Firmicutes\tc__Clostridia\to__Clostri3\tf__Lachnospiraceae", "o8\totu\t190.0\tp__Firmicutes\tc__Clost5\to__Clostridiales\tf__Lachnospiraceae", ] self.assertEqual(obs_bt, exp_bt)
def test_node_attr_table(self): """Test that node_attr_table is made correctly.""" # test color,size,shape with shared attributes: NodeType, Abundance bt = parse_biom_table(BIOM_STRING_1) mf_dict = parse_mapping_file_to_dict(MF_LINES.split("\n"))[0] sample_node_lines = make_sample_node_table(bt, mf_dict) md_key = "taxonomy" md_fields = ["k", "p", "c", "o", "f"] otu_node_lines = make_otu_node_table(bt, md_key, md_fields) scolor = ["NodeType"] ocolor = ["NodeType"] sshape = ["NodeType", "Abundance"] oshape = ["NodeType"] ssize = ["Abundance"] osize = ["Abundance", "NodeType"] obs_out = make_node_attr_table(otu_node_lines, sample_node_lines, scolor, ocolor, ssize, osize, sshape, oshape) exp_out = [ "#NodeID\tNodeType\tAbundance\tColor\tSize\tShape", "s1\tsample\t148.0\tsample\t148.0\tsample_148.0", "s2\tsample\t156.0\tsample\t156.0\tsample_156.0", "s3\tsample\t164.0\tsample\t164.0\tsample_164.0", "s4\tsample\t172.0\tsample\t172.0\tsample_172.0", "s5\tsample\t180.0\tsample\t180.0\tsample_180.0", "o1\totu\t15.0\totu\t15.0_otu\totu", "o2\totu\t40.0\totu\t40.0_otu\totu", "o3\totu\t65.0\totu\t65.0_otu\totu", "o4\totu\t90.0\totu\t90.0_otu\totu", "o5\totu\t115.0\totu\t115.0_otu\totu", "o6\totu\t140.0\totu\t140.0_otu\totu", "o7\totu\t165.0\totu\t165.0_otu\totu", "o8\totu\t190.0\totu\t190.0_otu\totu", ] # order different because computed by hand. As long as sets the same # we are confident they are the same self.assertEqual(set(obs_out), set(exp_out)) # test color,size,shape with some shared and some non-shared attrs md_key = "taxonomy" md_fields = ["k", "p", "c", "o", "f"] otu_node_lines = make_otu_node_table(bt, md_key, md_fields) scolor = ["NodeType", "Diet"] ocolor = ["k", "p"] sshape = ["Treatment", "Abundance"] oshape = ["NodeType", "o"] ssize = ["Abundance"] osize = ["Abundance", "NodeType"] obs_out = make_node_attr_table(otu_node_lines, sample_node_lines, scolor, ocolor, ssize, osize, sshape, oshape) exp_out = [ "#NodeID\tNodeType\tAbundance\tColor\tSize\tShape", "s1\tsample\t148.0\tsample_hf\t148.0\tpre_148.0", "s2\tsample\t156.0\tsample_lf\t156.0\tpre_156.0", "s3\tsample\t164.0\tsample_hf\t164.0\tpre_164.0", "s4\tsample\t172.0\tsample_lf\t172.0\tpost_172.0", "s5\tsample\t180.0\tsample_mf\t180.0\tpost_180.0", "o1\totu\t15.0\tk__Bacteria_p__Firmicutes\t15.0_otu\totu_o__Clostridiales", "o2\totu\t40.0\tk__Bacteria_p__Firmicutes\t40.0_otu\totu_o__Clostridiales", "o3\totu\t65.0\tk__Bacteria_p__Firmicutes\t65.0_otu\totu_o__Clostridiales", "o4\totu\t90.0\tk__Bacteria_p__Firmicutes\t90.0_otu\totu_o__Clostridiales", "o5\totu\t115.0\tk__Bacteria_p__Firmicutes\t115.0_otu\totu_o__Clostridiales", "o6\totu\t140.0\tk__Bacteria_p__Firmicutes\t140.0_otu\totu_o__Clostri3", "o7\totu\t165.0\tk__Bacteria_p__Firmicutes\t165.0_otu\totu_o__Clostri3", "o8\totu\t190.0\tk__Bacteria_p__Firmicutes\t190.0_otu\totu_o__Clostridiales", ] # order different because computed by hand. As long as sets the same # we are confident they are the same self.assertEqual(set(obs_out), set(exp_out))