def test_make_non_empty_sample_lists(self): """Test that sample lists are created correctly.""" fields = ['color', 'temp'] obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders, self.mdata) exp_sgs = [ array(['s4'], dtype='|S5'), array(['s1'], dtype='|S5'), array(['s0'], dtype='|S5'), array(['s3'], dtype='|S5'), array(['s2'], dtype='|S5') ] exp_vgs = [('blue', '0'), ('blue', 'cold'), ('blue', 'hot'), ('cyan', 'hot'), ('green', 'cold')] for i, j in zip(obs_sgs, exp_sgs): assert_array_equal(i, j) for i, j in zip(obs_vgs, exp_vgs): assert_array_equal(i, j) fields = ['color'] obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders, self.mdata) exp_sgs = [ array(['s0', 's1', 's4'], dtype='|S5'), array(['s3'], dtype='|S5'), array(['s2'], dtype='|S5') ] exp_vgs = [('blue', ), ('cyan', ), ('green', )] for i, j in zip(obs_sgs, exp_sgs): assert_array_equal(i, j) for i, j in zip(obs_vgs, exp_vgs): assert_array_equal(i, j)
def test_make_non_empty_sample_lists(self): """Test that sample lists are created correctly.""" fields = ['color', 'temp'] obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders, self.mdata) exp_sgs = [array(['s4'], dtype='|S5'), array(['s1'], dtype='|S5'), array(['s0'], dtype='|S5'), array(['s3'], dtype='|S5'), array(['s2'], dtype='|S5')] exp_vgs = [('blue', '0'), ('blue', 'cold'), ('blue', 'hot'), ('cyan', 'hot'), ('green', 'cold')] for i,j in zip(obs_sgs, exp_sgs): assert_array_equal(i, j) for i,j in zip(obs_vgs, exp_vgs): assert_array_equal(i, j) fields = ['color'] obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders, self.mdata) exp_sgs = [array(['s0', 's1', 's4'], dtype='|S5'), array(['s3'], dtype='|S5'), array(['s2'], dtype='|S5')] exp_vgs = [('blue',), ('cyan',), ('green',)] for i,j in zip(obs_sgs, exp_sgs): assert_array_equal(i, j) for i,j in zip(obs_vgs, exp_vgs): assert_array_equal(i, j)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) biom_table_fp = opts.biom_table_fp mapping_fp = opts.mapping_fp fields = opts.fields.split(',') output_dir = opts.output_dir suppress_mf = opts.suppress_mapping_file_output # column_rename_ids = opts.column_rename_ids # include_repeat_cols = opts.include_repeat_cols bt = load_table(biom_table_fp) mdata, mheaders, mcomments = parse_mapping_file(mapping_fp) mdata = array(mdata) # check that biom file and mapping file have matching sample names. discard # those samples that do not appear in both. shared_samples = list(set(mdata[:, 0]).intersection(bt.ids(axis='sample'))) if len(shared_samples) == 0: raise ValueError('Mapping file and biom table share no samples.') elif len(shared_samples) == len(mdata[:, 0]): mdata = array(mdata) else: # we want to preserve the order of the samples in the biom table ss_bt_order = [s for s in bt.ids(axis='sample') if s in shared_samples] bt = bt.filter(ss_bt_order, axis='sample', inplace=True) mdata = subset_mapping_data(mdata, shared_samples) # check that headers in mapping data if not all([i in mheaders for i in fields]): raise ValueError('One or more of the specified fields was not found ' +\ 'in the mapping file.') # create output directory and create base names create_dir(output_dir) mf_base_name = join(output_dir, splitext(split(mapping_fp)[1])[0]) bt_base_name = join(output_dir, splitext(split(biom_table_fp)[1])[0]) # run code and append output sample_groups, value_groups = make_non_empty_sample_lists(fields, mheaders, mdata) for sg, vg in zip(sample_groups, value_groups): name_base = '__' + '%s_%s_' * len(vg) + '_' name_tmp = [] for f, v in zip(fields, vg): name_tmp.extend([f, v]) nb = name_base % tuple(name_tmp) tmp_mf_data = subset_mapping_data(mdata, sg) tmp_mf_str = format_mapping_file(mheaders, tmp_mf_data, mcomments) write_biom_table(bt.filter(sg, axis='sample', inplace=False), bt_base_name + nb + '.biom') if not suppress_mf: o = open(mf_base_name + nb + '.txt', 'w') o.writelines(tmp_mf_str) o.close()