Beispiel #1
0
 def test_make_non_empty_sample_lists(self):
     """Test that sample lists are created correctly."""
     fields = ['color', 'temp']
     obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders,
                                                    self.mdata)
     exp_sgs = [
         array(['s4'], dtype='|S5'),
         array(['s1'], dtype='|S5'),
         array(['s0'], dtype='|S5'),
         array(['s3'], dtype='|S5'),
         array(['s2'], dtype='|S5')
     ]
     exp_vgs = [('blue', '0'), ('blue', 'cold'), ('blue', 'hot'),
                ('cyan', 'hot'), ('green', 'cold')]
     for i, j in zip(obs_sgs, exp_sgs):
         assert_array_equal(i, j)
     for i, j in zip(obs_vgs, exp_vgs):
         assert_array_equal(i, j)
     fields = ['color']
     obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders,
                                                    self.mdata)
     exp_sgs = [
         array(['s0', 's1', 's4'], dtype='|S5'),
         array(['s3'], dtype='|S5'),
         array(['s2'], dtype='|S5')
     ]
     exp_vgs = [('blue', ), ('cyan', ), ('green', )]
     for i, j in zip(obs_sgs, exp_sgs):
         assert_array_equal(i, j)
     for i, j in zip(obs_vgs, exp_vgs):
         assert_array_equal(i, j)
Beispiel #2
0
 def test_make_non_empty_sample_lists(self):
     """Test that sample lists are created correctly."""
     fields = ['color', 'temp']
     obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders,
                                                    self.mdata)
     exp_sgs = [array(['s4'], dtype='|S5'),
                array(['s1'], dtype='|S5'),
                array(['s0'], dtype='|S5'),
                array(['s3'], dtype='|S5'),
                array(['s2'], dtype='|S5')]
     exp_vgs = [('blue', '0'),
                ('blue', 'cold'),
                ('blue', 'hot'),
                ('cyan', 'hot'),
                ('green', 'cold')]
     for i,j in zip(obs_sgs, exp_sgs):
         assert_array_equal(i, j)
     for i,j in zip(obs_vgs, exp_vgs):
         assert_array_equal(i, j)
     fields = ['color']
     obs_sgs, obs_vgs = make_non_empty_sample_lists(fields, self.mheaders,
                                                    self.mdata)
     exp_sgs = [array(['s0', 's1', 's4'], dtype='|S5'),
                array(['s3'], dtype='|S5'),
                array(['s2'], dtype='|S5')]
     exp_vgs = [('blue',), ('cyan',), ('green',)]
     for i,j in zip(obs_sgs, exp_sgs):
         assert_array_equal(i, j)
     for i,j in zip(obs_vgs, exp_vgs):
         assert_array_equal(i, j)
Beispiel #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    biom_table_fp = opts.biom_table_fp
    mapping_fp = opts.mapping_fp
    fields = opts.fields.split(',')
    output_dir = opts.output_dir
    suppress_mf = opts.suppress_mapping_file_output
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols

    bt = load_table(biom_table_fp)
    mdata, mheaders, mcomments = parse_mapping_file(mapping_fp)
    mdata = array(mdata)

    # check that biom file and mapping file have matching sample names. discard
    # those samples that do not appear in both. 
    shared_samples = list(set(mdata[:, 0]).intersection(bt.ids(axis='sample')))
    if len(shared_samples) == 0:
        raise ValueError('Mapping file and biom table share no samples.')
    elif len(shared_samples) == len(mdata[:, 0]):
        mdata = array(mdata)
    else:
        # we want to preserve the order of the samples in the biom table
        ss_bt_order = [s for s in bt.ids(axis='sample') if s in
                       shared_samples]
        bt = bt.filter(ss_bt_order, axis='sample', inplace=True)
        mdata = subset_mapping_data(mdata, shared_samples)
    # check that headers in mapping data
    if not all([i in mheaders for i in fields]):
        raise ValueError('One or more of the specified fields was not found ' +\
                         'in the mapping file.')

    # create output directory and create base names
    create_dir(output_dir)
    mf_base_name = join(output_dir, splitext(split(mapping_fp)[1])[0])
    bt_base_name = join(output_dir, splitext(split(biom_table_fp)[1])[0])

    # run code and append output
    sample_groups, value_groups = make_non_empty_sample_lists(fields, mheaders,
                                                              mdata)

    for sg, vg in zip(sample_groups, value_groups):
        name_base = '__' + '%s_%s_' * len(vg) + '_'
        name_tmp = []
        for f, v in zip(fields, vg):
            name_tmp.extend([f, v])
        nb = name_base % tuple(name_tmp)

        tmp_mf_data = subset_mapping_data(mdata, sg)
        tmp_mf_str = format_mapping_file(mheaders, tmp_mf_data, mcomments)
        write_biom_table(bt.filter(sg, axis='sample', inplace=False),
                         bt_base_name + nb + '.biom')
        
        if not suppress_mf:
            o = open(mf_base_name + nb + '.txt', 'w')
            o.writelines(tmp_mf_str)
            o.close()
Beispiel #4
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    biom_table_fp = opts.biom_table_fp
    mapping_fp = opts.mapping_fp
    fields = opts.fields.split(',')
    output_dir = opts.output_dir
    suppress_mf = opts.suppress_mapping_file_output
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols

    bt = load_table(biom_table_fp)
    mdata, mheaders, mcomments = parse_mapping_file(mapping_fp)
    mdata = array(mdata)

    # check that biom file and mapping file have matching sample names. discard
    # those samples that do not appear in both. 
    shared_samples = list(set(mdata[:, 0]).intersection(bt.ids(axis='sample')))
    if len(shared_samples) == 0:
        raise ValueError('Mapping file and biom table share no samples.')
    elif len(shared_samples) == len(mdata[:, 0]):
        mdata = array(mdata)
    else:
        # we want to preserve the order of the samples in the biom table
        ss_bt_order = [s for s in bt.ids(axis='sample') if s in
                       shared_samples]
        bt = bt.filter(ss_bt_order, axis='sample', inplace=True)
        mdata = subset_mapping_data(mdata, shared_samples)
    # check that headers in mapping data
    if not all([i in mheaders for i in fields]):
        raise ValueError('One or more of the specified fields was not found ' +\
                         'in the mapping file.')

    # create output directory and create base names
    create_dir(output_dir)
    mf_base_name = join(output_dir, splitext(split(mapping_fp)[1])[0])
    bt_base_name = join(output_dir, splitext(split(biom_table_fp)[1])[0])

    # run code and append output
    sample_groups, value_groups = make_non_empty_sample_lists(fields, mheaders,
                                                              mdata)

    for sg, vg in zip(sample_groups, value_groups):
        name_base = '__' + '%s_%s_' * len(vg) + '_'
        name_tmp = []
        for f, v in zip(fields, vg):
            name_tmp.extend([f, v])
        nb = name_base % tuple(name_tmp)

        tmp_mf_data = subset_mapping_data(mdata, sg)
        tmp_mf_str = format_mapping_file(mheaders, tmp_mf_data, mcomments)
        write_biom_table(bt.filter(sg, axis='sample', inplace=False),
                         bt_base_name + nb + '.biom')
        
        if not suppress_mf:
            o = open(mf_base_name + nb + '.txt', 'w')
            o.writelines(tmp_mf_str)
            o.close()