def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    alpha_fps = opts.alpha_fps
    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    binning_method = opts.binning_method
    missing_value_name = opts.missing_value_name
    depth = opts.depth
    number_of_bins = opts.number_of_bins
    collated_input = opts.collated_input

    # if using collated data, make sure they specify a depth
    if collated_input:
        alpha_dict = {}

        # build up a dictionary with the filenames as keys and lines as values
        for single_alpha_fp in alpha_fps:
            alpha_dict[splitext(basename(single_alpha_fp))[0]] = open(single_alpha_fp, "U").readlines()

        # format the collated data
        try:
            metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict, depth)
        except ValueError, e:  # see mean_alpha for the possible exceptions
            option_parser.error(e.message)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    alpha_fps = opts.alpha_fps
    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    binning_method = opts.binning_method
    missing_value_name = opts.missing_value_name
    depth = opts.depth

    # make sure the number of bins is an integer
    try:
        number_of_bins = int(opts.number_of_bins)
    except ValueError:
        raise ValueError, 'The number of bins must be an integer, not %s'\
            % opts.number_of_bins

    # if using collated data, make sure they specify a depth
    if depth is not None:
        alpha_dict = {}

        # build up a dictionary with the filenames as keys and lines as values
        for single_alpha_fp in alpha_fps:
            alpha_dict[splitext(basename(single_alpha_fp))[0]] = open(
                single_alpha_fp, 'U').readlines()

        # format the collated data
        metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict,
            depth)

    # when not using collated data, the user can only specify one input file
    else:
        if len(alpha_fps) > 1:
            option_parser.error('A comma-separated list of files should only be'
                ' passed with the --alpha_fps option when using collated alpha '
                'diversity data and also selecting a rarefaction depth with the'
                ' --depth option.')
        else:
            metrics, alpha_sample_ids, alpha_data = parse_matrix(open(
                alpha_fps[0], 'U'))

    # parse the data from the files
    mapping_file_data, mapping_file_headers, comments = parse_mapping_file(
        open(mapping_fp, 'U'))

    # add the alpha diversity data to the mapping file
    out_mapping_file_data, out_mapping_file_headers = \
        add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids,
        alpha_data, mapping_file_headers, mapping_file_data, number_of_bins,
        binning_method, missing_value_name)

    # format the new data and write it down
    lines = format_mapping_file(out_mapping_file_headers, out_mapping_file_data)
    fd_out = open(output_mapping_fp, 'w')
    fd_out.writelines(lines)
    fd_out.close()
Exemple #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    alpha_fps = opts.alpha_fps
    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    binning_method = opts.binning_method
    missing_value_name = opts.missing_value_name
    depth = opts.depth
    number_of_bins = opts.number_of_bins
    collated_input = opts.collated_input

    # if using collated data, make sure they specify a depth
    if collated_input:
        alpha_dict = {}

        # build up a dictionary with the filenames as keys and lines as values
        for single_alpha_fp in alpha_fps:
            alpha_dict[splitext(basename(single_alpha_fp))[0]] = open(
                single_alpha_fp, 'U').readlines()

        # format the collated data
        try:
            metrics, alpha_sample_ids, alpha_data = mean_alpha(
                alpha_dict, depth)
        except ValueError as e:  # see mean_alpha for the possible exceptions
            option_parser.error(e.message)

    # when not using collated data, the user can only specify one input file
    else:
        if len(alpha_fps) > 1:
            option_parser.error(
                'A comma-separated list of files should only be'
                ' passed with the --alpha_fps option when using collated alpha '
                'diversity data and also selecting a rarefaction depth with the'
                ' --depth option.')
        else:
            metrics, alpha_sample_ids, alpha_data = parse_matrix(
                open(alpha_fps[0], 'U'))

    # parse the data from the files
    mapping_file_data, mapping_file_headers, comments = parse_mapping_file(
        open(mapping_fp, 'U'))

    # add the alpha diversity data to the mapping file
    out_mapping_file_data, out_mapping_file_headers = \
        add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids,
                                                   alpha_data, mapping_file_headers, mapping_file_data, number_of_bins,
                                                   binning_method, missing_value_name)

    # format the new data and write it down
    lines = format_mapping_file(out_mapping_file_headers,
                                out_mapping_file_data)
    fd_out = open(output_mapping_fp, 'w')
    fd_out.writelines(lines)
    fd_out.close()
    def test_mean_alpha(self):
        """checks data is being correctly averaged"""

        # regular use-cases for this function
        expected_data = [[9.441785, 82.93],
                         [0.42877, 5.2006], [9.625995, 8.18]]
        expected_metrics = ['PD_whole_tree_even_310', 'chao1_even_310']
        expected_sample_ids = ['s1', 's2', 's3']

        o_metrics, o_sample_ids, o_data = mean_alpha(
            self.collated_alpha_dict_a, 310)

        self.assertEquals(o_metrics, expected_metrics)
        self.assertEquals(o_sample_ids, expected_sample_ids)
        self.assertEquals(o_data, expected_data)

        expected_data = [[12.508435, 11.6105],
                         [0.42877, 8.42], [11.58785, 1.0]]
        expected_metrics = ['PD_whole_tree_even_610', 'chao1_even_610']

        o_metrics, o_sample_ids, o_data = mean_alpha(
            self.collated_alpha_dict_a, 610)

        self.assertEquals(o_metrics, expected_metrics)
        self.assertEquals(o_sample_ids, expected_sample_ids)
        self.assertEquals(o_data, expected_data)

        # should default to the highest depth
        o_metrics, o_sample_ids, o_data = mean_alpha(
            self.collated_alpha_dict_a,
            None)
        self.assertEquals(o_metrics, expected_metrics)
        self.assertEquals(o_sample_ids, expected_sample_ids)
        self.assertEquals(o_data, expected_data)

        # non-existant depth
        with self.assertRaises(ValueError):
            o_metrics, o_sample_ids, o_data = mean_alpha(
                self.collated_alpha_dict_b, 111111)

        # files with non-matching sample ids should raise an exception
        with self.assertRaises(ValueError):
            o_metrics, o_sample_ids, o_data = mean_alpha(
                self.collated_alpha_dict_b, 310)

        # input types that should not be processed
        with self.assertRaises(AssertionError):
            output = mean_alpha([1, 2, 3], 5)

        with self.assertRaises(AssertionError):
            output = mean_alpha({'a': 'b'}, -1.4)
Exemple #5
0
    def test_mean_alpha(self):
        """checks data is being correctly averaged"""

        # regular use-cases for this function
        expected_data = [[9.441785, 82.93],
                         [0.42877, 5.2006], [9.625995, 8.18]]
        expected_metrics = ['PD_whole_tree_even_310', 'chao1_even_310']
        expected_sample_ids = ['s1', 's2', 's3']

        o_metrics, o_sample_ids, o_data = mean_alpha(
            self.collated_alpha_dict_a, 310)

        self.assertEquals(o_metrics, expected_metrics)
        self.assertEquals(o_sample_ids, expected_sample_ids)
        self.assertEquals(o_data, expected_data)

        expected_data = [[12.508435, 11.6105],
                         [0.42877, 8.42], [11.58785, 1.0]]
        expected_metrics = ['PD_whole_tree_even_610', 'chao1_even_610']

        o_metrics, o_sample_ids, o_data = mean_alpha(
            self.collated_alpha_dict_a, 610)

        self.assertEquals(o_metrics, expected_metrics)
        self.assertEquals(o_sample_ids, expected_sample_ids)
        self.assertEquals(o_data, expected_data)

        # should default to the highest depth
        o_metrics, o_sample_ids, o_data = mean_alpha(
            self.collated_alpha_dict_a,
            None)
        self.assertEquals(o_metrics, expected_metrics)
        self.assertEquals(o_sample_ids, expected_sample_ids)
        self.assertEquals(o_data, expected_data)

        # non-existant depth
        with self.assertRaises(ValueError):
            o_metrics, o_sample_ids, o_data = mean_alpha(
                self.collated_alpha_dict_b, 111111)

        # files with non-matching sample ids should raise an exception
        with self.assertRaises(ValueError):
            o_metrics, o_sample_ids, o_data = mean_alpha(
                self.collated_alpha_dict_b, 310)

        # input types that should not be processed
        with self.assertRaises(AssertionError):
            output = mean_alpha([1, 2, 3], 5)

        with self.assertRaises(AssertionError):
            output = mean_alpha({'a': 'b'}, -1.4)