def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) alpha_fps = opts.alpha_fps mapping_fp = opts.mapping_fp output_mapping_fp = opts.output_mapping_fp binning_method = opts.binning_method missing_value_name = opts.missing_value_name depth = opts.depth number_of_bins = opts.number_of_bins collated_input = opts.collated_input # if using collated data, make sure they specify a depth if collated_input: alpha_dict = {} # build up a dictionary with the filenames as keys and lines as values for single_alpha_fp in alpha_fps: alpha_dict[splitext(basename(single_alpha_fp))[0]] = open(single_alpha_fp, "U").readlines() # format the collated data try: metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict, depth) except ValueError, e: # see mean_alpha for the possible exceptions option_parser.error(e.message)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) alpha_fps = opts.alpha_fps mapping_fp = opts.mapping_fp output_mapping_fp = opts.output_mapping_fp binning_method = opts.binning_method missing_value_name = opts.missing_value_name depth = opts.depth # make sure the number of bins is an integer try: number_of_bins = int(opts.number_of_bins) except ValueError: raise ValueError, 'The number of bins must be an integer, not %s'\ % opts.number_of_bins # if using collated data, make sure they specify a depth if depth is not None: alpha_dict = {} # build up a dictionary with the filenames as keys and lines as values for single_alpha_fp in alpha_fps: alpha_dict[splitext(basename(single_alpha_fp))[0]] = open( single_alpha_fp, 'U').readlines() # format the collated data metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict, depth) # when not using collated data, the user can only specify one input file else: if len(alpha_fps) > 1: option_parser.error('A comma-separated list of files should only be' ' passed with the --alpha_fps option when using collated alpha ' 'diversity data and also selecting a rarefaction depth with the' ' --depth option.') else: metrics, alpha_sample_ids, alpha_data = parse_matrix(open( alpha_fps[0], 'U')) # parse the data from the files mapping_file_data, mapping_file_headers, comments = parse_mapping_file( open(mapping_fp, 'U')) # add the alpha diversity data to the mapping file out_mapping_file_data, out_mapping_file_headers = \ add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids, alpha_data, mapping_file_headers, mapping_file_data, number_of_bins, binning_method, missing_value_name) # format the new data and write it down lines = format_mapping_file(out_mapping_file_headers, out_mapping_file_data) fd_out = open(output_mapping_fp, 'w') fd_out.writelines(lines) fd_out.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) alpha_fps = opts.alpha_fps mapping_fp = opts.mapping_fp output_mapping_fp = opts.output_mapping_fp binning_method = opts.binning_method missing_value_name = opts.missing_value_name depth = opts.depth number_of_bins = opts.number_of_bins collated_input = opts.collated_input # if using collated data, make sure they specify a depth if collated_input: alpha_dict = {} # build up a dictionary with the filenames as keys and lines as values for single_alpha_fp in alpha_fps: alpha_dict[splitext(basename(single_alpha_fp))[0]] = open( single_alpha_fp, 'U').readlines() # format the collated data try: metrics, alpha_sample_ids, alpha_data = mean_alpha( alpha_dict, depth) except ValueError as e: # see mean_alpha for the possible exceptions option_parser.error(e.message) # when not using collated data, the user can only specify one input file else: if len(alpha_fps) > 1: option_parser.error( 'A comma-separated list of files should only be' ' passed with the --alpha_fps option when using collated alpha ' 'diversity data and also selecting a rarefaction depth with the' ' --depth option.') else: metrics, alpha_sample_ids, alpha_data = parse_matrix( open(alpha_fps[0], 'U')) # parse the data from the files mapping_file_data, mapping_file_headers, comments = parse_mapping_file( open(mapping_fp, 'U')) # add the alpha diversity data to the mapping file out_mapping_file_data, out_mapping_file_headers = \ add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids, alpha_data, mapping_file_headers, mapping_file_data, number_of_bins, binning_method, missing_value_name) # format the new data and write it down lines = format_mapping_file(out_mapping_file_headers, out_mapping_file_data) fd_out = open(output_mapping_fp, 'w') fd_out.writelines(lines) fd_out.close()
def test_mean_alpha(self): """checks data is being correctly averaged""" # regular use-cases for this function expected_data = [[9.441785, 82.93], [0.42877, 5.2006], [9.625995, 8.18]] expected_metrics = ['PD_whole_tree_even_310', 'chao1_even_310'] expected_sample_ids = ['s1', 's2', 's3'] o_metrics, o_sample_ids, o_data = mean_alpha( self.collated_alpha_dict_a, 310) self.assertEquals(o_metrics, expected_metrics) self.assertEquals(o_sample_ids, expected_sample_ids) self.assertEquals(o_data, expected_data) expected_data = [[12.508435, 11.6105], [0.42877, 8.42], [11.58785, 1.0]] expected_metrics = ['PD_whole_tree_even_610', 'chao1_even_610'] o_metrics, o_sample_ids, o_data = mean_alpha( self.collated_alpha_dict_a, 610) self.assertEquals(o_metrics, expected_metrics) self.assertEquals(o_sample_ids, expected_sample_ids) self.assertEquals(o_data, expected_data) # should default to the highest depth o_metrics, o_sample_ids, o_data = mean_alpha( self.collated_alpha_dict_a, None) self.assertEquals(o_metrics, expected_metrics) self.assertEquals(o_sample_ids, expected_sample_ids) self.assertEquals(o_data, expected_data) # non-existant depth with self.assertRaises(ValueError): o_metrics, o_sample_ids, o_data = mean_alpha( self.collated_alpha_dict_b, 111111) # files with non-matching sample ids should raise an exception with self.assertRaises(ValueError): o_metrics, o_sample_ids, o_data = mean_alpha( self.collated_alpha_dict_b, 310) # input types that should not be processed with self.assertRaises(AssertionError): output = mean_alpha([1, 2, 3], 5) with self.assertRaises(AssertionError): output = mean_alpha({'a': 'b'}, -1.4)