def generate_alpha_rarefaction_data_from_point_in_omega( biom_object, metrics, sequences, iterations, tree_object=None): """generate alpha rarefaction data from a biom table and mapping file Inputs: biom_object: OTU table to be rarefied and used to compute alpha diversity metrics: list of metrics, phylogenetic or non phylogenetic sequences: maximum number of sequences for the rarefaction plots iterations: number of repetitions per rarefaction tree_object: tree to perform the phylogenetic operations, default is None Output: alpha_rarefaction_data: dictionary where the keys are alpha diversity metrics and the values are tuples; in these tuples the first element is a list of column headers for an alpha diversity file, the second element is a list of row headers for an alpha diversity file and the third element is a list of lists containing the alpha diversity data computed at multiple rarefaction depths and as many iterations as specified. """ # The minimum depth is defined by the size of the maximum depth steps = 4 min_depth = int(ceil(sequences / steps)) # get a rarefied biom with the proper identifiers rarefied_bioms_list = get_rarefactions(biom_object, min_depth, sequences,\ iterations, steps) alpha_rs = {} alpha_filenames = [] # rarefy all the biom objects and get the alpha diversity values for rarefied_biom in rarefied_bioms_list: # this tag contains data about the iteration and the depth identifier = 'alpha_rare_%s_%s' % (str( rarefied_biom[0]), str(rarefied_biom[1])) alpha_values = single_object_alpha(rarefied_biom[2], metrics, tree_object) alpha_rs[identifier] = (rarefied_biom[0], rarefied_biom[1], alpha_values.split('\n')) alpha_filenames.append(identifier) # use the rarefaction with the fewest sequences per sample as the reference ref_rare = single_object_alpha(rarefied_bioms_list[0][2], metrics,\ tree_object=tree_object).split('\n') all_metrics, all_samples, example_data = parse_matrix(ref_rare) # build a dictionary with the data for each of the metrics specified metrics_data = {} for metric in all_metrics: per_metric_data = [] for filename in alpha_filenames: f_metrics, f_samples, f_data = parse_matrix(alpha_rs[filename][2]) per_metric_data.append(make_output_row(f_metrics, metric,\ f_samples, f_data, filename, len(all_samples), all_samples)) metrics_data[metric] = per_metric_data # now format the dictionary to make it compatible with make_averages alpha_rarefaction_data = _format_rarefactions(metrics_data, all_samples) return alpha_rarefaction_data
def generate_alpha_rarefaction_data_from_point_in_omega(biom_object, metrics, sequences, iterations, tree_object=None): """generate alpha rarefaction data from a biom table and mapping file Inputs: biom_object: OTU table to be rarefied and used to compute alpha diversity metrics: list of metrics, phylogenetic or non phylogenetic sequences: maximum number of sequences for the rarefaction plots iterations: number of repetitions per rarefaction tree_object: tree to perform the phylogenetic operations, default is None Output: alpha_rarefaction_data: dictionary where the keys are alpha diversity metrics and the values are tuples; in these tuples the first element is a list of column headers for an alpha diversity file, the second element is a list of row headers for an alpha diversity file and the third element is a list of lists containing the alpha diversity data computed at multiple rarefaction depths and as many iterations as specified. """ # The minimum depth is defined by the size of the maximum depth steps = 4 min_depth = int(ceil(sequences / steps)) # get a rarefied biom with the proper identifiers rarefied_bioms_list = get_rarefactions(biom_object, min_depth, sequences,\ iterations, steps) alpha_rs = {} alpha_filenames = [] # rarefy all the biom objects and get the alpha diversity values for rarefied_biom in rarefied_bioms_list: # this tag contains data about the iteration and the depth identifier = 'alpha_rare_%s_%s' % (str(rarefied_biom[0]), str(rarefied_biom[1])) alpha_values = single_object_alpha(rarefied_biom[2], metrics, tree_object) alpha_rs[identifier] = (rarefied_biom[0], rarefied_biom[1], alpha_values.split('\n')) alpha_filenames.append(identifier) # use the rarefaction with the fewest sequences per sample as the reference ref_rare = single_object_alpha(rarefied_bioms_list[0][2], metrics,\ tree_object=tree_object).split('\n') all_metrics, all_samples, example_data = parse_matrix(ref_rare) # build a dictionary with the data for each of the metrics specified metrics_data = {} for metric in all_metrics: per_metric_data = [] for filename in alpha_filenames: f_metrics, f_samples, f_data = parse_matrix(alpha_rs[filename][2]) per_metric_data.append(make_output_row(f_metrics, metric,\ f_samples, f_data, filename, len(all_samples), all_samples)) metrics_data[metric] = per_metric_data # now format the dictionary to make it compatible with make_averages alpha_rarefaction_data = _format_rarefactions(metrics_data, all_samples) return alpha_rarefaction_data
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if len(args) != 0: parser.error("Positional argument detected. make sure all" + ' parameters are identified.' + '\ne.g.: include the \"-m\" in \"-m MINIMUM_LENGTH\"') input_dir = opts.input_path output_dir = opts.output_path example_filepath = opts.example_path if not os.path.exists(output_dir): os.makedirs(output_dir) file_names = os.listdir(input_dir) file_names = [fname for fname in file_names if not fname.startswith('.')] if example_filepath is None: # table row is base_name, seqs_per_sam, iters, ext file_name_table = map(parse_rarefaction_fname, file_names) # sort on seqs/sam sorted_fname_table = sorted( file_name_table, key=operator.itemgetter(1)) # now map back to file name example_fname = file_names[ file_name_table.index(sorted_fname_table[0])] example_filepath = os.path.join(input_dir, example_fname) f = open(example_filepath, 'U') all_metrics, all_samples, example_data = parse_matrix(f) num_cols = len(all_samples) f.close() # make the table 1 row at a time # we're building a rarefaction by sample mtx from # a sample by metric matrix # each metric is one output file for metric in all_metrics: metric_file_data = [] for fname in file_names: # f_ here refers to the input file currently being processed # to distinguish from the output file we're building f = open(os.path.join(input_dir, fname), 'U') f_metrics, f_samples, f_data = parse_matrix(f) f.close() metric_file_data.append( make_output_row(f_metrics, metric, f_samples, f_data, fname, num_cols, all_samples)) write_output_file(metric_file_data, output_dir, metric, all_samples)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) alpha_fps = opts.alpha_fps mapping_fp = opts.mapping_fp output_mapping_fp = opts.output_mapping_fp binning_method = opts.binning_method missing_value_name = opts.missing_value_name depth = opts.depth # make sure the number of bins is an integer try: number_of_bins = int(opts.number_of_bins) except ValueError: raise ValueError, 'The number of bins must be an integer, not %s'\ % opts.number_of_bins # if using collated data, make sure they specify a depth if depth is not None: alpha_dict = {} # build up a dictionary with the filenames as keys and lines as values for single_alpha_fp in alpha_fps: alpha_dict[splitext(basename(single_alpha_fp))[0]] = open( single_alpha_fp, 'U').readlines() # format the collated data metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict, depth) # when not using collated data, the user can only specify one input file else: if len(alpha_fps) > 1: option_parser.error('A comma-separated list of files should only be' ' passed with the --alpha_fps option when using collated alpha ' 'diversity data and also selecting a rarefaction depth with the' ' --depth option.') else: metrics, alpha_sample_ids, alpha_data = parse_matrix(open( alpha_fps[0], 'U')) # parse the data from the files mapping_file_data, mapping_file_headers, comments = parse_mapping_file( open(mapping_fp, 'U')) # add the alpha diversity data to the mapping file out_mapping_file_data, out_mapping_file_headers = \ add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids, alpha_data, mapping_file_headers, mapping_file_data, number_of_bins, binning_method, missing_value_name) # format the new data and write it down lines = format_mapping_file(out_mapping_file_headers, out_mapping_file_data) fd_out = open(output_mapping_fp, 'w') fd_out.writelines(lines) fd_out.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) alpha_fps = opts.alpha_fps mapping_fp = opts.mapping_fp output_mapping_fp = opts.output_mapping_fp binning_method = opts.binning_method missing_value_name = opts.missing_value_name depth = opts.depth number_of_bins = opts.number_of_bins collated_input = opts.collated_input # if using collated data, make sure they specify a depth if collated_input: alpha_dict = {} # build up a dictionary with the filenames as keys and lines as values for single_alpha_fp in alpha_fps: alpha_dict[splitext(basename(single_alpha_fp))[0]] = open( single_alpha_fp, 'U').readlines() # format the collated data try: metrics, alpha_sample_ids, alpha_data = mean_alpha( alpha_dict, depth) except ValueError as e: # see mean_alpha for the possible exceptions option_parser.error(e.message) # when not using collated data, the user can only specify one input file else: if len(alpha_fps) > 1: option_parser.error( 'A comma-separated list of files should only be' ' passed with the --alpha_fps option when using collated alpha ' 'diversity data and also selecting a rarefaction depth with the' ' --depth option.') else: metrics, alpha_sample_ids, alpha_data = parse_matrix( open(alpha_fps[0], 'U')) # parse the data from the files mapping_file_data, mapping_file_headers, comments = parse_mapping_file( open(mapping_fp, 'U')) # add the alpha diversity data to the mapping file out_mapping_file_data, out_mapping_file_headers = \ add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids, alpha_data, mapping_file_headers, mapping_file_data, number_of_bins, binning_method, missing_value_name) # format the new data and write it down lines = format_mapping_file(out_mapping_file_headers, out_mapping_file_data) fd_out = open(output_mapping_fp, 'w') fd_out.writelines(lines) fd_out.close()
def single_object_beta(self, otu_table, metric, tree_string, missing_sams=None): """ running single_file_beta should give same result using --rows""" if missing_sams is None: missing_sams = [] metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings( 'ignore', 'dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'unifrac had no information for\ sample M*') # self.files_to_remove.extend([input_path,tree_path]) # self.folders_to_remove.append(output_dir) # os.mkdir(output_dir+'/ft/') for metric in metrics: # do it beta_out = single_object_beta(otu_table, metric, tree_string, rowids=None, full_tree=False) sams, dmtx = parse_distmat(beta_out) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] # row_outname = output_dir + '/' + metric + '_' +\ # in_fname r_out = single_object_beta(otu_table, metric, tree_string, rowids=rows, full_tree=False) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] #~ row_outname = output_dir + '/ft/' + metric + '_' +\ #~ in_fname r_out = single_object_beta(otu_table, metric, tree_string, rowids=None, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # do it with full tree r_out = single_object_beta(otu_table, metric, tree_string, rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat(r_out) self.assertEqual(sams_ft, sams) npt.assert_almost_equal(dmtx_ft, dmtx)
def single_file_beta(self, otu_table_string, tree_string, missing_sams=None, use_metric_list=False): """ running single_file_beta should give same result using --rows""" if missing_sams is None: missing_sams = [] # setup fd, input_path = mkstemp(suffix='.txt') os.close(fd) in_fname = os.path.split(input_path)[1] f = open(input_path, 'w') f.write(otu_table_string) f.close() fd, tree_path = mkstemp(suffix='.tre') os.close(fd) f = open(tree_path, 'w') f.write(tree_string) f.close() metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) output_dir = mkdtemp() # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings( 'ignore', 'dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'unifrac had no information for\ sample M*') self.files_to_remove.extend([input_path, tree_path]) self.folders_to_remove.append(output_dir) os.mkdir(output_dir + '/ft/') for metric in metrics: # do it if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=None) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=None) sams, dmtx = parse_distmat( open(output_dir + '/' + metric + '_' + in_fname)) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=rows) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=rows) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/ft/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # do it with full tree if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=None, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat( open(output_dir + '/ft/' + metric + '_' + in_fname)) self.assertEqual(sams_ft, sams) npt.assert_almost_equal(dmtx_ft, dmtx)
def single_object_beta(self, otu_table, metric, tree_string, missing_sams=None): """ running single_file_beta should give same result using --rows""" if missing_sams==None: missing_sams = [] # setup #input_path = get_tmp_filename() #in_fname = os.path.split(input_path)[1] #f = open(input_path,'w') #f.write(otu_table_string) #f.close() #tree_path = get_tmp_filename() #f = open(tree_path,'w') #f.write(tree_string) #f.close() metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) #output_dir = get_tmp_filename(suffix = '') #os.mkdir(output_dir) # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings('ignore','dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','unifrac had no information for\ sample M*') #self.files_to_remove.extend([input_path,tree_path]) #self.folders_to_remove.append(output_dir) #os.mkdir(output_dir+'/ft/') for metric in metrics: # do it beta_out = single_object_beta(otu_table, metric, tree_string,rowids=None, full_tree=False) sams, dmtx = parse_distmat(beta_out) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] #row_outname = output_dir + '/' + metric + '_' +\ #in_fname r_out = single_object_beta(otu_table, metric, tree_string,rowids=rows, full_tree=False) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j,k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] self.assertFloatEqual(row_v1, full_v1) ### full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] #~ row_outname = output_dir + '/ft/' + metric + '_' +\ #~ in_fname r_out = single_object_beta(otu_table, metric, tree_string,rowids=None, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j,k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] self.assertFloatEqual(row_v1, full_v1) # # do it with full tree r_out = single_object_beta(otu_table, metric, tree_string,rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat(r_out) self.assertEqual(sams_ft, sams) self.assertFloatEqual(dmtx_ft, dmtx)
try: metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict, depth) except ValueError, e: # see mean_alpha for the possible exceptions option_parser.error(e.message) # when not using collated data, the user can only specify one input file else: if len(alpha_fps) > 1: option_parser.error( "A comma-separated list of files should only be" " passed with the --alpha_fps option when using collated alpha " "diversity data and also selecting a rarefaction depth with the" " --depth option." ) else: metrics, alpha_sample_ids, alpha_data = parse_matrix(open(alpha_fps[0], "U")) # parse the data from the files mapping_file_data, mapping_file_headers, comments = parse_mapping_file(open(mapping_fp, "U")) # add the alpha diversity data to the mapping file out_mapping_file_data, out_mapping_file_headers = add_alpha_diversity_values_to_mapping_file( metrics, alpha_sample_ids, alpha_data, mapping_file_headers, mapping_file_data, number_of_bins, binning_method, missing_value_name, )
def single_file_beta( self, otu_table_string, tree_string, missing_sams=None, use_metric_list=False): """ running single_file_beta should give same result using --rows""" if missing_sams is None: missing_sams = [] # setup fd, input_path = mkstemp(suffix='.txt') close(fd) in_fname = os.path.split(input_path)[1] f = open(input_path, 'w') f.write(otu_table_string) f.close() fd, tree_path = mkstemp(suffix='.tre') close(fd) f = open(tree_path, 'w') f.write(tree_string) f.close() metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) output_dir = mkdtemp() # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings('ignore', 'dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'unifrac had no information for\ sample M*') self.files_to_remove.extend([input_path, tree_path]) self.folders_to_remove.append(output_dir) os.mkdir(output_dir + '/ft/') for metric in metrics: # do it if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=None) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=None) sams, dmtx = parse_distmat(open(output_dir + '/' + metric + '_' + in_fname)) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=rows) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=rows) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] assert_almost_equal(row_v1, full_v1) # full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/ft/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] assert_almost_equal(row_v1, full_v1) # do it with full tree if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=None, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat(open(output_dir + '/ft/' + metric + '_' + in_fname)) self.assertEqual(sams_ft, sams) assert_almost_equal(dmtx_ft, dmtx)
try: metrics, alpha_sample_ids, alpha_data = mean_alpha( alpha_dict, depth) except ValueError, e: # see mean_alpha for the possible exceptions option_parser.error(e.message) # when not using collated data, the user can only specify one input file else: if len(alpha_fps) > 1: option_parser.error( 'A comma-separated list of files should only be' ' passed with the --alpha_fps option when using collated alpha ' 'diversity data and also selecting a rarefaction depth with the' ' --depth option.') else: metrics, alpha_sample_ids, alpha_data = parse_matrix( open(alpha_fps[0], 'U')) # parse the data from the files mapping_file_data, mapping_file_headers, comments = parse_mapping_file( open(mapping_fp, 'U')) # add the alpha diversity data to the mapping file out_mapping_file_data, out_mapping_file_headers = \ add_alpha_diversity_values_to_mapping_file(metrics, alpha_sample_ids, alpha_data, mapping_file_headers, mapping_file_data, number_of_bins, binning_method, missing_value_name) # format the new data and write it down lines = format_mapping_file(out_mapping_file_headers, out_mapping_file_data) fd_out = open(output_mapping_fp, 'w')