def test_natsort_case_insensitive(self): """natsort should perform numeric comparisons on strings and is _not_ case-sensitive""" # string with alpha and numerics sort correctly s = [ 'sample1', 'sample2', 'sample11', 'sample12', 'SAmple1', 'Sample2'] # expected values exp_natsort = ['SAmple1', 'Sample2', 'sample1', 'sample2', 'sample11', 'sample12'] exp_natsort_case_insensitive = ['sample1', 'SAmple1', 'sample2', 'Sample2', 'sample11', 'sample12'] # test natsort self.assertEqual(natsort(s), exp_natsort) # test natsort_case_insensitive self.assertEqual(natsort_case_insensitive(s), exp_natsort_case_insensitive) s.reverse() # test natsort self.assertEqual(natsort(s), exp_natsort) # test natsort_case_insensitive self.assertEqual(natsort(list('cbaA321')), list('123Aabc')) # strings with alpha only sort correctly self.assertEqual(natsort_case_insensitive(list('cdBa')), list('aBcd')) # string of ints sort correctly self.assertEqual(natsort_case_insensitive(['11', '2', '1', '0']), ['0', '1', '2', '11']) # strings of floats sort correctly self.assertEqual(natsort_case_insensitive(['1.11', '1.12', '1.00', '0.009']), ['0.009', '1.00', '1.11', '1.12']) # string of ints sort correctly self.assertEqual(natsort_case_insensitive([('11', 'A'), ('2', 'B'), ('1', 'C'), ('0', 'D')]), [('0', 'D'), ('1', 'C'), ('2', 'B'), ('11', 'A')])
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) output_fp = opts.output_fp map_data, header, comments = parse_mapping_file(opts.input_fp) if opts.category not in header: option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category) # use stdout or the user supplied file path if output_fp: fd = open(output_fp, 'w') else: fd = stdout result = defaultdict(int) cat_idx = header.index(opts.category) for samp in map_data: result[samp[cat_idx]] += 1 for cat_val in natsort(result): if not cat_val: fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val]) else: fd.write("%s\t%d\n" % (cat_val, result[cat_val])) fd.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) output_fp = opts.output_fp map_data, header, comments = parse_mapping_file(opts.mapping_file) if opts.category not in header: option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category) # use stdout or the user supplied file path if output_fp: fd = open(output_fp, 'w') else: fd = stdout result = defaultdict(int) cat_idx = header.index(opts.category) for samp in map_data: result[samp[cat_idx]] += 1 for cat_val in natsort(result): if not cat_val: fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val]) else: fd.write("%s\t%d\n" % (cat_val, result[cat_val])) fd.close()
def combine_sample_dicts(sample_dicts): """ combines a list of sample_dicts into one otu table sample dicts is a list of dicts, each one {otu_id:num_seqs} output is a tuple: (otu_mtx (rows are otus), otu_ids (list)) * otu_mtx has samples in order of dicts, otus sorted with natsort / human sort * otu_mtx will have all otus mentioned as keys in sample_dicts, even if they are abundance 0 ({otu_id:0,...}) such otus will simply be rows of zeros """ all_otu_ids = [] for s in sample_dicts: all_otu_ids.extend(s.keys()) all_otu_ids = list(set(all_otu_ids)) all_otu_ids = natsort(all_otu_ids) # get index once now, for all samples, instead of all_otu_ids.index() indices = {} for i in range(len(all_otu_ids)): indices[all_otu_ids[i]] = i otu_mtx = zeros((len(all_otu_ids), len(sample_dicts)), int) # otus (rows) by samples (cols) for i, sample_dict in enumerate(sample_dicts): for otu, abund in sample_dict.items(): otu_mtx[indices[otu], i] = abund return otu_mtx, all_otu_ids
def combine_sample_dicts(sample_dicts): """ combines a list of sample_dicts into one otu table sample dicts is a list of dicts, each one {otu_id:num_seqs} output is a tuple: (otu_mtx (rows are otus), otu_ids (list)) * otu_mtx has samples in order of dicts, otus sorted with natsort / human sort * otu_mtx will have all otus mentioned as keys in sample_dicts, even if they are abundance 0 ({otu_id:0,...}) such otus will simply be rows of zeros """ all_otu_ids = [] for s in sample_dicts: all_otu_ids.extend(s.keys()) all_otu_ids = list(set(all_otu_ids)) all_otu_ids = natsort(all_otu_ids) # get index once now, for all samples, instead of all_otu_ids.index() indices = {} for i in range(len(all_otu_ids)): indices[all_otu_ids[i]] = i otu_mtx = numpy.zeros((len(all_otu_ids), len(sample_dicts)), int) # otus (rows) by samples (cols) for i, sample_dict in enumerate(sample_dicts): for otu, abund in sample_dict.items(): otu_mtx[indices[otu], i] = abund return otu_mtx, all_otu_ids
def parse_smp_unifrac_distances(lines): """ """ header = lines[0] data = lines[1::] unique_personal_ids = natsort(list(set([line.split('\t')[0] for line in data]))) unique_time_points = list(set([line.split('\t')[1] for line in data])) # sort the time-points unique_time_points = sorted(unique_time_points, key=lambda x:float(x)) _matrix = [line.split('\t') for line in data] output_data = zeros([len(unique_personal_ids), len(unique_time_points)]) for index, person_id in enumerate(unique_personal_ids): sub_matrix = [row for row in _matrix if row[0] == person_id] # sort by time point submatrix sub_matrix = sorted(sub_matrix, key=lambda x: float(x[1])) for element in sub_matrix: per_value_index = unique_time_points.index(element[1]) output_data[index][per_value_index] = float(element[2]) return unique_personal_ids, unique_time_points, output_data
def test_natsort(self): """natsort should perform numeric comparisons on strings""" # string with alpha and numerics sort correctly s = 'sample1 sample2 sample11 sample12'.split() self.assertEqual(natsort(s), 'sample1 sample2 sample11 sample12'.split()) s.reverse() self.assertEqual(natsort(s), 'sample1 sample2 sample11 sample12'.split()) self.assertEqual(natsort(list('cba321')),list('123abc')) # strings with alpha only sort correctly self.assertEqual(natsort(list('cdba')),list('abcd')) # string of ints sort correctly self.assertEqual(natsort(['11','2','1','0']), ['0','1','2','11']) # strings of floats sort correctly self.assertEqual(natsort(['1.11','1.12','1.00','0.009']), ['0.009','1.00','1.11','1.12']) # string of ints sort correctly self.assertEqual(natsort([('11','A'),('2','B'),('1','C'),('0','D')]), [('0','D'),('1','C'),('2','B'),('11','A')])
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) coordinates_fp = opts.coordinates_fp mapping_file_fp = opts.mapping_file_fp category_header_name = opts.category output_fp = opts.output_fp coords_headers, coords_data, coords_eigenvalues, coords_percents = parse_coords( open(coordinates_fp, 'U')) mapping_data, mapping_headers, _ = parse_mapping_file( open(mapping_file_fp, 'U')) category_header_index = mapping_headers.index(category_header_name) category_names = list( set([line[category_header_index] for line in mapping_data])) xtitle = 'PC1 (%.0f%%)' % round(coords_percents[0]) ytitle = 'PC2 (%.0f%%)' % round(coords_percents[1]) main_figure = plt.figure() main_axes = main_figure.add_subplot(1, 1, 1, axisbg='white') plt.xlabel(xtitle) plt.ylabel(ytitle) main_axes.tick_params(axis='y') main_axes.tick_params(axis='x') # sort the data!!! that way you can match make_3d_plots.py for index, category in enumerate(natsort(category_names)): sample_ids_list = [ line[0] for line in mapping_data if line[category_header_index] == category ] qiime_color = get_qiime_hex_string_color(index) if len(sample_ids_list) < 3: continue indices = [ coords_headers.index(sample_id) for sample_id in sample_ids_list ] points = coords_data[indices, :2] # * coords_percents[:2] hull = ConvexHull(points) main_axes.plot(points[:, 0], points[:, 1], 'o', color=qiime_color) for simplex in hull.simplices: main_axes.plot(points[simplex, 0], points[simplex, 1], 'k-') main_axes.plot(points[hull.vertices, 0], points[hull.vertices, 1], '--', lw=2, color=qiime_color) # plt.plot(points[hull.vertices[0],0], points[hull.vertices[0],1], '--', color=qiime_color) #plt.show() main_figure.savefig(output_fp)
def color_groups(groups, colors, data_color_order): """Colors a set of groups in data_color_order, handling special colors. Modifies colors in-place. Cycles through data colors (i.e. wraps around when last color is reached). """ group_num=-1 for g in natsort(groups): if g not in colors: group_num+=1 if group_num==len(data_color_order): group_num=0 colors[g] = data_color_order[group_num]
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) map_data, header, comments = parse_mapping_file(opts.input_fp) if opts.category not in header: option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category) result = defaultdict(int) cat_idx = header.index(opts.category) for samp in map_data: result[samp[cat_idx]] += 1 for cat_val in natsort(result): if not cat_val: print "***UNSPECIFIED***\t%d" % result[cat_val] else: print "%s\t%d" % (cat_val, result[cat_val])
def iter_color_groups(mapping, prefs): """Iterates over color groups for each category given mapping file/prefs. See get_group_colors for details of algorithm. """ # Iterate through prefs and color by given mapping labels for key in natsort(prefs.keys()): col_name = prefs[key]["column"] if "colors" in prefs[key]: if isinstance(prefs[key]["colors"], dict): colors = prefs[key]["colors"].copy() # copy so we can mutate else: colors = prefs[key]["colors"][:] else: colors = {} labelname = prefs[key]["column"] # Define groups and associate appropriate colors to each group groups = group_by_field(mapping, col_name) colors, data_colors, data_color_order = get_group_colors(groups, colors) yield labelname, groups, colors, data_colors, data_color_order
def iter_color_groups(mapping, prefs): """Iterates over color groups for each category given mapping file/prefs. See get_group_colors for details of algorithm. """ #Iterate through prefs and color by given mapping labels for key in natsort(prefs.keys()): col_name = prefs[key]['column'] if 'colors' in prefs[key]: if isinstance(prefs[key]['colors'], dict): colors = prefs[key]['colors'].copy() #copy so we can mutate else: colors = prefs[key]['colors'][:] else: colors={} labelname=prefs[key]['column'] #Define groups and associate appropriate colors to each group groups = group_by_field(mapping, col_name) colors, data_colors, data_color_order = \ get_group_colors(groups, colors) yield labelname, groups, colors, data_colors, data_color_order
def make_html(rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, \ imagetype,mapping_lookup, output_type="file_creation", all_plots=None): rarefaction_legend_mat legend_td = [ '<b>Legend</b><div STYLE="border: thin black solid; height: 300px; width: 200px; font-size: 12px; overflow: auto;"><table>' ] summarized_table = [] metric_select_html = [] category_select_html = [] data_table_html = [] metrics = [] category_colors = {} cat_iter = 0 #iterate the legend dictionary for m in natsort(rarefaction_legend_mat): #Create the metric select box options metric_select_html.append('<option value="%s">%s</option>' % (m, m)) metrics.append(m) #iterate through the categories in the legend dictionary for category in natsort(rarefaction_legend_mat[m]['groups']): #Create the select box options if cat_iter == 0: cat_links = [] for i in rarefaction_legend_mat[m]['groups'][category]: cat_links.append(mapping_lookup[category + '-' + i]) category_select_html.append('<option value="%s">%s</option>' % \ (category+'$#!'+'$#!'.join(cat_links),category)) plot_iterator = 0 #iterate through the groups in the legend dictionary and create #the html formatted rows for each category and group for group in natsort( rarefaction_legend_mat[m]['groups'][category]): sample_list = [] category_colors[group]=\ rarefaction_legend_mat[m]['groups'][category][group]['groupcolor'] for sample in natsort(rarefaction_legend_mat[m]['groups'] [category][group]['groupsamples']): sample_list.append('\'' + sample + '\'') plot_iterator = plot_iterator + 1 legend_td.append( '<tr id="%s" name="%s" style="display: none;"><td class="data" onmouseover="document.body.style.cursor=\'pointer\'" onmouseout="document.body.style.cursor=\'default\'" onclick="toggle(%s)" id="%s" name="%s">▶</td><td><input name="%s" type="checkbox" checked="True" onclick="show_hide_category(this)"></td><td style="color:%s">■ </td><td class="data"><b>%s</b></td></tr>' % (m + category, m + category, "'" + m + mapping_lookup[category + '-' + group] + "'", m + mapping_lookup[category + '-' + group], ','.join(sample_list), m + mapping_lookup[category + '-' + group] + '_raw.' + imagetype, rarefaction_legend_mat[m]['groups'][category] [group]['groupcolor'], group)) for sample in natsort(rarefaction_legend_mat[m]['groups'] [category][group]['groupsamples']): sample = str(sample) legend_td.append( '<tr id="%s" name="%s" style="display: none;"><td class="data" align="right">∟</td><td></td><td style="color:%s">◆</td><td class="data" align="left"><b>%s</b></td></tr>' % (m + mapping_lookup[category + '-' + group] + '_raw', m + mapping_lookup[category + '-' + group], rarefaction_legend_mat[m]['samples'][sample]['color'], sample)) cat_iter = 1 #iterate through the data dictionary and format the rows for the html #data table for category in rarefaction_data_mat: data_table_html.append( '<tr name="%s" style="display: none;"><td class="headers">%s</td><td class="headers">Seqs/Sample</td>' % (category, category)) for j in metrics: data_table_html.append( '<td class="headers">%s Ave.</td><td class="headers">%s Err.</td>' % (j, j)) data_table_html.append('</tr>') #data_table_html.append('<tr name="%s" style="display: none;"></tr>' % (category)) for g in natsort(rarefaction_data_mat[category]): for i in range(len(xaxisvals)): data_table_html.append( '<tr name="%s" style="display: none;">' % (category)) data_table_html.append( '<td class="data" bgcolor="%s">%s</td><td class="data">%s</td>' % (category_colors[g], g, xaxisvals[i])) for m in metrics: #bugfix, was rarefaction_data_mat[category][g] data_table_html.append( '<td class="data">%s</td><td class="data">%s</td>' % (rarefaction_data_mat[category][g][m]['ave'][i], rarefaction_data_mat[category][g][m]['err'][i])) data_table_html.append('</tr>') legend_td.append('</table></div></div>') #Create the table that contains the plots and table plot_html = '%s' % ('\n'.join(legend_td)) if output_type == "file_creation": #insert the formatted rows into the html string at the bottom of this file html_output=HTML % ('', "img.setAttribute('src',\"./html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype)", "img.setAttribute('src',\"./html_plots/\"+metric+array[i]+'_ave'+imagetype)", "img.setAttribute('src',\"./html_plots/\"+arguments[0]+'_raw'+imagetype)", '.'+imagetype, '\n'.join(metric_select_html), \ '\n'.join(category_select_html), \ plot_html, \ '\n'.join(data_table_html)) elif output_type == "memory": plots_html = ['all_plots = {}'] for elements in all_plots: for k, v in elements.items(): # the path is compatible with the javascript, see make_averages plots_html.append('all_plots["%s"] = "%s"' % (k, \ "data:image/png;base64," + urllib.quote(base64.b64encode(v.buf)))) #insert the formatted rows into the html string at the bottom of this file html_output=HTML % ('\n'.join(plots_html), "img.setAttribute('src',all_plots[\"plot/html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype])", "img.setAttribute('src',all_plots[\"plot/html_plots/\"+metric+array[i]+'_ave'+imagetype])", "img.setAttribute('src',all_plots[\"plot/html_plots/\"+arguments[0]+'_raw'+imagetype])", '.'+imagetype, '\n'.join(metric_select_html), \ '\n'.join(category_select_html), \ plot_html, \ '\n'.join(data_table_html)) return html_output
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) chuck_norris_joke = opts.chuck_norris_joke coordinates_fp = opts.coordinates_fp mapping_file_fp = opts.mapping_file_fp category_header_name = opts.category output_fp = opts.output_fp # have a swell day Yoshiki from the future if chuck_norris_joke: o, e, _ = qiime_system_call('curl http://api.icndb.com/jokes/random') exec 'joke = %s' % o.strip() print joke['value']['joke'] exit(0) coords_headers, coords_data, coords_eigenvalues, coords_percents =\ parse_coords(open(coordinates_fp, 'U')) mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_file_fp, 'U')) category_header_index = mapping_headers.index(category_header_name) category_names = list(set([line[category_header_index] for line in mapping_data])) main_figure = plt.figure() main_axes = main_figure.add_subplot(1, 1, 1, axisbg='black') plt.xlabel('PC1') plt.ylabel('PC2') main_axes.tick_params(axis='y', colors='none') main_axes.tick_params(axis='x', colors='none') # sort the data!!! that way you can match make_3d_plots.py sorted_categories = natsort(category_names) colors_used = [] for index, category in enumerate(sorted_categories): sample_ids_list = [line[0] for line in mapping_data if line[category_header_index] == category] qiime_color = get_qiime_hex_string_color(index) if len(sample_ids_list) < 3: continue colors_used.append(qiime_color) indices = [coords_headers.index(sample_id) for sample_id in sample_ids_list] points = coords_data[indices, :2]# * coords_percents[:2] hull = ConvexHull(points) main_axes.plot(points[:,0], points[:,1], 'o', color=qiime_color) for simplex in hull.simplices: main_axes.plot(points[simplex,0], points[simplex,1], 'w-') main_axes.plot(points[hull.vertices,0], points[hull.vertices,1], '--', lw=2, color=qiime_color) # plt.plot(points[hull.vertices[0],0], points[hull.vertices[0],1], '--', color=qiime_color) # plt.show() main_figure.savefig(output_fp) name = splitext(output_fp)[0] extension = splitext(output_fp)[1].replace('.', '') make_legend(sorted_categories, colors_used, 0, 0, 'black', 'white', name, extension, 80)
def make_plots( background_color, label_color, rares, ymax, xmax, output_dir, resolution, imagetype, groups, colors, data_colors, metric_name, labelname, rarefaction_data_mat, rarefaction_legend_mat, sample_dict, sample_data_colors, sample_colors, mapping_lookup, output_type="file_creation", ): """This is the main function for generating the rarefaction plots and html file.""" # Get the alpha rare data raredata = rares # generate the filepath for the image file file_path = os.path.join(output_dir, splitext(split(raredata["headers"][0])[1])[0]) all_plots_single = [] # Sort and iterate through the groups for i in natsort(groups): # for k in groups[i]: for j in range(len(raredata["xaxis"])): group_field = i seq_per_sample_field = int(raredata["xaxis"][j]) color_field = data_colors[colors[group_field]].toHex() # If a field is missing, then it means that one of the # samples did not contain enough sequences. # For this case, we will assign the value as n.a. try: average_field = raredata["series"][i][j] error_field = raredata["error"][i][j] if isnan(average_field): error_field = nan except: average_field = nan error_field = nan # Add context to the data dictionary, which will be used in the html if rarefaction_data_mat[labelname].has_key(i): if rarefaction_data_mat[labelname][i].has_key(metric_name): rarefaction_data_mat[labelname][i][metric_name]["ave"].append( "".join("%10.3f" % ((raredata["series"][i][j]))) ) rarefaction_data_mat[labelname][i][metric_name]["err"].append( "".join("%10.3f" % ((raredata["error"][i][j]))) ) else: rarefaction_data_mat[labelname][i][metric_name] = {} rarefaction_data_mat[labelname][i][metric_name]["ave"] = [] rarefaction_data_mat[labelname][i][metric_name]["err"] = [] rarefaction_data_mat[labelname][i][metric_name]["ave"].append( "".join("%10.3f" % ((raredata["series"][i][j]))) ) rarefaction_data_mat[labelname][i][metric_name]["err"].append( "".join("%10.3f" % ((raredata["error"][i][j]))) ) else: rarefaction_data_mat[labelname][i] = {} rarefaction_data_mat[labelname][i][metric_name] = {} rarefaction_data_mat[labelname][i][metric_name]["ave"] = [] rarefaction_data_mat[labelname][i][metric_name]["err"] = [] rarefaction_data_mat[labelname][i][metric_name]["ave"].append( "".join("%10.3f" % ((raredata["series"][i][j]))) ) rarefaction_data_mat[labelname][i][metric_name]["err"].append( "".join("%10.3f" % ((raredata["error"][i][j]))) ) # Create raw plots for each group in a category fpath = output_dir if output_type == "file_creation": rarefaction_legend_mat = save_single_rarefaction_plots( sample_dict, imagetype, metric_name, sample_data_colors, sample_colors, fpath, background_color, label_color, resolution, ymax, xmax, rarefaction_legend_mat, groups[i], labelname, i, mapping_lookup, output_type, ) elif output_type == "memory": rarefaction_legend_mat, rare_plot_for_all = save_single_rarefaction_plots( sample_dict, imagetype, metric_name, sample_data_colors, sample_colors, fpath, background_color, label_color, resolution, ymax, xmax, rarefaction_legend_mat, groups[i], labelname, i, mapping_lookup, output_type, ) all_plots_single.append(rare_plot_for_all) categories = [k for k in groups] # Create the rarefaction average plot and get updated legend information # if output_type == "file_creation": rarefaction_legend_mat = save_single_ave_rarefaction_plots( raredata["xaxis"], raredata["series"], raredata["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, rarefaction_legend_mat, metric_name, mapping_lookup, output_type, ) return rarefaction_data_mat, rarefaction_legend_mat elif output_type == "memory": rarefaction_legend_mat, all_plots_ave = save_single_ave_rarefaction_plots( raredata["xaxis"], raredata["series"], raredata["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, rarefaction_legend_mat, metric_name, mapping_lookup, output_type, ) return rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave
def make_html( rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, imagetype, mapping_lookup, output_type="file_creation", all_plots=None, ): rarefaction_legend_mat legend_td = [ '<b>Legend</b><div STYLE="border: thin black solid; height: 300px; width: 200px; font-size: 12px; overflow: auto;"><table>' ] summarized_table = [] metric_select_html = [] category_select_html = [] data_table_html = [] metrics = [] category_colors = {} cat_iter = 0 # iterate the legend dictionary for m in natsort(rarefaction_legend_mat): # Create the metric select box options metric_select_html.append('<option value="%s">%s</option>' % (m, m)) metrics.append(m) # iterate through the categories in the legend dictionary for category in natsort(rarefaction_legend_mat[m]["groups"]): # Create the select box options if cat_iter == 0: cat_links = [] for i in rarefaction_legend_mat[m]["groups"][category]: cat_links.append(mapping_lookup[category + "-" + i]) category_select_html.append( '<option value="%s">%s</option>' % (category + "$#!" + "$#!".join(cat_links), category) ) plot_iterator = 0 # iterate through the groups in the legend dictionary and create # the html formatted rows for each category and group for group in natsort(rarefaction_legend_mat[m]["groups"][category]): sample_list = [] category_colors[group] = rarefaction_legend_mat[m]["groups"][category][group]["groupcolor"] for sample in natsort(rarefaction_legend_mat[m]["groups"][category][group]["groupsamples"]): sample_list.append("'" + sample + "'") plot_iterator = plot_iterator + 1 legend_td.append( '<tr id="%s" name="%s" style="display: none;"><td class="data" onmouseover="document.body.style.cursor=\'pointer\'" onmouseout="document.body.style.cursor=\'default\'" onclick="toggle(%s)" id="%s" name="%s">▶</td><td><input name="%s" type="checkbox" checked="True" onclick="show_hide_category(this)"></td><td style="color:%s">■ </td><td class="data"><b>%s</b></td></tr>' % ( m + category, m + category, "'" + m + mapping_lookup[category + "-" + group] + "'", m + mapping_lookup[category + "-" + group], ",".join(sample_list), m + mapping_lookup[category + "-" + group] + "_raw." + imagetype, rarefaction_legend_mat[m]["groups"][category][group]["groupcolor"], group, ) ) for sample in natsort(rarefaction_legend_mat[m]["groups"][category][group]["groupsamples"]): sample = str(sample) legend_td.append( '<tr id="%s" name="%s" style="display: none;"><td class="data" align="right">∟</td><td></td><td style="color:%s">◆</td><td class="data" align="left"><b>%s</b></td></tr>' % ( m + mapping_lookup[category + "-" + group] + "_raw", m + mapping_lookup[category + "-" + group], rarefaction_legend_mat[m]["samples"][sample]["color"], sample, ) ) cat_iter = 1 # iterate through the data dictionary and format the rows for the html # data table for category in rarefaction_data_mat: data_table_html.append( '<tr name="%s" style="display: none;"><td class="headers">%s</td><td class="headers">Seqs/Sample</td>' % (category, category) ) for j in metrics: data_table_html.append('<td class="headers">%s Ave.</td><td class="headers">%s Err.</td>' % (j, j)) data_table_html.append("</tr>") # data_table_html.append('<tr name="%s" style="display: none;"></tr>' % (category)) for g in natsort(rarefaction_data_mat[category]): for i in range(len(xaxisvals)): data_table_html.append('<tr name="%s" style="display: none;">' % (category)) data_table_html.append( '<td class="data" bgcolor="%s">%s</td><td class="data">%s</td>' % (category_colors[g], g, xaxisvals[i]) ) for m in metrics: # bugfix, was rarefaction_data_mat[category][g] data_table_html.append( '<td class="data">%s</td><td class="data">%s</td>' % ( rarefaction_data_mat[category][g][m]["ave"][i], rarefaction_data_mat[category][g][m]["err"][i], ) ) data_table_html.append("</tr>") legend_td.append("</table></div></div>") # Create the table that contains the plots and table plot_html = "%s" % ("\n".join(legend_td)) if output_type == "file_creation": # insert the formatted rows into the html string at the bottom of this file html_output = HTML % ( "", "img.setAttribute('src',\"./html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype)", "img.setAttribute('src',\"./html_plots/\"+metric+array[i]+'_ave'+imagetype)", "img.setAttribute('src',\"./html_plots/\"+arguments[0]+'_raw'+imagetype)", "." + imagetype, "\n".join(metric_select_html), "\n".join(category_select_html), plot_html, "\n".join(data_table_html), ) elif output_type == "memory": plots_html = ["all_plots = {}"] for elements in all_plots: for k, v in elements.items(): # the path is compatible with the javascript, see make_averages plots_html.append( 'all_plots["%s"] = "%s"' % (k, "data:image/png;base64," + urllib.quote(base64.b64encode(v.buf))) ) # insert the formatted rows into the html string at the bottom of this file html_output = HTML % ( "\n".join(plots_html), "img.setAttribute('src',all_plots[\"plot/html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype])", "img.setAttribute('src',all_plots[\"plot/html_plots/\"+metric+array[i]+'_ave'+imagetype])", "img.setAttribute('src',all_plots[\"plot/html_plots/\"+arguments[0]+'_raw'+imagetype])", "." + imagetype, "\n".join(metric_select_html), "\n".join(category_select_html), plot_html, "\n".join(data_table_html), ) return html_output
def make_averages( color_prefs, data, background_color, label_color, rares, output_dir, resolution, imagetype, ymax, suppress_webpage, std_type, output_type="file_creation", ): """This is the main function, which takes the rarefaction files, calls the functions to make plots and formatting the output html.""" rarelines = [] rarefaction_legend_mat = {} if ymax: user_ymax = True else: user_ymax = False if not suppress_webpage and output_type == "file_creation": # in this option the path must include the output directory all_output_dir = os.path.join(output_dir, "html_plots") ave_output_dir = os.path.join(output_dir, "average_plots") # Create the directories, where plots and data will be written create_dir(all_output_dir) elif output_type == "memory": # this is rather an artificial path to work with the javascript code all_output_dir = "plot/html_plots" ave_output_dir = "plot/average_plots" ave_data_file_path = os.path.join(output_dir, "average_tables") if output_type == "file_creation": create_dir(ave_output_dir) create_dir(ave_data_file_path, False) metric_num = 0 rarefaction_legend_mat = {} rarefaction_data_mat = {} rare_num = 0 # this is a fix for the issue of writing field values as the filenames mapping_lookup = {} for i, column in enumerate(data["map"][0]): for j, row in enumerate(data["map"][1:]): mapping_lookup["%s-%s" % (column, row[i])] = "col_%s_row_%s" % (str(i), str(j)) all_plots = [] # Iterate through the rarefaction files for r in natsort(rares): raredata = rares[r] metric_name = r.split(".")[0] # convert the rarefaction data into variables col_headers, comments, rarefaction_fn, rarefaction_data = rares[r] # Here we only need to perform these steps once, since the data is # the same for all rarefaction files if rare_num == 0: # Remove samples from the mapping file, which contain no data after # rarefaction updated_mapping = [] for j in data["map"]: # Add the mapping header if j[0] == "SampleID": updated_mapping.append(j) # Determine if the sample exists in the rarefaction file for i in col_headers[3:]: if j[0] == i: updated_mapping.append(j) # Get the groups and colors for the updated mapping file groups_and_colors = iter_color_groups(updated_mapping, color_prefs) groups_and_colors = list(groups_and_colors) # parse the rarefaction data rare_mat_trans, seqs_per_samp, sampleIDs = get_rarefaction_data(rarefaction_data, col_headers) rarefaction_legend_mat[metric_name] = {} # Create dictionary variables and get the colors for each Sample sample_colors = None rarefaction_legend_mat[metric_name]["groups"] = {} for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] # Create a legend dictionary for html output rarefaction_legend_mat[metric_name]["groups"][labelname] = {} # If this is the first time iterating through the rarefaction data # create a data dictionary for html output if rare_num == 0: rarefaction_data_mat[labelname] = {} # If the labelname is SampleID, use the colors assigned if labelname == "SampleID": sample_colors = groups_and_colors[i][2] sample_data_colors = groups_and_colors[i][3] rare_num = 1 # If sample colors were not assigned, create a list of sample colors if not sample_colors: samples_and_colors = iter_color_groups( updated_mapping, {"SampleID": {"column": "SampleID", "colors": (("red", (0, 100, 100)), ("blue", (240, 100, 100)))}}, ) samples_and_colors = list(samples_and_colors) sample_colors = samples_and_colors[0][2] sample_data_colors = samples_and_colors[0][3] sample_dict = {} # Create a dictionary containing the samples for i, sid in enumerate(sampleIDs): if sid in (i[0] for i in updated_mapping): sample_dict[sid] = {} for j, seq in enumerate(seqs_per_samp): try: sample_dict[sid][seq].append(rare_mat_trans[i][j]) except (KeyError): sample_dict[sid][seq] = [] sample_dict[sid][seq].append(rare_mat_trans[i][j]) # convert xvals to float xaxisvals = [float(x) for x in set(seqs_per_samp)] xaxisvals.sort() # get the rarefaction averages rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, sampleIDs) # calculate the max xval xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals) - 1] - xaxisvals[len(xaxisvals) - 2]) """ #get the overall average #overall_average = get_overall_averages(rare_mat_ave, sampleIDs) rarelines.append("#" + r + '\n') for s in sampleIDs: rarelines.append('%f'%overall_average[s] + '\n') """ if not user_ymax: ymax = 0 for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] ave_file_path = os.path.join(ave_data_file_path, metric_name) # save the rarefaction averages rare_lines = save_rarefaction_data( rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type ) # write out the rarefaction average data if output_type == "file_creation": open(ave_file_path + labelname + ".txt", "w").writelines(rare_lines) # take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n")) # determine the ymax based on the average data # multiple the ymax, since the dots can end up on the border new_ymax = ( max([max(v) for v in rares_data["series"].values()]) + max([max(e) for e in rares_data["error"].values()]) ) * 1.15 if isnan(new_ymax): new_ymax = (max([max(v) for v in rares_data["series"].values()])) * 1.15 if new_ymax > ymax: ymax = new_ymax iterator_num = 0 # iterate through the groups for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] # save the rarefaction averages rare_lines = save_rarefaction_data( rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type ) # take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n")) if not suppress_webpage: if iterator_num == 0: rarefaction_legend_mat[metric_name]["samples"] = {} for o in sample_dict: rarefaction_legend_mat[metric_name]["samples"][o] = {} # Add values to the legend dictionary rarefaction_legend_mat[metric_name]["samples"][o]["color"] = sample_data_colors[ sample_colors[o] ].toHex() iterator_num = 1 # Iterate through the groups and create the legend dictionary for g in groups: # generate the filepath for the image file file_path = os.path.join(all_output_dir, metric_name + labelname + g) # create a dictionary of samples and their colors rarefaction_legend_mat[metric_name]["groups"][labelname][g] = {} rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupsamples"] = groups[g] rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupcolor"] = data_colors[ colors[g] ].toHex() # Create the individual category average plots if output_type == "file_creation": rarefaction_data_mat, rarefaction_legend_mat = make_plots( background_color, label_color, rares_data, ymax, xmax, all_output_dir, resolution, imagetype, groups, colors, data_colors, metric_name, labelname, rarefaction_data_mat, rarefaction_legend_mat, sample_dict, sample_data_colors, sample_colors, mapping_lookup, output_type, ) elif output_type == "memory": rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave = make_plots( background_color, label_color, rares_data, ymax, xmax, all_output_dir, resolution, imagetype, groups, colors, data_colors, metric_name, labelname, rarefaction_data_mat, rarefaction_legend_mat, sample_dict, sample_data_colors, sample_colors, mapping_lookup, output_type, ) # generate the filepath for the image file file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0]) # Create the average plots categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots( rares_data["xaxis"], rares_data["series"], rares_data["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, metric_name, output_type, ) if output_type == "memory": all_plots.append(all_plots_rare) all_plots.extend(all_plots_single) all_plots.append(all_plots_ave) else: # generate the filepath for the image file file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0]) categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots( rares_data["xaxis"], rares_data["series"], rares_data["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, metric_name, output_type, ) if not suppress_webpage: # format the html output html_output = make_html( rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, imagetype, mapping_lookup, output_type, all_plots ) else: html_output = None return html_output
def get_group_colors(groups, colors, data_colors=data_colors, data_color_order=data_color_order): """Figures out group colors for a specific series based on prefs. Algorithm is as follows: - For each name, color pair we know about: - Check if the name is one of the groups (exact match) - If it isn't, assume it's a prefix and pull out all the matching groups - If the color is just a string, set everything to the color with that name - Otherwise, assume that either it's a new color we're adding, or that it's a range for gradient coloring. - If it's a new color, create it and add it to added_data_colors. - If it's a gradient, make up all the new colors and add them to added_data_colors The current method for gradient coloring of columns (should perhaps replace with more general method) is to pass in any of the following: 'colors':(('white', (0,0,100)),('red',(0,100,100))) makes gradient between white and red, applies to all samples 'colors':{'RK':(('white',(0,0,100)),('red',(0,100,100))), 'NF':(('white',(120,0,100)),('green',(120,100,100))) } pulls the combination samples starting with RK, colors with first gradient, then pulls the combination samples starting with NF, colors with the next gradient. Return values are: - colors: dict of {group_value:color_name} - data_colors: dict of {color_name:color_object} - data_color_order: order in which the data colors are used/written. """ added_data_colors = {} if isinstance(colors, dict): #assume we're getting some of the colors out of a dict if colors.items() <> []: for k, v in sorted(colors.items()): if k not in groups: #assume is prefix k_matches = [g for g in groups if g.startswith(k)] if isinstance(v, str): #just set everything to this color for m in k_matches: colors[m] = v else: #assume is new color or range first, second = v if isinstance(first, str): #new named color? if first not in data_colors: added_data_colors[first] = Color(first, second) for m in k_matches: colors[m] = first else: #new color range? start_color, end_color = map(get_color, [first,second]) num_colors = len(k_matches) curr_data_colors = color_dict_to_objects( make_color_dict(start_color, start_hsv,end_color,end_hsv,num_colors)) curr_colors = {} color_groups(k_matches, curr_colors, natsort(curr_data_colors)) colors.update(curr_colors) added_data_colors.update(curr_data_colors) del colors[k] elif not isinstance(v, str): #assume val is new color color = get_color(v) if color.Name not in data_colors: added_data_colors[color.Name] = color colors[k] = color.Name #handle any leftover groups color_groups(groups, colors, data_color_order) #add new colors data_colors.update(added_data_colors) if added_data_colors != {}: data_color_order.append(''.join(natsort(added_data_colors))) else: #handle case where no prefs is used color_groups(groups, colors, data_color_order) else: #handle the case where colors is a tuple for gradients start_color, end_color = map(get_color, colors) start_hsv=start_color.Coords end_hsv=end_color.Coords num_colors = len(groups) data_colors = color_dict_to_objects( make_color_dict(start_color, start_hsv, end_color, end_hsv, num_colors)) data_color_order = list(natsort(data_colors.keys())) colors = {} color_groups(groups, colors, data_color_order) return colors, data_colors, data_color_order
def make_plots(background_color, label_color, rares, ymax, xmax,\ output_dir, resolution, imagetype,groups,colors,data_colors, \ metric_name,labelname,rarefaction_data_mat,\ rarefaction_legend_mat,sample_dict,sample_data_colors, sample_colors,mapping_lookup, output_type="file_creation"): '''This is the main function for generating the rarefaction plots and html file.''' #Get the alpha rare data raredata = rares #generate the filepath for the image file file_path = os.path.join(output_dir, \ splitext(split(raredata['headers'][0])[1])[0]) all_plots_single = [] #Sort and iterate through the groups for i in natsort(groups): #for k in groups[i]: for j in range(len(raredata['xaxis'])): group_field = i seq_per_sample_field = int(raredata['xaxis'][j]) color_field = data_colors[colors[group_field]].toHex() #If a field is missing, then it means that one of the #samples did not contain enough sequences. #For this case, we will assign the value as n.a. try: average_field = raredata['series'][i][j] error_field = raredata['error'][i][j] if isnan(average_field): error_field = nan except: average_field = nan error_field = nan #Add context to the data dictionary, which will be used in the html if rarefaction_data_mat[labelname].has_key(i): if rarefaction_data_mat[labelname][i].has_key(metric_name): rarefaction_data_mat[labelname][i][metric_name][ 'ave'].append(''.join('%10.3f' % ((raredata['series'][i][j])))) rarefaction_data_mat[labelname][i][metric_name][ 'err'].append(''.join('%10.3f' % ((raredata['error'][i][j])))) else: rarefaction_data_mat[labelname][i][metric_name] = {} rarefaction_data_mat[labelname][i][metric_name]['ave'] = [] rarefaction_data_mat[labelname][i][metric_name]['err'] = [] rarefaction_data_mat[labelname][i][metric_name][ 'ave'].append(''.join('%10.3f' % ((raredata['series'][i][j])))) rarefaction_data_mat[labelname][i][metric_name][ 'err'].append(''.join('%10.3f' % ((raredata['error'][i][j])))) else: rarefaction_data_mat[labelname][i] = {} rarefaction_data_mat[labelname][i][metric_name] = {} rarefaction_data_mat[labelname][i][metric_name]['ave'] = [] rarefaction_data_mat[labelname][i][metric_name]['err'] = [] rarefaction_data_mat[labelname][i][metric_name]['ave'].append( ''.join('%10.3f' % ((raredata['series'][i][j])))) rarefaction_data_mat[labelname][i][metric_name]['err'].append( ''.join('%10.3f' % ((raredata['error'][i][j])))) #Create raw plots for each group in a category fpath = output_dir if output_type == "file_creation": rarefaction_legend_mat = save_single_rarefaction_plots( \ sample_dict, \ imagetype,metric_name, \ sample_data_colors,sample_colors, \ fpath,background_color, \ label_color,resolution,ymax,xmax, rarefaction_legend_mat,groups[i], labelname,i,mapping_lookup, output_type) elif output_type == "memory": rarefaction_legend_mat, rare_plot_for_all = save_single_rarefaction_plots( \ sample_dict, \ imagetype,metric_name, \ sample_data_colors,sample_colors, \ fpath,background_color, \ label_color,resolution,ymax,xmax, rarefaction_legend_mat,groups[i], labelname,i,mapping_lookup, output_type) all_plots_single.append(rare_plot_for_all) categories = [k for k in groups] #Create the rarefaction average plot and get updated legend information # if output_type == "file_creation": rarefaction_legend_mat = save_single_ave_rarefaction_plots(raredata['xaxis'], \ raredata['series'], raredata['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ rarefaction_legend_mat, metric_name,mapping_lookup, output_type) return rarefaction_data_mat, rarefaction_legend_mat elif output_type == "memory": rarefaction_legend_mat, all_plots_ave = save_single_ave_rarefaction_plots(raredata['xaxis'], \ raredata['series'], raredata['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ rarefaction_legend_mat, metric_name,mapping_lookup, output_type) return rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave
def make_mage_output(groups, colors, coord_header, coords, pct_var, \ background_color,label_color,data_colors, \ taxa=None, custom_axes=None,name='', \ radius=None, alpha=.75, num_coords=10,scaled=False, \ coord_scale=1.05, edges=None, coords_low=None, \ coords_high=None, ellipsoid_prefs=None, user_supplied_edges=False, ball_scale=1.0, \ arrow_colors={'line_color': 'white', 'head_color': 'red'}): """Convert groups, colors, coords and percent var into mage format""" result = [] #Scale the coords and generate header labels if scaled: scalars = pct_var if custom_axes: # create a dummy vector of ones to avoid scaling custom axes custom_scalars = scalars[0] * np.ones(len(custom_axes)) scalars = np.append(custom_scalars,scalars) coords = scale_pc_data_matrix(coords, scalars) if not coords_low is None: coords_low = scale_pc_data_matrix(coords_low, scalars) if not coords_high is None: coords_high = scale_pc_data_matrix(coords_high, scalars) header_suffix = '_scaled' else: header_suffix = '_unscaled' if radius is None: radius = float(auto_radius(coords))*float(ball_scale) else: radius = float(radius)*float(ball_scale) maxes = coords.max(0)[:num_coords] mins = coords.min(0)[:num_coords] pct_var = pct_var[:num_coords] #scale from fraction #check that we didn't get fewer dimensions than we wanted if len(mins) < num_coords: num_coords = len(mins) min_maxes = flatten(zip(mins,maxes)) if custom_axes: axis_names = ['PC%s' %(i+1) for i in xrange(num_coords - len(custom_axes))] axis_names = custom_axes + axis_names else: axis_names = ['PC%s' %(i+1) for i in xrange(num_coords)] #Write the header information result.append('@kinemage {%s}' % (name+header_suffix)) result.append('@dimension '+' '.join(['{%s}'%(name) for name in axis_names])) result.append('@dimminmax '+ ' '.join(map(str, min_maxes))) result.append('@master {points}') result.append('@master {labels}') if edges: result.append('@master {edges}') if not taxa is None: result.append('@master {taxa_points}') result.append('@master {taxa_labels}') for name, color in sorted(data_colors.items()): result.append(color.toMage()) if background_color=='white': result.append('@whitebackground') result.append('@hsvcolor {black} 0.0 0.0 0.0') else: result.append('@hsvcolor {white} 180.0 0.0 100.0') #Write the groups, colors and coords coord_dict = dict(zip(coord_header, coords)) if not coords_low is None: coord_low_dict = dict(zip(coord_header, coords_low)) if not coords_high is None: coord_high_dict = dict(zip(coord_header, coords_high)) for group_name in natsort(groups): ids = groups[group_name] result.append('@group {%s (n=%s)} collapsible' % (group_name, len(ids))) color = colors[group_name] coord_lines = [] for id_ in sorted(ids): if id_ in coord_dict: coord_lines.append('{%s} %s' % \ (id_, ' '.join(map(str, coord_dict[id_][:num_coords])))) # create list of balls, one for each sample result.append('@balllist color=%s radius=%s alpha=%s dimension=%s \ master={points} nobutton' % (color, radius, alpha, num_coords)) result.append('\n'.join(coord_lines)) # make ellipsoids if low and high coord bounds were received if (not coords_low is None) and (not coords_high is None): # create one trianglelist for each sample to define ellipsoids result += make_mage_ellipsoids(ids, coord_dict, coord_low_dict, coord_high_dict, color, ellipsoid_prefs) # create list of labels result.append('@labellist color=%s radius=%s alpha=%s dimension=%s \ master={labels} nobutton' % (color, radius, alpha, num_coords)) result.append('\n'.join(coord_lines)) if not taxa is None: result += make_mage_taxa(taxa, num_coords, pct_var, scaled=scaled, scalars=None, radius=radius) #Write the axes on the bottom of the graph result.append('@group {axes} collapsible') state = 'on' axis_mins = mins*coord_scale axis_maxes = maxes*coord_scale if not custom_axes: custom_axes = [] # draw each axis for i in xrange(num_coords): if i == 3: state = 'off' result.append('@vectorlist {%s line} dimension=%s %s' % \ (axis_names[i], num_coords, state)) result.append(' '.join(map(str, axis_mins)) + ' ' + label_color) end = axis_mins.copy() end[i] = axis_maxes[i] result.append(' '.join(map(str, end)) + ' ' + label_color) end[i] *= coord_scale #add scale factor to offset labels a little # custom axes come first, no "percent variance" shown if i < len(custom_axes): result.append('@labellist {%s} dimension=%s %s' % \ (axis_names[i], num_coords, state)) result.append( ('{%s}' % (axis_names[i])) + \ ' '.join(map(str, end)) + ' ' + label_color) # if all custom axes have been drawn, draw normal PC axes else: pct = pct_var[i-len(custom_axes)] result.append('@labellist {%s (%0.2g%%)} dimension=%s %s' % \ (axis_names[i], pct, num_coords, state)) result.append( ('{%s (%0.2g%%)}' % (axis_names[i], pct)) + \ ' '.join(map(str, end)) + ' ' + label_color) #Write edges if requested if edges: result += make_edges_output(coord_dict, edges, num_coords, label_color, arrow_colors=arrow_colors, user_supplied_edges=user_supplied_edges) return result
def make_averages(color_prefs, data, background_color, label_color, rares, \ output_dir,resolution,imagetype,ymax,suppress_webpage, std_type, output_type="file_creation"): '''This is the main function, which takes the rarefaction files, calls the functions to make plots and formatting the output html.''' rarelines = [] rarefaction_legend_mat = {} if ymax: user_ymax = True else: user_ymax = False if not suppress_webpage and output_type == "file_creation": # in this option the path must include the output directory all_output_dir = os.path.join(output_dir, 'html_plots') ave_output_dir = os.path.join(output_dir, 'average_plots') #Create the directories, where plots and data will be written create_dir(all_output_dir) elif output_type == 'memory': # this is rather an artificial path to work with the javascript code all_output_dir = 'plot/html_plots' ave_output_dir = 'plot/average_plots' ave_data_file_path = os.path.join(output_dir, 'average_tables') if output_type == "file_creation": create_dir(ave_output_dir) create_dir(ave_data_file_path, False) metric_num = 0 rarefaction_legend_mat = {} rarefaction_data_mat = {} rare_num = 0 # this is a fix for the issue of writing field values as the filenames mapping_lookup = {} for i, column in enumerate(data['map'][0]): for j, row in enumerate(data['map'][1:]): mapping_lookup['%s-%s' % (column,row[i])]='col_%s_row_%s' % \ (str(i),str(j)) all_plots = [] #Iterate through the rarefaction files for r in natsort(rares): raredata = rares[r] metric_name = r.split('.')[0] #convert the rarefaction data into variables col_headers, comments, rarefaction_fn, rarefaction_data = rares[r] #Here we only need to perform these steps once, since the data is #the same for all rarefaction files if rare_num == 0: #Remove samples from the mapping file, which contain no data after #rarefaction updated_mapping = [] for j in data['map']: #Add the mapping header if j[0] == 'SampleID': updated_mapping.append(j) #Determine if the sample exists in the rarefaction file for i in col_headers[3:]: if j[0] == i: updated_mapping.append(j) #Get the groups and colors for the updated mapping file groups_and_colors = iter_color_groups(updated_mapping, color_prefs) groups_and_colors = list(groups_and_colors) #parse the rarefaction data rare_mat_trans, seqs_per_samp, sampleIDs = \ get_rarefaction_data(rarefaction_data, col_headers) rarefaction_legend_mat[metric_name] = {} #Create dictionary variables and get the colors for each Sample sample_colors = None rarefaction_legend_mat[metric_name]['groups'] = {} for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] #Create a legend dictionary for html output rarefaction_legend_mat[metric_name]['groups'][labelname] = {} #If this is the first time iterating through the rarefaction data #create a data dictionary for html output if rare_num == 0: rarefaction_data_mat[labelname] = {} #If the labelname is SampleID, use the colors assigned if labelname == 'SampleID': sample_colors = groups_and_colors[i][2] sample_data_colors = groups_and_colors[i][3] rare_num = 1 #If sample colors were not assigned, create a list of sample colors if not sample_colors: samples_and_colors=iter_color_groups(updated_mapping, \ {'SampleID': {'column': 'SampleID', 'colors': \ (('red', (0, 100, 100)), ('blue', (240, 100, 100)))}}) samples_and_colors = list(samples_and_colors) sample_colors = samples_and_colors[0][2] sample_data_colors = samples_and_colors[0][3] sample_dict = {} #Create a dictionary containing the samples for i, sid in enumerate(sampleIDs): if sid in (i[0] for i in updated_mapping): sample_dict[sid] = {} for j, seq in enumerate(seqs_per_samp): try: sample_dict[sid][seq].append(rare_mat_trans[i][j]) except (KeyError): sample_dict[sid][seq] = [] sample_dict[sid][seq].append(rare_mat_trans[i][j]) #convert xvals to float xaxisvals = [float(x) for x in set(seqs_per_samp)] xaxisvals.sort() #get the rarefaction averages rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, \ sampleIDs) #calculate the max xval xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals)-1] - \ xaxisvals[len(xaxisvals)-2]) ''' #get the overall average #overall_average = get_overall_averages(rare_mat_ave, sampleIDs) rarelines.append("#" + r + '\n') for s in sampleIDs: rarelines.append('%f'%overall_average[s] + '\n') ''' if not user_ymax: ymax = 0 for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] ave_file_path = os.path.join(ave_data_file_path, metric_name) #save the rarefaction averages rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax,\ labelname, colors, r, data_colors, groups, std_type) #write out the rarefaction average data if output_type == "file_creation": open(ave_file_path + labelname + '.txt', 'w').writelines(rare_lines) #take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data( \ ''.join(rare_lines[:]).split('\n')) #determine the ymax based on the average data #multiple the ymax, since the dots can end up on the border new_ymax=(max([max(v) for v in rares_data['series'].values()])+\ max([max(e) for e in rares_data['error'].values()])) * 1.15 if isnan(new_ymax): new_ymax=(max([max(v) for v in \ rares_data['series'].values()])) * 1.15 if new_ymax > ymax: ymax = new_ymax iterator_num = 0 #iterate through the groups for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] #save the rarefaction averages rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax, \ labelname, colors, r, data_colors, groups, std_type) #take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data( \ ''.join(rare_lines[:]).split('\n')) if not suppress_webpage: if iterator_num == 0: rarefaction_legend_mat[metric_name]['samples'] = {} for o in sample_dict: rarefaction_legend_mat[metric_name]['samples'][o] = {} #Add values to the legend dictionary rarefaction_legend_mat[metric_name]['samples'][o][ 'color'] = sample_data_colors[ sample_colors[o]].toHex() iterator_num = 1 #Iterate through the groups and create the legend dictionary for g in groups: #generate the filepath for the image file file_path = os.path.join(all_output_dir, \ metric_name+labelname+g) #create a dictionary of samples and their colors rarefaction_legend_mat[metric_name]['groups'][labelname][ g] = {} rarefaction_legend_mat[metric_name]['groups'][labelname][ g]['groupsamples'] = groups[g] rarefaction_legend_mat[metric_name]['groups'][labelname][g]['groupcolor']=\ data_colors[colors[g]].toHex() #Create the individual category average plots if output_type == "file_creation": rarefaction_data_mat,rarefaction_legend_mat=make_plots(\ background_color, label_color, \ rares_data, ymax, xmax,all_output_dir, \ resolution, imagetype,groups, colors, \ data_colors,metric_name,labelname, \ rarefaction_data_mat,rarefaction_legend_mat, sample_dict,sample_data_colors, sample_colors,mapping_lookup,output_type) elif output_type == "memory": rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, \ all_plots_ave = make_plots(\ background_color, label_color, \ rares_data, ymax, xmax,all_output_dir, \ resolution, imagetype,groups, colors, \ data_colors,metric_name,labelname, \ rarefaction_data_mat,rarefaction_legend_mat, sample_dict,sample_data_colors, sample_colors,mapping_lookup,output_type) #generate the filepath for the image file file_path = os.path.join(ave_output_dir, \ splitext(split(rares_data['headers'][0])[1])[0]) #Create the average plots categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \ rares_data['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ metric_name, output_type) if output_type == "memory": all_plots.append(all_plots_rare) all_plots.extend(all_plots_single) all_plots.append(all_plots_ave) else: #generate the filepath for the image file file_path = os.path.join(ave_output_dir, \ splitext(split(rares_data['headers'][0])[1])[0]) categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \ rares_data['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ metric_name, output_type) if not suppress_webpage: #format the html output html_output=make_html(rarefaction_legend_mat, \ rarefaction_data_mat,xaxisvals,imagetype,mapping_lookup, output_type, all_plots) else: html_output = None return html_output