def test_parse_rarefaction_data(self): self.data = {} self.data['headers'] = ['PD_whole_tree.txt', 'Antibiotics'] self.data['error'] = {'NA': [0.099969643842700004], \ 'Y': [0.105669693476, 1.08546135424, 1.5626248357999999], \ 'N': [0.101173002974]} self.data['options'] = ['Y', 'NA', 'N'] self.data['xaxis'] = [10.0, 310.0, 610.0, 910.0, 1210.0, 1510.0, \ 1810.0, 2110.0, 2410.0, 2710.0, 3010.0] self.data['series'] = {'NA': [0.88581050485400004], \ 'Y': [0.918845147059, 7.1758656176500004, 9.9186072941199992], \ 'N': [0.92636763785999998]} self.data['color'] = {'NA': '#00ff00', 'Y': '#ff0000', 'N': '#0000ff'} self.rarefaction_series_data = ['# PD_whole_tree.txt', '# Antibiotics', 'xaxis: 10.0\t310.0\t610.0\t910.0\t1210.0\t1510.0\t1810.0\t2110.0\ \t2410.0\t2710.0\t3010.0\t', 'xmax: 3310.0', '>> Y', 'color #ff0000', 'series 0.918845147059\t7.17586561765\t9.91860729412\t', 'error 0.105669693476\t1.08546135424\t1.5626248358\t', '>> NA', 'color #00ff00', 'series 0.885810504854\t', 'error 0.0999696438427\t', '>> N', 'color #0000ff', 'series 0.92636763786\t', 'error 0.101173002974' ] test = parse_rarefaction_data(self.rarefaction_series_data) self.assertEqual(test, self.data)
def test_parse_rarefaction_data(self): self.data = {} self.data['headers'] = ['PD_whole_tree.txt', 'Antibiotics'] self.data['error'] = {'NA': [0.099969643842700004], \ 'Y': [0.105669693476, 1.08546135424, 1.5626248357999999], \ 'N': [0.101173002974]} self.data['options'] = ['Y', 'NA', 'N'] self.data['xaxis'] = [10.0, 310.0, 610.0, 910.0, 1210.0, 1510.0, \ 1810.0, 2110.0, 2410.0, 2710.0, 3010.0] self.data['series'] = {'NA': [0.88581050485400004], \ 'Y': [0.918845147059, 7.1758656176500004, 9.9186072941199992], \ 'N': [0.92636763785999998]} self.data['color'] = {'NA': '#00ff00', 'Y': '#ff0000', 'N': '#0000ff'} self.rarefaction_series_data = [ '# PD_whole_tree.txt', '# Antibiotics', 'xaxis: 10.0\t310.0\t610.0\t910.0\t1210.0\t1510.0\t1810.0\t2110.0\ \t2410.0\t2710.0\t3010.0\t', 'xmax: 3310.0', '>> Y', 'color #ff0000', 'series 0.918845147059\t7.17586561765\t9.91860729412\t', 'error 0.105669693476\t1.08546135424\t1.5626248358\t', '>> NA', 'color #00ff00', 'series 0.885810504854\t', 'error 0.0999696438427\t', '>> N', 'color #0000ff', 'series 0.92636763786\t', 'error 0.101173002974' ] test = parse_rarefaction_data(self.rarefaction_series_data) self.assertEqual(test, self.data)
def make_averages( color_prefs, data, background_color, label_color, rares, output_dir, resolution, imagetype, ymax, suppress_webpage, std_type, output_type="file_creation", ): """This is the main function, which takes the rarefaction files, calls the functions to make plots and formatting the output html.""" rarelines = [] rarefaction_legend_mat = {} if ymax: user_ymax = True else: user_ymax = False if not suppress_webpage and output_type == "file_creation": # in this option the path must include the output directory all_output_dir = os.path.join(output_dir, "html_plots") ave_output_dir = os.path.join(output_dir, "average_plots") # Create the directories, where plots and data will be written create_dir(all_output_dir) elif output_type == "memory": # this is rather an artificial path to work with the javascript code all_output_dir = "plot/html_plots" ave_output_dir = "plot/average_plots" ave_data_file_path = os.path.join(output_dir, "average_tables") if output_type == "file_creation": create_dir(ave_output_dir) create_dir(ave_data_file_path, False) metric_num = 0 rarefaction_legend_mat = {} rarefaction_data_mat = {} rare_num = 0 # this is a fix for the issue of writing field values as the filenames mapping_lookup = {} for i, column in enumerate(data["map"][0]): for j, row in enumerate(data["map"][1:]): mapping_lookup["%s-%s" % (column, row[i])] = "col_%s_row_%s" % (str(i), str(j)) all_plots = [] # Iterate through the rarefaction files for r in natsort(rares): raredata = rares[r] metric_name = r.split(".")[0] # convert the rarefaction data into variables col_headers, comments, rarefaction_fn, rarefaction_data = rares[r] # Here we only need to perform these steps once, since the data is # the same for all rarefaction files if rare_num == 0: # Remove samples from the mapping file, which contain no data after # rarefaction updated_mapping = [] for j in data["map"]: # Add the mapping header if j[0] == "SampleID": updated_mapping.append(j) # Determine if the sample exists in the rarefaction file for i in col_headers[3:]: if j[0] == i: updated_mapping.append(j) # Get the groups and colors for the updated mapping file groups_and_colors = iter_color_groups(updated_mapping, color_prefs) groups_and_colors = list(groups_and_colors) # parse the rarefaction data rare_mat_trans, seqs_per_samp, sampleIDs = get_rarefaction_data(rarefaction_data, col_headers) rarefaction_legend_mat[metric_name] = {} # Create dictionary variables and get the colors for each Sample sample_colors = None rarefaction_legend_mat[metric_name]["groups"] = {} for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] # Create a legend dictionary for html output rarefaction_legend_mat[metric_name]["groups"][labelname] = {} # If this is the first time iterating through the rarefaction data # create a data dictionary for html output if rare_num == 0: rarefaction_data_mat[labelname] = {} # If the labelname is SampleID, use the colors assigned if labelname == "SampleID": sample_colors = groups_and_colors[i][2] sample_data_colors = groups_and_colors[i][3] rare_num = 1 # If sample colors were not assigned, create a list of sample colors if not sample_colors: samples_and_colors = iter_color_groups( updated_mapping, {"SampleID": {"column": "SampleID", "colors": (("red", (0, 100, 100)), ("blue", (240, 100, 100)))}}, ) samples_and_colors = list(samples_and_colors) sample_colors = samples_and_colors[0][2] sample_data_colors = samples_and_colors[0][3] sample_dict = {} # Create a dictionary containing the samples for i, sid in enumerate(sampleIDs): if sid in (i[0] for i in updated_mapping): sample_dict[sid] = {} for j, seq in enumerate(seqs_per_samp): try: sample_dict[sid][seq].append(rare_mat_trans[i][j]) except (KeyError): sample_dict[sid][seq] = [] sample_dict[sid][seq].append(rare_mat_trans[i][j]) # convert xvals to float xaxisvals = [float(x) for x in set(seqs_per_samp)] xaxisvals.sort() # get the rarefaction averages rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, sampleIDs) # calculate the max xval xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals) - 1] - xaxisvals[len(xaxisvals) - 2]) """ #get the overall average #overall_average = get_overall_averages(rare_mat_ave, sampleIDs) rarelines.append("#" + r + '\n') for s in sampleIDs: rarelines.append('%f'%overall_average[s] + '\n') """ if not user_ymax: ymax = 0 for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] ave_file_path = os.path.join(ave_data_file_path, metric_name) # save the rarefaction averages rare_lines = save_rarefaction_data( rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type ) # write out the rarefaction average data if output_type == "file_creation": open(ave_file_path + labelname + ".txt", "w").writelines(rare_lines) # take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n")) # determine the ymax based on the average data # multiple the ymax, since the dots can end up on the border new_ymax = ( max([max(v) for v in rares_data["series"].values()]) + max([max(e) for e in rares_data["error"].values()]) ) * 1.15 if isnan(new_ymax): new_ymax = (max([max(v) for v in rares_data["series"].values()])) * 1.15 if new_ymax > ymax: ymax = new_ymax iterator_num = 0 # iterate through the groups for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] # save the rarefaction averages rare_lines = save_rarefaction_data( rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type ) # take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n")) if not suppress_webpage: if iterator_num == 0: rarefaction_legend_mat[metric_name]["samples"] = {} for o in sample_dict: rarefaction_legend_mat[metric_name]["samples"][o] = {} # Add values to the legend dictionary rarefaction_legend_mat[metric_name]["samples"][o]["color"] = sample_data_colors[ sample_colors[o] ].toHex() iterator_num = 1 # Iterate through the groups and create the legend dictionary for g in groups: # generate the filepath for the image file file_path = os.path.join(all_output_dir, metric_name + labelname + g) # create a dictionary of samples and their colors rarefaction_legend_mat[metric_name]["groups"][labelname][g] = {} rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupsamples"] = groups[g] rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupcolor"] = data_colors[ colors[g] ].toHex() # Create the individual category average plots if output_type == "file_creation": rarefaction_data_mat, rarefaction_legend_mat = make_plots( background_color, label_color, rares_data, ymax, xmax, all_output_dir, resolution, imagetype, groups, colors, data_colors, metric_name, labelname, rarefaction_data_mat, rarefaction_legend_mat, sample_dict, sample_data_colors, sample_colors, mapping_lookup, output_type, ) elif output_type == "memory": rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave = make_plots( background_color, label_color, rares_data, ymax, xmax, all_output_dir, resolution, imagetype, groups, colors, data_colors, metric_name, labelname, rarefaction_data_mat, rarefaction_legend_mat, sample_dict, sample_data_colors, sample_colors, mapping_lookup, output_type, ) # generate the filepath for the image file file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0]) # Create the average plots categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots( rares_data["xaxis"], rares_data["series"], rares_data["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, metric_name, output_type, ) if output_type == "memory": all_plots.append(all_plots_rare) all_plots.extend(all_plots_single) all_plots.append(all_plots_ave) else: # generate the filepath for the image file file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0]) categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots( rares_data["xaxis"], rares_data["series"], rares_data["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, metric_name, output_type, ) if not suppress_webpage: # format the html output html_output = make_html( rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, imagetype, mapping_lookup, output_type, all_plots ) else: html_output = None return html_output
def make_averages(color_prefs, data, background_color, label_color, rares, \ output_dir,resolution,imagetype,ymax,suppress_webpage, std_type, output_type="file_creation"): '''This is the main function, which takes the rarefaction files, calls the functions to make plots and formatting the output html.''' rarelines = [] rarefaction_legend_mat = {} if ymax: user_ymax = True else: user_ymax = False if not suppress_webpage and output_type == "file_creation": # in this option the path must include the output directory all_output_dir = os.path.join(output_dir, 'html_plots') ave_output_dir = os.path.join(output_dir, 'average_plots') #Create the directories, where plots and data will be written create_dir(all_output_dir) elif output_type == 'memory': # this is rather an artificial path to work with the javascript code all_output_dir = 'plot/html_plots' ave_output_dir = 'plot/average_plots' ave_data_file_path = os.path.join(output_dir, 'average_tables') if output_type == "file_creation": create_dir(ave_output_dir) create_dir(ave_data_file_path, False) metric_num = 0 rarefaction_legend_mat = {} rarefaction_data_mat = {} rare_num = 0 # this is a fix for the issue of writing field values as the filenames mapping_lookup = {} for i, column in enumerate(data['map'][0]): for j, row in enumerate(data['map'][1:]): mapping_lookup['%s-%s' % (column,row[i])]='col_%s_row_%s' % \ (str(i),str(j)) all_plots = [] #Iterate through the rarefaction files for r in natsort(rares): raredata = rares[r] metric_name = r.split('.')[0] #convert the rarefaction data into variables col_headers, comments, rarefaction_fn, rarefaction_data = rares[r] #Here we only need to perform these steps once, since the data is #the same for all rarefaction files if rare_num == 0: #Remove samples from the mapping file, which contain no data after #rarefaction updated_mapping = [] for j in data['map']: #Add the mapping header if j[0] == 'SampleID': updated_mapping.append(j) #Determine if the sample exists in the rarefaction file for i in col_headers[3:]: if j[0] == i: updated_mapping.append(j) #Get the groups and colors for the updated mapping file groups_and_colors = iter_color_groups(updated_mapping, color_prefs) groups_and_colors = list(groups_and_colors) #parse the rarefaction data rare_mat_trans, seqs_per_samp, sampleIDs = \ get_rarefaction_data(rarefaction_data, col_headers) rarefaction_legend_mat[metric_name] = {} #Create dictionary variables and get the colors for each Sample sample_colors = None rarefaction_legend_mat[metric_name]['groups'] = {} for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] #Create a legend dictionary for html output rarefaction_legend_mat[metric_name]['groups'][labelname] = {} #If this is the first time iterating through the rarefaction data #create a data dictionary for html output if rare_num == 0: rarefaction_data_mat[labelname] = {} #If the labelname is SampleID, use the colors assigned if labelname == 'SampleID': sample_colors = groups_and_colors[i][2] sample_data_colors = groups_and_colors[i][3] rare_num = 1 #If sample colors were not assigned, create a list of sample colors if not sample_colors: samples_and_colors=iter_color_groups(updated_mapping, \ {'SampleID': {'column': 'SampleID', 'colors': \ (('red', (0, 100, 100)), ('blue', (240, 100, 100)))}}) samples_and_colors = list(samples_and_colors) sample_colors = samples_and_colors[0][2] sample_data_colors = samples_and_colors[0][3] sample_dict = {} #Create a dictionary containing the samples for i, sid in enumerate(sampleIDs): if sid in (i[0] for i in updated_mapping): sample_dict[sid] = {} for j, seq in enumerate(seqs_per_samp): try: sample_dict[sid][seq].append(rare_mat_trans[i][j]) except (KeyError): sample_dict[sid][seq] = [] sample_dict[sid][seq].append(rare_mat_trans[i][j]) #convert xvals to float xaxisvals = [float(x) for x in set(seqs_per_samp)] xaxisvals.sort() #get the rarefaction averages rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, \ sampleIDs) #calculate the max xval xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals)-1] - \ xaxisvals[len(xaxisvals)-2]) ''' #get the overall average #overall_average = get_overall_averages(rare_mat_ave, sampleIDs) rarelines.append("#" + r + '\n') for s in sampleIDs: rarelines.append('%f'%overall_average[s] + '\n') ''' if not user_ymax: ymax = 0 for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] ave_file_path = os.path.join(ave_data_file_path, metric_name) #save the rarefaction averages rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax,\ labelname, colors, r, data_colors, groups, std_type) #write out the rarefaction average data if output_type == "file_creation": open(ave_file_path + labelname + '.txt', 'w').writelines(rare_lines) #take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data( \ ''.join(rare_lines[:]).split('\n')) #determine the ymax based on the average data #multiple the ymax, since the dots can end up on the border new_ymax=(max([max(v) for v in rares_data['series'].values()])+\ max([max(e) for e in rares_data['error'].values()])) * 1.15 if isnan(new_ymax): new_ymax=(max([max(v) for v in \ rares_data['series'].values()])) * 1.15 if new_ymax > ymax: ymax = new_ymax iterator_num = 0 #iterate through the groups for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] #save the rarefaction averages rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax, \ labelname, colors, r, data_colors, groups, std_type) #take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data( \ ''.join(rare_lines[:]).split('\n')) if not suppress_webpage: if iterator_num == 0: rarefaction_legend_mat[metric_name]['samples'] = {} for o in sample_dict: rarefaction_legend_mat[metric_name]['samples'][o] = {} #Add values to the legend dictionary rarefaction_legend_mat[metric_name]['samples'][o][ 'color'] = sample_data_colors[ sample_colors[o]].toHex() iterator_num = 1 #Iterate through the groups and create the legend dictionary for g in groups: #generate the filepath for the image file file_path = os.path.join(all_output_dir, \ metric_name+labelname+g) #create a dictionary of samples and their colors rarefaction_legend_mat[metric_name]['groups'][labelname][ g] = {} rarefaction_legend_mat[metric_name]['groups'][labelname][ g]['groupsamples'] = groups[g] rarefaction_legend_mat[metric_name]['groups'][labelname][g]['groupcolor']=\ data_colors[colors[g]].toHex() #Create the individual category average plots if output_type == "file_creation": rarefaction_data_mat,rarefaction_legend_mat=make_plots(\ background_color, label_color, \ rares_data, ymax, xmax,all_output_dir, \ resolution, imagetype,groups, colors, \ data_colors,metric_name,labelname, \ rarefaction_data_mat,rarefaction_legend_mat, sample_dict,sample_data_colors, sample_colors,mapping_lookup,output_type) elif output_type == "memory": rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, \ all_plots_ave = make_plots(\ background_color, label_color, \ rares_data, ymax, xmax,all_output_dir, \ resolution, imagetype,groups, colors, \ data_colors,metric_name,labelname, \ rarefaction_data_mat,rarefaction_legend_mat, sample_dict,sample_data_colors, sample_colors,mapping_lookup,output_type) #generate the filepath for the image file file_path = os.path.join(ave_output_dir, \ splitext(split(rares_data['headers'][0])[1])[0]) #Create the average plots categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \ rares_data['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ metric_name, output_type) if output_type == "memory": all_plots.append(all_plots_rare) all_plots.extend(all_plots_single) all_plots.append(all_plots_ave) else: #generate the filepath for the image file file_path = os.path.join(ave_output_dir, \ splitext(split(rares_data['headers'][0])[1])[0]) categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \ rares_data['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ metric_name, output_type) if not suppress_webpage: #format the html output html_output=make_html(rarefaction_legend_mat, \ rarefaction_data_mat,xaxisvals,imagetype,mapping_lookup, output_type, all_plots) else: html_output = None return html_output