def test_iter_color_groups(self): """iter_color_groups should iterate over color groups correctly.""" obs = iter_color_groups(self.mapping, self.prefs) obs1 = list(obs) obs_label = obs1[0][0] obs_groups = obs1[0][1] obs_colors = obs1[0][2] obs_data_colors = obs1[0][3] obs_data_color_order = obs1[0][4] data_colors = color_dict_to_objects(self.data_color_hsv) self.assertEqual(obs_label, self.labelname) self.assertEqual(obs_groups, self.dict) self.assertEqual(obs_colors, self.colors) self.assertEqual(obs_data_colors.keys(), data_colors.keys()) # Need to iterate through color object, since they has different ids # assigned each time using color_dict_to_objects for key in data_colors: self.assertEqual(obs_data_colors[key].toHex(), data_colors[key].toHex()) self.assertEqual(obs_data_color_order, self.data_color_order)
def test_iter_color_groups(self): """iter_color_groups should iterate over color groups correctly.""" obs = iter_color_groups(self.mapping, self.prefs) obs1 = list(obs) obs_label = obs1[0][0] obs_groups = obs1[0][1] obs_colors = obs1[0][2] obs_data_colors = obs1[0][3] obs_data_color_order = obs1[0][4] data_colors = color_dict_to_objects(self.data_color_hsv) self.assertEqual(obs_label, self.labelname) self.assertEqual(obs_groups, self.dict) self.assertEqual(obs_colors, self.colors) self.assertEqual(obs_data_colors.keys(), data_colors.keys()) #Need to iterate through color object, since they has different ids #assigned each time using color_dict_to_objects for key in data_colors: self.assertEqual(obs_data_colors[key].toHex(),\ data_colors[key].toHex()) self.assertEqual(obs_data_color_order, self.data_color_order)
def make_3d_plots(coord_header, coords, pct_var, mapping, prefs, \ background_color,label_color, \ taxa=None, custom_axes=None, \ edges=None, coords_low=None, coords_high=None, \ ellipsoid_prefs=None, \ user_supplied_edges=False, ball_scale=1.0, \ arrow_colors={'line_color': 'white', 'head_color': 'red'}): """Makes 3d plots given coords, mapping file, and prefs. Added quick-and-dirty hack for gradient coloring of columns, should replace with more general method. Current solution is to pass in any of the following: 'colors':(('white', (0,100,100)),('red',(100,100,100))) makes gradient between white and red, applies to all samples 'colors':{'RK':(('white',(0,0,100)),('red',(0,100,100))), 'NF':(('white',(120,0,100)),('green',(120,100,100))) } pulls the combination samples starting with RK, colors with first gradient, then pulls the combination samples starting with NF, colors with the next gradient. """ result = [] #Iterate through prefs and color by given mapping labels #Sort by the column name first groups_and_colors=iter_color_groups(mapping,prefs) groups_and_colors=list(groups_and_colors) for i in range(len(groups_and_colors)): #Write to kinemage file using the groups, colors and coords labelname=groups_and_colors[i][0] groups=groups_and_colors[i][1] colors=groups_and_colors[i][2] data_colors=groups_and_colors[i][3] data_color_order=groups_and_colors[i][4] result.extend(make_mage_output(groups, colors, coord_header, coords, \ pct_var,background_color,label_color,data_colors, \ taxa, custom_axes,name=labelname, \ scaled=False, edges=edges, coords_low=coords_low, coords_high=coords_high, \ ellipsoid_prefs=ellipsoid_prefs, \ user_supplied_edges=user_supplied_edges, \ ball_scale=ball_scale, arrow_colors=arrow_colors)) result.extend(make_mage_output(groups, colors, coord_header, coords, \ pct_var,background_color,label_color,data_colors, \ taxa, custom_axes,name=labelname, \ scaled=True, edges=edges, \ coords_low=coords_low, coords_high=coords_high, \ ellipsoid_prefs=ellipsoid_prefs, \ user_supplied_edges=user_supplied_edges, \ ball_scale=ball_scale, arrow_colors=arrow_colors)) return result
def make_3d_plots(coord_header, coords, pct_var, mapping, prefs, \ background_color,label_color, \ taxa=None, custom_axes=None, \ edges=None, coords_low=None, coords_high=None, \ ellipsoid_prefs=None, user_supplied_edges=False): """Makes 3d plots given coords, mapping file, and prefs. Added quick-and-dirty hack for gradient coloring of columns, should replace with more general method. Current solution is to pass in any of the following: 'colors':(('white', (0,100,100)),('red',(100,100,100))) makes gradient between white and red, applies to all samples 'colors':{'RK':(('white',(0,0,100)),('red',(0,100,100))), 'NF':(('white',(120,0,100)),('green',(120,100,100))) } pulls the combination samples starting with RK, colors with first gradient, then pulls the combination samples starting with NF, colors with the next gradient. """ result = [] #Iterate through prefs and color by given mapping labels #Sort by the column name first groups_and_colors = iter_color_groups(mapping, prefs) groups_and_colors = list(groups_and_colors) for i in range(len(groups_and_colors)): #Write to kinemage file using the groups, colors and coords labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] result.extend(make_mage_output(groups, colors, coord_header, coords, \ pct_var,background_color,label_color,data_colors, \ taxa, custom_axes,name=labelname, \ scaled=False, edges=edges, coords_low=coords_low, coords_high=coords_high, \ ellipsoid_prefs=ellipsoid_prefs, user_supplied_edges=user_supplied_edges)) result.extend(make_mage_output(groups, colors, coord_header, coords, \ pct_var,background_color,label_color,data_colors, \ taxa, custom_axes,name=labelname, \ scaled=True, edges=edges, \ coords_low=coords_low, coords_high=coords_high, \ ellipsoid_prefs=ellipsoid_prefs, user_supplied_edges=user_supplied_edges)) return result
def generate_3d_plots_invue(prefs, data, dir_path, filename, intp_pts, polyh_pts, offset): """ Make files to be imported to inVUE http://sourceforge.net/projects/invue/""" # Validating existance of all columns for col in prefs: if col not in data['map'][0]: raise ValueError, 'Column given "%s" does not exits in mapping \ file' % col # Split matrix by labelname, groups & give colors groups_and_colors=iter_color_groups(data['map'],prefs) groups_and_colors=list(groups_and_colors) smp_lbl, smp_lbl_grp, polypts = make_3d_plots_invue(data, groups_and_colors, \ intp_pts, polyh_pts, offset) # Looping to binning result to write full and binned files for lbl in smp_lbl: for grp in smp_lbl_grp[lbl]: # writting individual files ind_path = "%s/%s_%s_%s.txt" % (dir_path, filename, lbl, grp) smp = smp_lbl_grp[lbl][grp] outfile = open(ind_path, 'w') outfile.write(format_coords(smp['headrs'], smp['coords'], [], [], False)) outfile.close() # writing full file full_path = "%s/%s_%s.txt" % (dir_path, filename, lbl) outfile = open(full_path, 'w') outfile.write (format_coords(smp_lbl[lbl]['headrs'], smp_lbl[lbl]['coords'], \ [], [], False)) outfile.close() # Writing tetraVertices.txt ind_path = "%s/tetraVertices.txt" % (dir_path) outfile = open(ind_path, 'w') outfile.write('\n'.join(['\t'.join(map(str, row)) for row in polypts])) outfile.write('\n') outfile.close()
def generate_2d_plots(prefs,data,html_dir_path,data_dir_path,filename, background_color,label_color,generate_scree): """Generate interactive 2D scatterplots""" coord_tups = [("1", "2"), ("3", "2"), ("1", "3")] mapping=data['map'] out_table='' #Iterate through prefs and generate html files for each colorby option #Sort by the column name first sample_location={} groups_and_colors=iter_color_groups(mapping,prefs) groups_and_colors=list(groups_and_colors) radiobuttons = [] for i in range(len(groups_and_colors)): labelname=groups_and_colors[i][0] #'EnvoID' groups=groups_and_colors[i][1] #defaultdict(<type 'list'>, {'mangrove biome/marine habitat/ocean water': ['BBA.number1.filt..660397', 'BBA.number2.filt..660380', ...} colors=groups_and_colors[i][2] #{'mangrove biome/marine habitat/ocean water': 'red5', 'Small lake biome/marine habitat/saline lake sediment': 'cyan1', data_colors=groups_and_colors[i][3]#{'orange1': <qiime.colors.Color object at 0x25f1210>, 'orange3': data_color_order=groups_and_colors[i][4]#['red1', 'blue1', 'orange1', 'green1', 'purple1', 'yellow1', 'cyan1', 'pink1', 'teal1', ...] data_file_dir_path = get_random_directory_name(output_dir=data_dir_path) new_link=os.path.split(data_file_dir_path) data_file_link=os.path.join('.', os.path.split(new_link[-2])[-1], \ new_link[-1]) new_col_name=labelname img_data = {} plot_label=labelname if data.has_key('support_pcoas'): matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(data['coord'], data['support_pcoas'], method=data['ellipsoid_method']) data['coord'] = \ (m_names,matrix_average,data['coord'][2],data['coord'][3]) for i in range(len(m_names)): sample_location[m_names[i]]=i else: matrix_average = None matrix_low = None matrix_high = None eigval_average = None m_names = None iterator=0 for coord_tup in coord_tups: # change, if you want more thatn one PCoA plot! (i.e involving PC3) if isarray(matrix_low) and isarray(matrix_high) and \ isarray(matrix_average): coord_1r=asarray(matrix_low) coord_2r=asarray(matrix_high) mat_ave=asarray(matrix_average) else: coord_1r=None coord_2r=None mat_ave=None sample_location=None coord_1, coord_2 = coord_tup img_data[coord_tup] = draw_pcoa_graph(plot_label,data_file_dir_path, data_file_link,coord_1,coord_2, coord_1r, coord_2r, mat_ave,\ sample_location, data,prefs,groups,colors, background_color,label_color, data_colors,data_color_order, generate_eps=True) radiobuttons.append(RADIO % (data_file_link, labelname)) if i == 0: ## only create first table! out_table += TABLE_HTML % (labelname, "<br>".join(img_data[("1", "2")]), "<br>".join(img_data[("3", "2")]), "<br>".join(img_data[("1", "3")])) if generate_scree: data_file_dir_path = get_random_directory_name(output_dir = data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1]) img_src, download_link = draw_scree_graph(data_file_dir_path, data_file_link, background_color, label_color, generate_eps = True, data = data) out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link))) out_table = "\n".join(radiobuttons) + out_table outfile = create_html_filename(filename,'.html') outfile = os.path.join(html_dir_path,outfile) write_html_file(out_table,outfile)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Some code for error checking of input args: # Check if distance_matrix_file is valid: try: d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, "U")) except: option_parser.error( "This does not look like a valid distance matrix file. Please supply a valid distance matrix file using the -d option." ) if not is_symmetric_and_hollow(d_mat): option_parser.error("The distance matrix must be symmetric and " "hollow.") # Check if map_fname is valid: try: mapping, m_header, m_comments = parse_mapping_file(open(opts.map_fname, "U")) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping file. Please supply a valid mapping file using the -m option." ) # make sure background_color is valid if opts.background_color not in ["black", "white"]: option_parser.error( "'%s' is not a valid background color. Please pass in either 'black' or 'white' using the -k option." % (opts.background_color) ) # make sure prefs file is valid if it exists if opts.prefs_path is not None: try: prefs_file = open(opts.prefs_path, "U").read() except IOError: option_parser.error( "Provided prefs file, '%s', does not exist. Please pass in a valid prefs file with the -p option." % (opts.prefs_path) ) if opts.prefs_path is not None: prefs = parse_prefs_file(prefs_file) else: prefs = None color_prefs, color_data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options( opts ) # list of labelname, groups, colors, data_colors, data_color_order groups_and_colors = list(iter_color_groups(mapping=color_data["map"], prefs=color_prefs)) # dict mapping labelname to list of: [groups, colors, data_colors, # data_color_order] field_to_colors = {} for color_info in groups_and_colors: field_to_colors[color_info[0]] = color_info[1:] qiime_dir = get_qiime_project_dir() + "/qiime/support_files/" fields = opts.fields if fields is not None: fields = map(strip, fields.split(",")) fields = [i.strip('"').strip("'") for i in fields] elif prefs is not None: fields = prefs.get("FIELDS", None) else: fields = get_interesting_mapping_fields(mapping, m_header) # Check that all provided fields are valid: if fields is not None: for f in fields: if f not in m_header: option_parser.error( "The field, %s, is not in the provided mapping file. Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file." % (f) ) within_distances, between_distances, dmat = group_distances( mapping_file=opts.map_fname, dmatrix_file=opts.distance_matrix_file, fields=fields, dir_prefix=get_random_directory_name(output_dir=opts.dir_path, prefix="distances"), ) if not opts.suppress_html_output: # histograms output path histograms_path = path.join(opts.dir_path, "histograms") try: mkdir(histograms_path) except OSError: # raised if dir exists pass # draw all histograms distances_dict, label_to_histogram_filename = draw_all_histograms( single_field=within_distances, paired_field=between_distances, dmat=dmat, histogram_dir=histograms_path, field_to_color_prefs=field_to_colors, background_color=background_color, ) # Get relative path to histogram files. label_to_histogram_filename_relative = _make_relative_paths(label_to_histogram_filename, opts.dir_path) dm_fname = path.split(opts.distance_matrix_file)[-1] basename = path.splitext(dm_fname)[0] outfile_name = basename + "_distance_histograms.html" make_main_html( distances_dict=distances_dict, label_to_histogram_filename=label_to_histogram_filename_relative, root_outdir=opts.dir_path, outfile_name=outfile_name, title="Distance Histograms", ) # Handle saving web resources locally. # javascript file javascript_path = path.join(opts.dir_path, "js") try: mkdir(javascript_path) except OSError: # raised if dir exists pass js_out = open(javascript_path + "/histograms.js", "w") js_out.write(open(qiime_dir + "js/histograms.js").read()) js_out.close() monte_carlo_iters = opts.monte_carlo_iters if monte_carlo_iters > 0: # Do Monte Carlo for all fields monte_carlo_group_distances( mapping_file=opts.map_fname, dmatrix_file=opts.distance_matrix_file, prefs=prefs, dir_prefix=opts.dir_path, fields=fields, default_iters=monte_carlo_iters, ) # Do Monte Carlo for within and between fields monte_carlo_group_distances_within_between( single_field=within_distances, paired_field=between_distances, dmat=dmat, dir_prefix=opts.dir_path, num_iters=monte_carlo_iters, )
def generate_2d_plots(prefs, data, html_dir_path, data_dir_path, filename, background_color, label_color, generate_scree): """Generate interactive 2D scatterplots""" coord_tups = [("1", "2"), ("3", "2"), ("1", "3")] mapping = data['map'] out_table = '' #Iterate through prefs and generate html files for each colorby option #Sort by the column name first sample_location = {} groups_and_colors = iter_color_groups(mapping, prefs) groups_and_colors = list(groups_and_colors) for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] data_file_dir_path = get_random_directory_name( output_dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link=os.path.join('.', os.path.split(new_link[-2])[-1], \ new_link[-1]) new_col_name = labelname img_data = {} plot_label = labelname if data.has_key('support_pcoas'): matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(data['coord'], data['support_pcoas'], method=data['ellipsoid_method']) data['coord'] = \ (m_names,matrix_average,data['coord'][2],data['coord'][3]) for i in range(len(m_names)): sample_location[m_names[i]] = i else: matrix_average = None matrix_low = None matrix_high = None eigval_average = None m_names = None iterator = 0 for coord_tup in coord_tups: if isarray(matrix_low) and isarray(matrix_high) and \ isarray(matrix_average): coord_1r = asarray(matrix_low) coord_2r = asarray(matrix_high) mat_ave = asarray(matrix_average) else: coord_1r = None coord_2r = None mat_ave = None sample_location = None coord_1, coord_2 = coord_tup img_data[coord_tup] = draw_pcoa_graph(plot_label,data_file_dir_path, data_file_link,coord_1,coord_2, coord_1r, coord_2r, mat_ave,\ sample_location, data,prefs,groups,colors, background_color,label_color, data_colors,data_color_order, generate_eps=True) out_table += TABLE_HTML % (labelname, "<br>".join( img_data[("1", "2")]), "<br>".join( img_data[("3", "2")]), "<br>".join(img_data[("1", "3")])) if generate_scree: data_file_dir_path = get_random_directory_name( output_dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1]) img_src, download_link = draw_scree_graph(data_file_dir_path, data_file_link, background_color, label_color, generate_eps=True, data=data) out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link))) outfile = create_html_filename(filename, '.html') outfile = os.path.join(html_dir_path, outfile) write_html_file(out_table, outfile)
def make_averages( color_prefs, data, background_color, label_color, rares, output_dir, resolution, imagetype, ymax, suppress_webpage, std_type, output_type="file_creation", ): """This is the main function, which takes the rarefaction files, calls the functions to make plots and formatting the output html.""" rarelines = [] rarefaction_legend_mat = {} if ymax: user_ymax = True else: user_ymax = False if not suppress_webpage and output_type == "file_creation": # in this option the path must include the output directory all_output_dir = os.path.join(output_dir, "html_plots") ave_output_dir = os.path.join(output_dir, "average_plots") # Create the directories, where plots and data will be written create_dir(all_output_dir) elif output_type == "memory": # this is rather an artificial path to work with the javascript code all_output_dir = "plot/html_plots" ave_output_dir = "plot/average_plots" ave_data_file_path = os.path.join(output_dir, "average_tables") if output_type == "file_creation": create_dir(ave_output_dir) create_dir(ave_data_file_path, False) metric_num = 0 rarefaction_legend_mat = {} rarefaction_data_mat = {} rare_num = 0 # this is a fix for the issue of writing field values as the filenames mapping_lookup = {} for i, column in enumerate(data["map"][0]): for j, row in enumerate(data["map"][1:]): mapping_lookup["%s-%s" % (column, row[i])] = "col_%s_row_%s" % (str(i), str(j)) all_plots = [] # Iterate through the rarefaction files for r in natsort(rares): raredata = rares[r] metric_name = r.split(".")[0] # convert the rarefaction data into variables col_headers, comments, rarefaction_fn, rarefaction_data = rares[r] # Here we only need to perform these steps once, since the data is # the same for all rarefaction files if rare_num == 0: # Remove samples from the mapping file, which contain no data after # rarefaction updated_mapping = [] for j in data["map"]: # Add the mapping header if j[0] == "SampleID": updated_mapping.append(j) # Determine if the sample exists in the rarefaction file for i in col_headers[3:]: if j[0] == i: updated_mapping.append(j) # Get the groups and colors for the updated mapping file groups_and_colors = iter_color_groups(updated_mapping, color_prefs) groups_and_colors = list(groups_and_colors) # parse the rarefaction data rare_mat_trans, seqs_per_samp, sampleIDs = get_rarefaction_data(rarefaction_data, col_headers) rarefaction_legend_mat[metric_name] = {} # Create dictionary variables and get the colors for each Sample sample_colors = None rarefaction_legend_mat[metric_name]["groups"] = {} for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] # Create a legend dictionary for html output rarefaction_legend_mat[metric_name]["groups"][labelname] = {} # If this is the first time iterating through the rarefaction data # create a data dictionary for html output if rare_num == 0: rarefaction_data_mat[labelname] = {} # If the labelname is SampleID, use the colors assigned if labelname == "SampleID": sample_colors = groups_and_colors[i][2] sample_data_colors = groups_and_colors[i][3] rare_num = 1 # If sample colors were not assigned, create a list of sample colors if not sample_colors: samples_and_colors = iter_color_groups( updated_mapping, {"SampleID": {"column": "SampleID", "colors": (("red", (0, 100, 100)), ("blue", (240, 100, 100)))}}, ) samples_and_colors = list(samples_and_colors) sample_colors = samples_and_colors[0][2] sample_data_colors = samples_and_colors[0][3] sample_dict = {} # Create a dictionary containing the samples for i, sid in enumerate(sampleIDs): if sid in (i[0] for i in updated_mapping): sample_dict[sid] = {} for j, seq in enumerate(seqs_per_samp): try: sample_dict[sid][seq].append(rare_mat_trans[i][j]) except (KeyError): sample_dict[sid][seq] = [] sample_dict[sid][seq].append(rare_mat_trans[i][j]) # convert xvals to float xaxisvals = [float(x) for x in set(seqs_per_samp)] xaxisvals.sort() # get the rarefaction averages rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, sampleIDs) # calculate the max xval xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals) - 1] - xaxisvals[len(xaxisvals) - 2]) """ #get the overall average #overall_average = get_overall_averages(rare_mat_ave, sampleIDs) rarelines.append("#" + r + '\n') for s in sampleIDs: rarelines.append('%f'%overall_average[s] + '\n') """ if not user_ymax: ymax = 0 for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] ave_file_path = os.path.join(ave_data_file_path, metric_name) # save the rarefaction averages rare_lines = save_rarefaction_data( rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type ) # write out the rarefaction average data if output_type == "file_creation": open(ave_file_path + labelname + ".txt", "w").writelines(rare_lines) # take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n")) # determine the ymax based on the average data # multiple the ymax, since the dots can end up on the border new_ymax = ( max([max(v) for v in rares_data["series"].values()]) + max([max(e) for e in rares_data["error"].values()]) ) * 1.15 if isnan(new_ymax): new_ymax = (max([max(v) for v in rares_data["series"].values()])) * 1.15 if new_ymax > ymax: ymax = new_ymax iterator_num = 0 # iterate through the groups for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] # save the rarefaction averages rare_lines = save_rarefaction_data( rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type ) # take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n")) if not suppress_webpage: if iterator_num == 0: rarefaction_legend_mat[metric_name]["samples"] = {} for o in sample_dict: rarefaction_legend_mat[metric_name]["samples"][o] = {} # Add values to the legend dictionary rarefaction_legend_mat[metric_name]["samples"][o]["color"] = sample_data_colors[ sample_colors[o] ].toHex() iterator_num = 1 # Iterate through the groups and create the legend dictionary for g in groups: # generate the filepath for the image file file_path = os.path.join(all_output_dir, metric_name + labelname + g) # create a dictionary of samples and their colors rarefaction_legend_mat[metric_name]["groups"][labelname][g] = {} rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupsamples"] = groups[g] rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupcolor"] = data_colors[ colors[g] ].toHex() # Create the individual category average plots if output_type == "file_creation": rarefaction_data_mat, rarefaction_legend_mat = make_plots( background_color, label_color, rares_data, ymax, xmax, all_output_dir, resolution, imagetype, groups, colors, data_colors, metric_name, labelname, rarefaction_data_mat, rarefaction_legend_mat, sample_dict, sample_data_colors, sample_colors, mapping_lookup, output_type, ) elif output_type == "memory": rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave = make_plots( background_color, label_color, rares_data, ymax, xmax, all_output_dir, resolution, imagetype, groups, colors, data_colors, metric_name, labelname, rarefaction_data_mat, rarefaction_legend_mat, sample_dict, sample_data_colors, sample_colors, mapping_lookup, output_type, ) # generate the filepath for the image file file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0]) # Create the average plots categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots( rares_data["xaxis"], rares_data["series"], rares_data["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, metric_name, output_type, ) if output_type == "memory": all_plots.append(all_plots_rare) all_plots.extend(all_plots_single) all_plots.append(all_plots_ave) else: # generate the filepath for the image file file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0]) categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots( rares_data["xaxis"], rares_data["series"], rares_data["error"], xmax, ymax, categories, labelname, imagetype, resolution, data_colors, colors, file_path, background_color, label_color, metric_name, output_type, ) if not suppress_webpage: # format the html output html_output = make_html( rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, imagetype, mapping_lookup, output_type, all_plots ) else: html_output = None return html_output
def make_averages(color_prefs, data, background_color, label_color, rares, \ output_dir,resolution,imagetype,ymax,suppress_webpage, std_type, output_type="file_creation"): '''This is the main function, which takes the rarefaction files, calls the functions to make plots and formatting the output html.''' rarelines = [] rarefaction_legend_mat = {} if ymax: user_ymax = True else: user_ymax = False if not suppress_webpage and output_type == "file_creation": # in this option the path must include the output directory all_output_dir = os.path.join(output_dir, 'html_plots') ave_output_dir = os.path.join(output_dir, 'average_plots') #Create the directories, where plots and data will be written create_dir(all_output_dir) elif output_type == 'memory': # this is rather an artificial path to work with the javascript code all_output_dir = 'plot/html_plots' ave_output_dir = 'plot/average_plots' ave_data_file_path = os.path.join(output_dir, 'average_tables') if output_type == "file_creation": create_dir(ave_output_dir) create_dir(ave_data_file_path, False) metric_num = 0 rarefaction_legend_mat = {} rarefaction_data_mat = {} rare_num = 0 # this is a fix for the issue of writing field values as the filenames mapping_lookup = {} for i, column in enumerate(data['map'][0]): for j, row in enumerate(data['map'][1:]): mapping_lookup['%s-%s' % (column,row[i])]='col_%s_row_%s' % \ (str(i),str(j)) all_plots = [] #Iterate through the rarefaction files for r in natsort(rares): raredata = rares[r] metric_name = r.split('.')[0] #convert the rarefaction data into variables col_headers, comments, rarefaction_fn, rarefaction_data = rares[r] #Here we only need to perform these steps once, since the data is #the same for all rarefaction files if rare_num == 0: #Remove samples from the mapping file, which contain no data after #rarefaction updated_mapping = [] for j in data['map']: #Add the mapping header if j[0] == 'SampleID': updated_mapping.append(j) #Determine if the sample exists in the rarefaction file for i in col_headers[3:]: if j[0] == i: updated_mapping.append(j) #Get the groups and colors for the updated mapping file groups_and_colors = iter_color_groups(updated_mapping, color_prefs) groups_and_colors = list(groups_and_colors) #parse the rarefaction data rare_mat_trans, seqs_per_samp, sampleIDs = \ get_rarefaction_data(rarefaction_data, col_headers) rarefaction_legend_mat[metric_name] = {} #Create dictionary variables and get the colors for each Sample sample_colors = None rarefaction_legend_mat[metric_name]['groups'] = {} for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] #Create a legend dictionary for html output rarefaction_legend_mat[metric_name]['groups'][labelname] = {} #If this is the first time iterating through the rarefaction data #create a data dictionary for html output if rare_num == 0: rarefaction_data_mat[labelname] = {} #If the labelname is SampleID, use the colors assigned if labelname == 'SampleID': sample_colors = groups_and_colors[i][2] sample_data_colors = groups_and_colors[i][3] rare_num = 1 #If sample colors were not assigned, create a list of sample colors if not sample_colors: samples_and_colors=iter_color_groups(updated_mapping, \ {'SampleID': {'column': 'SampleID', 'colors': \ (('red', (0, 100, 100)), ('blue', (240, 100, 100)))}}) samples_and_colors = list(samples_and_colors) sample_colors = samples_and_colors[0][2] sample_data_colors = samples_and_colors[0][3] sample_dict = {} #Create a dictionary containing the samples for i, sid in enumerate(sampleIDs): if sid in (i[0] for i in updated_mapping): sample_dict[sid] = {} for j, seq in enumerate(seqs_per_samp): try: sample_dict[sid][seq].append(rare_mat_trans[i][j]) except (KeyError): sample_dict[sid][seq] = [] sample_dict[sid][seq].append(rare_mat_trans[i][j]) #convert xvals to float xaxisvals = [float(x) for x in set(seqs_per_samp)] xaxisvals.sort() #get the rarefaction averages rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, \ sampleIDs) #calculate the max xval xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals)-1] - \ xaxisvals[len(xaxisvals)-2]) ''' #get the overall average #overall_average = get_overall_averages(rare_mat_ave, sampleIDs) rarelines.append("#" + r + '\n') for s in sampleIDs: rarelines.append('%f'%overall_average[s] + '\n') ''' if not user_ymax: ymax = 0 for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] ave_file_path = os.path.join(ave_data_file_path, metric_name) #save the rarefaction averages rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax,\ labelname, colors, r, data_colors, groups, std_type) #write out the rarefaction average data if output_type == "file_creation": open(ave_file_path + labelname + '.txt', 'w').writelines(rare_lines) #take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data( \ ''.join(rare_lines[:]).split('\n')) #determine the ymax based on the average data #multiple the ymax, since the dots can end up on the border new_ymax=(max([max(v) for v in rares_data['series'].values()])+\ max([max(e) for e in rares_data['error'].values()])) * 1.15 if isnan(new_ymax): new_ymax=(max([max(v) for v in \ rares_data['series'].values()])) * 1.15 if new_ymax > ymax: ymax = new_ymax iterator_num = 0 #iterate through the groups for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] #save the rarefaction averages rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax, \ labelname, colors, r, data_colors, groups, std_type) #take the formatted rarefaction averages and format the results rares_data = parse_rarefaction_data( \ ''.join(rare_lines[:]).split('\n')) if not suppress_webpage: if iterator_num == 0: rarefaction_legend_mat[metric_name]['samples'] = {} for o in sample_dict: rarefaction_legend_mat[metric_name]['samples'][o] = {} #Add values to the legend dictionary rarefaction_legend_mat[metric_name]['samples'][o][ 'color'] = sample_data_colors[ sample_colors[o]].toHex() iterator_num = 1 #Iterate through the groups and create the legend dictionary for g in groups: #generate the filepath for the image file file_path = os.path.join(all_output_dir, \ metric_name+labelname+g) #create a dictionary of samples and their colors rarefaction_legend_mat[metric_name]['groups'][labelname][ g] = {} rarefaction_legend_mat[metric_name]['groups'][labelname][ g]['groupsamples'] = groups[g] rarefaction_legend_mat[metric_name]['groups'][labelname][g]['groupcolor']=\ data_colors[colors[g]].toHex() #Create the individual category average plots if output_type == "file_creation": rarefaction_data_mat,rarefaction_legend_mat=make_plots(\ background_color, label_color, \ rares_data, ymax, xmax,all_output_dir, \ resolution, imagetype,groups, colors, \ data_colors,metric_name,labelname, \ rarefaction_data_mat,rarefaction_legend_mat, sample_dict,sample_data_colors, sample_colors,mapping_lookup,output_type) elif output_type == "memory": rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, \ all_plots_ave = make_plots(\ background_color, label_color, \ rares_data, ymax, xmax,all_output_dir, \ resolution, imagetype,groups, colors, \ data_colors,metric_name,labelname, \ rarefaction_data_mat,rarefaction_legend_mat, sample_dict,sample_data_colors, sample_colors,mapping_lookup,output_type) #generate the filepath for the image file file_path = os.path.join(ave_output_dir, \ splitext(split(rares_data['headers'][0])[1])[0]) #Create the average plots categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \ rares_data['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ metric_name, output_type) if output_type == "memory": all_plots.append(all_plots_rare) all_plots.extend(all_plots_single) all_plots.append(all_plots_ave) else: #generate the filepath for the image file file_path = os.path.join(ave_output_dir, \ splitext(split(rares_data['headers'][0])[1])[0]) categories = [k for k in groups] all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \ rares_data['error'], xmax, ymax, categories, \ labelname, imagetype, resolution, data_colors, \ colors, file_path, background_color, label_color, \ metric_name, output_type) if not suppress_webpage: #format the html output html_output=make_html(rarefaction_legend_mat, \ rarefaction_data_mat,xaxisvals,imagetype,mapping_lookup, output_type, all_plots) else: html_output = None return html_output
def generate_2d_plots(prefs, data, html_dir_path, data_dir_path, filename, background_color, label_color, generate_scree): """Generate interactive 2D scatterplots""" coord_tups = [("1", "2"), ("3", "2"), ("1", "3")] mapping = data['map'] out_table = '' # Iterate through prefs and generate html files for each colorby option # Sort by the column name first sample_location = {} groups_and_colors = iter_color_groups(mapping, prefs) groups_and_colors = list(groups_and_colors) for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] data_file_dir_path = mkdtemp(dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1]) new_col_name = labelname img_data = {} plot_label = labelname if 'support_pcoas' in data: matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(data['coord'], data['support_pcoas'], method=data['ellipsoid_method']) data['coord'] = \ (m_names, matrix_average, data['coord'][2], data['coord'][3]) for i in range(len(m_names)): sample_location[m_names[i]] = i else: matrix_average = None matrix_low = None matrix_high = None eigval_average = None m_names = None iterator = 0 for coord_tup in coord_tups: if isarray(matrix_low) and isarray(matrix_high) and \ isarray(matrix_average): coord_1r = asarray(matrix_low) coord_2r = asarray(matrix_high) mat_ave = asarray(matrix_average) else: coord_1r = None coord_2r = None mat_ave = None sample_location = None coord_1, coord_2 = coord_tup img_data[coord_tup] = draw_pcoa_graph( plot_label, data_file_dir_path, data_file_link, coord_1, coord_2, coord_1r, coord_2r, mat_ave, sample_location, data, prefs, groups, colors, background_color, label_color, data_colors, data_color_order, generate_eps=True) out_table += TABLE_HTML % (labelname, "<br>".join(img_data[("1", "2")]), "<br>".join(img_data[("3", "2")]), "<br>".join(img_data[("1", "3")])) if generate_scree: data_file_dir_path = mkdtemp(dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join( '.', os.path.split(new_link[-2])[-1], new_link[-1]) img_src, download_link = draw_scree_graph( data_file_dir_path, data_file_link, background_color, label_color, generate_eps=True, data=data) out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link))) outfile = create_html_filename(filename, '.html') outfile = os.path.join(html_dir_path, outfile) write_html_file(out_table, outfile)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) #Some code for error checking of input args: #Check if distance_matrix_file is valid: try: d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, 'U')) except: option_parser.error( "This does not look like a valid distance matrix file. Please supply a valid distance matrix file using the -d option." ) if not is_symmetric_and_hollow(d_mat): option_parser.error("The distance matrix must be symmetric and " "hollow.") #Check if map_fname is valid: try: mapping, m_header, m_comments = \ parse_mapping_file(open(opts.map_fname,'U')) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping file. Please supply a valid mapping file using the -m option." ) #make sure background_color is valid if opts.background_color not in ['black', 'white']: option_parser.error( "'%s' is not a valid background color. Please pass in either 'black' or 'white' using the -k option." % (opts.background_color)) #make sure prefs file is valid if it exists if opts.prefs_path is not None: try: prefs_file = open(opts.prefs_path, 'U').read() except IOError: option_parser.error( "Provided prefs file, '%s', does not exist. Please pass in a valid prefs file with the -p option." % (opts.prefs_path)) if opts.prefs_path is not None: prefs = parse_prefs_file(prefs_file) else: prefs = None color_prefs, color_data, background_color, label_color, ball_scale,\ arrow_colors=sample_color_prefs_and_map_data_from_options(opts) #list of labelname, groups, colors, data_colors, data_color_order groups_and_colors=list(iter_color_groups(mapping=color_data['map'],\ prefs=color_prefs)) #dict mapping labelname to list of: [groups, colors, data_colors, # data_color_order] field_to_colors = {} for color_info in groups_and_colors: field_to_colors[color_info[0]] = color_info[1:] qiime_dir = get_qiime_project_dir() + '/qiime/support_files/' fields = opts.fields if fields is not None: fields = map(strip, fields.split(',')) fields = [i.strip('"').strip("'") for i in fields] elif prefs is not None: fields = prefs.get('FIELDS', None) else: fields = get_interesting_mapping_fields(mapping, m_header) #Check that all provided fields are valid: if fields is not None: for f in fields: if f not in m_header: option_parser.error( "The field, %s, is not in the provided mapping file. Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file." % (f)) within_distances, between_distances, dmat = \ group_distances(mapping_file=opts.map_fname,\ dmatrix_file=opts.distance_matrix_file,\ fields=fields,\ dir_prefix=get_random_directory_name(output_dir=opts.dir_path,\ prefix='distances')) if not opts.suppress_html_output: #histograms output path histograms_path = path.join(opts.dir_path, 'histograms') try: mkdir(histograms_path) except OSError: #raised if dir exists pass #draw all histograms distances_dict, label_to_histogram_filename = \ draw_all_histograms(single_field=within_distances, \ paired_field=between_distances, \ dmat=dmat,\ histogram_dir=histograms_path,\ field_to_color_prefs=field_to_colors,\ background_color=background_color) #Get relative path to histogram files. label_to_histogram_filename_relative = \ _make_relative_paths(label_to_histogram_filename, opts.dir_path) dm_fname = path.split(opts.distance_matrix_file)[-1] basename = path.splitext(dm_fname)[0] outfile_name = basename + '_distance_histograms.html' make_main_html(distances_dict=distances_dict,\ label_to_histogram_filename=label_to_histogram_filename_relative,\ root_outdir=opts.dir_path, \ outfile_name = outfile_name, \ title='Distance Histograms') #Handle saving web resources locally. #javascript file javascript_path = path.join(opts.dir_path, 'js') try: mkdir(javascript_path) except OSError: #raised if dir exists pass js_out = open(javascript_path + '/histograms.js', 'w') js_out.write(open(qiime_dir + 'js/histograms.js').read()) js_out.close() monte_carlo_iters = opts.monte_carlo_iters if monte_carlo_iters > 0: #Do Monte Carlo for all fields monte_carlo_group_distances(mapping_file=opts.map_fname,\ dmatrix_file=opts.distance_matrix_file,\ prefs=prefs, \ dir_prefix = opts.dir_path,\ fields=fields,\ default_iters=monte_carlo_iters) #Do Monte Carlo for within and between fields monte_carlo_group_distances_within_between(\ single_field=within_distances,\ paired_field=between_distances, dmat=dmat, \ dir_prefix = opts.dir_path,\ num_iters=monte_carlo_iters)