def make_figures(): # Make the ./plots/ dir if needed elviz_utils.prepare_plot_dir(MAIN_DIR) # Make figure 1: make_heatmap_for_major_players(MAJOR_PLAYERS, make_svg_too = MAKE_SVG_TOO) # Make figure 2: make_heatmap_for_particular_family_with_other(family='Methylophilaceae', taxa_dict=METHYLOPHILACEAE, make_svg_too = MAKE_SVG_TOO) # Make figure 3: make_heatmap_for_particular_family_with_other(family='Methylococcaceae', taxa_dict=METHYLOCOCCACEAE, make_svg_too = MAKE_SVG_TOO) # Make figure 4: # want a different kind of plot for Burkolderiales: heatmap_burkolderiales(BURKOLDERIALES, make_svg_too = MAKE_SVG_TOO) # Make figure 5: make_heatmap_for_predators(PREDATORS, make_svg_too = MAKE_SVG_TOO)
def heatmap_from_taxa_dict(dataframe, taxa_dict, title=False, facet='rep', annotate=False, summarise_other=True, main_dir='./', cap_facet_labels=True, plot_dir='./plots/mixed_taxonomy/', size_spec=False, aspect_spec=False, check_totals_sum_to_1=True, svg=False): """ Make a plot using a taxa_dict. The taxa_dict is used to make a summary dataframe using aggregate_mixed_taxonomy(), and the reult is plotted. :param dataframe: dataframe to source all data from :param taxa_dict: a dictionary with taxonomic levels as keys and names as values. E.g. {'Phylum':['Bacteroidetes'], 'Order':['Burkholderiales','Methylophilales', 'Methylococcales']} :param facet: The rows to facet the subplots by. Defaults to replicates, so weeks will be the columns. :param annotate: print numerical values inside each square? (Makes big plots *really* big; not recommended for default use. :param main_dir: main dir to consider "home", so notebooks can be run in remote directories. :param summarise_other: include a bar for "other"? (Or just don't show) :param plot_dir: path to save plots at, relative to main_dir :param size_spec: manually specify the figure size (useful when default is ugly) :param aspect_spec: manually specify the figure asepct ratio (useful when default is ugly :return: saves and returns a seaborn heat map """ # Cherry pick out the rows for the specified taxa. # If you give conflicting taxa as input, aggregate_mixed_taxonomy() will # throw an error. plot_data = aggregate_mixed_taxonomy( dataframe=dataframe, taxa_dict=taxa_dict, main_dir=main_dir, summarise_other=summarise_other, check_totals_sum_to_1=check_totals_sum_to_1) # store the maximum abundance level. We will need to tell all the # sub-heat maps to use this same maximum so they aren't each on their # own scale. max_abundance = plot_data['abundance sum'].max() # The data is seperated by these two variables. # The one not used as the facet will be used as the columns in the # subplot. if facet == 'week': cols_in_facet = 'rep' else: cols_in_facet = 'week' print('plot_data.head()') print(plot_data.head()) def pivot_so_columns_are_plotting_variable(dataframe, groupby): return dataframe.pivot(index='taxonomic name', columns=groupby, values='abundance sum') def facet_heatmap(data, groupby, xrotation, **kws): """ Used to fill the subplots with data. :param data: dataframe to plot :param groupby: column to group on :param xrotation: degrees to rotate x labels by :param kws: kewyord arguments for plotting :return: """ # pivot only supports one column for now. # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu facet_data = pivot_so_columns_are_plotting_variable( dataframe=data, groupby=groupby) # Pass kwargs to heatmap cmap used to be 'Blue' sns.heatmap(facet_data, cmap="YlGnBu", **kws) g.set_xticklabels(rotation=xrotation) # todo: add a label at the bottom like "replicate" or "week" # currently replicate is turned into facet_replicate but should just # make a label that says replicate. Week # Control plot aesthetics depending on facet option. if facet == 'week': xrotation = 0 num_rows = len(plot_data['taxonomic name'].unique()) size = 2 * 0.2*num_rows aspect = 1 space_for_cbar = 0.85 x_axis_label = 'replicate' else: # (facet = "rep") xrotation = 90 # Calculate the size, aspect depending on the number of # rows per subplot num_rows = len(plot_data['taxonomic name'].unique()) size = 1 + 0.22*num_rows aspect = 1.5 # aspect for each sub-plot, not a single tile space_for_cbar = 0.85 if size_spec: size = size_spec if aspect_spec: aspect = aspect_spec print(plot_data.head()) if cap_facet_labels: if facet == "rep": row_var='$O_2$' col_var = 'Week' facet_var = "Replicate" else: print("not set up for facet != rep") plot_data = capitalize_some_column_names(plot_data) col_var else: facet_var = 'rep' row_var = 'oxy' col_var = 'week' with sns.plotting_context(font_scale=8): g = sns.FacetGrid(plot_data, col=facet_var, row=row_var, size=size, aspect=aspect, margin_titles=True) # Add axes for the colorbar. [left, bottom, width, height] cbar_ax = g.fig.add_axes([.92, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, # NEED vmax = MAX ABUNDANCE or each plot will have # its own color scale! vmin=0, vmax=max_abundance, annot=annotate, groupby=col_var, xrotation=xrotation) g.set_axis_labels(col_var) # add space for x label g.fig.subplots_adjust(bottom=0.2) # todo: add an x-label for each facet (I want only 1) # g.set_axis_labels(['x label', 'ylabel']) # g.fig.subplots_adjust(top=0.2) # g.fig.text(0.5, 0.1, s='armadillo') #, *args, **kwargs) # g.fig.xlabel('ardvark') # Add space so the colorbar doesn't overlap th plot. g.fig.subplots_adjust(right=space_for_cbar) # todo: still not enough room for # Order-Burkholderiales_Methylophilales_Methylococcales-- # Phylum-Bacteroidetes--rep.pdf # Format the y strings in each subplot of the Seaborn grid. # Don't put () on the function you are c # Todo: make the 2nd argument a function y_label_formatter(g, italics_unless_other) supertitle = taxa_dict_to_descriptive_string(taxa_dict) if title: # TODO: they are currently being converted to LaTeX # add a supertitle, you bet. plt.subplots_adjust(top=0.80) g.fig.suptitle(supertitle, size=16) # Tight layout --> title and cbar overlap heat maps. Boo. # NO: plt.tight_layout() g.fig.subplots_adjust(wspace=.05, hspace=.05) # prepare filename and save. plot_dir = elviz_utils.prepare_plot_dir(plot_dir) print("plot directory: {}".format(plot_dir)) filepath = plot_dir + supertitle filepath += "--{}".format(facet) if annotate: filepath += "--annotated" filepath += ".pdf" print(filepath) g.fig.savefig(filepath) if svg: g.fig.savefig(filepath.rstrip("pdf") + "svg") return g
def heatmap_all_below(dataframe, taxa_dict, plot_dir, low_cutoff=0.001, cap_facet_labels=True, title=False, svg=False): """ Make a heatmap of all the taxa below the taxa specified in taxa_dict. :param dataframe: dataframe of data to harvest excerpts from :param taxa_dict: a dictionary with taxonomic levels as keys and names as values. E.g. {'Order':['Burkholderiales']} :param plot_dir: path to save plots to, relative to main_dir :param main_dir: path to data source, etc. :param low_cutoff: lowest abundance to include. A taxa must be above this threshold in at least one sample to be included. :return: """ # TODO: this function has a lot of commonality with heatmap_from_taxa_dict # and could/should be factored. # grab the data for that taxa: # for now assume just 1 key and 1 value. taxa_level = list(taxa_dict.keys())[0] taxa_name = list(taxa_dict.values())[0][0] dataframe = dataframe[dataframe[taxa_level] == taxa_name] print(dataframe.head()) # Columns to form a concatenated label from: label_cols = taxonomy_levels_below(taxa_level=taxa_level) print('label_cols: {}'.format(label_cols)) # change nan cells to 'unknown' dataframe.fillna('unknown', inplace=True) # make a summary string representing the taxonomy for everything below def label_building_lambda(f, column_value_list, taxa_name): """ Returns a lambda function to make row labels from. :param f: function to make a lambda out of. :param columns: column names to pass to function f in the lambda :return: function """ # * means unpack the list you get from the list comprehension print("columns passed: {}".format(column_value_list)) print("Use those in {}".format(f)) # Passing a list into label_from_taxa_colnames(). # Doing a list comprehension on columns. # Note that (row[col] for col in columns)) is a generator . # building something like label_from_taxa_colnames() return lambda row: f([row[col] for col in column_value_list], taxa_name) # e.g. makes: # my_function([Comamonadaceae, Curvibacter]) from a row of a dataframe # and the specification that columns = ['Family', 'Genus'] # TODO: use the taxa_dict to get the columns to use! # make a name_string per row. It's something like # "Comamonadaceae, Curvibacter" or "other" dataframe['name_string'] = dataframe.apply( label_building_lambda(f=label_from_taxa_colnames, column_value_list=label_cols, taxa_name=taxa_name), axis=1) print("dataframe.head() for name_string:") print(dataframe.head()) # reduce to only name_string rows with at least one abundance > the # threshold set by low_cutoff to we don't have a zillion rows: # todo: allow high to change? dataframe = \ abundance_utils.filter_by_abundance(data=dataframe, abundance_column='fraction of ' 'reads', high=1, low=low_cutoff, taxonomy_column='name_string') # Plot as usual, using the stuff developed above. # todo: factor some of this?? def pivot_so_columns_are_plotting_variable(dataframe, groupby): return dataframe.pivot(index='name_string', columns=groupby, values='fraction of reads') def facet_heatmap(data, groupby, xrotation, **kws): """ Used to fill the subplots with data. :param data: dataframe to plot :param groupby: column to group on :param xrotation: :param kws: :return: """ # pivot only supports one column for now. # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu facet_data = pivot_so_columns_are_plotting_variable( dataframe=data, groupby=groupby) # Pass kwargs to heatmap cmap. sns.heatmap(facet_data, cmap="YlGnBu", **kws) g.set_xticklabels(rotation=xrotation) # set some plotting parameters xrotation = 90 # Calculate the size, aspect depending on the number of # rows per subplot num_rows = len(dataframe['name_string'].unique()) size = 1 + 0.22*num_rows aspect = 1.5 # a if cap_facet_labels: dataframe = capitalize_some_column_names(dataframe) facet_var = "Replicate" row_var='$O_2$' col_var = "Week" else: facet_var = 'rep' row_var = 'oxy' col_var = 'week' # todo: this doesn't seem to be changing the font size. Probably isn't # for other plotting calls either! with sns.plotting_context(font_scale=40): g = sns.FacetGrid(dataframe, col=facet_var, row=row_var, size=size, aspect=aspect, margin_titles=True) g.set_axis_labels(col_var) # Add axes for the colorbar. [left, bottom, width, height] cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, vmin=0, # MUST SET VMAX or all of the subplots will be on # their own color scale and you might not know it. vmax=dataframe['fraction of reads'].max(), annot=False, groupby=col_var, xrotation=90) # modify labels # Todo: make the 2nd argument a function y_label_formatter(g, italics_unless_other) # add space for x label g.fig.subplots_adjust(bottom=0.2) # room for colorbar (cbar) g.fig.subplots_adjust(right=0.85) # add a supertitle, you bet. supertitle_base = taxa_dict_to_descriptive_string(taxa_dict) if title: plt.subplots_adjust(top=0.80) supertitle = \ supertitle_base + '. Min fraction of reads cutoff = {}'.format( low_cutoff) g.fig.suptitle(supertitle, size=15) # Also summarise # of taxa rows being grouped together. # prepare filename and save. plot_dir = elviz_utils.prepare_plot_dir(plot_dir) filepath = plot_dir + supertitle_base filepath += "--min_{}".format(low_cutoff) filepath += "--{}".format('x-week') filepath += ".pdf" print(filepath) g.fig.savefig(filepath) if svg: g.fig.savefig(filepath.rstrip("pdf") + "svg") return g
def plot_heatmap_genus(dataframe, high, low, oxy, rep, plot_dir): """ Make a heatmap at Genus, using oganisms withing the specified abundance cutoffs. :param dataframe: dataframe to pass :param high: highest abundance to include genera for :param low: lowes abundance to include genera for :param oxy: oxygen tension, "Low" or "High" :param rep: replicate (1-4) :param plot_dir: directory to save plots in. :return: """ # get rid of oxygen levels and replicates if specified. if oxy is not 'all': print("keep only {} oxygen samples".format(oxy)) dataframe = dataframe[dataframe['oxy'] == oxy] if rep is not 'all': print("keep only replicate levels:", rep) dataframe = dataframe[dataframe['rep'].isin(rep)] dataframe = abundance_utils.filter_by_abundance( data=dataframe, abundance_column='fraction of reads', high=high, low=low) dataframe['facet_replicate'] = 'replicate ' + dataframe['rep'].astype(str) # make height of the plot a function of the number of rows (Genera): num_data_rows = len(dataframe['Genus'].unique()) plot_size = 2 + num_data_rows / 7 plot_aspect = 2 if num_data_rows > 6: plot_aspect = .85 if num_data_rows > 9: plot_aspect = .65 if num_data_rows > 9: plot_aspect = .6 def facet_heatmap(data, **kws): """ Used to fill the subplots with data. :param data: :param kws: :return: """ facet_data = data.pivot(index='Genus', columns='week', values='fraction of reads') # Pass kwargs to heatmap cmap used to be 'Blue' sns.heatmap(facet_data, cmap="YlGnBu", **kws) with sns.plotting_context(font_scale=7): g = sns.FacetGrid(dataframe, col='facet_replicate', margin_titles=True, size=plot_size, aspect=plot_aspect) g.set_xticklabels(rotation=90) # Create a colorbar axes cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, vmin=0, # specify vmax = max abundance seen or each will # have its own scale (and you might not know it!) vmax=dataframe['fraction of reads'].max(), ) g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18) g.set_axis_labels('week') # Add space so the colorbar doesn't overlap the plot g.fig.subplots_adjust(right=.9) # add a supertitle, you bet. plt.subplots_adjust(top=0.80) supertitle = str(low) + ' < fraction of reads < ' + str( high) + ', {} oxygen'.format(oxy) g.fig.suptitle(supertitle, size=18) # write a filename and save. filename = oxy + "_oxygen--{0}_to_{1}_abundance".format(low, high) print('filename:', filename) plot_dir = elviz_utils.prepare_plot_dir(plot_dir) # save figure g.savefig(plot_dir + filename + '.pdf')