def heatmap_all_below(dataframe, taxa_dict, plot_dir, low_cutoff=0.001, cap_facet_labels=True, title=False, svg=False): """ Make a heatmap of all the taxa below the taxa specified in taxa_dict. :param dataframe: dataframe of data to harvest excerpts from :param taxa_dict: a dictionary with taxonomic levels as keys and names as values. E.g. {'Order':['Burkholderiales']} :param plot_dir: path to save plots to, relative to main_dir :param main_dir: path to data source, etc. :param low_cutoff: lowest abundance to include. A taxa must be above this threshold in at least one sample to be included. :return: """ # TODO: this function has a lot of commonality with heatmap_from_taxa_dict # and could/should be factored. # grab the data for that taxa: # for now assume just 1 key and 1 value. taxa_level = list(taxa_dict.keys())[0] taxa_name = list(taxa_dict.values())[0][0] dataframe = dataframe[dataframe[taxa_level] == taxa_name] print(dataframe.head()) # Columns to form a concatenated label from: label_cols = taxonomy_levels_below(taxa_level=taxa_level) print('label_cols: {}'.format(label_cols)) # change nan cells to 'unknown' dataframe.fillna('unknown', inplace=True) # make a summary string representing the taxonomy for everything below def label_building_lambda(f, column_value_list, taxa_name): """ Returns a lambda function to make row labels from. :param f: function to make a lambda out of. :param columns: column names to pass to function f in the lambda :return: function """ # * means unpack the list you get from the list comprehension print("columns passed: {}".format(column_value_list)) print("Use those in {}".format(f)) # Passing a list into label_from_taxa_colnames(). # Doing a list comprehension on columns. # Note that (row[col] for col in columns)) is a generator . # building something like label_from_taxa_colnames() return lambda row: f([row[col] for col in column_value_list], taxa_name) # e.g. makes: # my_function([Comamonadaceae, Curvibacter]) from a row of a dataframe # and the specification that columns = ['Family', 'Genus'] # TODO: use the taxa_dict to get the columns to use! # make a name_string per row. It's something like # "Comamonadaceae, Curvibacter" or "other" dataframe['name_string'] = dataframe.apply( label_building_lambda(f=label_from_taxa_colnames, column_value_list=label_cols, taxa_name=taxa_name), axis=1) print("dataframe.head() for name_string:") print(dataframe.head()) # reduce to only name_string rows with at least one abundance > the # threshold set by low_cutoff to we don't have a zillion rows: # todo: allow high to change? dataframe = \ abundance_utils.filter_by_abundance(data=dataframe, abundance_column='fraction of ' 'reads', high=1, low=low_cutoff, taxonomy_column='name_string') # Plot as usual, using the stuff developed above. # todo: factor some of this?? def pivot_so_columns_are_plotting_variable(dataframe, groupby): return dataframe.pivot(index='name_string', columns=groupby, values='fraction of reads') def facet_heatmap(data, groupby, xrotation, **kws): """ Used to fill the subplots with data. :param data: dataframe to plot :param groupby: column to group on :param xrotation: :param kws: :return: """ # pivot only supports one column for now. # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu facet_data = pivot_so_columns_are_plotting_variable( dataframe=data, groupby=groupby) # Pass kwargs to heatmap cmap. sns.heatmap(facet_data, cmap="YlGnBu", **kws) g.set_xticklabels(rotation=xrotation) # set some plotting parameters xrotation = 90 # Calculate the size, aspect depending on the number of # rows per subplot num_rows = len(dataframe['name_string'].unique()) size = 1 + 0.22*num_rows aspect = 1.5 # a if cap_facet_labels: dataframe = capitalize_some_column_names(dataframe) facet_var = "Replicate" row_var='$O_2$' col_var = "Week" else: facet_var = 'rep' row_var = 'oxy' col_var = 'week' # todo: this doesn't seem to be changing the font size. Probably isn't # for other plotting calls either! with sns.plotting_context(font_scale=40): g = sns.FacetGrid(dataframe, col=facet_var, row=row_var, size=size, aspect=aspect, margin_titles=True) g.set_axis_labels(col_var) # Add axes for the colorbar. [left, bottom, width, height] cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, vmin=0, # MUST SET VMAX or all of the subplots will be on # their own color scale and you might not know it. vmax=dataframe['fraction of reads'].max(), annot=False, groupby=col_var, xrotation=90) # modify labels # Todo: make the 2nd argument a function y_label_formatter(g, italics_unless_other) # add space for x label g.fig.subplots_adjust(bottom=0.2) # room for colorbar (cbar) g.fig.subplots_adjust(right=0.85) # add a supertitle, you bet. supertitle_base = taxa_dict_to_descriptive_string(taxa_dict) if title: plt.subplots_adjust(top=0.80) supertitle = \ supertitle_base + '. Min fraction of reads cutoff = {}'.format( low_cutoff) g.fig.suptitle(supertitle, size=15) # Also summarise # of taxa rows being grouped together. # prepare filename and save. plot_dir = elviz_utils.prepare_plot_dir(plot_dir) filepath = plot_dir + supertitle_base filepath += "--min_{}".format(low_cutoff) filepath += "--{}".format('x-week') filepath += ".pdf" print(filepath) g.fig.savefig(filepath) if svg: g.fig.savefig(filepath.rstrip("pdf") + "svg") return g
def plot_heatmap_genus(dataframe, high, low, oxy, rep, plot_dir): """ Make a heatmap at Genus, using oganisms withing the specified abundance cutoffs. :param dataframe: dataframe to pass :param high: highest abundance to include genera for :param low: lowes abundance to include genera for :param oxy: oxygen tension, "Low" or "High" :param rep: replicate (1-4) :param plot_dir: directory to save plots in. :return: """ # get rid of oxygen levels and replicates if specified. if oxy is not 'all': print("keep only {} oxygen samples".format(oxy)) dataframe = dataframe[dataframe['oxy'] == oxy] if rep is not 'all': print("keep only replicate levels:", rep) dataframe = dataframe[dataframe['rep'].isin(rep)] dataframe = abundance_utils.filter_by_abundance( data=dataframe, abundance_column='fraction of reads', high=high, low=low) dataframe['facet_replicate'] = 'replicate ' + dataframe['rep'].astype(str) # make height of the plot a function of the number of rows (Genera): num_data_rows = len(dataframe['Genus'].unique()) plot_size = 2 + num_data_rows / 7 plot_aspect = 2 if num_data_rows > 6: plot_aspect = .85 if num_data_rows > 9: plot_aspect = .65 if num_data_rows > 9: plot_aspect = .6 def facet_heatmap(data, **kws): """ Used to fill the subplots with data. :param data: :param kws: :return: """ facet_data = data.pivot(index='Genus', columns='week', values='fraction of reads') # Pass kwargs to heatmap cmap used to be 'Blue' sns.heatmap(facet_data, cmap="YlGnBu", **kws) with sns.plotting_context(font_scale=7): g = sns.FacetGrid(dataframe, col='facet_replicate', margin_titles=True, size=plot_size, aspect=plot_aspect) g.set_xticklabels(rotation=90) # Create a colorbar axes cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, vmin=0, # specify vmax = max abundance seen or each will # have its own scale (and you might not know it!) vmax=dataframe['fraction of reads'].max(), ) g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18) g.set_axis_labels('week') # Add space so the colorbar doesn't overlap the plot g.fig.subplots_adjust(right=.9) # add a supertitle, you bet. plt.subplots_adjust(top=0.80) supertitle = str(low) + ' < fraction of reads < ' + str( high) + ', {} oxygen'.format(oxy) g.fig.suptitle(supertitle, size=18) # write a filename and save. filename = oxy + "_oxygen--{0}_to_{1}_abundance".format(low, high) print('filename:', filename) plot_dir = elviz_utils.prepare_plot_dir(plot_dir) # save figure g.savefig(plot_dir + filename + '.pdf')