コード例 #1
0
def make_figures():
    # Make the ./plots/ dir if needed
    elviz_utils.prepare_plot_dir(MAIN_DIR)

    # Make figure 1:
    make_heatmap_for_major_players(MAJOR_PLAYERS,
                                   make_svg_too = MAKE_SVG_TOO)
    # Make figure 2:
    make_heatmap_for_particular_family_with_other(family='Methylophilaceae',
                                                  taxa_dict=METHYLOPHILACEAE,
                                                  make_svg_too = MAKE_SVG_TOO)
    # Make figure 3:
    make_heatmap_for_particular_family_with_other(family='Methylococcaceae',
                                                  taxa_dict=METHYLOCOCCACEAE,
                                                  make_svg_too = MAKE_SVG_TOO)
    # Make figure 4:
    # want a different kind of plot for Burkolderiales:
    heatmap_burkolderiales(BURKOLDERIALES, make_svg_too = MAKE_SVG_TOO)

    # Make figure 5:
    make_heatmap_for_predators(PREDATORS, make_svg_too = MAKE_SVG_TOO)
コード例 #2
0
def heatmap_from_taxa_dict(dataframe, taxa_dict,
                           title=False,
                           facet='rep', annotate=False,
                           summarise_other=True,
                           main_dir='./',
                           cap_facet_labels=True,
                           plot_dir='./plots/mixed_taxonomy/',
                           size_spec=False,
                           aspect_spec=False,
                           check_totals_sum_to_1=True,
                           svg=False):
    """
    Make a plot using a taxa_dict.

    The taxa_dict is used to make a summary dataframe using
    aggregate_mixed_taxonomy(), and the reult is plotted.

    :param dataframe: dataframe to source all data from
    :param taxa_dict: a dictionary with taxonomic levels as keys and
    names as values.  E.g. {'Phylum':['Bacteroidetes'],
    'Order':['Burkholderiales','Methylophilales', 'Methylococcales']}
    :param facet: The rows to facet the subplots by.  Defaults to replicates,
    so weeks will be the columns.
    :param annotate: print numerical values inside each square?  (Makes big
    plots *really* big; not recommended for default use.
    :param main_dir: main dir to consider "home", so notebooks can be run in
    remote directories.
    :param summarise_other: include a bar for "other"?  (Or just don't show)
    :param plot_dir: path to save plots at, relative to main_dir
    :param size_spec: manually specify the figure size (useful when default
    is ugly)
    :param aspect_spec: manually specify the figure asepct ratio (useful when
    default is ugly
    :return: saves and returns a seaborn heat map
    """

    # Cherry pick out the rows for the specified taxa.
    # If you give conflicting taxa as input, aggregate_mixed_taxonomy() will
    # throw an error.
    plot_data = aggregate_mixed_taxonomy(
        dataframe=dataframe,
        taxa_dict=taxa_dict,
        main_dir=main_dir,
        summarise_other=summarise_other,
        check_totals_sum_to_1=check_totals_sum_to_1)

    # store the maximum abundance level.  We will need to tell all the
    # sub-heat maps to use this same maximum so they aren't each on their
    # own scale.
    max_abundance = plot_data['abundance sum'].max()

    # The data is seperated by these two variables.
    # The one not used as the facet will be used as the columns in the
    # subplot.
    if facet == 'week':

        cols_in_facet = 'rep'
    else:
        cols_in_facet = 'week'

    print('plot_data.head()')
    print(plot_data.head())

    def pivot_so_columns_are_plotting_variable(dataframe, groupby):
        return dataframe.pivot(index='taxonomic name',
                               columns=groupby,
                               values='abundance sum')

    def facet_heatmap(data, groupby, xrotation, **kws):
        """
        Used to fill the subplots with data.

        :param data: dataframe to plot
        :param groupby: column to group on
        :param xrotation: degrees to rotate x labels by
        :param kws: kewyord arguments for plotting
        :return:
        """
        # pivot only supports one column for now.
        # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu
        facet_data = pivot_so_columns_are_plotting_variable(
            dataframe=data, groupby=groupby)
        # Pass kwargs to heatmap  cmap used to be 'Blue'
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)
        g.set_xticklabels(rotation=xrotation)

    # todo: add a label at the bottom like "replicate" or "week"
    # currently replicate is turned into facet_replicate but should just
    # make a label that says replicate.  Week

    # Control plot aesthetics depending on facet option.
    if facet == 'week':
        xrotation = 0
        num_rows = len(plot_data['taxonomic name'].unique())
        size = 2 * 0.2*num_rows
        aspect = 1
        space_for_cbar = 0.85
        x_axis_label = 'replicate'

    else: # (facet = "rep")
        xrotation = 90
        # Calculate the size, aspect depending on the number of
        #  rows per subplot
        num_rows = len(plot_data['taxonomic name'].unique())
        size = 1 + 0.22*num_rows
        aspect = 1.5  # aspect for each sub-plot, not a single tile
        space_for_cbar = 0.85

    if size_spec:
        size = size_spec
    if aspect_spec:
        aspect = aspect_spec

    print(plot_data.head())

    if cap_facet_labels:
        if facet == "rep":
            row_var='$O_2$'
            col_var = 'Week'
            facet_var = "Replicate"
        else:
            print("not set up for facet != rep")
        plot_data = capitalize_some_column_names(plot_data)
        col_var
    else:
        facet_var = 'rep'
        row_var = 'oxy'
        col_var = 'week'

    with sns.plotting_context(font_scale=8):
        g = sns.FacetGrid(plot_data,
                          col=facet_var,
                          row=row_var,
                          size=size,
                          aspect=aspect,
                          margin_titles=True)

    # Add axes for the colorbar.  [left, bottom, width, height]
    cbar_ax = g.fig.add_axes([.92, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax,
                        # NEED vmax = MAX ABUNDANCE or each plot will have
                        # its own color scale!
                        vmin=0, vmax=max_abundance,
                        annot=annotate,
                        groupby=col_var,
                        xrotation=xrotation)

    g.set_axis_labels(col_var)

    # add space for x label
    g.fig.subplots_adjust(bottom=0.2)

    # todo: add an x-label for each facet (I want only 1)
    # g.set_axis_labels(['x label', 'ylabel'])
    # g.fig.subplots_adjust(top=0.2)
    # g.fig.text(0.5, 0.1, s='armadillo') #, *args, **kwargs)
    # g.fig.xlabel('ardvark')

    # Add space so the colorbar doesn't overlap th plot.
    g.fig.subplots_adjust(right=space_for_cbar)
    # todo: still not enough room for
    # Order-Burkholderiales_Methylophilales_Methylococcales--
    # Phylum-Bacteroidetes--rep.pdf

    # Format the y strings in each subplot of the Seaborn grid.
    # Don't put () on the function you are c
    # Todo: make the 2nd argument a function
    y_label_formatter(g, italics_unless_other)

    supertitle = taxa_dict_to_descriptive_string(taxa_dict)
    if title:
        # TODO: they are currently being converted to LaTeX
        # add a supertitle, you bet.
        plt.subplots_adjust(top=0.80)
        g.fig.suptitle(supertitle, size=16)

    # Tight layout --> title and cbar overlap heat maps.  Boo.
    # NO: plt.tight_layout()
    g.fig.subplots_adjust(wspace=.05, hspace=.05)

    # prepare filename and save.
    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)
    print("plot directory: {}".format(plot_dir))
    filepath = plot_dir + supertitle
    filepath += "--{}".format(facet)
    if annotate:
        filepath += "--annotated"
    filepath += ".pdf"
    print(filepath)
    g.fig.savefig(filepath)
    if svg:
        g.fig.savefig(filepath.rstrip("pdf") + "svg")

    return g
コード例 #3
0
def heatmap_all_below(dataframe, taxa_dict, plot_dir, low_cutoff=0.001,
                      cap_facet_labels=True,
                      title=False, svg=False):
    """
    Make a heatmap of all the taxa below the taxa specified in taxa_dict.

    :param dataframe: dataframe of data to harvest excerpts from
    :param taxa_dict: a dictionary with taxonomic levels as keys and
    names as values.  E.g. {'Order':['Burkholderiales']}
    :param plot_dir: path to save plots to, relative to main_dir
    :param main_dir: path to data source, etc.
    :param low_cutoff: lowest abundance to include.  A taxa must be above
    this threshold in at least one sample to be included.
    :return:
    """
    # TODO: this function has a lot of commonality with heatmap_from_taxa_dict
    # and could/should be factored.

    # grab the data for that taxa:
    # for now assume just 1 key and 1 value.
    taxa_level = list(taxa_dict.keys())[0]
    taxa_name = list(taxa_dict.values())[0][0]
    dataframe = dataframe[dataframe[taxa_level] == taxa_name]
    print(dataframe.head())

    # Columns to form a concatenated label from:
    label_cols = taxonomy_levels_below(taxa_level=taxa_level)
    print('label_cols: {}'.format(label_cols))

    # change nan cells to 'unknown'
    dataframe.fillna('unknown', inplace=True)

    # make a summary string representing the taxonomy for everything below

    def label_building_lambda(f, column_value_list, taxa_name):
        """
        Returns a lambda function to make row labels from.
        :param f: function to make a lambda out of.
        :param columns: column names to pass to function f in the lambda
        :return: function
        """
        # * means unpack the list you get from the list comprehension
        print("columns passed: {}".format(column_value_list))
        print("Use those in {}".format(f))
        # Passing a list into label_from_taxa_colnames().
        # Doing a list comprehension on columns.
        # Note that (row[col] for col in columns)) is a generator .
        # building something like label_from_taxa_colnames()
        return lambda row: f([row[col] for col in column_value_list],
                             taxa_name)
        # e.g. makes:
        # my_function([Comamonadaceae, Curvibacter]) from a row of a dataframe
        # and the specification that columns = ['Family', 'Genus']

    # TODO: use the taxa_dict to get the columns to use!
    # make a name_string per row.  It's something like
    # "Comamonadaceae, Curvibacter" or "other"
    dataframe['name_string'] = dataframe.apply(
        label_building_lambda(f=label_from_taxa_colnames,
                              column_value_list=label_cols,
                              taxa_name=taxa_name),
        axis=1)
    print("dataframe.head() for name_string:")
    print(dataframe.head())

    # reduce to only name_string rows with at least one abundance > the
    # threshold set by low_cutoff to we don't have a zillion rows:
    # todo: allow high to change?
    dataframe = \
        abundance_utils.filter_by_abundance(data=dataframe,
                                            abundance_column='fraction of '
                                                             'reads',
                                            high=1,
                                            low=low_cutoff,
                                            taxonomy_column='name_string')

    # Plot as usual, using the stuff developed above.
    # todo: factor some of this??
    def pivot_so_columns_are_plotting_variable(dataframe, groupby):
        return dataframe.pivot(index='name_string',
                               columns=groupby,
                               values='fraction of reads')

    def facet_heatmap(data, groupby, xrotation, **kws):
        """
        Used to fill the subplots with data.

        :param data: dataframe to plot
        :param groupby: column to group on
        :param xrotation:
        :param kws:
        :return:
        """
        # pivot only supports one column for now.
        # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu
        facet_data = pivot_so_columns_are_plotting_variable(
            dataframe=data, groupby=groupby)
        # Pass kwargs to heatmap cmap.
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)
        g.set_xticklabels(rotation=xrotation)

    # set some plotting parameters
    xrotation = 90
    # Calculate the size, aspect depending on the number of
    #  rows per subplot
    num_rows = len(dataframe['name_string'].unique())
    size = 1 + 0.22*num_rows
    aspect = 1.5  # a

    if cap_facet_labels:
        dataframe = capitalize_some_column_names(dataframe)
        facet_var = "Replicate"
        row_var='$O_2$'
        col_var = "Week"
    else:
        facet_var = 'rep'
        row_var = 'oxy'
        col_var = 'week'

    # todo: this doesn't seem to be changing the font size.  Probably isn't
    # for other plotting calls either!
    with sns.plotting_context(font_scale=40):
        g = sns.FacetGrid(dataframe,
                          col=facet_var,
                          row=row_var,
                          size=size,
                          aspect=aspect,
                          margin_titles=True)

    g.set_axis_labels(col_var)

    # Add axes for the colorbar.  [left, bottom, width, height]
    cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax, vmin=0,
                        # MUST SET VMAX or all of the subplots will be on
                        # their own color scale and you might not know it.
                        vmax=dataframe['fraction of reads'].max(),
                        annot=False,
                        groupby=col_var,
                        xrotation=90)


    # modify labels
    # Todo: make the 2nd argument a function
    y_label_formatter(g, italics_unless_other)

    # add space for x label
    g.fig.subplots_adjust(bottom=0.2)

    # room for colorbar (cbar)
    g.fig.subplots_adjust(right=0.85)

    # add a supertitle, you bet.
    supertitle_base = taxa_dict_to_descriptive_string(taxa_dict)
    if title:
        plt.subplots_adjust(top=0.80)
        supertitle = \
            supertitle_base + '.  Min fraction of reads cutoff = {}'.format(
                low_cutoff)
        g.fig.suptitle(supertitle, size=15)

    # Also summarise # of taxa rows being grouped together.

    # prepare filename and save.
    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)
    filepath = plot_dir + supertitle_base
    filepath += "--min_{}".format(low_cutoff)
    filepath += "--{}".format('x-week')
    filepath += ".pdf"
    print(filepath)
    g.fig.savefig(filepath)

    if svg:
        g.fig.savefig(filepath.rstrip("pdf") + "svg")

    return g
コード例 #4
0
def plot_heatmap_genus(dataframe, high, low, oxy, rep, plot_dir):
    """
    Make a heatmap at Genus, using oganisms withing the specified abundance
    cutoffs.

    :param dataframe: dataframe to pass
    :param high: highest abundance to include genera for
    :param low: lowes abundance to include genera for
    :param oxy: oxygen tension, "Low" or "High"
    :param rep: replicate (1-4)
    :param plot_dir: directory to save plots in.
    :return:
    """
    # get rid of oxygen levels and replicates if specified.
    if oxy is not 'all':
        print("keep only {} oxygen samples".format(oxy))
        dataframe = dataframe[dataframe['oxy'] == oxy]
    if rep is not 'all':
        print("keep only replicate levels:", rep)
        dataframe = dataframe[dataframe['rep'].isin(rep)]
    dataframe = abundance_utils.filter_by_abundance(
        data=dataframe,
        abundance_column='fraction of reads',
        high=high, low=low)
    dataframe['facet_replicate'] = 'replicate ' + dataframe['rep'].astype(str)

    # make height of the plot a function of the number of rows (Genera):
    num_data_rows = len(dataframe['Genus'].unique())
    plot_size = 2 + num_data_rows / 7
    plot_aspect = 2
    if num_data_rows > 6:
        plot_aspect = .85
    if num_data_rows > 9:
        plot_aspect = .65
    if num_data_rows > 9:
        plot_aspect = .6

    def facet_heatmap(data, **kws):
        """
        Used to fill the subplots with data.

        :param data:
        :param kws:
        :return:
        """

        facet_data = data.pivot(index='Genus', columns='week',
                                values='fraction of reads')
        # Pass kwargs to heatmap  cmap used to be 'Blue'
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)

    with sns.plotting_context(font_scale=7):
        g = sns.FacetGrid(dataframe, col='facet_replicate',
                          margin_titles=True,
                          size=plot_size, aspect=plot_aspect)
        g.set_xticklabels(rotation=90)

    # Create a colorbar axes
    cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax, vmin=0,
                        # specify vmax = max abundance seen or each will
                        # have its own scale (and you might not know it!)
                        vmax=dataframe['fraction of reads'].max(),
                        )

    g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
    g.set_axis_labels('week')

    # Add space so the colorbar doesn't overlap the plot
    g.fig.subplots_adjust(right=.9)

    # add a supertitle, you bet.
    plt.subplots_adjust(top=0.80)
    supertitle = str(low) + ' < fraction of reads < ' + str(
        high) + ', {} oxygen'.format(oxy)
    g.fig.suptitle(supertitle, size=18)

    # write a filename and save.
    filename = oxy + "_oxygen--{0}_to_{1}_abundance".format(low, high)
    print('filename:', filename)

    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)

    # save figure
    g.savefig(plot_dir + filename + '.pdf')