Python filter_by_abundance Examples

Programming Language: Python

Namespace/Package Name: abundance_utils

Method/Function: filter_by_abundance

Examples at hotexamples.com: 2

Python filter_by_abundance - 2 examples found. These are the top rated real world Python examples of abundance_utils.filter_by_abundance extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: abundance_plot_utils.py Project: JanetMatsen/elvizAnalysis

def heatmap_all_below(dataframe, taxa_dict, plot_dir, low_cutoff=0.001,
                      cap_facet_labels=True,
                      title=False, svg=False):
    """
    Make a heatmap of all the taxa below the taxa specified in taxa_dict.

    :param dataframe: dataframe of data to harvest excerpts from
    :param taxa_dict: a dictionary with taxonomic levels as keys and
    names as values.  E.g. {'Order':['Burkholderiales']}
    :param plot_dir: path to save plots to, relative to main_dir
    :param main_dir: path to data source, etc.
    :param low_cutoff: lowest abundance to include.  A taxa must be above
    this threshold in at least one sample to be included.
    :return:
    """
    # TODO: this function has a lot of commonality with heatmap_from_taxa_dict
    # and could/should be factored.

    # grab the data for that taxa:
    # for now assume just 1 key and 1 value.
    taxa_level = list(taxa_dict.keys())[0]
    taxa_name = list(taxa_dict.values())[0][0]
    dataframe = dataframe[dataframe[taxa_level] == taxa_name]
    print(dataframe.head())

    # Columns to form a concatenated label from:
    label_cols = taxonomy_levels_below(taxa_level=taxa_level)
    print('label_cols: {}'.format(label_cols))

    # change nan cells to 'unknown'
    dataframe.fillna('unknown', inplace=True)

    # make a summary string representing the taxonomy for everything below

    def label_building_lambda(f, column_value_list, taxa_name):
        """
        Returns a lambda function to make row labels from.
        :param f: function to make a lambda out of.
        :param columns: column names to pass to function f in the lambda
        :return: function
        """
        # * means unpack the list you get from the list comprehension
        print("columns passed: {}".format(column_value_list))
        print("Use those in {}".format(f))
        # Passing a list into label_from_taxa_colnames().
        # Doing a list comprehension on columns.
        # Note that (row[col] for col in columns)) is a generator .
        # building something like label_from_taxa_colnames()
        return lambda row: f([row[col] for col in column_value_list],
                             taxa_name)
        # e.g. makes:
        # my_function([Comamonadaceae, Curvibacter]) from a row of a dataframe
        # and the specification that columns = ['Family', 'Genus']

    # TODO: use the taxa_dict to get the columns to use!
    # make a name_string per row.  It's something like
    # "Comamonadaceae, Curvibacter" or "other"
    dataframe['name_string'] = dataframe.apply(
        label_building_lambda(f=label_from_taxa_colnames,
                              column_value_list=label_cols,
                              taxa_name=taxa_name),
        axis=1)
    print("dataframe.head() for name_string:")
    print(dataframe.head())

    # reduce to only name_string rows with at least one abundance > the
    # threshold set by low_cutoff to we don't have a zillion rows:
    # todo: allow high to change?
    dataframe = \
        abundance_utils.filter_by_abundance(data=dataframe,
                                            abundance_column='fraction of '
                                                             'reads',
                                            high=1,
                                            low=low_cutoff,
                                            taxonomy_column='name_string')

    # Plot as usual, using the stuff developed above.
    # todo: factor some of this??
    def pivot_so_columns_are_plotting_variable(dataframe, groupby):
        return dataframe.pivot(index='name_string',
                               columns=groupby,
                               values='fraction of reads')

    def facet_heatmap(data, groupby, xrotation, **kws):
        """
        Used to fill the subplots with data.

        :param data: dataframe to plot
        :param groupby: column to group on
        :param xrotation:
        :param kws:
        :return:
        """
        # pivot only supports one column for now.
        # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu
        facet_data = pivot_so_columns_are_plotting_variable(
            dataframe=data, groupby=groupby)
        # Pass kwargs to heatmap cmap.
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)
        g.set_xticklabels(rotation=xrotation)

    # set some plotting parameters
    xrotation = 90
    # Calculate the size, aspect depending on the number of
    #  rows per subplot
    num_rows = len(dataframe['name_string'].unique())
    size = 1 + 0.22*num_rows
    aspect = 1.5  # a

    if cap_facet_labels:
        dataframe = capitalize_some_column_names(dataframe)
        facet_var = "Replicate"
        row_var='$O_2$'
        col_var = "Week"
    else:
        facet_var = 'rep'
        row_var = 'oxy'
        col_var = 'week'

    # todo: this doesn't seem to be changing the font size.  Probably isn't
    # for other plotting calls either!
    with sns.plotting_context(font_scale=40):
        g = sns.FacetGrid(dataframe,
                          col=facet_var,
                          row=row_var,
                          size=size,
                          aspect=aspect,
                          margin_titles=True)

    g.set_axis_labels(col_var)

    # Add axes for the colorbar.  [left, bottom, width, height]
    cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax, vmin=0,
                        # MUST SET VMAX or all of the subplots will be on
                        # their own color scale and you might not know it.
                        vmax=dataframe['fraction of reads'].max(),
                        annot=False,
                        groupby=col_var,
                        xrotation=90)


    # modify labels
    # Todo: make the 2nd argument a function
    y_label_formatter(g, italics_unless_other)

    # add space for x label
    g.fig.subplots_adjust(bottom=0.2)

    # room for colorbar (cbar)
    g.fig.subplots_adjust(right=0.85)

    # add a supertitle, you bet.
    supertitle_base = taxa_dict_to_descriptive_string(taxa_dict)
    if title:
        plt.subplots_adjust(top=0.80)
        supertitle = \
            supertitle_base + '.  Min fraction of reads cutoff = {}'.format(
                low_cutoff)
        g.fig.suptitle(supertitle, size=15)

    # Also summarise # of taxa rows being grouped together.

    # prepare filename and save.
    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)
    filepath = plot_dir + supertitle_base
    filepath += "--min_{}".format(low_cutoff)
    filepath += "--{}".format('x-week')
    filepath += ".pdf"
    print(filepath)
    g.fig.savefig(filepath)

    if svg:
        g.fig.savefig(filepath.rstrip("pdf") + "svg")

    return g

Example #2

Show file

File: abundance_plot_utils.py Project: JanetMatsen/elvizAnalysis

def plot_heatmap_genus(dataframe, high, low, oxy, rep, plot_dir):
    """
    Make a heatmap at Genus, using oganisms withing the specified abundance
    cutoffs.

    :param dataframe: dataframe to pass
    :param high: highest abundance to include genera for
    :param low: lowes abundance to include genera for
    :param oxy: oxygen tension, "Low" or "High"
    :param rep: replicate (1-4)
    :param plot_dir: directory to save plots in.
    :return:
    """
    # get rid of oxygen levels and replicates if specified.
    if oxy is not 'all':
        print("keep only {} oxygen samples".format(oxy))
        dataframe = dataframe[dataframe['oxy'] == oxy]
    if rep is not 'all':
        print("keep only replicate levels:", rep)
        dataframe = dataframe[dataframe['rep'].isin(rep)]
    dataframe = abundance_utils.filter_by_abundance(
        data=dataframe,
        abundance_column='fraction of reads',
        high=high, low=low)
    dataframe['facet_replicate'] = 'replicate ' + dataframe['rep'].astype(str)

    # make height of the plot a function of the number of rows (Genera):
    num_data_rows = len(dataframe['Genus'].unique())
    plot_size = 2 + num_data_rows / 7
    plot_aspect = 2
    if num_data_rows > 6:
        plot_aspect = .85
    if num_data_rows > 9:
        plot_aspect = .65
    if num_data_rows > 9:
        plot_aspect = .6

    def facet_heatmap(data, **kws):
        """
        Used to fill the subplots with data.

        :param data:
        :param kws:
        :return:
        """

        facet_data = data.pivot(index='Genus', columns='week',
                                values='fraction of reads')
        # Pass kwargs to heatmap  cmap used to be 'Blue'
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)

    with sns.plotting_context(font_scale=7):
        g = sns.FacetGrid(dataframe, col='facet_replicate',
                          margin_titles=True,
                          size=plot_size, aspect=plot_aspect)
        g.set_xticklabels(rotation=90)

    # Create a colorbar axes
    cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax, vmin=0,
                        # specify vmax = max abundance seen or each will
                        # have its own scale (and you might not know it!)
                        vmax=dataframe['fraction of reads'].max(),
                        )

    g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
    g.set_axis_labels('week')

    # Add space so the colorbar doesn't overlap the plot
    g.fig.subplots_adjust(right=.9)

    # add a supertitle, you bet.
    plt.subplots_adjust(top=0.80)
    supertitle = str(low) + ' < fraction of reads < ' + str(
        high) + ', {} oxygen'.format(oxy)
    g.fig.suptitle(supertitle, size=18)

    # write a filename and save.
    filename = oxy + "_oxygen--{0}_to_{1}_abundance".format(low, high)
    print('filename:', filename)

    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)

    # save figure
    g.savefig(plot_dir + filename + '.pdf')