예제 #1
0
def taxa_abundance_bar_plot(taxa,
                            metadata=None,
                            level=1,
                            group=None,
                            by=None,
                            ax=None,
                            figsize=None,
                            width=0.8,
                            count=0,
                            exclude_samples=None,
                            include_samples=None,
                            exclude_taxa=None,
                            sort_by_names=False,
                            colors=None,
                            label_columns=None,
                            orders=None,
                            sample_names=None,
                            csv_file=None,
                            taxa_names=None,
                            sort_by_mean1=True,
                            sort_by_mean2=True,
                            sort_by_mean3=True,
                            show_others=True,
                            cmap_name='Accent',
                            legend_short=False,
                            artist_kwargs=None):
    """Create a bar plot showing relative taxa abundance.

    The input visualization may already contain metadata, but you can
    update it with the ``metadata`` option.

    By default, the method will create a bar for each sample. Use the
    ``group`` option to create a bar for each sample group.

    +----------------+-----------------------------------------------------+
    | q2-taxa plugin | Example                                             |
    +================+=====================================================+
    | QIIME 2 CLI    | qiime taxa barplot [OPTIONS]                        |
    +----------------+-----------------------------------------------------+
    | QIIME 2 API    | from qiime2.plugins.taxa.visualizers import barplot |
    +----------------+-----------------------------------------------------+

    Parameters
    ----------
    taxa : str or qiime2.Visualization
        Visualization file or object from the q2-taxa plugin.
    metadata : str or qiime2.Metadata, optional
        Metadata file or object.
    level : int, default: 1
        Taxonomic level at which the features should be collapsed.
    group : str, optional
        Metadata column to be used for grouping the samples.
    by : list, optional
        Column name(s) to be used for sorting the samples. Using 'sample-id'
        will sort the samples by their name, in addition to other column
        name(s) that may have been provided. If multiple items are provided,
        sorting will occur by the order of the items.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    width : float, default: 0.8
        The width of the bars.
    count : int, default: 0
        The number of taxa to display. When 0, display all.
    exclude_samples : dict, optional
        Filtering logic used for sample exclusion.
        Format: {'col': ['item', ...], ...}.
    include_samples : dict, optional
        Filtering logic used for sample inclusion.
        Format: {'col': ['item', ...], ...}.
    exclude_taxa : list, optional
        The taxa names to be excluded when matched. Case insenstivie.
    sort_by_names : bool, default: False
        If true, sort the columns (i.e. species) to be displayed by name.
    colors : list, optional
        The bar colors.
    label_columns : list, optional
        The column names to be used as the x-axis labels.
    orders : dict, optional
        Dictionary of {column1: [element1, element2, ...], column2:
        [element1, element2...], ...} to indicate the order of items. Used to
        sort the sampels by the user-specified order instead of ordering
        numerically or alphabetically.
    sample_names : list, optional
        List of sample IDs to be included.
    csv_file : str, optional
        Path of the .csv file to output the dataframe to.
    taxa_names : list, optional
        List of taxa names to be displayed.
    sort_by_mean1 : bool, default: True
        Sort taxa by their mean relative abundance before sample filtration.
    sort_by_mean2 : bool, default: True
        Sort taxa by their mean relative abundance after sample filtration by
        'include_samples' or 'exclude_samples'.
    sort_by_mean3 : bool, default: True
        Sort taxa by their mean relative abundance after sample filtration by
        'sample_names'.
    show_others : bool, default: True
        Include the 'Others' category.
    cmap_name : str, default: 'Accent'
        Name of the colormap passed to `matplotlib.cm.get_cmap()`.
    legend_short : bool, default: False
        If true, only display the smallest taxa rank in the legend.
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    See Also
    --------
    taxa_abundance_box_plot

    Examples
    --------
    Below is a simple example showing taxonomic abundance at the kingdom
    level (i.e. ``level=1``), which is the default taxonomic rank.

    >>> qzv_file = '/Users/sbslee/Desktop/dokdo/data/moving-pictures-tutorial/taxa-bar-plots.qzv'
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               figsize=(10, 7),
    ...                               artist_kwargs=dict(show_legend=True))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-1.png

    We can change the taxonomic rank from kingdom to genus by setting
    ``level=6``. Note that I removed ``show_legend=True`` because
    otherwise there will be too many taxa to display on the legend.
    Note also that the colors are recycled in each bar.

    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               figsize=(10, 7),
    ...                               level=6)
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-2.png

    We can only show the top seven most abundant genera plus 'Others' with
    ``count=8``.

    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               figsize=(10, 7),
    ...                               level=6,
    ...                               count=8,
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(show_legend=True,
    ...                                                  legend_loc='upper left'))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-3.png

    We can plot the figure and the legend separately.

    >>> fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(12, 7), gridspec_kw={'width_ratios': [9, 1]})
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax1,
    ...                               level=6,
    ...                               count=8)
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax2,
    ...                               level=6,
    ...                               count=8,
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(legend_only=True,
    ...                                                  legend_loc='upper left'))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-4.png

    We can use a different color map to display more unique genera (e.g. 20).

    >>> fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(12, 7), gridspec_kw={'width_ratios': [9, 1]})
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax1,
    ...                               level=6,
    ...                               count=20,
    ...                               cmap_name='tab20')
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax2,
    ...                               level=6,
    ...                               count=20,
    ...                               cmap_name='tab20',
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(legend_only=True,
    ...                                                  legend_loc='upper left'))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-5.png

    We can sort the samples by the body-site column in metadata with
    ``by=['body-site']``. To check whether the sorting worked properly,
    we can change the x-axis tick labels to include each sample's
    body-site with ``label_columns``.

    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               by=['body-site'],
    ...                               label_columns=['body-site', 'sample-id'],
    ...                               figsize=(10, 7),
    ...                               level=6,
    ...                               count=8,
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(show_legend=True,
    ...                                                  legend_loc='upper left'))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-6.png

    If you want to sort the samples in a certain order instead of ordering
    numerically or alphabetically, use the ``orders`` option.

    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               by=['body-site'],
    ...                               label_columns=['body-site', 'sample-id'],
    ...                               figsize=(10, 7),
    ...                               level=6,
    ...                               count=8,
    ...                               orders={'body-site': ['left palm', 'tongue', 'gut', 'right palm']},
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(show_legend=True,
    ...                                                  legend_loc='upper left'))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-7.png

    We can only display the 'gut' and 'tongue' samples with
    ``include_samples``.

    >>> fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(9, 7), gridspec_kw={'width_ratios': [9, 1]})
    >>> kwargs = dict(include_samples={'body-site': ['gut', 'tongue']},
    ...                 by=['body-site'],
    ...                 label_columns=['body-site', 'sample-id'],
    ...                 level=6,
    ...                 count=8)
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax1,
    ...                               **kwargs)
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax2,
    ...                               **kwargs,
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(legend_only=True,
    ...                                                  legend_loc='upper left'))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-8.png

    We can make multiple bar charts grouped by body-site. When making a
    grouped bar chart, it's important to include ``sort_by_mean2=False``
    in order to have the same bar colors for the same taxa across different
    groups.

    >>> fig, [ax1, ax2, ax3, ax4, ax5] = plt.subplots(1, 5, figsize=(16, 7), gridspec_kw={'width_ratios': [2, 2, 2, 2, 1]})
    >>> kwargs = dict(level=6, count=8, sort_by_mean2=False)
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax1,
    ...                               include_samples={'body-site': ['gut']},
    ...                               **kwargs,
    ...                               artist_kwargs=dict(title='gut'))
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax2,
    ...                               include_samples={'body-site': ['left palm']},
    ...                               **kwargs,
    ...                               artist_kwargs=dict(title='left palm',
    ...                                                  hide_ylabel=True,
    ...                                                  hide_yticks=True))
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax3,
    ...                               include_samples={'body-site': ['right palm']},
    ...                               **kwargs,
    ...                               artist_kwargs=dict(title='right palm',
    ...                                                  hide_ylabel=True,
    ...                                                  hide_yticks=True))
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax4,
    ...                               include_samples={'body-site': ['tongue']},
    ...                               **kwargs,
    ...                               artist_kwargs=dict(title='tongue',
    ...                                                  hide_ylabel=True,
    ...                                                  hide_yticks=True))
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax5,
    ...                               **kwargs,
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(legend_only=True,
    ...                                                  legend_loc='upper left'))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-9.png

    We can select specific samples with ``sample_names``. We can also
    manually set the x-axis tick labels with ``xticklabels``. Finally, you
    can pick specific colors for the bars.

    >>> fig, [ax1, ax2, ax3] = plt.subplots(1, 3, figsize=(10, 5))
    >>> kwargs = dict(level=6, count=3, legend_short=True, sample_names=['L2S382', 'L4S112'])
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax1,
    ...                               **kwargs,
    ...                               artist_kwargs=dict(show_legend=True,
    ...                                                  legend_loc='upper right',
    ...                                                  title="sample_names=['L2S382', 'L4S112']"))
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax2,
    ...                               **kwargs,
    ...                               artist_kwargs=dict(show_legend=True,
    ...                                                  legend_loc='upper right',
    ...                                                  title="xticklabels=['A', 'B']",
    ...                                                  xticklabels=['A', 'B']))
    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               ax=ax3,
    ...                               colors=['tab:blue', 'tab:orange', 'tab:gray'],
    ...                               **kwargs,
    ...                               artist_kwargs=dict(show_legend=True,
    ...                                                  legend_loc='upper right',
    ...                                                  title="colors=['tab:blue', 'tab:orange', 'tab:gray']"))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-10.png

    Finally, we can create a bar for each sample type.

    >>> dokdo.taxa_abundance_bar_plot(qzv_file,
    ...                               level=6,
    ...                               count=8,
    ...                               group='body-site',
    ...                               figsize=(10, 7),
    ...                               legend_short=True,
    ...                               artist_kwargs=dict(show_legend=True))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_bar_plot-11.png
    """
    with tempfile.TemporaryDirectory() as t:
        _parse_input(taxa, t)
        df = pd.read_csv(f'{t}/level-{level}.csv', index_col=0)

    # If provided, update the metadata.
    if metadata is None:
        pass
    else:
        mf = dokdo.get_mf(metadata)
        cols = _get_mf_cols(df)
        df.drop(columns=cols, inplace=True)
        df = pd.concat([df, mf], axis=1, join='inner')

    # If provided, sort the samples by the user-specified order instead of
    # ordering numerically or alphabetically. To do this, we will first add a
    # new temporary column filled with the indicies of the user-provided
    # list. This column will be used for sorting the samples later instead of
    # the original column. After sorting, the new column will be dropped from
    # the dataframe and the original column will replace its place.
    if isinstance(orders, dict):
        for k, v in orders.items():
            u = df[k].unique().tolist()

            if set(u) != set(v):
                message = (f"Target values {u} not matched with user-provided "
                           f"values {v} for metadata column `{k}`")
                raise ValueError(message)

            l = [x for x in range(len(v))]
            d = dict(zip(v, l))
            df.rename(columns={k: f'@{k}'}, inplace=True)
            df[k] = df[f'@{k}'].map(d)

    df["sample-id"] = df.index

    # If provided, sort the samples for display in the x-axis.
    if isinstance(by, list):
        df = df.sort_values(by=by)

    # If sorting was performed by the user-specified order, remove the
    # temporary columns and then bring back the original column.
    if isinstance(orders, dict):
        for k in orders:
            df.drop(columns=[k], inplace=True)
            df.rename(columns={f'@{k}': k}, inplace=True)

    # If provided, exclude the specified taxa.
    if isinstance(exclude_taxa, list):
        dropped = []
        for tax in exclude_taxa:
            for col in df.columns:
                if tax.lower() in col.lower():
                    dropped.append(col)
        dropped = list(set(dropped))
        df = df.drop(columns=dropped)

    # If provided, group the samples by the given metadata column.
    if group is not None:
        df = df.groupby(group)[taxa_cols(df)].agg('sum')

    # Remove the metadata columns.
    cols = _get_mf_cols(df)
    mf = df[cols]
    df = df.drop(columns=cols)

    if sort_by_mean1:
        df = _sort_by_mean(df)

    df, mf = _filter_samples(df, mf, exclude_samples, include_samples)

    if sort_by_mean2:
        df = _sort_by_mean(df)

    # If provided, only include the specified samples.
    if isinstance(sample_names, list):
        df = df.loc[sample_names]
        mf = mf.loc[sample_names]

        if sort_by_mean3:
            df = _sort_by_mean(df)

    # Convert counts to proportions.
    df = df.div(df.sum(axis=1), axis=0)

    df = _get_others_col(df, count, taxa_names, show_others)

    if sort_by_names:
        df = df.reindex(sorted(df.columns), axis=1)

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    if isinstance(colors, list):
        c = colors
    else:
        c = plt.cm.get_cmap(cmap_name).colors

    df = df * 100

    # If provided, output the dataframe as a .csv file.
    if csv_file is not None:
        df.to_csv(csv_file)

    if legend_short:
        df.columns = [dokdo.pname(x) for x in df.columns]

    df.plot.bar(stacked=True,
                legend=False,
                ax=ax,
                width=width,
                color=c,
                linewidth=0)

    if label_columns is not None:
        f = lambda row: ' : '.join(row.values.astype(str))
        xticklabels = mf[label_columns].apply(f, axis=1).tolist()
    else:
        xticklabels = None

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {
        'xlabel': '',
        'ylabel': 'Relative abundance (%)',
        'xticklabels': xticklabels,
        **artist_kwargs
    }

    ax = _artist(ax, **artist_kwargs)

    return ax
예제 #2
0
def beta_3d_plot(pcoa_results,
                 metadata=None,
                 hue=None,
                 azim=-60,
                 elev=30,
                 s=80,
                 ax=None,
                 figsize=None,
                 hue_order=None,
                 artist_kwargs=None):
    """Create a 3D scatter plot from PCoA results.

    +---------------------+---------------------------------------------------+
    | q2-diversity plugin | Example                                           |
    +=====================+===================================================+
    | QIIME 2 CLI         | qiime diversity pcoa [OPTIONS]                    |
    +---------------------+---------------------------------------------------+
    | QIIME 2 API         | from qiime2.plugins.diversity.methods import pcoa |
    +---------------------+---------------------------------------------------+

    Parameters
    ----------
    pcoa_results : str or qiime2.Artifact
        Artifact file or object corresponding to PCoAResults or
        PCoAResults % Properties('biplot').
    metadata : str or qiime2.Metadata, optional
        Metadata file or object.
    hue : str, optional
        Grouping variable that will produce points with different colors.
    azim : int, default: -60
        Azimuthal viewing angle.
    elev : int, default: 30
        Elevation viewing angle.
    s : float, default: 80.0
        Marker size.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    hue_order : list, optional
        Specify the order of categorical levels of the 'hue' semantic.
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    See Also
    --------
    ordinate
    beta_2d_plot
    beta_scree_plot
    beta_parallel_plot
    addbiplot

    Examples
    --------
    Below is a simple example.

    >>> qza_file = f'{data_dir}/moving-pictures-tutorial/unweighted_unifrac_pcoa_results.qza'
    >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv'
    >>> dokdo.beta_3d_plot(qza_file,
    ...                    metadata_file,
    ...                    'body-site',
    ...                    figsize=(6, 6),
    ...                    artist_kwargs=dict(show_legend=True))
    >>> plt.tight_layout()

    .. image:: images/beta_3d_plot-1.png

    We can control the camera angle with ``elev`` and ``azim``.

    >>> fig = plt.figure(figsize=(12, 6))
    >>> ax1 = fig.add_subplot(1, 2, 1, projection='3d')
    >>> ax2 = fig.add_subplot(1, 2, 2, projection='3d')
    >>> dokdo.beta_3d_plot(qza_file, metadata_file, ax=ax1, hue='body-site', elev=15)
    >>> dokdo.beta_3d_plot(qza_file, metadata_file, ax=ax2, hue='body-site', azim=70)
    >>> plt.tight_layout()

    .. image:: images/beta_3d_plot-2.png
    """
    if isinstance(pcoa_results, str):
        _pcoa_results = Artifact.load(pcoa_results)
    else:
        _pcoa_results = pcoa_results

    ordination_results = _pcoa_results.view(OrdinationResults)

    df = ordination_results.samples.iloc[:, :3]
    df.columns = ['A1', 'A2', 'A3']

    props = ordination_results.proportion_explained

    if metadata is None:
        df = df
    else:
        mf = dokdo.get_mf(metadata)
        df = pd.concat([df, mf], axis=1, join='inner')

    if ax is None:
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(1, 1, 1, projection='3d')

    ax.view_init(azim=azim, elev=elev)

    if hue is None:
        ax.scatter(df['A1'], df['A2'], df['A3'], s=s)
    else:
        if hue_order is None:
            _hue_order = df[hue].unique()
        else:
            _hue_order = hue_order
        for label in _hue_order:
            a = df[df[hue] == label]
            ax.scatter(a['A1'], a['A2'], a['A3'], label=label, s=s)

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {
        'xlabel': f'Axis 1 ({props[0]*100:.2f} %)',
        'ylabel': f'Axis 2 ({props[1]*100:.2f} %)',
        'zlabel': f'Axis 3 ({props[2]*100:.2f} %)',
        'hide_xticks': True,
        'hide_yticks': True,
        'hide_zticks': True,
        'legend_title': hue,
        **artist_kwargs
    }

    ax = _artist(ax, **artist_kwargs)

    return ax
예제 #3
0
def alpha_diversity_plot(alpha_diversity,
                         metadata,
                         where,
                         ax=None,
                         figsize=None,
                         add_swarmplot=False,
                         order=None,
                         hide_nsizes=False,
                         artist_kwargs=None):
    """Create an alpha diversity plot.

    Parameters
    ----------
    alpha_diversity : str or qiime2.Artifact
        Artifact file or object with the semantic type
        `SampleData[AlphaDiversity]`.
    metadata : str or qiime2.Metadata
        Metadata file or object.
    where : str
        Column name to be used for the x-axis.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    add_swarmplot : bool, default: False
        Add a swarm plot on top of the box plot.
    order : list, optional
        Order to plot the categorical levels in.
    hide_nsizes : bool, default: False
        Hide sample size from x-axis labels.
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    Examples
    --------
    Below is a simple example.

    >>> qzv_file = f'{data_dir}/moving-pictures-tutorial/faith_pd_vector.qza'
    >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv'
    >>> dokdo.alpha_diversity_plot(qzv_file, metadata_file, 'body-site')
    >>> plt.tight_layout()

    .. image:: images/alpha_diversity_plot.png
    """
    if isinstance(alpha_diversity, str):
        _alpha_diversity = Artifact.load(alpha_diversity)
    else:
        _alpha_diversity = alpha_diversity

    df = _alpha_diversity.view(pd.Series).to_frame()

    mf = dokdo.get_mf(metadata)
    df = pd.concat([df, mf], axis=1, join='inner')

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    metric = df.columns[0]

    boxprops = dict(color='white', edgecolor='black')

    d = {'x': where, 'y': metric, 'ax': ax, 'order': order, 'data': df}

    sns.boxplot(boxprops=boxprops, **d)

    if add_swarmplot:
        sns.swarmplot(**d)

    if hide_nsizes is False:
        nsizes = df[where].value_counts().to_dict()
        xtexts = [x.get_text() for x in ax.get_xticklabels()]
        xtexts = [f'{x} ({nsizes[x]})' for x in xtexts]
        ax.set_xticklabels(xtexts)

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {'xlabel': where, 'ylabel': metric, **artist_kwargs}

    ax = _artist(ax, **artist_kwargs)

    return ax
예제 #4
0
def beta_parallel_plot(pcoa_results,
                       hue=None,
                       hue_order=None,
                       metadata=None,
                       count=5,
                       ax=None,
                       figsize=None,
                       artist_kwargs=None):
    """Create a parallel plot from PCoA results.

    +---------------------+---------------------------------------------------+
    | q2-diversity plugin | Example                                           |
    +=====================+===================================================+
    | QIIME 2 CLI         | qiime diversity pcoa [OPTIONS]                    |
    +---------------------+---------------------------------------------------+
    | QIIME 2 API         | from qiime2.plugins.diversity.methods import pcoa |
    +---------------------+---------------------------------------------------+

    Parameters
    ----------
    pcoa_results : str or qiime2.Artifact
        Artifact file or object corresponding to PCoAResults.
    hue : str, optional
        Grouping variable that will produce lines with different colors.
    hue_order : list, optional
        Specify the order of categorical levels of the 'hue' semantic.
    metadata : str or qiime2.Metadata, optional
        Metadata file or object. Required if 'hue' is used.
    count : int, default: 5
        Number of principal components to be displayed.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    See Also
    --------
    ordinate
    beta_2d_plot
    beta_3d_plot
    beta_scree_plot

    Examples
    --------
    Below is a simple example.

    >>> qza_file = f'{data_dir}/moving-pictures-tutorial/unweighted_unifrac_pcoa_results.qza'
    >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv'
    >>> dokdo.beta_parallel_plot(qza_file)
    >>> plt.tight_layout()

    .. image:: images/beta_parallel_plot-1.png

    We can group the lines by body-site.

    >>> dokdo.beta_parallel_plot(qza_file,
    ...                          metadata=metadata_file,
    ...                          hue='body-site',
    ...                          artist_kwargs=dict(show_legend=True))
    >>> plt.tight_layout()

    .. image:: images/beta_parallel_plot-2.png
    """
    if isinstance(pcoa_results, str):
        _pcoa_results = Artifact.load(pcoa_results)
    else:
        _pcoa_results = pcoa_results

    ordination_results = _pcoa_results.view(OrdinationResults)

    props = ordination_results.proportion_explained * 100
    props = [f'Axis {i+1} ({x:.2f}%)' for i, x in enumerate(props[:count])]

    df = ordination_results.samples.copy().iloc[:, :count]

    if hue is None:
        col = df.index
    else:
        mf = dokdo.get_mf(metadata)
        col = mf[hue]

    df = df.assign(Target=col)

    if isinstance(hue_order, list):
        d = {x: i for i, x in enumerate(hue_order)}
        df = df.iloc[df['Target'].map(d).argsort()]

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    pd.plotting.parallel_coordinates(df,
                                     'Target',
                                     color=plt.cm.get_cmap('tab10').colors)

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {
        'xlabel': '',
        'ylabel': '',
        'xticklabels': props,
        'legend_title': hue,
        **artist_kwargs
    }

    ax = _artist(ax, **artist_kwargs)

    return ax
예제 #5
0
def prepare_lefse(table_file,
                  taxonomy_file,
                  metadata_file,
                  output_file,
                  class_col,
                  subclass_col=None,
                  subject_col=None,
                  where=None):
    """Create a TSV file which can be used as input for the LEfSe tool.

    This command
    1) collapses the input feature table at the genus level,
    2) computes relative frequency of the features,
    3) performs sample filtration if requested,
    4) changes the format of feature names,
    5) adds the relevant metadata as 'Class', 'Subclass', and 'Subject', and
    6) writes a text file which can be used as input for LEfSe.

    Parameters
    ----------
    table_file : str
        Path to the table file with the 'FeatureTable[Frequency]' type.
    taxonomy_file : str
        Path to the taxonomy file with the 'FeatureData[Taxonomy]' type.
    metadata_file : str
        Path to the metadata file.
    output_file : str
        Path to the output file.
    class_col : str
        Metadata column used as 'Class' by LEfSe.
    subclass_col : str, optional
        Metadata column used as 'Subclass' by LEfSe.
    subject_col : str, optional
        Metadata column used as 'Subject' by LEfSe.
    where : str, optional
        SQLite 'WHERE' clause specifying sample metadata criteria.
    """
    _ = taxa.methods.collapse(table=Artifact.load(table_file),
                              taxonomy=Artifact.load(taxonomy_file),
                              level=6)

    _ = feature_table.methods.relative_frequency(table=_.collapsed_table)

    if where is None:
        df = _.relative_frequency_table.view(pd.DataFrame)
    else:
        _ = feature_table.methods.filter_samples(
            table=_.relative_frequency_table,
            metadata=Metadata.load(metadata_file),
            where=where)
        df = _.filtered_table.view(pd.DataFrame)

    def f(x):
        for c in ['-', '[', ']', '(', ')', ' ']:
            x = x.replace(c, '_')

        ranks = x.split(';')
        base = ranks[0]
        result = [base]

        for i, rank in enumerate(ranks[1:], start=2):
            if rank == '__':
                result.append(f'{base}_x__L{i}')
            elif rank.split('__')[1] == '':
                result.append(f'{base}_{rank}L{i}')
            else:
                result.append(rank)
                base = rank

        return '|'.join(result)

    df.columns = [f(x) for x in df.columns.to_list()]

    mf = dokdo.get_mf(metadata_file)
    mf = mf.replace(' ', '_', regex=True)
    cols = mf.columns.to_list()
    df = pd.concat([df, mf], axis=1, join="inner")
    df.insert(0, class_col, df.pop(class_col))
    cols.remove(class_col)

    if subclass_col is None and subject_col is None:
        pass
    elif subclass_col is not None and subject_col is None:
        df.insert(1, subclass_col, df.pop(subclass_col))
        cols.remove(subclass_col)
    elif subclass_col is None and subject_col is not None:
        df.insert(1, subject_col, df.pop(subject_col))
        cols.remove(subject_col)
    else:
        df.insert(1, subclass_col, df.pop(subclass_col))
        df.insert(2, subject_col, df.pop(subject_col))
        cols.remove(subclass_col)
        cols.remove(subject_col)

    df.drop(columns=cols, inplace=True)
    df.T.to_csv(output_file, header=False, sep='\t')
예제 #6
0
def heatmap(
    table, metadata=None, hue1=None, hue_order1=None,
    hue1_cmap='tab10', hue1_loc='upper right', hue2=None,
    hue_order2=None, hue2_cmap='Pastel1', hue2_loc='upper left',
    normalize=None, method='average', metric='euclidean',
    figsize=(10, 10), row_cluster=True, col_cluster=True, **kwargs
):
    """Create a hierarchically clustered heatmap of a feature table.

    Internally, this method uses the `seaborn.clustermap()` method to
    create a heatmap.

    Parameters
    ----------
    table : str or qiime2.Artifact
        Artifact file or object corresponding to FeatureTable[Frequency].
    metadata : str or qiime2.Metadata, optional
        Metadata file or object.
    hue1 : str, optional
        First grouping variable that will produce labels with different
        colors.
    hue_order1 : list, optional
        Specify the order of categorical levels of the 'hue1' semantic.
    hue1_cmap : str, default: 'tab10'
        Name of the colormap passed to `matplotlib.cm.get_cmap()` for `hue1`.
    hue1_loc : str, default: 'upper right'
        Location of the legend for `hue1`.
    hue2 : str, optional
        Second grouping variable that will produce labels with different
        colors.
    hue_order2 : list, optional
        Specify the order of categorical levels of the 'hue2' semantic.
    hue2_cmap : str, default: 'Pastel1'
        Name of the colormap passed to `matplotlib.cm.get_cmap()` for `hue2`.
    hue2_loc : str, default: 'upper left'
        Location of the legend for `hue2`.
    normalize : str, optional
        Normalize the feature table by adding a psuedocount of 1 and then
        taking the log10 of the table or performing centre log ratio
        transformation. Choices: {'log10', 'clr'}.
    method : str, default: 'average'
        Linkage method to use for calculating clusters. See
        `scipy.cluster.hierarchy.linkage()` documentation for more
        information.
    metric : str, default: 'euclidean'
        Distance metric to use for the data. See
        `scipy.spatial.distance.pdist()` documentation for more options.
    figsize : tuple, default: (10, 10)
        Width, height in inches. Format: (float, float).
    row_cluster : bool, default: True
        If True, cluster the rows.
    col_cluster : bool, default: True
        If True, cluster the columns.
    kwargs : other keyword arguments
        All other keyword arguments are passed to `seaborn.clustermap()`.

    Returns
    -------
    seaborn.matrix.ClusterGrid
        A ClusterGrid instance.

    Examples
    --------
    Below is a simple example.
    
    >>> table_file = f'{data_dir}/moving-pictures-tutorial/table.qza'
    >>> dokdo.heatmap(table_file, normalize='log10')

    .. image:: images/heatmap-1.png

    We can color the samples by ``body-site``. For this example, we will
    use the centered log-ratio transformation.

    >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv'
    >>> dokdo.heatmap(table_file,
    ...               normalize='clr',
    ...               metadata=metadata_file,
    ...               hue1='body-site')

    .. image:: images/heatmap-2.png

    We can add an additional grouping variable ``subject``. Note that
    ``xticklabels`` and ``yticklabels`` are extra keyword arguments that
    are passed to the ``seaborn.clustermap`` method.

    >>> dokdo.heatmap(table_file,
    ...               normalize='clr',
    ...               metadata=metadata_file,
    ...               hue1='body-site',
    ...               hue2='subject',
    ...               xticklabels=False,
    ...               yticklabels=False)

    .. image:: images/heatmap-3.png
    """
    # Check the input type.
    if isinstance(table, Artifact):
        table = table
    elif isinstance(table, str):
        table = Artifact.load(table)
    else:
        raise TypeError(f'Incorrect feature table type: {type(table)}')

    # Create the dataframe.
    df = table.view(pd.DataFrame)

    # If the metadata is provided, filter the samples accordingly.
    if metadata is not None:
        mf = dokdo.get_mf(metadata)
        df = pd.concat([df, mf], axis=1, join='inner')
        df.drop(mf.columns, axis=1, inplace=True)
        df = df.loc[:, (df != 0).any(axis=0)]

    # If the hue argument(s) are used, get the row colors.
    lut1 = None
    lut2 = None
    row_colors = None
    if hue1 is not None:
        colors1 = plt.cm.get_cmap(hue1_cmap).colors
        df = pd.concat([df, mf], axis=1, join='inner')
        if hue_order1 is None:
            keys1 = df[hue1].unique()
        else:
            keys1 = hue_order1
            df = df[df[hue1].isin(hue_order1)]
        lut1 = dict(zip(keys1, colors1[:len(keys1)]))
        row_colors = df[hue1].map(lut1)
        df.drop(mf.columns, axis=1, inplace=True)
    if hue2 is not None:
        colors2 = plt.cm.get_cmap(hue2_cmap).colors
        df = pd.concat([df, mf], axis=1, join='inner')
        if hue_order2 is None:
            keys2 = df[hue2].unique()
        else:
            keys2 = hue_order2
            df = df[df[hue2].isin(hue_order2)]
        lut2 = dict(zip(keys2, colors2[:len(keys2)]))
        s = df[hue2].map(lut2)
        row_colors = pd.concat([row_colors, s], axis=1)
        df.drop(mf.columns, axis=1, inplace=True)

    # Apply the appropriate normalziation.
    if normalize == 'log10':
        df = df.apply(lambda x: np.log10(x + 1))
    elif normalize == 'clr':
        df = df.apply(lambda x: clr(x + 1), axis=1, result_type='broadcast')
    else:
        pass

    # Draw the heatmap.
    g = sns.clustermap(df, method=method, metric=metric, figsize=figsize,
                       row_cluster=row_cluster, col_cluster=col_cluster,
                       row_colors=row_colors, **kwargs)

    # If the hue argument(s) are used, add the legend(s).
    if hue1 is not None:
        handles = [Patch(facecolor=lut1[name]) for name in lut1]
        legend1 = plt.legend(handles, lut1, title=hue1, bbox_to_anchor=(1, 1),
                   bbox_transform=plt.gcf().transFigure, loc=hue1_loc)
    if hue2 is not None:
        if hue1 is None:
            raise ValueError("Argument 'hue2' was used without 'hue1'. "
                             "Use 'hue1' instead.")
        handles = [Patch(facecolor=lut2[name]) for name in lut2]
        plt.legend(handles, lut2, title=hue2, bbox_to_anchor=(1, 1),
                   bbox_transform=plt.gcf().transFigure, loc=hue2_loc)
        plt.gca().add_artist(legend1)

    return g
예제 #7
0
def denoising_stats_plot(stats,
                         metadata,
                         where,
                         ax=None,
                         figsize=None,
                         pseudocount=False,
                         order=None,
                         hide_nsizes=False,
                         artist_kwargs=None):
    """Create a grouped box chart for denoising statistics from DADA2.

    +-----------------+---------------------------------------------------------+
    | q2-dada2 plugin | Example                                                 |
    +=================+=========================================================+
    | QIIME 2 CLI     | qiime dada2 denoise-paired [OPTIONS]                    |
    +-----------------+---------------------------------------------------------+
    | QIIME 2 API     | from qiime2.plugins.dada2.methods import denoise_paired |
    +-----------------+---------------------------------------------------------+

    Parameters
    ----------
    stats : str or qiime2.Artifact
        Artifact file or object from the q2-dada2 plugin.
    metadata : str or qiime2.Metadata
        Metadata file or object.
    where : str
        Column name of the sample metadata.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    pseudocount : bool, default: False
        Add pseudocount to remove zeros.
    order : list, optional
        Order to plot the categorical levels in.
    hide_nsizes : bool, default: False
        Hide sample size from x-axis labels.
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    Examples
    --------
    Below is a simple example.

    >>> qza_file = f'{data_dir}/atacama-soil-microbiome-tutorial/denoising-stats.qza'
    >>> metadata_file = f'{data_dir}/atacama-soil-microbiome-tutorial/sample-metadata.tsv'
    >>> dokdo.denoising_stats_plot(qza_file, metadata_file, 'transect-name', artist_kwargs=dict(show_legend=True))
    >>> plt.tight_layout()

    .. image:: images/denoising_stats_plot.png
    """
    with tempfile.TemporaryDirectory() as t:
        _parse_input(stats, t)

        df1 = pd.read_table(f'{t}/stats.tsv', skiprows=[1], index_col=0)

    mf = dokdo.get_mf(metadata)

    df2 = pd.concat([df1, mf], axis=1, join='inner')

    a = ['input', 'filtered', 'denoised', 'merged', 'non-chimeric', where]
    df3 = pd.melt(df2[a], id_vars=[where])

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    if pseudocount:
        df3['value'] = df3['value'] + 1

    sns.boxplot(x=where,
                y='value',
                data=df3,
                hue='variable',
                ax=ax,
                order=order)

    if hide_nsizes is False:
        nsizes = df2[where].value_counts().to_dict()
        xtexts = [x.get_text() for x in ax.get_xticklabels()]
        xtexts = [f'{x} ({nsizes[x]})' for x in xtexts]
        ax.set_xticklabels(xtexts)

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {'xlabel': where, 'ylabel': 'Read depth', **artist_kwargs}

    ax = _artist(ax, **artist_kwargs)

    return ax
예제 #8
0
def barplot(barplot_file,
            group,
            axis=0,
            figsize=(10, 10),
            level=1,
            count=0,
            items=None,
            by=None,
            label_columns=None,
            metadata=None,
            artist_kwargs=None,
            ylabel_fontsize=None,
            xaxis_repeated=False,
            cmap_name='Accent'):
    """Create a grouped abundance bar plot.

    Under the hood, this method essentially wraps the
    `taxa_abundance_bar_plot` method.

    Parameters
    ----------
    barplot_file : str or qiime2.Visualization
        Visualization file or object from the q2-taxa plugin.
    group : str
        Metadata column.
    axis : int, default : 0
        By default, charts will be stacked vertically. Use 1 for horizontal
        stacking.
    figsize : tuple, default: (10, 10)
        Width, height in inches. Format: (float, float).
    level : int, default: 1
        Taxonomic level at which the features should be collapsed.
    count : int, default: 0
        The number of taxa to display. When 0, display all.
    items : list, optional
        Specify the order of charts.
    by : list, optional
        Column name(s) to be used for sorting the samples. Using 'index' will
        sort the samples by their name, in addition to other column name(s)
        that may have been provided. If multiple items are provided, sorting
        will occur by the order of the items.
    label_columns : list, optional
        The column names to be used as the x-axis labels.
    metadata : str or qiime2.Metadata, optional
        Metadata file or object.
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.
    ylabel_fontsize : float or str, optional
        Sets the y-axis label font size.
    xaxis_repeated : bool, default: False
        If true, remove all x-axis tick labels except for the bottom subplot.
        Ignored if `axis=1`.
    cmap_name : str, default: 'Accent'
        Name of the colormap passed to `matplotlib.cm.get_cmap()`.

    See Also
    --------
    taxa_abundance_bar_plot

    Examples
    --------
    Below is a simple example.

    >>> barplot_file = f'{data_dir}/moving-pictures-tutorial/taxa-bar-plots.qzv'
    >>> dokdo.barplot(barplot_file,
    ...               'body-site',
    ...               axis=1,
    ...               figsize=(10, 6),
    ...               level=6,
    ...               count=8)

    .. image:: images/barplot-1.png

    We can draw the subplots vertically, which is particularly useful when the samples are matched.

    >>> dokdo.barplot(barplot_file,
    ...               'body-site',
    ...               axis=0,
    ...               figsize=(8, 10),
    ...               level=6,
    ...               count=8,
    ...               xaxis_repeated=True)

    .. image:: images/barplot-2.png
    """
    with tempfile.TemporaryDirectory() as t:
        vis = Visualization.load(barplot_file)
        vis.export_data(t)
        df = pd.read_csv(f'{t}/level-1.csv', index_col=0)

    if metadata is not None:
        mf = dokdo.get_mf(metadata)
        cols = _get_mf_cols(df)
        df.drop(columns=cols, inplace=True)
        df = pd.concat([df, mf], axis=1, join='inner')

    if items is None:
        _items = df[group].unique()
    else:
        _items = items

    if axis == 0:
        args = [len(_items), 3]
        gridspec_kw = dict(width_ratios=[0.01, 1, 0.01])
    else:
        args = [1, len(_items) + 2]
        gridspec_kw = dict(width_ratios=[0.01] + [1 for x in _items] + [0.01])

    fig, axes = plt.subplots(*args, figsize=figsize, gridspec_kw=gridspec_kw)

    if artist_kwargs is None:
        artist_kwargs = {}

    _artist_kwargs = {'hide_ytexts': True, **artist_kwargs}

    plot_kwargs = dict(sort_by_mean2=False,
                       level=level,
                       count=count,
                       by=by,
                       label_columns=label_columns,
                       metadata=metadata,
                       cmap_name=cmap_name)

    if axis == 0:
        if xaxis_repeated:
            hide_xtexts = [True for x in range(len(axes[:, 1]))]
            hide_xtexts[-1] = False
        else:
            hide_xtexts = [False for x in range(len(axes[:, 1]))]

        for i, ax in enumerate(axes[:, 1]):
            taxa_abundance_bar_plot(barplot_file,
                                    ax=ax,
                                    include_samples={group: [_items[i]]},
                                    artist_kwargs={
                                        'title': _items[i],
                                        'hide_xtexts': hide_xtexts[i],
                                        **_artist_kwargs
                                    },
                                    **plot_kwargs)

    else:
        for i, ax in enumerate(axes[1:-1]):
            taxa_abundance_bar_plot(barplot_file,
                                    ax=ax,
                                    include_samples={group: [_items[i]]},
                                    artist_kwargs={
                                        'title': _items[i],
                                        **_artist_kwargs
                                    },
                                    **plot_kwargs)

    # Add the shared y-axis label.
    if axis == 0:
        gs = axes[0, 0].get_gridspec()
        for ax in axes[:, 0]:
            ax.remove()
        axbig = fig.add_subplot(gs[:, 0])
    else:
        axbig = axes[0]
    axbig.set_ylabel('Relative abundance (%)', fontsize=ylabel_fontsize)
    axbig.xaxis.set_visible(False)
    plt.setp(axbig.spines.values(), visible=False)
    axbig.tick_params(left=False, labelleft=False)
    axbig.patch.set_visible(False)

    # Add the shared legend.
    if axis == 0:
        gs = axes[0, -1].get_gridspec()
        for ax in axes[:, -1]:
            ax.remove()
        axbig = fig.add_subplot(gs[:, -1])
    else:
        axbig = axes[-1]

    taxa_abundance_bar_plot(barplot_file,
                            ax=axbig,
                            legend_short=True,
                            artist_kwargs={
                                'legend_only': True,
                                'legend_loc': 'center left',
                                **_artist_kwargs
                            },
                            **plot_kwargs)

    plt.tight_layout()
예제 #9
0
def beta_2d_plot(pcoa_results,
                 metadata=None,
                 hue=None,
                 size=None,
                 style=None,
                 s=80,
                 alpha=None,
                 ax=None,
                 figsize=None,
                 hue_order=None,
                 style_order=None,
                 legend_type='brief',
                 artist_kwargs=None):
    """Create a 2D scatter plot from PCoA results.

    +---------------------+---------------------------------------------------+
    | q2-diversity plugin | Example                                           |
    +=====================+===================================================+
    | QIIME 2 CLI         | qiime diversity pcoa [OPTIONS]                    |
    +---------------------+---------------------------------------------------+
    | QIIME 2 API         | from qiime2.plugins.diversity.methods import pcoa |
    +---------------------+---------------------------------------------------+

    Parameters
    ----------
    pcoa_results : str or qiime2.Artifact
        Artifact file or object corresponding to PCoAResults or
        PCoAResults % Properties('biplot').
    metadata : str or qiime2.Metadata, optional
        Metadata file or object.
    hue : str, optional
        Grouping variable that will produce points with different colors.
    size : str, optional
        Grouping variable that will produce points with different sizes.
    style : str, optional
        Grouping variable that will produce points with different markers.
    s : float, default: 80.0
        Marker size.
    alpha : float, optional
        Proportional opacity of the points.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    hue_order : list, optional
        Specify the order of categorical levels of the 'hue' semantic.
    style_order : list, optional
        Specify the order of categorical levels of the 'style' semantic.
    legend_type : str, default: 'brief'
        Legend type as in seaborn.scatterplot ('brief' or 'full').
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    See Also
    --------
    ordinate
    beta_3d_plot
    beta_scree_plot
    beta_parallel_plot
    addbiplot

    Examples
    --------
    Below is a simple example.

    >>> qza_file = f'{data_dir}/moving-pictures-tutorial/unweighted_unifrac_pcoa_results.qza'
    >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv'
    >>> dokdo.beta_2d_plot(qza_file)
    >>> plt.tight_layout()

    .. image:: images/beta_2d_plot-1.png

    We can color the datapoints with ``hue``. We can also change the
    style of datapoints with ``style``. If the variable of interest is
    numeric, we can use ``size`` to control the size of datapoints.
    Finally, we can combine all those groupings.

    >>> fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(8, 8))
    >>> artist_kwargs1 = dict(show_legend=True, title="hue='body-site'")
    >>> artist_kwargs2 = dict(show_legend=True, title="style='subject'")
    >>> artist_kwargs3 = dict(show_legend=True, title="size='days-since-experiment-start'")
    >>> artist_kwargs4 = dict(title="Combined groupings")
    >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax1, hue='body-site', artist_kwargs=artist_kwargs1)
    >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax2, style='subject', artist_kwargs=artist_kwargs2)
    >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax3, size='days-since-experiment-start', artist_kwargs=artist_kwargs3)
    >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax4, hue='body-site', style='subject', size='days-since-experiment-start', artist_kwargs=artist_kwargs4)
    >>> plt.tight_layout()

    .. image:: images/beta_2d_plot-2.png
    """
    if isinstance(pcoa_results, str):
        _pcoa_results = Artifact.load(pcoa_results)
    else:
        _pcoa_results = pcoa_results

    ordination_results = _pcoa_results.view(OrdinationResults)

    df1 = ordination_results.samples.iloc[:, :2]
    df1.columns = ['A1', 'A2']

    if metadata is None:
        df2 = df1
    else:
        mf = dokdo.get_mf(metadata)
        df2 = pd.concat([df1, mf], axis=1, join='inner')

    props = ordination_results.proportion_explained

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    sns.scatterplot(data=df2,
                    x='A1',
                    y='A2',
                    hue=hue,
                    hue_order=hue_order,
                    style=style,
                    style_order=style_order,
                    size=size,
                    ax=ax,
                    s=s,
                    alpha=alpha,
                    legend=legend_type)

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {
        'xlabel': f'Axis 1 ({props[0]*100:.2f} %)',
        'ylabel': f'Axis 2 ({props[1]*100:.2f} %)',
        'hide_xticks': True,
        'hide_yticks': True,
        **artist_kwargs
    }

    ax = _artist(ax, **artist_kwargs)

    return ax
예제 #10
0
def taxa_abundance_box_plot(
    taxa, metadata=None, hue=None, hue_order=None,
    add_datapoints=False, level=1, by=None, ax=None,
    figsize=None, count=0, exclude_samples=None,
    include_samples=None, exclude_taxa=None, sort_by_names=False,
    sample_names=None, csv_file=None, size=5, pseudocount=False,
    taxa_names=None, brief_xlabels=False, show_means=False,
    meanprops=None, show_others=True, sort_by_mean=True,
    jitter=1, alpha=None, artist_kwargs=None
):
    """Create a taxa abundance box plot.

    +----------------+-----------------------------------------------------+
    | q2-taxa plugin | Example                                             |
    +================+=====================================================+
    | QIIME 2 CLI    | qiime taxa barplot [OPTIONS]                        |
    +----------------+-----------------------------------------------------+
    | QIIME 2 API    | from qiime2.plugins.taxa.visualizers import barplot |
    +----------------+-----------------------------------------------------+

    Parameters
    ----------
    taxa : str or qiime2.Visualization
        Visualization file or object from the q2-taxa plugin.
    metadata : str or qiime2.Metadata, optional
        Metadata file or object.
    hue : str, optional
        Grouping variable that will produce boxes with different colors.
    hue_order : list, optional
        Specify the order of categorical levels of the 'hue' semantic.
    add_datapoints : bool, default: False
        Show datapoints on top of the boxes.
    level : int, default: 1
        Taxonomic level at which the features should be collapsed.
    by : list, optional
        Column name(s) to be used for sorting the samples. Using 'sample-id'
        will sort the samples by their name, in addition to other column
        name(s) that may have been provided. If multiple items are provided,
        sorting will occur by the order of the items.
    ax : matplotlib.axes.Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    figsize : tuple, optional
        Width, height in inches. Format: (float, float).
    count : int, default: 0
        The number of taxa to display. When 0, display all.
    exclude_samples : dict, optional
        Filtering logic used for sample exclusion.
        Format: {'col': ['item', ...], ...}.
    include_samples : dict, optional
        Filtering logic used for sample inclusion.
        Format: {'col': ['item', ...], ...}.
    exclude_taxa : list, optional
        The taxa names to be excluded when matched. Case insenstivie.
    sort_by_names : bool, default: False
        If true, sort the columns (i.e. species) to be displayed by name.
    sample_names : list, optional
        List of sample IDs to be included.
    csv_file : str, optional
        Path of the .csv file to output the dataframe to.
    size : float, default: 5.0
        Radius of the markers, in points.
    pseudocount : bool, default: False
        Add pseudocount to remove zeros.
    taxa_names : list, optional
        List of taxa names to be displayed.
    brief_xlabels : bool, default: False
        If true, only display the smallest taxa rank in the x-axis labels.
    show_means : bool, default: False
        Add means to the boxes.
    meanprops : dict, optional
        The meanprops argument as in matplotlib.pyplot.boxplot.
    show_others : bool, default: True
        Include the 'Others' category.
    sort_by_mean : bool, default: True
        Sort taxa by their mean relative abundance after sample filtration.
    jitter : float, default: 1
        Amount of jitter (only along the categorical axis) to apply.
    alpha : float, optional
        Proportional opacity of the points.
    artist_kwargs : dict, optional
        Keyword arguments passed down to the _artist() method.

    Returns
    -------
    matplotlib.axes.Axes
        Axes object with the plot drawn onto it.

    See Also
    --------
    taxa_abundance_bar_plot
    addpairs

    Examples
    --------
    Below is a simple example showing taxonomic abundance at the phylum
    level (i.e. ``level=2``).

    >>> qzv_file = '/Users/sbslee/Desktop/dokdo/data/moving-pictures-tutorial/taxa-bar-plots.qzv'
    >>> dokdo.taxa_abundance_box_plot(qzv_file, level=2, figsize=(8, 7))
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_box_plot-1.png

    We can control how many taxa to display with ``count``. Also, we can
    make the x-axis tick labels pretty with ``brief_xlabels``. We can
    manually set the x-axis tick labels with ``xticklabels``. Lastly, we
    can select specific taxa to display with ``taxa_names``.

    >>> fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(10, 10))
    >>> kwargs = {'level' : 2}
    >>> artist_kwargs1 = dict(title='count=4')
    >>> artist_kwargs2 = dict(title='brief_xlabels=True')
    >>> artist_kwargs3 = dict(xticklabels=['A', 'B', 'C', 'D'], title="xticklabels=['A', 'B', 'C', 'D']")
    >>> artist_kwargs4 = dict(title="taxa_names=[...]")
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax1, count=4, artist_kwargs=artist_kwargs1, **kwargs)
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax2, count=4, brief_xlabels=True, artist_kwargs=artist_kwargs2, **kwargs)
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax3, count=4, artist_kwargs=artist_kwargs3, **kwargs)
    >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax4, taxa_names=['k__Bacteria;p__Firmicutes', 'k__Bacteria;p__Proteobacteria'], artist_kwargs=artist_kwargs4, **kwargs)
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_box_plot-2.png

    We can group the boxes by a metadata column with ``hue``. For this
    plot, we will draw the y-axis in log scale with ``ylog``. To do
    this, we actually need to adjust the y-axis limits with ``ymin``
    and ``ymax``, and also add a pseudocount of 1 to remove 0s with
    ``pseudocount`` (because 0s cannot be shown in log scale). We will
    also add data points with ``add_datapoints=True``.

    >>> artist_kwargs = dict(ylog=True, ymin=0.05, ymax=200, show_legend=True)
    >>> dokdo.taxa_abundance_box_plot(qzv_file,
    ...                               level=2,
    ...                               figsize=(10, 7),
    ...                               hue='body-site',
    ...                               size=3,
    ...                               count=4,
    ...                               pseudocount=True,
    ...                               add_datapoints=True,
    ...                               artist_kwargs=artist_kwargs)
    >>> plt.tight_layout()

    .. image:: images/taxa_abundance_box_plot-3.png
    """
    with tempfile.TemporaryDirectory() as t:
        _parse_input(taxa, t)
        df = pd.read_csv(f'{t}/level-{level}.csv', index_col=0)

    # If provided, update the metadata.
    if metadata is None:
        pass
    else:
        mf = dokdo.get_mf(metadata)
        cols = _get_mf_cols(df)
        df.drop(columns=cols, inplace=True)
        df = pd.concat([df, mf], axis=1, join='inner')

    df["sample-id"] = df.index

    # If provided, sort the samples for display in the x-axis.
    if by:
        df = df.sort_values(by=by)

    # If provided, exclude the specified taxa.
    if isinstance(exclude_taxa, list):
        dropped = []
        for tax in exclude_taxa:
            for col in df.columns:
                if tax.lower() in col.lower():
                    dropped.append(col)
        dropped = list(set(dropped))
        df = df.drop(columns=dropped)

    # Remove the metadata columns.
    cols = _get_mf_cols(df)
    mf = df[cols]
    df = df.drop(columns=cols)

    df, mf = _filter_samples(df, mf, exclude_samples, include_samples)

    # If provided, only include the specified samples.
    if isinstance(sample_names, list):
        df = df.loc[sample_names]
        mf = mf.loc[sample_names]

    if sort_by_mean:
        df = _sort_by_mean(df)

    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)

    # Add a pseudocount.
    if pseudocount:
        df = df + 1

    # Convert counts to proportions.
    df = df.div(df.sum(axis=1), axis=0)

    df = _get_others_col(df, count, taxa_names, show_others)

    if sort_by_names:
        df = df.reindex(sorted(df.columns), axis=1)

    _taxa_names = df.columns

    df = df * 100

    if hue is not None:
        df2 = pd.concat([df, mf[hue]], axis=1, join='inner')
        df2 = pd.melt(df2, id_vars=[hue])
    else:
        df2 = pd.melt(df)



    if meanprops:
        _meanprops = meanprops
    else:
        _meanprops={'marker':'x',
                    'markerfacecolor':'white',
                    'markeredgecolor':'white',
                    'markersize':'10'}

    d = {}

    if show_means:
        d['showmeans'] = True
        d['meanprops'] = _meanprops

    sns.boxplot(x='variable',
                y='value',
                hue=hue,
                hue_order=hue_order,
                data=df2,
                ax=ax,
                **d)

    if add_datapoints:
        remove_duplicates = True
        # Alternative method: sns.swarmplot()
        sns.stripplot(x='variable',
                      y='value',
                      hue=hue,
                      hue_order=hue_order,
                      data=df2,
                      ax=ax,
                      color='black',
                      size=size,
                      dodge=True,
                      jitter=jitter,
                      alpha=alpha)
    else:
        remove_duplicates = False

    # If provided, output the dataframe as a .csv file.
    if csv_file is not None:
        df3 = pd.concat([df, mf], axis=1, join='inner')
        df3.to_csv(csv_file)

    if brief_xlabels:
        xticklabels = [dokdo.pname(x.get_text()) for x in ax.get_xticklabels()]
    else:
        xticklabels = None

    if artist_kwargs is None:
        artist_kwargs = {}

    artist_kwargs = {'xrot': 45,
                     'xha': 'right',
                     'xlabel': '',
                     'ylabel': 'Relative abundance (%)',
                     'xticklabels': xticklabels,
                     'remove_duplicates': remove_duplicates,
                     **artist_kwargs}

    if hue is not None:
        artist_kwargs['legend_title'] = hue

    ax = _artist(ax, **artist_kwargs)

    return ax