def taxa_abundance_bar_plot(taxa, metadata=None, level=1, group=None, by=None, ax=None, figsize=None, width=0.8, count=0, exclude_samples=None, include_samples=None, exclude_taxa=None, sort_by_names=False, colors=None, label_columns=None, orders=None, sample_names=None, csv_file=None, taxa_names=None, sort_by_mean1=True, sort_by_mean2=True, sort_by_mean3=True, show_others=True, cmap_name='Accent', legend_short=False, artist_kwargs=None): """Create a bar plot showing relative taxa abundance. The input visualization may already contain metadata, but you can update it with the ``metadata`` option. By default, the method will create a bar for each sample. Use the ``group`` option to create a bar for each sample group. +----------------+-----------------------------------------------------+ | q2-taxa plugin | Example | +================+=====================================================+ | QIIME 2 CLI | qiime taxa barplot [OPTIONS] | +----------------+-----------------------------------------------------+ | QIIME 2 API | from qiime2.plugins.taxa.visualizers import barplot | +----------------+-----------------------------------------------------+ Parameters ---------- taxa : str or qiime2.Visualization Visualization file or object from the q2-taxa plugin. metadata : str or qiime2.Metadata, optional Metadata file or object. level : int, default: 1 Taxonomic level at which the features should be collapsed. group : str, optional Metadata column to be used for grouping the samples. by : list, optional Column name(s) to be used for sorting the samples. Using 'sample-id' will sort the samples by their name, in addition to other column name(s) that may have been provided. If multiple items are provided, sorting will occur by the order of the items. ax : matplotlib.axes.Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. figsize : tuple, optional Width, height in inches. Format: (float, float). width : float, default: 0.8 The width of the bars. count : int, default: 0 The number of taxa to display. When 0, display all. exclude_samples : dict, optional Filtering logic used for sample exclusion. Format: {'col': ['item', ...], ...}. include_samples : dict, optional Filtering logic used for sample inclusion. Format: {'col': ['item', ...], ...}. exclude_taxa : list, optional The taxa names to be excluded when matched. Case insenstivie. sort_by_names : bool, default: False If true, sort the columns (i.e. species) to be displayed by name. colors : list, optional The bar colors. label_columns : list, optional The column names to be used as the x-axis labels. orders : dict, optional Dictionary of {column1: [element1, element2, ...], column2: [element1, element2...], ...} to indicate the order of items. Used to sort the sampels by the user-specified order instead of ordering numerically or alphabetically. sample_names : list, optional List of sample IDs to be included. csv_file : str, optional Path of the .csv file to output the dataframe to. taxa_names : list, optional List of taxa names to be displayed. sort_by_mean1 : bool, default: True Sort taxa by their mean relative abundance before sample filtration. sort_by_mean2 : bool, default: True Sort taxa by their mean relative abundance after sample filtration by 'include_samples' or 'exclude_samples'. sort_by_mean3 : bool, default: True Sort taxa by their mean relative abundance after sample filtration by 'sample_names'. show_others : bool, default: True Include the 'Others' category. cmap_name : str, default: 'Accent' Name of the colormap passed to `matplotlib.cm.get_cmap()`. legend_short : bool, default: False If true, only display the smallest taxa rank in the legend. artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. Returns ------- matplotlib.axes.Axes Axes object with the plot drawn onto it. See Also -------- taxa_abundance_box_plot Examples -------- Below is a simple example showing taxonomic abundance at the kingdom level (i.e. ``level=1``), which is the default taxonomic rank. >>> qzv_file = '/Users/sbslee/Desktop/dokdo/data/moving-pictures-tutorial/taxa-bar-plots.qzv' >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... figsize=(10, 7), ... artist_kwargs=dict(show_legend=True)) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-1.png We can change the taxonomic rank from kingdom to genus by setting ``level=6``. Note that I removed ``show_legend=True`` because otherwise there will be too many taxa to display on the legend. Note also that the colors are recycled in each bar. >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... figsize=(10, 7), ... level=6) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-2.png We can only show the top seven most abundant genera plus 'Others' with ``count=8``. >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... figsize=(10, 7), ... level=6, ... count=8, ... legend_short=True, ... artist_kwargs=dict(show_legend=True, ... legend_loc='upper left')) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-3.png We can plot the figure and the legend separately. >>> fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(12, 7), gridspec_kw={'width_ratios': [9, 1]}) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax1, ... level=6, ... count=8) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax2, ... level=6, ... count=8, ... legend_short=True, ... artist_kwargs=dict(legend_only=True, ... legend_loc='upper left')) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-4.png We can use a different color map to display more unique genera (e.g. 20). >>> fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(12, 7), gridspec_kw={'width_ratios': [9, 1]}) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax1, ... level=6, ... count=20, ... cmap_name='tab20') >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax2, ... level=6, ... count=20, ... cmap_name='tab20', ... legend_short=True, ... artist_kwargs=dict(legend_only=True, ... legend_loc='upper left')) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-5.png We can sort the samples by the body-site column in metadata with ``by=['body-site']``. To check whether the sorting worked properly, we can change the x-axis tick labels to include each sample's body-site with ``label_columns``. >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... by=['body-site'], ... label_columns=['body-site', 'sample-id'], ... figsize=(10, 7), ... level=6, ... count=8, ... legend_short=True, ... artist_kwargs=dict(show_legend=True, ... legend_loc='upper left')) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-6.png If you want to sort the samples in a certain order instead of ordering numerically or alphabetically, use the ``orders`` option. >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... by=['body-site'], ... label_columns=['body-site', 'sample-id'], ... figsize=(10, 7), ... level=6, ... count=8, ... orders={'body-site': ['left palm', 'tongue', 'gut', 'right palm']}, ... legend_short=True, ... artist_kwargs=dict(show_legend=True, ... legend_loc='upper left')) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-7.png We can only display the 'gut' and 'tongue' samples with ``include_samples``. >>> fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(9, 7), gridspec_kw={'width_ratios': [9, 1]}) >>> kwargs = dict(include_samples={'body-site': ['gut', 'tongue']}, ... by=['body-site'], ... label_columns=['body-site', 'sample-id'], ... level=6, ... count=8) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax1, ... **kwargs) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax2, ... **kwargs, ... legend_short=True, ... artist_kwargs=dict(legend_only=True, ... legend_loc='upper left')) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-8.png We can make multiple bar charts grouped by body-site. When making a grouped bar chart, it's important to include ``sort_by_mean2=False`` in order to have the same bar colors for the same taxa across different groups. >>> fig, [ax1, ax2, ax3, ax4, ax5] = plt.subplots(1, 5, figsize=(16, 7), gridspec_kw={'width_ratios': [2, 2, 2, 2, 1]}) >>> kwargs = dict(level=6, count=8, sort_by_mean2=False) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax1, ... include_samples={'body-site': ['gut']}, ... **kwargs, ... artist_kwargs=dict(title='gut')) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax2, ... include_samples={'body-site': ['left palm']}, ... **kwargs, ... artist_kwargs=dict(title='left palm', ... hide_ylabel=True, ... hide_yticks=True)) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax3, ... include_samples={'body-site': ['right palm']}, ... **kwargs, ... artist_kwargs=dict(title='right palm', ... hide_ylabel=True, ... hide_yticks=True)) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax4, ... include_samples={'body-site': ['tongue']}, ... **kwargs, ... artist_kwargs=dict(title='tongue', ... hide_ylabel=True, ... hide_yticks=True)) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax5, ... **kwargs, ... legend_short=True, ... artist_kwargs=dict(legend_only=True, ... legend_loc='upper left')) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-9.png We can select specific samples with ``sample_names``. We can also manually set the x-axis tick labels with ``xticklabels``. Finally, you can pick specific colors for the bars. >>> fig, [ax1, ax2, ax3] = plt.subplots(1, 3, figsize=(10, 5)) >>> kwargs = dict(level=6, count=3, legend_short=True, sample_names=['L2S382', 'L4S112']) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax1, ... **kwargs, ... artist_kwargs=dict(show_legend=True, ... legend_loc='upper right', ... title="sample_names=['L2S382', 'L4S112']")) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax2, ... **kwargs, ... artist_kwargs=dict(show_legend=True, ... legend_loc='upper right', ... title="xticklabels=['A', 'B']", ... xticklabels=['A', 'B'])) >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... ax=ax3, ... colors=['tab:blue', 'tab:orange', 'tab:gray'], ... **kwargs, ... artist_kwargs=dict(show_legend=True, ... legend_loc='upper right', ... title="colors=['tab:blue', 'tab:orange', 'tab:gray']")) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-10.png Finally, we can create a bar for each sample type. >>> dokdo.taxa_abundance_bar_plot(qzv_file, ... level=6, ... count=8, ... group='body-site', ... figsize=(10, 7), ... legend_short=True, ... artist_kwargs=dict(show_legend=True)) >>> plt.tight_layout() .. image:: images/taxa_abundance_bar_plot-11.png """ with tempfile.TemporaryDirectory() as t: _parse_input(taxa, t) df = pd.read_csv(f'{t}/level-{level}.csv', index_col=0) # If provided, update the metadata. if metadata is None: pass else: mf = dokdo.get_mf(metadata) cols = _get_mf_cols(df) df.drop(columns=cols, inplace=True) df = pd.concat([df, mf], axis=1, join='inner') # If provided, sort the samples by the user-specified order instead of # ordering numerically or alphabetically. To do this, we will first add a # new temporary column filled with the indicies of the user-provided # list. This column will be used for sorting the samples later instead of # the original column. After sorting, the new column will be dropped from # the dataframe and the original column will replace its place. if isinstance(orders, dict): for k, v in orders.items(): u = df[k].unique().tolist() if set(u) != set(v): message = (f"Target values {u} not matched with user-provided " f"values {v} for metadata column `{k}`") raise ValueError(message) l = [x for x in range(len(v))] d = dict(zip(v, l)) df.rename(columns={k: f'@{k}'}, inplace=True) df[k] = df[f'@{k}'].map(d) df["sample-id"] = df.index # If provided, sort the samples for display in the x-axis. if isinstance(by, list): df = df.sort_values(by=by) # If sorting was performed by the user-specified order, remove the # temporary columns and then bring back the original column. if isinstance(orders, dict): for k in orders: df.drop(columns=[k], inplace=True) df.rename(columns={f'@{k}': k}, inplace=True) # If provided, exclude the specified taxa. if isinstance(exclude_taxa, list): dropped = [] for tax in exclude_taxa: for col in df.columns: if tax.lower() in col.lower(): dropped.append(col) dropped = list(set(dropped)) df = df.drop(columns=dropped) # If provided, group the samples by the given metadata column. if group is not None: df = df.groupby(group)[taxa_cols(df)].agg('sum') # Remove the metadata columns. cols = _get_mf_cols(df) mf = df[cols] df = df.drop(columns=cols) if sort_by_mean1: df = _sort_by_mean(df) df, mf = _filter_samples(df, mf, exclude_samples, include_samples) if sort_by_mean2: df = _sort_by_mean(df) # If provided, only include the specified samples. if isinstance(sample_names, list): df = df.loc[sample_names] mf = mf.loc[sample_names] if sort_by_mean3: df = _sort_by_mean(df) # Convert counts to proportions. df = df.div(df.sum(axis=1), axis=0) df = _get_others_col(df, count, taxa_names, show_others) if sort_by_names: df = df.reindex(sorted(df.columns), axis=1) if ax is None: fig, ax = plt.subplots(figsize=figsize) if isinstance(colors, list): c = colors else: c = plt.cm.get_cmap(cmap_name).colors df = df * 100 # If provided, output the dataframe as a .csv file. if csv_file is not None: df.to_csv(csv_file) if legend_short: df.columns = [dokdo.pname(x) for x in df.columns] df.plot.bar(stacked=True, legend=False, ax=ax, width=width, color=c, linewidth=0) if label_columns is not None: f = lambda row: ' : '.join(row.values.astype(str)) xticklabels = mf[label_columns].apply(f, axis=1).tolist() else: xticklabels = None if artist_kwargs is None: artist_kwargs = {} artist_kwargs = { 'xlabel': '', 'ylabel': 'Relative abundance (%)', 'xticklabels': xticklabels, **artist_kwargs } ax = _artist(ax, **artist_kwargs) return ax
def beta_3d_plot(pcoa_results, metadata=None, hue=None, azim=-60, elev=30, s=80, ax=None, figsize=None, hue_order=None, artist_kwargs=None): """Create a 3D scatter plot from PCoA results. +---------------------+---------------------------------------------------+ | q2-diversity plugin | Example | +=====================+===================================================+ | QIIME 2 CLI | qiime diversity pcoa [OPTIONS] | +---------------------+---------------------------------------------------+ | QIIME 2 API | from qiime2.plugins.diversity.methods import pcoa | +---------------------+---------------------------------------------------+ Parameters ---------- pcoa_results : str or qiime2.Artifact Artifact file or object corresponding to PCoAResults or PCoAResults % Properties('biplot'). metadata : str or qiime2.Metadata, optional Metadata file or object. hue : str, optional Grouping variable that will produce points with different colors. azim : int, default: -60 Azimuthal viewing angle. elev : int, default: 30 Elevation viewing angle. s : float, default: 80.0 Marker size. ax : matplotlib.axes.Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. figsize : tuple, optional Width, height in inches. Format: (float, float). hue_order : list, optional Specify the order of categorical levels of the 'hue' semantic. artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. Returns ------- matplotlib.axes.Axes Axes object with the plot drawn onto it. See Also -------- ordinate beta_2d_plot beta_scree_plot beta_parallel_plot addbiplot Examples -------- Below is a simple example. >>> qza_file = f'{data_dir}/moving-pictures-tutorial/unweighted_unifrac_pcoa_results.qza' >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv' >>> dokdo.beta_3d_plot(qza_file, ... metadata_file, ... 'body-site', ... figsize=(6, 6), ... artist_kwargs=dict(show_legend=True)) >>> plt.tight_layout() .. image:: images/beta_3d_plot-1.png We can control the camera angle with ``elev`` and ``azim``. >>> fig = plt.figure(figsize=(12, 6)) >>> ax1 = fig.add_subplot(1, 2, 1, projection='3d') >>> ax2 = fig.add_subplot(1, 2, 2, projection='3d') >>> dokdo.beta_3d_plot(qza_file, metadata_file, ax=ax1, hue='body-site', elev=15) >>> dokdo.beta_3d_plot(qza_file, metadata_file, ax=ax2, hue='body-site', azim=70) >>> plt.tight_layout() .. image:: images/beta_3d_plot-2.png """ if isinstance(pcoa_results, str): _pcoa_results = Artifact.load(pcoa_results) else: _pcoa_results = pcoa_results ordination_results = _pcoa_results.view(OrdinationResults) df = ordination_results.samples.iloc[:, :3] df.columns = ['A1', 'A2', 'A3'] props = ordination_results.proportion_explained if metadata is None: df = df else: mf = dokdo.get_mf(metadata) df = pd.concat([df, mf], axis=1, join='inner') if ax is None: fig = plt.figure(figsize=figsize) ax = fig.add_subplot(1, 1, 1, projection='3d') ax.view_init(azim=azim, elev=elev) if hue is None: ax.scatter(df['A1'], df['A2'], df['A3'], s=s) else: if hue_order is None: _hue_order = df[hue].unique() else: _hue_order = hue_order for label in _hue_order: a = df[df[hue] == label] ax.scatter(a['A1'], a['A2'], a['A3'], label=label, s=s) if artist_kwargs is None: artist_kwargs = {} artist_kwargs = { 'xlabel': f'Axis 1 ({props[0]*100:.2f} %)', 'ylabel': f'Axis 2 ({props[1]*100:.2f} %)', 'zlabel': f'Axis 3 ({props[2]*100:.2f} %)', 'hide_xticks': True, 'hide_yticks': True, 'hide_zticks': True, 'legend_title': hue, **artist_kwargs } ax = _artist(ax, **artist_kwargs) return ax
def alpha_diversity_plot(alpha_diversity, metadata, where, ax=None, figsize=None, add_swarmplot=False, order=None, hide_nsizes=False, artist_kwargs=None): """Create an alpha diversity plot. Parameters ---------- alpha_diversity : str or qiime2.Artifact Artifact file or object with the semantic type `SampleData[AlphaDiversity]`. metadata : str or qiime2.Metadata Metadata file or object. where : str Column name to be used for the x-axis. ax : matplotlib.axes.Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. figsize : tuple, optional Width, height in inches. Format: (float, float). add_swarmplot : bool, default: False Add a swarm plot on top of the box plot. order : list, optional Order to plot the categorical levels in. hide_nsizes : bool, default: False Hide sample size from x-axis labels. artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. Returns ------- matplotlib.axes.Axes Axes object with the plot drawn onto it. Examples -------- Below is a simple example. >>> qzv_file = f'{data_dir}/moving-pictures-tutorial/faith_pd_vector.qza' >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv' >>> dokdo.alpha_diversity_plot(qzv_file, metadata_file, 'body-site') >>> plt.tight_layout() .. image:: images/alpha_diversity_plot.png """ if isinstance(alpha_diversity, str): _alpha_diversity = Artifact.load(alpha_diversity) else: _alpha_diversity = alpha_diversity df = _alpha_diversity.view(pd.Series).to_frame() mf = dokdo.get_mf(metadata) df = pd.concat([df, mf], axis=1, join='inner') if ax is None: fig, ax = plt.subplots(figsize=figsize) metric = df.columns[0] boxprops = dict(color='white', edgecolor='black') d = {'x': where, 'y': metric, 'ax': ax, 'order': order, 'data': df} sns.boxplot(boxprops=boxprops, **d) if add_swarmplot: sns.swarmplot(**d) if hide_nsizes is False: nsizes = df[where].value_counts().to_dict() xtexts = [x.get_text() for x in ax.get_xticklabels()] xtexts = [f'{x} ({nsizes[x]})' for x in xtexts] ax.set_xticklabels(xtexts) if artist_kwargs is None: artist_kwargs = {} artist_kwargs = {'xlabel': where, 'ylabel': metric, **artist_kwargs} ax = _artist(ax, **artist_kwargs) return ax
def beta_parallel_plot(pcoa_results, hue=None, hue_order=None, metadata=None, count=5, ax=None, figsize=None, artist_kwargs=None): """Create a parallel plot from PCoA results. +---------------------+---------------------------------------------------+ | q2-diversity plugin | Example | +=====================+===================================================+ | QIIME 2 CLI | qiime diversity pcoa [OPTIONS] | +---------------------+---------------------------------------------------+ | QIIME 2 API | from qiime2.plugins.diversity.methods import pcoa | +---------------------+---------------------------------------------------+ Parameters ---------- pcoa_results : str or qiime2.Artifact Artifact file or object corresponding to PCoAResults. hue : str, optional Grouping variable that will produce lines with different colors. hue_order : list, optional Specify the order of categorical levels of the 'hue' semantic. metadata : str or qiime2.Metadata, optional Metadata file or object. Required if 'hue' is used. count : int, default: 5 Number of principal components to be displayed. ax : matplotlib.axes.Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. figsize : tuple, optional Width, height in inches. Format: (float, float). artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. Returns ------- matplotlib.axes.Axes Axes object with the plot drawn onto it. See Also -------- ordinate beta_2d_plot beta_3d_plot beta_scree_plot Examples -------- Below is a simple example. >>> qza_file = f'{data_dir}/moving-pictures-tutorial/unweighted_unifrac_pcoa_results.qza' >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv' >>> dokdo.beta_parallel_plot(qza_file) >>> plt.tight_layout() .. image:: images/beta_parallel_plot-1.png We can group the lines by body-site. >>> dokdo.beta_parallel_plot(qza_file, ... metadata=metadata_file, ... hue='body-site', ... artist_kwargs=dict(show_legend=True)) >>> plt.tight_layout() .. image:: images/beta_parallel_plot-2.png """ if isinstance(pcoa_results, str): _pcoa_results = Artifact.load(pcoa_results) else: _pcoa_results = pcoa_results ordination_results = _pcoa_results.view(OrdinationResults) props = ordination_results.proportion_explained * 100 props = [f'Axis {i+1} ({x:.2f}%)' for i, x in enumerate(props[:count])] df = ordination_results.samples.copy().iloc[:, :count] if hue is None: col = df.index else: mf = dokdo.get_mf(metadata) col = mf[hue] df = df.assign(Target=col) if isinstance(hue_order, list): d = {x: i for i, x in enumerate(hue_order)} df = df.iloc[df['Target'].map(d).argsort()] if ax is None: fig, ax = plt.subplots(figsize=figsize) pd.plotting.parallel_coordinates(df, 'Target', color=plt.cm.get_cmap('tab10').colors) if artist_kwargs is None: artist_kwargs = {} artist_kwargs = { 'xlabel': '', 'ylabel': '', 'xticklabels': props, 'legend_title': hue, **artist_kwargs } ax = _artist(ax, **artist_kwargs) return ax
def prepare_lefse(table_file, taxonomy_file, metadata_file, output_file, class_col, subclass_col=None, subject_col=None, where=None): """Create a TSV file which can be used as input for the LEfSe tool. This command 1) collapses the input feature table at the genus level, 2) computes relative frequency of the features, 3) performs sample filtration if requested, 4) changes the format of feature names, 5) adds the relevant metadata as 'Class', 'Subclass', and 'Subject', and 6) writes a text file which can be used as input for LEfSe. Parameters ---------- table_file : str Path to the table file with the 'FeatureTable[Frequency]' type. taxonomy_file : str Path to the taxonomy file with the 'FeatureData[Taxonomy]' type. metadata_file : str Path to the metadata file. output_file : str Path to the output file. class_col : str Metadata column used as 'Class' by LEfSe. subclass_col : str, optional Metadata column used as 'Subclass' by LEfSe. subject_col : str, optional Metadata column used as 'Subject' by LEfSe. where : str, optional SQLite 'WHERE' clause specifying sample metadata criteria. """ _ = taxa.methods.collapse(table=Artifact.load(table_file), taxonomy=Artifact.load(taxonomy_file), level=6) _ = feature_table.methods.relative_frequency(table=_.collapsed_table) if where is None: df = _.relative_frequency_table.view(pd.DataFrame) else: _ = feature_table.methods.filter_samples( table=_.relative_frequency_table, metadata=Metadata.load(metadata_file), where=where) df = _.filtered_table.view(pd.DataFrame) def f(x): for c in ['-', '[', ']', '(', ')', ' ']: x = x.replace(c, '_') ranks = x.split(';') base = ranks[0] result = [base] for i, rank in enumerate(ranks[1:], start=2): if rank == '__': result.append(f'{base}_x__L{i}') elif rank.split('__')[1] == '': result.append(f'{base}_{rank}L{i}') else: result.append(rank) base = rank return '|'.join(result) df.columns = [f(x) for x in df.columns.to_list()] mf = dokdo.get_mf(metadata_file) mf = mf.replace(' ', '_', regex=True) cols = mf.columns.to_list() df = pd.concat([df, mf], axis=1, join="inner") df.insert(0, class_col, df.pop(class_col)) cols.remove(class_col) if subclass_col is None and subject_col is None: pass elif subclass_col is not None and subject_col is None: df.insert(1, subclass_col, df.pop(subclass_col)) cols.remove(subclass_col) elif subclass_col is None and subject_col is not None: df.insert(1, subject_col, df.pop(subject_col)) cols.remove(subject_col) else: df.insert(1, subclass_col, df.pop(subclass_col)) df.insert(2, subject_col, df.pop(subject_col)) cols.remove(subclass_col) cols.remove(subject_col) df.drop(columns=cols, inplace=True) df.T.to_csv(output_file, header=False, sep='\t')
def heatmap( table, metadata=None, hue1=None, hue_order1=None, hue1_cmap='tab10', hue1_loc='upper right', hue2=None, hue_order2=None, hue2_cmap='Pastel1', hue2_loc='upper left', normalize=None, method='average', metric='euclidean', figsize=(10, 10), row_cluster=True, col_cluster=True, **kwargs ): """Create a hierarchically clustered heatmap of a feature table. Internally, this method uses the `seaborn.clustermap()` method to create a heatmap. Parameters ---------- table : str or qiime2.Artifact Artifact file or object corresponding to FeatureTable[Frequency]. metadata : str or qiime2.Metadata, optional Metadata file or object. hue1 : str, optional First grouping variable that will produce labels with different colors. hue_order1 : list, optional Specify the order of categorical levels of the 'hue1' semantic. hue1_cmap : str, default: 'tab10' Name of the colormap passed to `matplotlib.cm.get_cmap()` for `hue1`. hue1_loc : str, default: 'upper right' Location of the legend for `hue1`. hue2 : str, optional Second grouping variable that will produce labels with different colors. hue_order2 : list, optional Specify the order of categorical levels of the 'hue2' semantic. hue2_cmap : str, default: 'Pastel1' Name of the colormap passed to `matplotlib.cm.get_cmap()` for `hue2`. hue2_loc : str, default: 'upper left' Location of the legend for `hue2`. normalize : str, optional Normalize the feature table by adding a psuedocount of 1 and then taking the log10 of the table or performing centre log ratio transformation. Choices: {'log10', 'clr'}. method : str, default: 'average' Linkage method to use for calculating clusters. See `scipy.cluster.hierarchy.linkage()` documentation for more information. metric : str, default: 'euclidean' Distance metric to use for the data. See `scipy.spatial.distance.pdist()` documentation for more options. figsize : tuple, default: (10, 10) Width, height in inches. Format: (float, float). row_cluster : bool, default: True If True, cluster the rows. col_cluster : bool, default: True If True, cluster the columns. kwargs : other keyword arguments All other keyword arguments are passed to `seaborn.clustermap()`. Returns ------- seaborn.matrix.ClusterGrid A ClusterGrid instance. Examples -------- Below is a simple example. >>> table_file = f'{data_dir}/moving-pictures-tutorial/table.qza' >>> dokdo.heatmap(table_file, normalize='log10') .. image:: images/heatmap-1.png We can color the samples by ``body-site``. For this example, we will use the centered log-ratio transformation. >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv' >>> dokdo.heatmap(table_file, ... normalize='clr', ... metadata=metadata_file, ... hue1='body-site') .. image:: images/heatmap-2.png We can add an additional grouping variable ``subject``. Note that ``xticklabels`` and ``yticklabels`` are extra keyword arguments that are passed to the ``seaborn.clustermap`` method. >>> dokdo.heatmap(table_file, ... normalize='clr', ... metadata=metadata_file, ... hue1='body-site', ... hue2='subject', ... xticklabels=False, ... yticklabels=False) .. image:: images/heatmap-3.png """ # Check the input type. if isinstance(table, Artifact): table = table elif isinstance(table, str): table = Artifact.load(table) else: raise TypeError(f'Incorrect feature table type: {type(table)}') # Create the dataframe. df = table.view(pd.DataFrame) # If the metadata is provided, filter the samples accordingly. if metadata is not None: mf = dokdo.get_mf(metadata) df = pd.concat([df, mf], axis=1, join='inner') df.drop(mf.columns, axis=1, inplace=True) df = df.loc[:, (df != 0).any(axis=0)] # If the hue argument(s) are used, get the row colors. lut1 = None lut2 = None row_colors = None if hue1 is not None: colors1 = plt.cm.get_cmap(hue1_cmap).colors df = pd.concat([df, mf], axis=1, join='inner') if hue_order1 is None: keys1 = df[hue1].unique() else: keys1 = hue_order1 df = df[df[hue1].isin(hue_order1)] lut1 = dict(zip(keys1, colors1[:len(keys1)])) row_colors = df[hue1].map(lut1) df.drop(mf.columns, axis=1, inplace=True) if hue2 is not None: colors2 = plt.cm.get_cmap(hue2_cmap).colors df = pd.concat([df, mf], axis=1, join='inner') if hue_order2 is None: keys2 = df[hue2].unique() else: keys2 = hue_order2 df = df[df[hue2].isin(hue_order2)] lut2 = dict(zip(keys2, colors2[:len(keys2)])) s = df[hue2].map(lut2) row_colors = pd.concat([row_colors, s], axis=1) df.drop(mf.columns, axis=1, inplace=True) # Apply the appropriate normalziation. if normalize == 'log10': df = df.apply(lambda x: np.log10(x + 1)) elif normalize == 'clr': df = df.apply(lambda x: clr(x + 1), axis=1, result_type='broadcast') else: pass # Draw the heatmap. g = sns.clustermap(df, method=method, metric=metric, figsize=figsize, row_cluster=row_cluster, col_cluster=col_cluster, row_colors=row_colors, **kwargs) # If the hue argument(s) are used, add the legend(s). if hue1 is not None: handles = [Patch(facecolor=lut1[name]) for name in lut1] legend1 = plt.legend(handles, lut1, title=hue1, bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure, loc=hue1_loc) if hue2 is not None: if hue1 is None: raise ValueError("Argument 'hue2' was used without 'hue1'. " "Use 'hue1' instead.") handles = [Patch(facecolor=lut2[name]) for name in lut2] plt.legend(handles, lut2, title=hue2, bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure, loc=hue2_loc) plt.gca().add_artist(legend1) return g
def denoising_stats_plot(stats, metadata, where, ax=None, figsize=None, pseudocount=False, order=None, hide_nsizes=False, artist_kwargs=None): """Create a grouped box chart for denoising statistics from DADA2. +-----------------+---------------------------------------------------------+ | q2-dada2 plugin | Example | +=================+=========================================================+ | QIIME 2 CLI | qiime dada2 denoise-paired [OPTIONS] | +-----------------+---------------------------------------------------------+ | QIIME 2 API | from qiime2.plugins.dada2.methods import denoise_paired | +-----------------+---------------------------------------------------------+ Parameters ---------- stats : str or qiime2.Artifact Artifact file or object from the q2-dada2 plugin. metadata : str or qiime2.Metadata Metadata file or object. where : str Column name of the sample metadata. ax : matplotlib.axes.Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. figsize : tuple, optional Width, height in inches. Format: (float, float). pseudocount : bool, default: False Add pseudocount to remove zeros. order : list, optional Order to plot the categorical levels in. hide_nsizes : bool, default: False Hide sample size from x-axis labels. artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. Returns ------- matplotlib.axes.Axes Axes object with the plot drawn onto it. Examples -------- Below is a simple example. >>> qza_file = f'{data_dir}/atacama-soil-microbiome-tutorial/denoising-stats.qza' >>> metadata_file = f'{data_dir}/atacama-soil-microbiome-tutorial/sample-metadata.tsv' >>> dokdo.denoising_stats_plot(qza_file, metadata_file, 'transect-name', artist_kwargs=dict(show_legend=True)) >>> plt.tight_layout() .. image:: images/denoising_stats_plot.png """ with tempfile.TemporaryDirectory() as t: _parse_input(stats, t) df1 = pd.read_table(f'{t}/stats.tsv', skiprows=[1], index_col=0) mf = dokdo.get_mf(metadata) df2 = pd.concat([df1, mf], axis=1, join='inner') a = ['input', 'filtered', 'denoised', 'merged', 'non-chimeric', where] df3 = pd.melt(df2[a], id_vars=[where]) if ax is None: fig, ax = plt.subplots(figsize=figsize) if pseudocount: df3['value'] = df3['value'] + 1 sns.boxplot(x=where, y='value', data=df3, hue='variable', ax=ax, order=order) if hide_nsizes is False: nsizes = df2[where].value_counts().to_dict() xtexts = [x.get_text() for x in ax.get_xticklabels()] xtexts = [f'{x} ({nsizes[x]})' for x in xtexts] ax.set_xticklabels(xtexts) if artist_kwargs is None: artist_kwargs = {} artist_kwargs = {'xlabel': where, 'ylabel': 'Read depth', **artist_kwargs} ax = _artist(ax, **artist_kwargs) return ax
def barplot(barplot_file, group, axis=0, figsize=(10, 10), level=1, count=0, items=None, by=None, label_columns=None, metadata=None, artist_kwargs=None, ylabel_fontsize=None, xaxis_repeated=False, cmap_name='Accent'): """Create a grouped abundance bar plot. Under the hood, this method essentially wraps the `taxa_abundance_bar_plot` method. Parameters ---------- barplot_file : str or qiime2.Visualization Visualization file or object from the q2-taxa plugin. group : str Metadata column. axis : int, default : 0 By default, charts will be stacked vertically. Use 1 for horizontal stacking. figsize : tuple, default: (10, 10) Width, height in inches. Format: (float, float). level : int, default: 1 Taxonomic level at which the features should be collapsed. count : int, default: 0 The number of taxa to display. When 0, display all. items : list, optional Specify the order of charts. by : list, optional Column name(s) to be used for sorting the samples. Using 'index' will sort the samples by their name, in addition to other column name(s) that may have been provided. If multiple items are provided, sorting will occur by the order of the items. label_columns : list, optional The column names to be used as the x-axis labels. metadata : str or qiime2.Metadata, optional Metadata file or object. artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. ylabel_fontsize : float or str, optional Sets the y-axis label font size. xaxis_repeated : bool, default: False If true, remove all x-axis tick labels except for the bottom subplot. Ignored if `axis=1`. cmap_name : str, default: 'Accent' Name of the colormap passed to `matplotlib.cm.get_cmap()`. See Also -------- taxa_abundance_bar_plot Examples -------- Below is a simple example. >>> barplot_file = f'{data_dir}/moving-pictures-tutorial/taxa-bar-plots.qzv' >>> dokdo.barplot(barplot_file, ... 'body-site', ... axis=1, ... figsize=(10, 6), ... level=6, ... count=8) .. image:: images/barplot-1.png We can draw the subplots vertically, which is particularly useful when the samples are matched. >>> dokdo.barplot(barplot_file, ... 'body-site', ... axis=0, ... figsize=(8, 10), ... level=6, ... count=8, ... xaxis_repeated=True) .. image:: images/barplot-2.png """ with tempfile.TemporaryDirectory() as t: vis = Visualization.load(barplot_file) vis.export_data(t) df = pd.read_csv(f'{t}/level-1.csv', index_col=0) if metadata is not None: mf = dokdo.get_mf(metadata) cols = _get_mf_cols(df) df.drop(columns=cols, inplace=True) df = pd.concat([df, mf], axis=1, join='inner') if items is None: _items = df[group].unique() else: _items = items if axis == 0: args = [len(_items), 3] gridspec_kw = dict(width_ratios=[0.01, 1, 0.01]) else: args = [1, len(_items) + 2] gridspec_kw = dict(width_ratios=[0.01] + [1 for x in _items] + [0.01]) fig, axes = plt.subplots(*args, figsize=figsize, gridspec_kw=gridspec_kw) if artist_kwargs is None: artist_kwargs = {} _artist_kwargs = {'hide_ytexts': True, **artist_kwargs} plot_kwargs = dict(sort_by_mean2=False, level=level, count=count, by=by, label_columns=label_columns, metadata=metadata, cmap_name=cmap_name) if axis == 0: if xaxis_repeated: hide_xtexts = [True for x in range(len(axes[:, 1]))] hide_xtexts[-1] = False else: hide_xtexts = [False for x in range(len(axes[:, 1]))] for i, ax in enumerate(axes[:, 1]): taxa_abundance_bar_plot(barplot_file, ax=ax, include_samples={group: [_items[i]]}, artist_kwargs={ 'title': _items[i], 'hide_xtexts': hide_xtexts[i], **_artist_kwargs }, **plot_kwargs) else: for i, ax in enumerate(axes[1:-1]): taxa_abundance_bar_plot(barplot_file, ax=ax, include_samples={group: [_items[i]]}, artist_kwargs={ 'title': _items[i], **_artist_kwargs }, **plot_kwargs) # Add the shared y-axis label. if axis == 0: gs = axes[0, 0].get_gridspec() for ax in axes[:, 0]: ax.remove() axbig = fig.add_subplot(gs[:, 0]) else: axbig = axes[0] axbig.set_ylabel('Relative abundance (%)', fontsize=ylabel_fontsize) axbig.xaxis.set_visible(False) plt.setp(axbig.spines.values(), visible=False) axbig.tick_params(left=False, labelleft=False) axbig.patch.set_visible(False) # Add the shared legend. if axis == 0: gs = axes[0, -1].get_gridspec() for ax in axes[:, -1]: ax.remove() axbig = fig.add_subplot(gs[:, -1]) else: axbig = axes[-1] taxa_abundance_bar_plot(barplot_file, ax=axbig, legend_short=True, artist_kwargs={ 'legend_only': True, 'legend_loc': 'center left', **_artist_kwargs }, **plot_kwargs) plt.tight_layout()
def beta_2d_plot(pcoa_results, metadata=None, hue=None, size=None, style=None, s=80, alpha=None, ax=None, figsize=None, hue_order=None, style_order=None, legend_type='brief', artist_kwargs=None): """Create a 2D scatter plot from PCoA results. +---------------------+---------------------------------------------------+ | q2-diversity plugin | Example | +=====================+===================================================+ | QIIME 2 CLI | qiime diversity pcoa [OPTIONS] | +---------------------+---------------------------------------------------+ | QIIME 2 API | from qiime2.plugins.diversity.methods import pcoa | +---------------------+---------------------------------------------------+ Parameters ---------- pcoa_results : str or qiime2.Artifact Artifact file or object corresponding to PCoAResults or PCoAResults % Properties('biplot'). metadata : str or qiime2.Metadata, optional Metadata file or object. hue : str, optional Grouping variable that will produce points with different colors. size : str, optional Grouping variable that will produce points with different sizes. style : str, optional Grouping variable that will produce points with different markers. s : float, default: 80.0 Marker size. alpha : float, optional Proportional opacity of the points. ax : matplotlib.axes.Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. figsize : tuple, optional Width, height in inches. Format: (float, float). hue_order : list, optional Specify the order of categorical levels of the 'hue' semantic. style_order : list, optional Specify the order of categorical levels of the 'style' semantic. legend_type : str, default: 'brief' Legend type as in seaborn.scatterplot ('brief' or 'full'). artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. Returns ------- matplotlib.axes.Axes Axes object with the plot drawn onto it. See Also -------- ordinate beta_3d_plot beta_scree_plot beta_parallel_plot addbiplot Examples -------- Below is a simple example. >>> qza_file = f'{data_dir}/moving-pictures-tutorial/unweighted_unifrac_pcoa_results.qza' >>> metadata_file = f'{data_dir}/moving-pictures-tutorial/sample-metadata.tsv' >>> dokdo.beta_2d_plot(qza_file) >>> plt.tight_layout() .. image:: images/beta_2d_plot-1.png We can color the datapoints with ``hue``. We can also change the style of datapoints with ``style``. If the variable of interest is numeric, we can use ``size`` to control the size of datapoints. Finally, we can combine all those groupings. >>> fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(8, 8)) >>> artist_kwargs1 = dict(show_legend=True, title="hue='body-site'") >>> artist_kwargs2 = dict(show_legend=True, title="style='subject'") >>> artist_kwargs3 = dict(show_legend=True, title="size='days-since-experiment-start'") >>> artist_kwargs4 = dict(title="Combined groupings") >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax1, hue='body-site', artist_kwargs=artist_kwargs1) >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax2, style='subject', artist_kwargs=artist_kwargs2) >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax3, size='days-since-experiment-start', artist_kwargs=artist_kwargs3) >>> dokdo.beta_2d_plot(qza_file, metadata_file, ax=ax4, hue='body-site', style='subject', size='days-since-experiment-start', artist_kwargs=artist_kwargs4) >>> plt.tight_layout() .. image:: images/beta_2d_plot-2.png """ if isinstance(pcoa_results, str): _pcoa_results = Artifact.load(pcoa_results) else: _pcoa_results = pcoa_results ordination_results = _pcoa_results.view(OrdinationResults) df1 = ordination_results.samples.iloc[:, :2] df1.columns = ['A1', 'A2'] if metadata is None: df2 = df1 else: mf = dokdo.get_mf(metadata) df2 = pd.concat([df1, mf], axis=1, join='inner') props = ordination_results.proportion_explained if ax is None: fig, ax = plt.subplots(figsize=figsize) sns.scatterplot(data=df2, x='A1', y='A2', hue=hue, hue_order=hue_order, style=style, style_order=style_order, size=size, ax=ax, s=s, alpha=alpha, legend=legend_type) if artist_kwargs is None: artist_kwargs = {} artist_kwargs = { 'xlabel': f'Axis 1 ({props[0]*100:.2f} %)', 'ylabel': f'Axis 2 ({props[1]*100:.2f} %)', 'hide_xticks': True, 'hide_yticks': True, **artist_kwargs } ax = _artist(ax, **artist_kwargs) return ax
def taxa_abundance_box_plot( taxa, metadata=None, hue=None, hue_order=None, add_datapoints=False, level=1, by=None, ax=None, figsize=None, count=0, exclude_samples=None, include_samples=None, exclude_taxa=None, sort_by_names=False, sample_names=None, csv_file=None, size=5, pseudocount=False, taxa_names=None, brief_xlabels=False, show_means=False, meanprops=None, show_others=True, sort_by_mean=True, jitter=1, alpha=None, artist_kwargs=None ): """Create a taxa abundance box plot. +----------------+-----------------------------------------------------+ | q2-taxa plugin | Example | +================+=====================================================+ | QIIME 2 CLI | qiime taxa barplot [OPTIONS] | +----------------+-----------------------------------------------------+ | QIIME 2 API | from qiime2.plugins.taxa.visualizers import barplot | +----------------+-----------------------------------------------------+ Parameters ---------- taxa : str or qiime2.Visualization Visualization file or object from the q2-taxa plugin. metadata : str or qiime2.Metadata, optional Metadata file or object. hue : str, optional Grouping variable that will produce boxes with different colors. hue_order : list, optional Specify the order of categorical levels of the 'hue' semantic. add_datapoints : bool, default: False Show datapoints on top of the boxes. level : int, default: 1 Taxonomic level at which the features should be collapsed. by : list, optional Column name(s) to be used for sorting the samples. Using 'sample-id' will sort the samples by their name, in addition to other column name(s) that may have been provided. If multiple items are provided, sorting will occur by the order of the items. ax : matplotlib.axes.Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. figsize : tuple, optional Width, height in inches. Format: (float, float). count : int, default: 0 The number of taxa to display. When 0, display all. exclude_samples : dict, optional Filtering logic used for sample exclusion. Format: {'col': ['item', ...], ...}. include_samples : dict, optional Filtering logic used for sample inclusion. Format: {'col': ['item', ...], ...}. exclude_taxa : list, optional The taxa names to be excluded when matched. Case insenstivie. sort_by_names : bool, default: False If true, sort the columns (i.e. species) to be displayed by name. sample_names : list, optional List of sample IDs to be included. csv_file : str, optional Path of the .csv file to output the dataframe to. size : float, default: 5.0 Radius of the markers, in points. pseudocount : bool, default: False Add pseudocount to remove zeros. taxa_names : list, optional List of taxa names to be displayed. brief_xlabels : bool, default: False If true, only display the smallest taxa rank in the x-axis labels. show_means : bool, default: False Add means to the boxes. meanprops : dict, optional The meanprops argument as in matplotlib.pyplot.boxplot. show_others : bool, default: True Include the 'Others' category. sort_by_mean : bool, default: True Sort taxa by their mean relative abundance after sample filtration. jitter : float, default: 1 Amount of jitter (only along the categorical axis) to apply. alpha : float, optional Proportional opacity of the points. artist_kwargs : dict, optional Keyword arguments passed down to the _artist() method. Returns ------- matplotlib.axes.Axes Axes object with the plot drawn onto it. See Also -------- taxa_abundance_bar_plot addpairs Examples -------- Below is a simple example showing taxonomic abundance at the phylum level (i.e. ``level=2``). >>> qzv_file = '/Users/sbslee/Desktop/dokdo/data/moving-pictures-tutorial/taxa-bar-plots.qzv' >>> dokdo.taxa_abundance_box_plot(qzv_file, level=2, figsize=(8, 7)) >>> plt.tight_layout() .. image:: images/taxa_abundance_box_plot-1.png We can control how many taxa to display with ``count``. Also, we can make the x-axis tick labels pretty with ``brief_xlabels``. We can manually set the x-axis tick labels with ``xticklabels``. Lastly, we can select specific taxa to display with ``taxa_names``. >>> fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(10, 10)) >>> kwargs = {'level' : 2} >>> artist_kwargs1 = dict(title='count=4') >>> artist_kwargs2 = dict(title='brief_xlabels=True') >>> artist_kwargs3 = dict(xticklabels=['A', 'B', 'C', 'D'], title="xticklabels=['A', 'B', 'C', 'D']") >>> artist_kwargs4 = dict(title="taxa_names=[...]") >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax1, count=4, artist_kwargs=artist_kwargs1, **kwargs) >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax2, count=4, brief_xlabels=True, artist_kwargs=artist_kwargs2, **kwargs) >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax3, count=4, artist_kwargs=artist_kwargs3, **kwargs) >>> dokdo.taxa_abundance_box_plot(qzv_file, ax=ax4, taxa_names=['k__Bacteria;p__Firmicutes', 'k__Bacteria;p__Proteobacteria'], artist_kwargs=artist_kwargs4, **kwargs) >>> plt.tight_layout() .. image:: images/taxa_abundance_box_plot-2.png We can group the boxes by a metadata column with ``hue``. For this plot, we will draw the y-axis in log scale with ``ylog``. To do this, we actually need to adjust the y-axis limits with ``ymin`` and ``ymax``, and also add a pseudocount of 1 to remove 0s with ``pseudocount`` (because 0s cannot be shown in log scale). We will also add data points with ``add_datapoints=True``. >>> artist_kwargs = dict(ylog=True, ymin=0.05, ymax=200, show_legend=True) >>> dokdo.taxa_abundance_box_plot(qzv_file, ... level=2, ... figsize=(10, 7), ... hue='body-site', ... size=3, ... count=4, ... pseudocount=True, ... add_datapoints=True, ... artist_kwargs=artist_kwargs) >>> plt.tight_layout() .. image:: images/taxa_abundance_box_plot-3.png """ with tempfile.TemporaryDirectory() as t: _parse_input(taxa, t) df = pd.read_csv(f'{t}/level-{level}.csv', index_col=0) # If provided, update the metadata. if metadata is None: pass else: mf = dokdo.get_mf(metadata) cols = _get_mf_cols(df) df.drop(columns=cols, inplace=True) df = pd.concat([df, mf], axis=1, join='inner') df["sample-id"] = df.index # If provided, sort the samples for display in the x-axis. if by: df = df.sort_values(by=by) # If provided, exclude the specified taxa. if isinstance(exclude_taxa, list): dropped = [] for tax in exclude_taxa: for col in df.columns: if tax.lower() in col.lower(): dropped.append(col) dropped = list(set(dropped)) df = df.drop(columns=dropped) # Remove the metadata columns. cols = _get_mf_cols(df) mf = df[cols] df = df.drop(columns=cols) df, mf = _filter_samples(df, mf, exclude_samples, include_samples) # If provided, only include the specified samples. if isinstance(sample_names, list): df = df.loc[sample_names] mf = mf.loc[sample_names] if sort_by_mean: df = _sort_by_mean(df) if ax is None: fig, ax = plt.subplots(figsize=figsize) # Add a pseudocount. if pseudocount: df = df + 1 # Convert counts to proportions. df = df.div(df.sum(axis=1), axis=0) df = _get_others_col(df, count, taxa_names, show_others) if sort_by_names: df = df.reindex(sorted(df.columns), axis=1) _taxa_names = df.columns df = df * 100 if hue is not None: df2 = pd.concat([df, mf[hue]], axis=1, join='inner') df2 = pd.melt(df2, id_vars=[hue]) else: df2 = pd.melt(df) if meanprops: _meanprops = meanprops else: _meanprops={'marker':'x', 'markerfacecolor':'white', 'markeredgecolor':'white', 'markersize':'10'} d = {} if show_means: d['showmeans'] = True d['meanprops'] = _meanprops sns.boxplot(x='variable', y='value', hue=hue, hue_order=hue_order, data=df2, ax=ax, **d) if add_datapoints: remove_duplicates = True # Alternative method: sns.swarmplot() sns.stripplot(x='variable', y='value', hue=hue, hue_order=hue_order, data=df2, ax=ax, color='black', size=size, dodge=True, jitter=jitter, alpha=alpha) else: remove_duplicates = False # If provided, output the dataframe as a .csv file. if csv_file is not None: df3 = pd.concat([df, mf], axis=1, join='inner') df3.to_csv(csv_file) if brief_xlabels: xticklabels = [dokdo.pname(x.get_text()) for x in ax.get_xticklabels()] else: xticklabels = None if artist_kwargs is None: artist_kwargs = {} artist_kwargs = {'xrot': 45, 'xha': 'right', 'xlabel': '', 'ylabel': 'Relative abundance (%)', 'xticklabels': xticklabels, 'remove_duplicates': remove_duplicates, **artist_kwargs} if hue is not None: artist_kwargs['legend_title'] = hue ax = _artist(ax, **artist_kwargs) return ax