예제 #1
0
def diversity_plot(metagenome,
                   x=None,
                   y=None,
                   hue=None,
                   col=None,
                   row=None,
                   output='boxplot.html',
                   points=True,
                   plot_kw={},
                   box_kw={},
                   scatter_kw={}):

    metagenome.compute_alpha_diversity(y)
    data = pd.concat(
        [metagenome.alpha_diversity,
         metagenome.metadata.factor_data()],
        axis=1)

    g = BokehFacetGrid(data=data,
                       hue=hue,
                       row=row,
                       col=col,
                       outdir=metagenome.figdir,
                       **plot_kw)
    g.map(boxplot, x=x, y=y, **box_kw)
    if points:
        g.map(swarmplot,
              x=x,
              y=y,
              tooltips=metagenome.metadata.qual_vars,
              **scatter_kw)
    g.save(output)
예제 #2
0
def distr_cmd(mg, factors, diversity=[], otu_list=[]):
    (hue, x, row, col) = factors

    if diversity:
        for div in diversity:
            print(f'Diversity boxplot: {div}')
            diversity_plot(mg,
                           x=x,
                           y=div,
                           hue=hue,
                           points=False,
                           col=col,
                           row=row,
                           plot_kw={'height': 400},
                           output=f'boxplot_{div}.html')

    if otu_list:
        if len(otu_list) == 1 and Path(otu_list[0]).is_file():
            otu_list = open(otu_list[0]).read().splitlines()[1:]

        otu_list = list(set(otu_list).intersection(set(mg.columns)))[:50]

        if not otu_list:
            return

        print(f'Feature boxplot ({len(otu_list)} otus (max=50))')

        data = (mg.get_column_format(
            tax=False).loc[(slice(None), otu_list), :].reset_index())

        repl = {}
        for otu in otu_list:
            lineage = mg.taxonomy.data.loc[otu].tolist()[:6]
            short = [x[0].lower() for x in mg.taxonomy.ranks][:len(lineage)]
            new_name = ', '.join(
                ['{}: {}'.format(*it) for it in zip(short, lineage)])
            repl[otu] = new_name

        data = data.replace(repl)

        g = BokehFacetGrid(data=data,
                           hue=hue,
                           col='OTU',
                           col_wrap=5,
                           outdir=mg.figdir,
                           width='auto',
                           height=400)
        g.map(boxplot, x=x, y='value', tooltips=['group_size', 'not_null'])
        g.map(swarmplot, x=x, y='value',
              tooltips=data.columns)  #.drop(columns=['OTU', 'color']))
        g.save('specific_otus_distribution.html')
예제 #3
0
def otu_topics_barplot(metagenome,
                       topics,
                       output='otu_topics_clustermap.html',
                       col=None,
                       row=None,
                       **kwargs):
    # top_otus = set()
    # for topic in topics.index:
    #     top_otus |= set(topics.loc[topic].nlargest(10).index)
    # top_otus = topics.columns[topics.max(axis=0) > 0.01]

    data = (topics.stack().where(lambda x: x > 0.01).dropna().rename(
        'weight').reset_index().merge(
            metagenome.taxonomy.data,
            left_on='OTU',
            right_index=True,
            how='left').reset_index(drop=True).sort_values(by='weight',
                                                           ascending=False))
    g = BokehFacetGrid(data=data,
                       outdir=Path(output).parent,
                       row=row,
                       col='community',
                       sort=False)
    g.map(barplot, x='OTU', y='weight', tooltips=metagenome.taxonomy.columns)
    g.save(Path(output).name)
예제 #4
0
def pairplot(data=None,
             cols=None,
             hue=None,
             tooltips=None,
             width=400,
             height=400,
             output=None):
    if cols is None:
        cols = data.select_dtypes('number').columns

    comb_data = []
    for col1, col2 in combinations(cols, 2):
        tmp_data = data[[col1, col2]].assign(var1=col1, var2=col2)
        tmp_data.columns = ['value1', 'value2', 'var1', 'var2']

        if tooltips is not None:
            tmp_data = pd.concat([tmp_data, data[tooltips]], axis=1)
        if hue is not None:
            tmp_data[hue] = data[hue]

        comb_data.append(tmp_data)

    comb_data = pd.concat(comb_data)

    g = BokehFacetGrid(data=comb_data,
                       hue=hue,
                       row='var1',
                       col='var2',
                       tooltips=tooltips)
    g.map(scatter, x='value1', y='value2')

    if output is None:
        g.save('pairplot.html')
예제 #5
0
def tax_cmd(mg, factors=None, ranks=None, plot_bars=True, plot_heatmap=True):
    (hue, x, row, col) = factors

    for rank in ranks:
        print(f'Plotting taxonomic composition for: {rank}')
        if plot_bars:
            mg_ = mg.copy()
            mg_.group_taxa(rank)

            taxa_stackplot(metagenome=mg_,
                           x=x,
                           hue=hue,
                           col=col,
                           row=row,
                           norm=True,
                           output=f'{mg.figdir}/barplot-{rank}.html',
                           plot_kw={
                               'width': 1400,
                               'height': 800
                           })

        if plot_heatmap:
            mg_ = mg.copy()
            # mg_.abundance.data = np.sqrt(mg_.abundance.data)
            data = mg_.get_column_format()
            g = BokehFacetGrid(data=data, row=x, col=col, outdir=mg_.figdir)
            g.map(clustermap, y=hue, x=rank, z='value', cluster_samples=False)
            g.save(f'clustermap-{rank}.html')
예제 #6
0
def lda_boxplot(data,
                metadata=None,
                taxonomy=None,
                x=None,
                row=None,
                col=None,
                rank='Genus',
                output='lda_plot.html',
                width=1400,
                top=10):

    top_otu = (data['features'].stack().rename_axis(
        index=['topic', 'feature']).rename('weight').reset_index().groupby(
            'topic').apply(
                lambda x: x.nlargest(top, 'weight').reset_index(drop=True)))

    top_otu[rank] = taxonomy.loc[top_otu.feature, rank].to_numpy()

    top_otu['top_otus'] = (top_otu[[rank, 'weight']].apply(
        lambda x: f'{x.weight:.0%} {x[rank]}', axis=1))

    top_otu = top_otu['top_otus'].unstack()
    top_otu.columns = ['OTU_{}'.format(x + 1) for x in top_otu.columns]

    data = pd.concat([data['samples'], metadata], axis=1)
    data = data.melt(id_vars=metadata.columns)
    data = data.merge(top_otu, left_on='variable', right_index=True)

    idx_size = len(data.variable.unique()) * len(data[x].unique())
    width = max(width, idx_size * 15)

    g = BokehFacetGrid(data=data,
                       hue=x,
                       row='variable',
                       col=col,
                       width=width,
                       outdir=Path(output).parent)
    g.map(boxplot, x=row, y='value', tooltips=top_otu.columns)
    g.map(swarmplot, x='variable', y='value', tooltips=metadata.columns)
    g.save(Path(output).name)
예제 #7
0
def sample_topics_clustermap(metagenome,
                             topics,
                             output='sample_topics_clustermap.html',
                             row=None,
                             col=None,
                             **kwargs):
    metadata = metagenome.metadata.factor_data()
    data = pd.concat([topics, metadata],
                     axis=1).rename_axis(index='groups').reset_index()
    data = data.melt(id_vars=list(metagenome.metadata.qual_vars) + ['groups'],
                     var_name='topics',
                     value_name='weight')

    g = BokehFacetGrid(data=data, outdir=Path(output).parent, row=row, col=col)
    g.map(clustermap, x='topics', y='groups', z='weight', standardize=False)
    g.save(Path(output).name)
예제 #8
0
def otu_topics_clustermap(metagenome,
                          topics,
                          output='otu_topics_clustermap.html',
                          col=None,
                          row=None,
                          **kwargs):
    top_otus = set()
    for topic in topics.index:
        top_otus |= set(topics.loc[topic].nlargest(10).index)
    # top_otus = topics.columns[topics.max(axis=0) > 0.01]

    data = (topics[top_otus].stack().rename('weight').reset_index().merge(
        metagenome.taxonomy.data, left_on='OTU', right_index=True,
        how='left').reset_index(drop=True))
    g = BokehFacetGrid(data=data, outdir=Path(output).parent, row=row, col=col)
    g.map(clustermap, x='community', y='OTU', z='weight', standardize=True)
    g.save(Path(output).name)
예제 #9
0
def stats_barplot(data,
                  x=None,
                  variables=['log10_p-adj', 'R2'],
                  hue=None,
                  threshold=0.05,
                  outdir='./',
                  output='stats_barplot.html',
                  plot_kw={},
                  bar_kw={}):

    data = data.melt(id_vars=[x for x in data.columns if x not in variables])

    g = BokehFacetGrid(data=data,
                       hue=hue,
                       row='variable',
                       row_order=variables,
                       outdir=outdir,
                       **plot_kw)
    g.map(barplot, x=x, y='value', tooltips=data.columns, **bar_kw)

    g.save(output)
예제 #10
0
def test_scatter_ellipse():
    N = 500
    df = pd.DataFrame(
        np.random.multivariate_normal([3, 5], [[10, 5], [5, 4]], N))
    df.columns = ['x', 'y']
    df['fact'] = np.random.choice(['a', 'b', 'c'], N, replace=True)
    angle = 0.5

    R = np.array([[cos(angle), -sin(angle)], [sin(angle), cos(angle)]])

    df[['x', 'y']] = df[['x', 'y']].dot(R).to_numpy()

    df.loc[df.fact == 'a', ['x', 'y']] = df.loc[df.fact == 'a',
                                                ['x', 'y']].dot(R).to_numpy()
    df.loc[df.fact == 'b',
           ['x', 'y']] = df.loc[df.fact == 'b',
                                ['x', 'y']].dot(R).dot(R).to_numpy()

    g = BokehFacetGrid(data=df, scale=1.5, hue='fact')
    g.map(scatter, x='x', y='y', ellipse=True, s=5)
    g.save('ellipse_test.html')
예제 #11
0
def test_stackplot_2():
    df = sim()
    g = BokehFacetGrid(data=df, width=800, hue='fact')
    g.map(stackplot, x=['x', 'col'], y='y')
예제 #12
0
def taxa_stackplot(feature_table=None,
                   feature_info=None,
                   metagenome=None,
                   x='variable',
                   hue=None,
                   row=None,
                   col=None,
                   output='stackplot.html',
                   norm=True,
                   abd_thresh=0.01,
                   plot_kw={},
                   bar_kw={}):
    '''Stacked barplot by sample groups
    
    Args:
        metagenome (MetagenomeDS): If the other dataframe information is skipped
        feature_table (pd.DataFrame): Count table (sample x OTU)
        taxonomy (pd.DataFrame): Taxomomy table (OTU x ranks)
        metadata (pd.DataFrame): Metadata table (sample x factors)
        norm (bool): Normalize sample group into ratios
        abd_thresh (float): Abundance threshold to group taxa into "others"
                            Must be in ]0, 1[
    Returns:
        None
    '''

    x = [xi for xi in [x, hue] if xi is not None]

    if metagenome:
        table = metagenome.get_column_format().reset_index()
        tax_cols = [table.columns[1]] + list(metagenome.taxonomy.columns)
        sample_var = table.columns[0]
    else:
        sample_var = 'variable'
        tax_cols = ['feature'] + list(feature_info.columns)
        table = (feature_table.T.merge(
            feature_info, left_index=True, right_index=True).rename_axis(
                index='feature').reset_index().melt(id_vars=tax_cols))

    hue = tax_cols[0]
    groups = x + [xi for xi in [row, col] if xi is not None]

    # Set threshold for assigning low abundance OTUs to others
    taxa_means = table.groupby(groups + [hue])['value'].agg('mean')
    sample_lims = taxa_means.sum(level=groups) * abd_thresh
    taxa_means = taxa_means.loc[list(zip(*[table[x] for x in groups + [hue]]))]

    if len(groups) > 1:
        sample_lims = sample_lims.reindex(
            index=table[groups].apply(tuple, axis=1))
    else:
        sample_lims = sample_lims.reindex(index=table[groups[0]])

    in_others_cond = sample_lims.to_numpy() > taxa_means.to_numpy()

    filler = 'Others (< {:.0%})'.format(abd_thresh)
    table.loc[in_others_cond, tax_cols] = filler

    agg_values = table.groupby([sample_var, hue]).value.sum()

    table = (
        table.groupby([sample_var,
                       hue]).nth(0,
                                 dropna='all')  # nth much faster than first()
        .assign(value=agg_values).reset_index())

    # Rank by total abundance
    hue_order = table.groupby(hue).value.sum().sort_values(
        ascending=False).index

    if filler in hue_order:
        hue_order = hue_order.drop(filler).append(pd.Index([filler]))

    g = BokehFacetGrid(data=table,
                       row=row,
                       col=col,
                       hue=hue,
                       hue_order=hue_order,
                       outdir=Path(output).parent,
                       **plot_kw)
    g.map(stackplot, x=x, y='value', **bar_kw)
    g.save(Path(output).name)
예제 #13
0
def test_barplot_2():
    df = sim()
    g = BokehFacetGrid(data=df, width=800)
    g.map(barplot, x='x', y='y')
예제 #14
0
def test_barplot_1():
    df = sim()
    g = BokehFacetGrid(data=df, hue='fact', width=800)
    g.map(barplot, x='x', y='y', tooltips=['fact', 'group'])
예제 #15
0
def test_box_swarm():
    df = sim()
    g = BokehFacetGrid(data=df, hue='fact', width=800)
    g.map(boxplot, x='x', y='y', tooltips=['fact', 'group'])
    g.map(swarmplot, x='x', y='y')
예제 #16
0
def test_swarm_2():
    df = sim()
    g = BokehFacetGrid(data=df, width=800)
    g.map(swarmplot, x='x', y='y')
예제 #17
0
def ordination_cmd(mg, factors=[], strata=[], method='pcoa', distance='bray'):

    (hue, col, row,
     extra) = factors[:1] + strata + [None] * (4 - len(strata) - 1)

    if extra is not None or len(factors) > 1:
        extra = ','.join([str(extra)] + [str(x) for x in factors[1:]])
        warnings.warn(
            f'Could not render {extra} too many levels to render. Ignoring.',
            UserWarning)

    result = ordinate(mg, strata=strata, subsample=True)
    if not all(x is None for x in strata):
        components = pd.concat(val['sample'] for val in result.values())
    else:
        components = result['sample']

    compo_names = components.columns[:2]
    other_meta = np.setdiff1d(mg.metadata.qual_vars, components.columns)
    components = pd.concat(
        [components, mg.metadata.factor_data(other_meta)],
        axis=1).dropna(subset=compo_names, how='any')

    hull = (len(components[hue].unique()) < 5)

    g = BokehFacetGrid(data=components.reset_index(),
                       hue=hue,
                       col=col,
                       row=row,
                       outdir=mg.figdir,
                       scale=1.5)
    g.map(scatter,
          x=components.columns[0],
          y=components.columns[1],
          hull=hull,
          s=10,
          tooltips=mg.metadata.qual_vars)
    g.save(f'{method}-{distance}.html')

    g = BokehFacetGrid(data=components.melt(
        id_vars=components.columns.drop(compo_names),
        value_name='score',
        var_name='component'),
                       col=col,
                       row=row,
                       outdir=mg.figdir,
                       hue=hue)
    g.map(boxplot, x='component', y='score')
    g.map(swarmplot, x='component', y='score', tooltips=components.columns[2:])
    g.save(f'{method}_{distance}_boxplot.html')