Esempi in Python per scale_color_manual, esempi in Python per plotnine.scale_color_manual

Esempio n. 1

0

Mostra file

File: plot.py Progetto: zuzannna/ts_tutorial

def customized_algorithm_plot(experiment_name='finite_simple_sanity',
                              data_path=_DEFAULT_DATA_PATH):
    """Simple plot of average instantaneous regret by agent, per timestep.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    p: ggplot plot
  """
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['t', 'agent']).agg({
        'instant_regret': np.mean
    }).reset_index())
    plt_df['agent_new_name'] = plt_df.agent.apply(rename_agent)

    custom_labels = ['Laplace TS', 'Langevin TS', 'TS', 'bootstrap TS']
    custom_colors = ["#E41A1C", "#377EB8", "#4DAF4A", "#984EA3"]

    p = (gg.ggplot(plt_df) +
         gg.aes('t', 'instant_regret', colour='agent_new_name') +
         gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('time period (t)') +
         gg.ylab('per-period regret') + gg.scale_color_manual(
             name='agent', labels=custom_labels, values=custom_colors))
    return p

Esempio n. 2

0

Mostra file

File: pca_plot_helper.py Progetto: MarvinT/annorxiver

def generate_scatter_plots(
        data,
        x="pca1",
        y="pca2",
        nsample=200,
        random_state=100,
        selected_categories=['bioinformatics', 'neuroscience'],
        color_palette=['#a6cee3', '#1f78b4'],
        save_file_path="output/pca_plots/scatterplot_files/pca01_v_pca02.svg"):
    g = (p9.ggplot(
        data.query(f"category in {selected_categories}").groupby("category").
        apply(lambda x: x.sample(nsample, random_state=random_state)
              if len(x) > nsample else x).reset_index(drop=True)) +
         p9.aes(x=x, y=y, color="factor(category)") + p9.geom_point() +
         p9.scale_color_manual({
             category: color
             for category, color in zip(selected_categories, color_palette)
         }) + p9.labs(x=f"PC{x[-1:]}",
                      y=f"PC{y[-1:]}",
                      title="PCA of BioRxiv (Word Dim: 300)",
                      color="Article Category") +
         p9.theme_seaborn(
             context="paper", style="ticks", font="Arial", font_scale=1.3) +
         p9.theme(figure_size=(6.66, 5), dpi=300))

    g.save(save_file_path, dpi=250)
    print(g)
    plt.clf()

Esempio n. 3

0

Mostra file

File: plot_results.py Progetto: tilman151/DomainAdaption_PHM2020

def mixed_linear_factors_plot(df, x_axis, factor):
    plotnine.options.figure_size = (10, 10)
    factor_steps = df[factor].unique()
    reg_lines = pd.DataFrame({
        factor: factor_steps,
        'intercept': np.zeros_like(factor_steps),
        'slope': np.zeros_like(factor_steps)
    })
    for i, step in enumerate(factor_steps):
        factored_df = df[df[factor] == step]
        md = smf.mixedlm('mse ~ %s' % x_axis,
                         factored_df,
                         groups=factored_df.index.values)
        mdf = md.fit()
        reg_lines.iloc[i] = [step, mdf.params['Intercept'], mdf.params[x_axis]]

    df['percent_broken'] = df['percent_broken'].round().astype(np.int)
    df['percent_fail_runs'] = df['percent_fail_runs'].round().astype(np.int)
    reg_lines[factor] = reg_lines[factor].round().astype(np.int)
    gg = (
        plotnine.ggplot(df, plotnine.aes(x=x_axis, y='mse', color='method')) +
        plotnine.geom_jitter(width=2.5, show_legend=False) +
        plotnine.scale_color_manual(['#DB5F57'] * 4) +
        plotnine.facet_wrap(factor) + plotnine.geom_abline(
            plotnine.aes(intercept='intercept', slope='slope'),
            data=reg_lines) + plotnine.theme_classic(base_size=20))
    gg.save('%s_vs_%s_rmse.pdf' % (x_axis, factor))

Esempio n. 4

0

Mostra file

File: figures.py Progetto: Pinafore/publications

def round_2_plot():
    if not os.path.exists(round_2_df_path):
        eprint(f'Downloading {round_2_df_url} to {round_2_df_path}')
        urlretrieve(round_2_df_url, round_2_df_path)
    verify_checksum(round_2_df_checksum, round_2_df_path)
    df = pd.read_json(round_2_df_path)
    p = (
        ggplot(df) + aes(x='char_percent', y='correct', color='Dataset') +
        facet_wrap('Guessing_Model', nrow=1) + stat_summary_bin(
            fun_data=mean_no_se, bins=20, shape='.', linetype='None',
            size=0.5) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) +
        scale_x_continuous(breaks=[0, .5, 1]) +
        coord_cartesian(ylim=[0, 0.7]) +
        ggtitle('Round 2 Attacks and Models') +
        xlab('Percent of Question Revealed') + ylab('Accuracy') + theme(
            #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
            strip_text_x=element_text(margin={
                't': 6,
                'b': 6,
                'l': 1,
                'r': 5
            })) +
        scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
                           name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_2_json.pdf', width=7.0, height=1.7)

Esempio n. 5

0

Mostra file

File: figures.py Progetto: Pinafore/publications

def round_1_plot():
    df = pd.read_csv('2019_tacl_trick/data/round_1.csv')
    model_dtype = CategoricalDtype(['DAN', 'RNN', 'IR'], ordered=True)
    df['Model'] = df['Model'].astype(model_dtype)

    # This following is a hack so that the legend widths are the same across plots
    def rename(x):
        if x == 'Round 1 - IR Adversarial':
            return 'Round 1 - IR Adversarial    '
        else:
            return x

    df['Dataset'] = df['Dataset'].map(rename)
    p = (ggplot(df) + aes(x='x', y='y', color='Dataset') +
         facet_wrap('Model', nrow=1) + geom_point(size=1.0, shape='o') +
         scale_y_continuous(breaks=np.linspace(0, 1, 6), limits=[0, 0.6]) +
         scale_x_continuous(breaks=[0, .5, 1]) +
         xlab('Percent of Question Revealed') + ylab('Accuracy') +
         ggtitle('Round 1 Attacks and Models') +
         theme(strip_text_x=element_text(margin={
             't': 6,
             'b': 6,
             'l': 1,
             'r': 5
         })) + scale_color_manual(
             values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
             name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_1_csv.pdf', width=7.0, height=1.7)

Esempio n. 6

0

Mostra file

File: figures.py Progetto: Pinafore/publications

def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') +
           geom_histogram(binwidth=2, position='identity', alpha=.6) +
           geom_text(aes(x='x', y=.22, label='x', color='Method'),
                     mean_len_df,
                     inherit_aes=False,
                     format_string='{:.1f}',
                     show_legend=False) +
           geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
                        mean_len_df,
                        inherit_aes=False,
                        color='black') + scale_linetype_manual(['dashed']) +
           facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) +
           xlab('Example Length') + ylab('Frequency') +
           scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               aspect_ratio=1,
               legend_title=element_blank(),
               legend_position=legend_position,
               legend_box=legend_box,
           ))

    return plt

Esempio n. 7

0

Mostra file

def accPlot(accsByNFeats):
    plotdata = []
    for s in accsByNFeats:
        plotdata.append(
            pd.concat([
                pd.DataFrame({
                    "p": p,
                    "acc": accsByNFeats[s][p],
                    "set": s
                },
                             index=[str(p)]) for p in accsByNFeats[s]
            ],
                      axis=0))
    ggd = pd.concat(plotdata)
    ggd['acc'] = ggd['acc'].astype(float)
    ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set'))
    ggo += gg.geom_line(alpha=0.5)
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000])
    ggo += gg.scale_color_manual(
        values=['darkgray', 'black', 'red', 'dodgerblue'])
    ggo += gg.ylab('Accuracy (5-fold CV)')
    print(ggo)
    return ggd

Esempio n. 8

0

Mostra file

File: plotting_constants.py Progetto: abhijeetdtu/unccalumni

class THEME():
    bgcolor = "#293241"
    LOADER_COLOR = "#2a9d8f"
    LOADER_TYPE = "dot"

    colors_light = [
        "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f",
        "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e"
    ]
    colors_dark = [
        "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a"
    ]
    # mt = theme(panel_background=element_rect(fill=bgcolor)
    #            ,plot_background=element_rect(fill=bgcolor)
    #            , axis_text_x = element_text(color="black")
    #            , axis_text_y = element_text(color="black")
    #            , strip_margin_y=0.05
    #            , strip_margin_x=0.5)

    mt = theme_bw() + theme(panel_border=element_blank())

    cat_colors = scale_fill_manual(values=colors_light)
    cat_colors_lines = scale_color_manual(values=colors_light)
    gradient_colors = scale_fill_gradient("#ce4257", "#aad576")
    FILL = 1
    COLOR = 2

    LONG_FIGURE = (10, 20)

Esempio n. 9

0

Mostra file

File: 1.umap.py Progetto: gwaygenomics/2018_05_30_ResistanceMechanisms_Kapoor

def plot_umap_cell_line(embedding_df, fig_file, cell_line_column, color_labels,
                        color_values):
    cell_line_gg = (
        gg.ggplot(embedding_df, gg.aes(x="x", y="y")) + gg.geom_point(
            gg.aes(color=cell_line_column), size=0.2, shape=".", alpha=0.2) +
        gg.theme_bw() + gg.scale_color_manual(
            name="Cell Line", labels=color_labels, values=color_values))

    cell_line_gg.save(filename=fig_file, height=4, width=5, dpi=500)
    return cell_line_gg

Esempio n. 10

0

Mostra file

File: figures.py Progetto: Pinafore/publications

def create_confidence_plot(conf_df):
    plt = (ggplot(conf_df) + aes(x='x', color='Method', fill='Method') +
           geom_density(alpha=.45) + facet_wrap('Task', nrow=4) +
           xlab('Confidence') + scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               axis_text_y=element_blank(),
               axis_ticks_major_y=element_blank(),
               axis_title_y=element_blank(),
               legend_title=element_blank(),
               legend_position='top',
               legend_box='horizontal',
           ))
    return plt

Esempio n. 11

0

Mostra file

File: LogisticReal.py Progetto: denniscwylie/maclearn

def accPlot(accsByNFeats):
    plotdata = []
    for s in accsByNFeats:
        plotdata.append(pd.concat([DataFrame({"p" : p,
                                              "acc" : accsByNFeats[s][p],
                                              "set" : s},
                                             index = [str(p)])
                                   for p in accsByNFeats[s]],
                                  axis = 0))
    ggd = pd.concat(plotdata)
    ggd['acc'] = ggd['acc'].astype(float)
    ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set'))
    ggo += gg.geom_line(alpha=0.5)
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000])
    ggo += gg.scale_color_manual(values=['darkgray', 'black',
                                         'red', 'dodgerblue'])
    ggo += gg.ylab('Accuracy (5-fold CV)')
    print(ggo)

Esempio n. 12

0

Mostra file

File: figures.py Progetto: Pinafore/publications

def create_confidence_plot(conf_df):
    plt = (
        ggplot(conf_df)
        + aes(x='x', color='Method', fill='Method')
        + geom_density(alpha=.45)
        + facet_wrap('Task', nrow=4)
        + xlab('Confidence')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            axis_text_y=element_blank(),
            axis_ticks_major_y=element_blank(),
            axis_title_y=element_blank(),
            legend_title=element_blank(),
            legend_position='top',
            legend_box='horizontal',
        )
    )
    return plt

Esempio n. 13

0

Mostra file

class THEME():
    bgcolor = "#293241"
    LOADER_COLOR = "#2a9d8f"
    LOADER_TYPE = "dot"

    colors_light = [
        "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#2a9d8f",
        "#797d62", "#3a6ea5"
    ]
    mt = theme(panel_background=element_rect(fill=bgcolor),
               plot_background=element_rect(fill=bgcolor),
               axis_text_x=element_text(color="black"),
               axis_text_y=element_text(color="black"),
               strip_margin_y=0.05,
               strip_margin_x=0.5)

    cat_colors = scale_fill_manual(values=colors_light)
    cat_colors_lines = scale_color_manual(values=colors_light)
    gradient_colors = scale_fill_gradient("#aad576", "#ce4257")
    FILL = 1
    COLOR = 2

    LONG_FIGURE = (10, 20)

Esempio n. 14

0

Mostra file

File: figures.py Progetto: Pinafore/publications

def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (
        ggplot(len_df)
        + aes(x='x', fill='Method', y='..density..')
        + geom_histogram(binwidth=2, position='identity', alpha=.6)
        + geom_text(
            aes(x='x', y=.22, label='x', color='Method'),
            mean_len_df,
            inherit_aes=False,
            format_string='{:.1f}',
            show_legend=False
        )
        + geom_segment(
            aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
            mean_len_df,
            inherit_aes=False, color='black'
        )
        + scale_linetype_manual(['dashed'])
        + facet_wrap('Task')
        + xlim(0, 20) + ylim(0, .23)
        + xlab('Example Length') + ylab('Frequency')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            aspect_ratio=1,
            legend_title=element_blank(),
            legend_position=legend_position,
            legend_box=legend_box,
        )
    )

    return plt

Esempio n. 15

0

Mostra file

File: MaclearnUtilities.py Progetto: lzhangUT/McLearn2

def ggpca(x,
          y=None,
          center='col',
          scale='none',
          rlab=False,
          clab=False,
          cshow=None,
          rsize=4,
          csize=2,
          lsize=10,
          lnudge=0.03,
          ralpha=0.6,
          calpha=1.0,
          clightalpha=0,
          rname='sample',
          cname='variable',
          lname='',
          grid=True,
          printit=False,
          xsvd=None,
          invert1=False,
          invert2=False,
          colscale=None,
          **kwargs):
    if cshow is None:
        cshow = x.shape[1]
    if rlab is not None and isinstance(rlab, bool):
        rlab = x.index if rlab else ''
    if clab is not None and isinstance(clab, bool):
        clab = x.columns if clab else ''
    if y is not None:
        pass
    x = x.loc[:, x.isnull().sum(axis=0) == 0]
    if xsvd is None:
        xsvd = svdForPca(x, center, scale)
    rsf = np.max(xsvd[0].iloc[:, 0]) - np.min(xsvd[0].iloc[:, 0])
    csf = np.max(xsvd[2].iloc[0, :]) - np.min(xsvd[2].iloc[0, :])
    sizeRange = sorted([csize, rsize])
    alphaRange = sorted([calpha, ralpha])
    ggd = pd.DataFrame({
        'PC1': xsvd[0].iloc[:, 0] / rsf,
        'PC2': xsvd[0].iloc[:, 1] / rsf,
        'label': rlab,
        'size': rsize,
        'alpha': ralpha
    })
    cclass = []
    if cshow > 0:
        cdata = pd.DataFrame({
            'PC1': xsvd[2].iloc[0, :] / csf,
            'PC2': xsvd[2].iloc[1, :] / csf,
            'label': clab,
            'size': csize,
            'alpha': calpha
        })
        if cshow < x.shape[1]:
            cscores = cdata['PC1']**2 + cdata['PC2']**2
            keep = cscores.sort_values(ascending=False).head(cshow).index
            if clightalpha > 0:
                cdata.loc[~cdata.index.isin(keep), 'label'] = ''
                cdata.loc[~cdata.index.isin(keep), 'alpha'] = clightalpha
                alphaRange = [
                    np.min([alphaRange[0], clightalpha]),
                    np.max([alphaRange[1], clightalpha])
                ]
            else:
                cdata = cdata.loc[cdata.index.isin(keep)]
        ggd = pd.concat([cdata, ggd])
        cclass = [cname] * cdata.shape[0]
    if invert1:
        ggd['PC1'] = -ggd['PC1']
    if invert2:
        ggd['PC2'] = -ggd['PC2']
    if y is not None:
        ggd['class'] = cclass + list(y.loc[x.index])
    else:
        ggd['class'] = cclass + ([rname] * x.shape[0])
    ggo = gg.ggplot(
        ggd,
        gg.aes(x='PC1',
               y='PC2',
               color='class',
               size='size',
               alpha='alpha',
               label='label'))
    ggo += gg.geom_hline(yintercept=0, color='lightgray')
    ggo += gg.geom_vline(xintercept=0, color='lightgray')
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.geom_text(nudge_y=lnudge, size=lsize, show_legend=False)
    if colscale is None and len(ggd['class'].unique()) < 8:
        colscale = [
            'darkslategray', 'goldenrod', 'lightseagreen', 'orangered',
            'dodgerblue', 'darkorchid'
        ]
        colscale = colscale[0:(len(ggd['class'].unique()) - 1)] + ['gray']
        if len(colscale) == 2 and cshow > 0:
            colscale = ['black', 'darkgray']
        if len(colscale) == 2 and cshow == 0:
            colscale = ['black', 'red']
        if len(colscale) == 3:
            colscale = ['black', 'red', 'darkgray']
    ggo += gg.scale_color_manual(values=colscale, name=lname)
    ggo += gg.scale_size_continuous(guide=False, range=sizeRange)
    ggo += gg.scale_alpha_continuous(guide=False, range=alphaRange)
    ggo += gg.xlab('PC1 (' +
                   str(np.round(100 * xsvd[1][0]**2 /
                                ((xsvd[1]**2).sum()), 1)) +
                   '% explained var.)')
    ggo += gg.ylab('PC2 (' +
                   str(np.round(100 * xsvd[1][1]**2 /
                                ((xsvd[1]**2).sum()), 1)) +
                   '% explained var.)')
    if not grid:
        ggo += gg.theme(panel_grid_minor=gg.element_blank(),
                        panel_grid_major=gg.element_blank(),
                        panel_background=gg.element_blank())
    ggo += gg.theme(axis_ticks=gg.element_blank(),
                    axis_text_x=gg.element_blank(),
                    axis_text_y=gg.element_blank())
    if printit:
        print(ggo)
    return ggo

Esempio n. 16

0

Mostra file

File: Human_experiment_lvl_sim.py Progetto: greenelab/simulate-expression-compendia

           y = "Similarity score (SVCCA)",
           title = "Similarity across varying numbers of partitions") \
    + theme(
            plot_background=element_rect(fill="white"),
            panel_background=element_rect(fill="white"),
            panel_grid_major_x=element_line(color="lightgrey"),
            panel_grid_major_y=element_line(color="lightgrey"),
            axis_line=element_line(color="grey"),
            legend_key=element_rect(fill='white', colour='white'),
            legend_title=element_text(family='sans-serif', size=15),
            legend_text=element_text(family='sans-serif', size=12),
            plot_title=element_text(family='sans-serif', size=15),
            axis_text=element_text(family='sans-serif', size=12),
            axis_title=element_text(family='sans-serif', size=15)
           ) \
    + scale_color_manual(['#1976d2', '#b3e5fc']) \

print(panel_A)
ggsave(plot=panel_A, filename=svcca_file, device="svg", dpi=300)
ggsave(plot=panel_A, filename=svcca_png_file, device="svg", dpi=300)

# ### Uncorrected PCA

# In[14]:

lst_num_partitions = [lst_num_partitions[i] for i in pca_ind]

all_data_df = pd.DataFrame()

# Get batch 1 data
partition_1_file = os.path.join(compendia_dir, "Partition_1_0.txt.xz")

Esempio n. 17

0

Mostra file

File: datavis.py Progetto: joholley/datavis

# ggbox.save('gse75386_gad1_boxplot.pdf', format='pdf', height=1, width=6)

plt.close()
# plt.figure(figsize=(6, 1))
sns.boxplot(data=gse75386, y='class', x='Gad1', color='white')
sns.stripplot(data=gse75386, y='class', x='Gad1', color='black')
# plt.savefig('gse75386_gad1_boxplot.pdf',
#             format='pdf', bbox_inches='tight')

## -----------------------------------------------------------------
## GSE75386 scatterplot
## -----------------------------------------------------------------
plt.close()
ggscat = ggplot(gse75386, gg.aes(x='Gad1', y='Cck', color='class'))
ggscat += gg.geom_point(alpha=0.75)
ggscat += gg.scale_color_manual(
    values=['darkslategray', 'goldenrod', 'lightseagreen'])
print(ggscat)
# ggscat.save('gse75386_cck_vs_gad1.pdf', format='pdf',
#             height=5, width=7)


def binarize(x, column, brk):
    out = pd.Series(['low ' + column] * x.shape[0], index=x.index)
    out.loc[x[column] > brk] = 'high ' + column
    return out


gse75386['Pvalb (cut)'] = binarize(gse75386, 'Pvalb', 5)
gse75386['Gad1 (cut)'] = binarize(gse75386, 'Gad1', 6)
gse75386.head()

Esempio n. 18

0

Mostra file

File: disc_model_analysis.py Progetto: zorrotrying/snorkeling

        (critical_val * x.aupr_std) / pd.np.sqrt(x.lf_num_len),
        'aupr_lower':
        lambda x: x.aupr_mean -
        (critical_val * x.aupr_std) / pd.np.sqrt(x.lf_num_len)
    }))
dev_set_stats_df

# In[9]:

(p9.ggplot(dev_set_stats_df,
           p9.aes(x="factor(lf_num)", y="auroc_mean", color="model")) +
 p9.geom_point() + p9.geom_line(p9.aes(group="model")) + p9.geom_errorbar(
     p9.aes(ymin="auroc_lower", ymax="auroc_upper", group="model")) +
 p9.theme_seaborn() + p9.labs(title="CtD Tune Set AUROC", color="Model") +
 p9.scale_color_manual({
     "disc_model": "blue",
     "gen_model": "orange"
 }))

# In[10]:

(p9.ggplot(dev_set_stats_df,
           p9.aes(x="factor(lf_num)", y="aupr_mean", color="model")) +
 p9.geom_point() + p9.geom_line(p9.aes(group="model")) + p9.geom_errorbar(
     p9.aes(ymin="aupr_lower", ymax="aupr_upper", group="model")) +
 p9.theme_seaborn() + p9.labs(title="CtD Tune Set AUPR", color="Model") +
 p9.scale_color_manual({
     "disc_model": "blue",
     "gen_model": "orange"
 }))

# In[11]:

Esempio n. 19

0

Mostra file

File: analyze_color.py Progetto: movalencia/plantcv

def analyze_color(rgb_img, mask, hist_plot_type=None):
    """Analyze the color properties of an image object
    Inputs:
    rgb_img          = RGB image data
    mask             = Binary mask made from selected contours
    hist_plot_type   = 'None', 'all', 'rgb','lab' or 'hsv'
    
    Returns:
    analysis_image   = histogram output
    
    :param rgb_img: numpy.ndarray
    :param mask: numpy.ndarray
    :param hist_plot_type: str
    :return analysis_images: list
    """

    params.device += 1

    if len(np.shape(rgb_img)) < 3:
        fatal_error("rgb_img must be an RGB image")

    # Mask the input image
    masked = cv2.bitwise_and(rgb_img, rgb_img, mask=mask)
    # Extract the blue, green, and red channels
    b, g, r = cv2.split(masked)
    # Convert the BGR image to LAB
    lab = cv2.cvtColor(masked, cv2.COLOR_BGR2LAB)
    # Extract the lightness, green-magenta, and blue-yellow channels
    l, m, y = cv2.split(lab)
    # Convert the BGR image to HSV
    hsv = cv2.cvtColor(masked, cv2.COLOR_BGR2HSV)
    # Extract the hue, saturation, and value channels
    h, s, v = cv2.split(hsv)

    # Color channel dictionary
    channels = {
        "b": b,
        "g": g,
        "r": r,
        "l": l,
        "m": m,
        "y": y,
        "h": h,
        "s": s,
        "v": v
    }

    # Histogram plot types
    hist_types = {
        "ALL": ("b", "g", "r", "l", "m", "y", "h", "s", "v"),
        "RGB": ("b", "g", "r"),
        "LAB": ("l", "m", "y"),
        "HSV": ("h", "s", "v")
    }

    if hist_plot_type is not None and hist_plot_type.upper() not in hist_types:
        fatal_error(
            "The histogram plot type was " + str(hist_plot_type) +
            ', but can only be one of the following: None, "all", "rgb", "lab", or "hsv"!'
        )
    # Store histograms, plotting colors, and plotting labels
    histograms = {
        "b": {
            "label":
            "blue",
            "graph_color":
            "blue",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["b"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "g": {
            "label":
            "green",
            "graph_color":
            "forestgreen",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["g"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "r": {
            "label":
            "red",
            "graph_color":
            "red",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["r"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "l": {
            "label":
            "lightness",
            "graph_color":
            "dimgray",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["l"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "m": {
            "label":
            "green-magenta",
            "graph_color":
            "magenta",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["m"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "y": {
            "label":
            "blue-yellow",
            "graph_color":
            "yellow",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["y"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "h": {
            "label":
            "hue",
            "graph_color":
            "blueviolet",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["h"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "s": {
            "label":
            "saturation",
            "graph_color":
            "cyan",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["s"]], [0], mask,
                                                  [256], [0, 255])
            ]
        },
        "v": {
            "label":
            "value",
            "graph_color":
            "orange",
            "hist": [
                float(l[0]) for l in cv2.calcHist([channels["v"]], [0], mask,
                                                  [256], [0, 255])
            ]
        }
    }

    # Create list of bin labels for 8-bit data
    binval = np.arange(0, 256)
    bin_values = [l for l in binval]

    analysis_images = []
    # Create a dataframe of bin labels and histogram data
    dataset = pd.DataFrame({
        'bins': binval,
        'blue': histograms["b"]["hist"],
        'green': histograms["g"]["hist"],
        'red': histograms["r"]["hist"],
        'lightness': histograms["l"]["hist"],
        'green-magenta': histograms["m"]["hist"],
        'blue-yellow': histograms["y"]["hist"],
        'hue': histograms["h"]["hist"],
        'saturation': histograms["s"]["hist"],
        'value': histograms["v"]["hist"]
    })

    # Make the histogram figure using plotnine
    if hist_plot_type is not None:
        if hist_plot_type.upper() == 'RGB':
            df_rgb = pd.melt(dataset,
                             id_vars=['bins'],
                             value_vars=['blue', 'green', 'red'],
                             var_name='Color Channel',
                             value_name='Pixels')
            hist_fig = (ggplot(
                df_rgb, aes(x='bins', y='Pixels', color='Color Channel')) +
                        geom_line() +
                        scale_x_continuous(breaks=list(range(0, 256, 25))) +
                        scale_color_manual(['blue', 'green', 'red']))
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'LAB':
            df_lab = pd.melt(
                dataset,
                id_vars=['bins'],
                value_vars=['lightness', 'green-magenta', 'blue-yellow'],
                var_name='Color Channel',
                value_name='Pixels')
            hist_fig = (ggplot(
                df_lab, aes(x='bins', y='Pixels', color='Color Channel')) +
                        geom_line() +
                        scale_x_continuous(breaks=list(range(0, 256, 25))) +
                        scale_color_manual(['yellow', 'magenta', 'dimgray']))
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'HSV':
            df_hsv = pd.melt(dataset,
                             id_vars=['bins'],
                             value_vars=['hue', 'saturation', 'value'],
                             var_name='Color Channel',
                             value_name='Pixels')
            hist_fig = (ggplot(
                df_hsv, aes(x='bins', y='Pixels', color='Color Channel')) +
                        geom_line() +
                        scale_x_continuous(breaks=list(range(0, 256, 25))) +
                        scale_color_manual(['blueviolet', 'cyan', 'orange']))
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'ALL':
            s = pd.Series([
                'blue', 'green', 'red', 'lightness', 'green-magenta',
                'blue-yellow', 'hue', 'saturation', 'value'
            ],
                          dtype="category")
            color_channels = [
                'blue', 'yellow', 'green', 'magenta', 'blueviolet', 'dimgray',
                'red', 'cyan', 'orange'
            ]
            df_all = pd.melt(dataset,
                             id_vars=['bins'],
                             value_vars=s,
                             var_name='Color Channel',
                             value_name='Pixels')
            hist_fig = (ggplot(
                df_all, aes(x='bins', y='Pixels', color='Color Channel')) +
                        geom_line() +
                        scale_x_continuous(breaks=list(range(0, 256, 25))) +
                        scale_color_manual(color_channels))
            analysis_images.append(hist_fig)

    # Hue values of zero are red but are also the value for pixels where hue is undefined
    # The hue value of a pixel will be undefined when the color values are saturated
    # Therefore, hue values of zero are excluded from the calculations below

    # Calculate the median hue value
    # The median is rescaled from the encoded 0-179 range to the 0-359 degree range
    hue_median = np.median(h[np.where(h > 0)]) * 2

    # Calculate the circular mean and standard deviation of the encoded hue values
    # The mean and standard-deviation are rescaled from the encoded 0-179 range to the 0-359 degree range
    hue_circular_mean = stats.circmean(h[np.where(h > 0)], high=179, low=0) * 2
    hue_circular_std = stats.circstd(h[np.where(h > 0)], high=179, low=0) * 2

    # Store into lists instead for pipeline and print_results
    # stats_dict = {'mean': circular_mean, 'std' : circular_std, 'median': median}

    # Plot or print the histogram
    if hist_plot_type is not None:
        if params.debug == 'print':
            hist_fig.save(
                os.path.join(params.debug_outdir,
                             str(params.device) + '_analyze_color_hist.png'))
        elif params.debug == 'plot':
            print(hist_fig)

    # Store into global measurements
    # RGB signal values are in an unsigned 8-bit scale of 0-255
    rgb_values = [i for i in range(0, 256)]
    # Hue values are in a 0-359 degree scale, every 2 degrees at the midpoint of the interval
    hue_values = [i * 2 + 1 for i in range(0, 180)]
    # Percentage values on a 0-100 scale (lightness, saturation, and value)
    percent_values = [round((i / 255) * 100, 2) for i in range(0, 256)]
    # Diverging values on a -128 to 127 scale (green-magenta and blue-yellow)
    diverging_values = [i for i in range(-128, 128)]
    # outputs.measurements['color_data'] = {
    #     'histograms': {
    #         'blue': {'signal_values': rgb_values, 'frequency': histograms["b"]["hist"]},
    #         'green': {'signal_values': rgb_values, 'frequency': histograms["g"]["hist"]},
    #         'red': {'signal_values': rgb_values, 'frequency': histograms["r"]["hist"]},
    #         'lightness': {'signal_values': percent_values, 'frequency': histograms["l"]["hist"]},
    #         'green-magenta': {'signal_values': diverging_values, 'frequency': histograms["m"]["hist"]},
    #         'blue-yellow': {'signal_values': diverging_values, 'frequency': histograms["y"]["hist"]},
    #         'hue': {'signal_values': hue_values, 'frequency': histograms["h"]["hist"]},
    #         'saturation': {'signal_values': percent_values, 'frequency': histograms["s"]["hist"]},
    #         'value': {'signal_values': percent_values, 'frequency': histograms["v"]["hist"]}
    #     },
    #     'color_features': {
    #         'hue_circular_mean': hue_circular_mean,
    #         'hue_circular_std': hue_circular_std,
    #         'hue_median': hue_median
    #     }
    # }
    outputs.add_observation(variable='blue_frequencies',
                            trait='blue frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["b"]["hist"],
                            label=rgb_values)
    outputs.add_observation(variable='green_frequencies',
                            trait='green frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["g"]["hist"],
                            label=rgb_values)
    outputs.add_observation(variable='red_frequencies',
                            trait='red frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["r"]["hist"],
                            label=rgb_values)
    outputs.add_observation(variable='lightness_frequencies',
                            trait='lightness frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["l"]["hist"],
                            label=percent_values)
    outputs.add_observation(variable='green-magenta_frequencies',
                            trait='green-magenta frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["m"]["hist"],
                            label=diverging_values)
    outputs.add_observation(variable='blue-yellow_frequencies',
                            trait='blue-yellow frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["y"]["hist"],
                            label=diverging_values)
    outputs.add_observation(variable='hue_frequencies',
                            trait='hue frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["h"]["hist"],
                            label=hue_values)
    outputs.add_observation(variable='saturation_frequencies',
                            trait='saturation frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["s"]["hist"],
                            label=percent_values)
    outputs.add_observation(variable='value_frequencies',
                            trait='value frequencies',
                            method='plantcv.plantcv.analyze_color',
                            scale='frequency',
                            datatype=list,
                            value=histograms["v"]["hist"],
                            label=percent_values)
    outputs.add_observation(variable='hue_circular_mean',
                            trait='hue circular mean',
                            method='plantcv.plantcv.analyze_color',
                            scale='degrees',
                            datatype=float,
                            value=hue_circular_mean,
                            label='degrees')
    outputs.add_observation(variable='hue_circular_std',
                            trait='hue circular standard deviation',
                            method='plantcv.plantcv.analyze_color',
                            scale='degrees',
                            datatype=float,
                            value=hue_median,
                            label='degrees')
    outputs.add_observation(variable='hue_median',
                            trait='hue median',
                            method='plantcv.plantcv.analyze_color',
                            scale='degrees',
                            datatype=float,
                            value=hue_median,
                            label='degrees')

    # Store images
    outputs.images.append(analysis_images)

    return analysis_images

Esempio n. 20

0

Mostra file

File: line_plot.py Progetto: wkostelecki/ezplot9

def line_plot(df,
              x,
              y,
              group=None,
              facet_x=None,
              facet_y=None,
              aggfun='sum',
              err=None,
              show_points=False,
              base_size=10,
              figure_size=(6, 3)):
    '''
  Aggregates data in df and plots multiple columns as a line chart.

  Parameters
  ----------
  df : pd.DataFrame
    input dataframe
  x : str
    quoted expression to be plotted on the x axis
  y : str or list of str
    quoted expression(s) to be plotted on the y axis
  group : str
    quoted expression to be used as group (ie color)
  facet_x : str
    quoted expression to be used as facet
  facet_y : str
    quoted expression to be used as facet
  aggfun : str or fun
    function to be used for aggregating (eg sum, mean, median ...)
  err : str
     quoted expression to be used as error shaded area
  show_points : bool
    show/hide markers
  base_size : int
    base size for theme_ez
  figure_size :tuple of int
    figure size

  Returns
  -------
  g : EZPlot
    EZplot object

  '''

    if group is not None and isinstance(y, list) and len(y) > 1:
        log.error(
            "groups can be specified only when a single y column is present")
        raise ValueError(
            "groups can be specified only when a single y column is present")

    if err is not None and isinstance(y, list) and len(y) > 1:
        log.error(
            "err can be specified only when a single y column is present")
        raise ValueError(
            "err can be specified only when a single y column is present")

    if isinstance(y, list) and len(y) == 1:
        y = y[0]

    # create a copy of the data
    dataframe = df.copy()

    # define groups and variables; remove and store (eventual) names
    names = {}
    groups = {}
    variables = {}

    for label, var in zip(['x', 'group', 'facet_x', 'facet_y'],
                          [x, group, facet_x, facet_y]):
        names[label], groups[label] = unname(var)

    # fix special cases
    if x == '.index':
        groups['x'] = '.index'
        names[
            'x'] = dataframe.index.name if dataframe.index.name is not None else ''

    if isinstance(y, list):

        ys = []
        for i, var in enumerate(y):
            ys.append('y_{}'.format(i))
            names['y_{}'.format(i)], variables['y_{}'.format(i)] = unname(var)

        # aggregate data
        tmp_gdata = agg_data(dataframe,
                             variables,
                             groups,
                             aggfun,
                             fill_groups=True)
        groups_present = [
            c for c in ['x', 'facet_x', 'facet_y'] if c in tmp_gdata.columns
        ]
        gdata = pd.melt(tmp_gdata,
                        groups_present,
                        var_name='group',
                        value_name='y')
        gdata['group'] = gdata['group'].replace(
            {var: names[var]
             for var in ys})

        # update values for plotting
        names['y'] = 'Value'
        names['group'] = 'Variable'
        group = 'Variable'

    else:

        names['y'], variables['y'] = unname(y)
        if err is not None:
            names['err'], variables['err'] = unname(err)

        # aggregate data
        gdata = agg_data(dataframe,
                         variables,
                         groups,
                         aggfun,
                         fill_groups=True)

    # reorder columns
    gdata = gdata[[
        c for c in ['x', 'y', 'err', 'group', 'facet_x', 'facet_y']
        if c in gdata.columns
    ]]
    if err is not None:
        gdata['ymax'] = gdata['y'] + gdata['err']
        gdata['ymin'] = gdata['y'] - gdata['err']

    # init plot obj
    g = EZPlot(gdata)

    # set groups
    if group is None:
        g += p9.geom_line(p9.aes(x="x", y="y"),
                          group=1,
                          colour=ez_colors(1)[0])
        if show_points:
            g += p9.geom_point(p9.aes(x="x", y="y"),
                               group=1,
                               colour=ez_colors(1)[0])
        if err is not None:
            g += p9.geom_ribbon(p9.aes(x="x", ymax="ymax", ymin="ymin"),
                                group=1,
                                fill=ez_colors(1)[0],
                                alpha=0.2)
    else:
        g += p9.geom_line(
            p9.aes(x="x", y="y", group="factor(group)",
                   colour="factor(group)"))
        if show_points:
            g += p9.geom_point(p9.aes(x="x", y="y", colour="factor(group)"))
        if err is not None:
            g += p9.geom_ribbon(p9.aes(x="x",
                                       ymax="ymax",
                                       ymin="ymin",
                                       fill="factor(group)"),
                                alpha=0.2)
        g += p9.scale_color_manual(values=ez_colors(g.n_groups('group')))
        g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group')))

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_timestamp('x'):
        g += p9.scale_x_datetime()
    elif g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

    # set y scale
    g += p9.scale_y_continuous(labels=ez_labels)

    # set axis labels
    g += \
      p9.xlab(names['x']) + \
      p9.ylab(names['y'])

    # set theme
    g += theme_ez(figure_size=figure_size,
                  base_size=base_size,
                  legend_title=p9.element_text(text=names['group'],
                                               size=base_size))

    return g

Esempio n. 21

0

Mostra file

File: maps.py Progetto: hpv-information-centre/reportcompiler-ic-tools-python

def generate_map(data,
                 region,
                 value_field,
                 iso_field='iso',
                 scale_params=None,
                 plot_na_dots=False,
                 tolerance=None,
                 plot_size=8,
                 out_region_color='#f0f0f0',
                 na_color='#aaaaaa',
                 line_color='#666666',
                 projection=None):
    """
    This function returns a map plot with the specified options.

    :param pandas.DataFrame data: Data to be plotted.
    :param str region: Region to center the map around. Countries outside
        the chosen region will be obscured.
    :param str value_field: Column of *data* with the values to be plotted.
    :param str iso_field: Column of *data* with the ISO3 codes for each
        country.
    :param dict scale_params: Dictionary of parameters to be passed to the
        ggplot corresponding color scale (continuous or discrete).
    :param bool plot_na_dots: Whether to plot the dots for small countries
        if said country doesn't have data available.
    :param int tolerance: Coordinate tolerance for polygon simplification,
        a higher number will result in simpler polygons and faster
        rendering (see DEFAULT_TOLERANCES).
    :param int plot_size: Size of the plot, which determines the relative sizes
        of the elements within.
    :param str out_region_color: Hex color of the countries that are out of the
        specified region.
    :param str na_color: Hex color of the countries with no data available.
    :param str line_color: Color of the country borders.
    :param str projection: Kind of map projection to be used in the map.
        Currently, Oceania (XOX) is only available in ESPG:4326 to enable
        wrapping.
    :returns: a ggplot-like plot with the map
    :rtype: plotnine.ggplot
    """
    if projection is None:
        if region == 'XOX':
            projection = 'epsg4326'
        else:
            projection = 'robinson'

    if projection not in PROJECTION_DICT.keys():
        raise ValueError('Projection "{}" not valid'.format(projection))

    if scale_params is None:
        scale_params = {}

    if region not in REGION_BOUNDS[projection]:
        raise ValueError(
            '"region" not available. Valid regions are: {}'.format(', '.join(
                REGION_BOUNDS[projection].keys())))

    if tolerance is None:
        tolerance = DEFAULT_TOLERANCES[projection][region]

    countries = GeoDataFrame.from_file(
        os.path.join(os.path.dirname(__file__), 'data/world-countries.shp'))

    # To plot Oceania we need the original EPSG:4326 to wrap around the 180º
    # longitude. In other cases transform to the desired projection.
    if region == 'XOX':
        countries.crs['lon_wrap'] = '180'  # Wrap around longitude 180º

        XOX_countries = countries['continent'] == 'XOX'
        countries[XOX_countries] = countries[XOX_countries].to_crs(
            countries.crs)
        centroids = countries[XOX_countries].apply(
            lambda row: row['geometry'].centroid, axis=1)
        countries.loc[XOX_countries, 'lon'] = [c.x for c in centroids]
        countries.loc[XOX_countries, 'lat'] = [c.y for c in centroids]
    else:
        if projection != 'epsg4326':
            countries = countries.to_crs(PROJECTION_DICT[projection])
            centroids = countries.apply(lambda row: row['geometry'].centroid,
                                        axis=1)
            countries['lon'] = [c.x for c in centroids]
            countries['lat'] = [c.y for c in centroids]

    countries['geometry'] = countries['geometry'].simplify(tolerance)

    upper_left, lower_right = REGION_BOUNDS[projection][region]
    limits_x = [upper_left[0], lower_right[0]]
    limits_y = [lower_right[1], upper_left[1]]
    ratio = (limits_x[1] - limits_x[0]) / (limits_y[1] - limits_y[0])

    plot_data = pd.merge(countries,
                         data,
                         how='left',
                         left_on='iso',
                         right_on=iso_field)
    map_bounds = REGION_BOUNDS['epsg4326'][region]
    map_area = ((map_bounds[1][0] - map_bounds[0][0]) *
                (map_bounds[0][1] - map_bounds[1][1]))
    plot_data['plot_dot'] = (plot_data['pol_area'] < DOT_THRESHOLD * map_area)

    if not plot_na_dots:
        plot_data['plot_dot'] &= ~pd.isnull(plot_data[value_field])

    if region != 'XWX':
        in_region = ((~pd.isnull(plot_data[value_field])) &
                     (plot_data['continent'] == region))
        in_region_missing = ((pd.isnull(plot_data[value_field])) &
                             (plot_data['continent'] == region))
        out_region = plot_data['continent'] != region
    else:
        in_region = ~pd.isnull(plot_data[value_field])
        in_region_missing = pd.isnull(plot_data[value_field])
        out_region = np.repeat(False, len(plot_data))

    if plot_data[value_field].dtype == 'object':
        # Assume discrete values
        fill_scale = scale_fill_brewer(**scale_params, drop=False)
    else:
        # Assume continuous values
        fill_scale = scale_fill_gradient(**scale_params)

    plot_data_values = plot_data[in_region]
    plot_data_missing = plot_data[in_region_missing]
    plot_data_out_region = plot_data[out_region]

    dots_region = plot_data_values[plot_data_values['plot_dot']]
    dots_region_missing = plot_data_missing[plot_data_missing['plot_dot']]
    dots_out_region = plot_data_out_region[plot_data_out_region['plot_dot']]

    plt = (
        ggplot() + geom_map(plot_data_values,
                            aes(fill=value_field),
                            color=line_color,
                            size=0.3) +
        geom_map(
            plot_data_missing, aes(color='plot_dot'), fill=na_color,
            size=0.3) + geom_map(plot_data_out_region,
                                 fill=out_region_color,
                                 color=line_color,
                                 size=0.3) +
        geom_point(dots_region,
                   aes(x='lon', y='lat', fill=value_field),
                   size=3,
                   stroke=.1,
                   color=line_color) + geom_point(dots_region_missing,
                                                  aes(x='lon', y='lat'),
                                                  fill=na_color,
                                                  size=3,
                                                  stroke=.1,
                                                  color=line_color) +
        geom_point(dots_out_region,
                   aes(x='lon', y='lat'),
                   fill=out_region_color,
                   size=3,
                   stroke=.1,
                   color=line_color) +
        scale_x_continuous(breaks=[], limits=limits_x) +
        scale_y_continuous(breaks=[], limits=limits_y) + theme(
            figure_size=(plot_size * ratio, plot_size),
            panel_background=element_rect(fill='white', color='black'),
            #  panel_border=element_rect(fill='white',
            #                            color='black',
            #                            size=.1),
            legend_background=element_rect(
                fill="white", color='black', size=.5),
            legend_box_just='left') + xlab('') + ylab(''))

    if len(plot_data_values.index) > 0:
        plt += fill_scale

    plt += scale_color_manual(name=' ',
                              values=[line_color],
                              breaks=[False],
                              labels=['No data available'])

    if plot_data[value_field].dtype == 'object':
        plt += guides(fill=guide_legend(override_aes={'shape': None}))

    return {
        'plot': plt,
        'ratio': ratio,
    }

Esempio n. 22

0

Mostra file

File: tsne_shen.py Progetto: denniscwylie/maclearn

plt.ion()


import RestrictedData
xnorms = RestrictedData.xnorms
annots = RestrictedData.annots


tsne = TSNE(n_components=2, verbose=1,
            perplexity=10, method='barnes_hut', angle=0.5,
            init='pca', early_exaggeration=12, learning_rate=200,
            n_iter=1000, random_state=123)
tsneResults = tsne.fit_transform(xnorms['shen'].values)


ggd = pd.DataFrame({'sample' : xnorms['shen'].index,
                    'system' : annots['shen'].reindex(xnorms['shen'].index)['System'],
                    'coord1' : tsneResults[:, 0],
                    'coord2' : tsneResults[:, 1]})
plt.close()
ggo = gg.ggplot(ggd, gg.aes(x='coord1', y='coord2', color='system', label='sample'))
ggo += gg.geom_point()
ggo += gg.geom_text(nudge_y=9, show_legend=False)
ggo += gg.scale_color_manual(values=['firebrick', 'goldenrod', 'lightseagreen',
                                     'darkorchid', 'darkslategray', 'dodgerblue'])
ggo += gg.theme_bw()
ggo += gg.xlab('tSNE coordinate 1')
ggo += gg.ylab('tSNE coordinate 2')
print(ggo)

Esempio n. 23

0

Mostra file

File: figures.py Progetto: Pinafore/qb

    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )

Esempio n. 24

0

Mostra file

                       y='center',
                       ymin='low',
                       ymax='high',
                       group="group",
                       fill="group"),
                na_rm=True,
                alpha=0.2,
            )
            g += p9.geom_line(p9.aes(x="x",
                                     y='center',
                                     group="group",
                                     colour="group"),
                              na_rm=True)

            g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group')))
            g += p9.scale_color_manual(values=ez_colors(g.n_groups('group')))

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_timestamp('x'):
        g += p9.scale_x_datetime()
    elif g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

Esempio n. 25

0

Mostra file

    def plot_char_percent_vs_accuracy_smooth(
        self, expo=False, no_models=False, columns=False
    ):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f"Setting limits to: {limits}")
        else:
            limits = [0, 1]
        if expo:
            if (
                os.path.exists("data/external/all_human_gameplay.json")
                and not self.no_humans
            ):
                with open("data/external/all_human_gameplay.json") as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [
                        ("parents", "Intermediate"),
                        ("maryland", "Expert"),
                        ("live", "National"),
                    ]:
                        if self.merge_humans:
                            name = "Human"
                        gameplay = all_gameplay[event]
                        if event != "live":
                            control_correct_positions = gameplay[
                                "control_correct_positions"
                            ]
                            control_wrong_positions = gameplay[
                                "control_wrong_positions"
                            ]
                            control_positions = (
                                control_correct_positions + control_wrong_positions
                            )
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1]
                                + len(control_wrong_positions) * [0]
                            )
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = (
                                control_sorted_result.cumsum()
                                / control_sorted_result.shape[0]
                            )
                            control_df = pd.DataFrame(
                                {"correct": control_y, "char_percent": control_x}
                            )
                            control_df["Dataset"] = "Regular Test"
                            control_df["Guessing_Model"] = f" {name}"
                            frames.append(control_df)

                        adv_correct_positions = gameplay["adv_correct_positions"]
                        adv_wrong_positions = gameplay["adv_wrong_positions"]
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1]
                            + len(adv_wrong_positions) * [0]
                        )
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({"correct": adv_y, "char_percent": adv_x})
                        adv_df["Dataset"] = "IR Adversarial"
                        adv_df["Guessing_Model"] = f" {name}"
                        frames.append(adv_df)

                        if len(gameplay["advneural_correct_positions"]) > 0:
                            adv_correct_positions = gameplay[
                                "advneural_correct_positions"
                            ]
                            adv_wrong_positions = gameplay["advneural_wrong_positions"]
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(
                                len(adv_correct_positions) * [1]
                                + len(adv_wrong_positions) * [0]
                            )
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = (
                                adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            )
                            adv_df = pd.DataFrame(
                                {"correct": adv_y, "char_percent": adv_x}
                            )
                            adv_df["Dataset"] = "RNN Adversarial"
                            adv_df["Guessing_Model"] = f" {name}"
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df["Guessing_Model"].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df["Guessing_Model"] = human_df["Guessing_Model"].astype(
                        human_dtype
                    )
                    dataset_dtype = CategoricalDtype(
                        ["Regular Test", "IR Adversarial", "RNN Adversarial"],
                        ordered=True,
                    )
                    human_df["Dataset"] = human_df["Dataset"].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape=".")
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df["Dataset"] != "Round 1 - IR Adversarial"]
                if 2 not in self.rounds:
                    df = df[df["Dataset"] != "Round 2 - IR Adversarial"]
                    df = df[df["Dataset"] != "Round 2 - RNN Adversarial"]
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f"Saving df to: {self.save_df}")
                    df.to_json(self.save_df)

                if (
                    os.path.exists("data/external/all_human_gameplay.json")
                    and not self.no_humans
                ):
                    eprint("Loading human data")
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap("Guessing_Model", ncol=1)
            else:
                facet_conf = facet_wrap("Guessing_Model", nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(
                        method="mavg", se=False, method_args={"window": 400}
                    )
                else:
                    chart = stat_summary_bin(
                        fun_data=mean_no_se,
                        bins=20,
                        shape=".",
                        linetype="None",
                        size=0.5,
                    )
            else:
                chart = None

            p = p + facet_conf + aes(x="char_percent", y="correct", color="Dataset")
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, 0.5, 1])
                + coord_cartesian(ylim=limits)
                + xlab("Percent of Question Revealed")
                + ylab("Accuracy")
                + theme(
                    # legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={"t": 6, "b": 6, "l": 1, "r": 5})
                )
                + scale_color_manual(
                    values=["#FF3333", "#66CC00", "#3333FF", "#FFFF33"],
                    name="Questions",
                )
            )
            if self.title != "":
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f"Saving df to: {self.save_df}")
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x="char_percent", y="correct", color="Guessing_Model")
                + stat_smooth(method="mavg", se=False, method_args={"window": 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )

Esempio n. 26

0

Mostra file

    drop=True).append(edge_pred_df.query("precision==1"),
                      sort=True).reset_index(drop=True).dropna())

# In[6]:

color_map = {
    "Existing": mcolors.to_hex(pd.np.array([178, 223, 138, 255]) / 255),
    "Novel": mcolors.to_hex(pd.np.array([31, 120, 180, 255]) / 255)
}

# In[7]:

g = (p9.ggplot(binned_df, p9.aes(x="precision", y="edges",
                                 color="in_hetionet")) + p9.geom_point() +
     p9.geom_line() + p9.scale_color_manual(values={
         "Existing": color_map["Existing"],
         "Novel": color_map["Novel"]
     }) + p9.facet_wrap("relation") + p9.scale_y_log10() + p9.theme_bw())
print(g)

# In[8]:

g = (p9.ggplot(binned_df, p9.aes(x="precision", y="edges", fill="in_hetionet"))
     + p9.geom_bar(stat='identity', position='dodge') +
     p9.scale_fill_manual(values={
         "Existing": color_map["Existing"],
         "Novel": color_map["Novel"]
     }) + p9.coord_flip() + p9.facet_wrap("relation") + p9.scale_y_log10() +
     p9.theme(figure_size=(12, 8), aspect_ratio=9) + p9.theme_bw())
print(g)

# In[9]:

Esempio n. 27

0

Mostra file

#######################################################################################################################
#######################################################################################################################
# The code below produces Figure 7 (in parallel): ####
######################################################
list_of_ggplots = approach.run_direct_simulation(params_for_global_min,
                                                 parallel_flag=True)

if approach.get_my_rank() == 0:

    g = list_of_ggplots[0]
    import plotnine as p9
    from matplotlib import rc
    rc('text', usetex=True)

    g = (g + p9.xlab("$E_{tot}$") + p9.ylab("$[S^{**}]$") +
         p9.scale_color_manual(values=["red", "blue"],
                               labels=["High [$S^{**}$]", "Low [$S^{**}$]"]))
    g.save(filename=f"./Figure_7.png",
           format="png",
           width=8,
           height=5,
           units='in',
           verbose=False)

    print("")

approach.generate_report()
#######################################################################################################################
#######################################################################################################################

Esempio n. 28

0

Mostra file

File: color_correction.py Progetto: danforthcenter/plantcv

def quick_color_check(target_matrix, source_matrix, num_chips):
    """ Quickly plot target matrix values against source matrix values to determine
    over saturated color chips or other issues.

    Inputs:
    source_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the source image
    target_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the target image
    num_chips          = number of color card chips included in the matrices (integer)

    :param source_matrix: numpy.ndarray
    :param target_matrix: numpy.ndarray
    :param num_chips: int
    """
    # Imports
    from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \
        scale_y_continuous, scale_color_manual, aes
    import pandas as pd

    # Extract and organize matrix info
    tr = target_matrix[:num_chips, 1:2]
    tg = target_matrix[:num_chips, 2:3]
    tb = target_matrix[:num_chips, 3:4]
    sr = source_matrix[:num_chips, 1:2]
    sg = source_matrix[:num_chips, 2:3]
    sb = source_matrix[:num_chips, 3:4]

    # Create columns of color labels
    red = []
    blue = []
    green = []
    for i in range(num_chips):
        red.append('red')
        blue.append('blue')
        green.append('green')

    # Make a column of chip numbers
    chip = np.arange(0, num_chips).reshape((num_chips, 1))
    chips = np.row_stack((chip, chip, chip))

    # Combine info
    color_data_r = np.column_stack((sr, tr, red))
    color_data_g = np.column_stack((sg, tg, green))
    color_data_b = np.column_stack((sb, tb, blue))
    all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r))

    # Create a dataframe with headers
    dataset = pd.DataFrame({'source': all_color_data[:, 0], 'target': all_color_data[:, 1],
                            'color': all_color_data[:, 2]})

    # Add chip numbers to the dataframe
    dataset['chip'] = chips
    dataset = dataset.astype({'color': str, 'chip': str, 'target': float, 'source': float})

    # Make the plot
    p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \
        geom_point(show_legend=False, size=2) + \
        geom_smooth(method='lm', size=.5, show_legend=False) + \
        theme_seaborn() + facet_grid('.~color') + \
        geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \
        scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \
        scale_color_manual(values=['blue', 'green', 'red'])

    # Reset debug
    if params.debug is not None:
        if params.debug == 'print':
            p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png'))
        elif params.debug == 'plot':
            print(p1)

Esempio n. 29

0

Mostra file

def density_plot(df,
                 x,
                 group=None,
                 facet_x=None,
                 facet_y=None,
                 position='overlay',
                 sort_groups=True,
                 base_size=10,
                 figure_size=(6, 3),
                 **stat_kwargs):
    '''
    Plot a 1-d density plot

    Parameters
    ----------
    df : pd.DataFrame
      input dataframe
    x : str
      quoted expression to be plotted on the x axis
    group : str
      quoted expression to be used as group (ie color)
    facet_x : str
      quoted expression to be used as facet
    facet_y : str
      quoted expression to be used as facet
    position : str
      if groups are present, choose between `stack` or `overlay`
    base_size : int
      base size for theme_ez
    figure_size :tuple of int
      figure size
    stat_kwargs : kwargs
      kwargs for the density stat

    Returns
    -------
    g : EZPlot
      EZplot object

    '''

    if position not in ['overlay', 'stack']:
        log.error("position not recognized")
        raise NotImplementedError("position not recognized")

    # create a copy of the data
    dataframe = df.copy()

    # define groups and variables; remove and store (eventual) names
    names = {}
    groups = {}
    variables = {}

    for label, var in zip(['x', 'group', 'facet_x', 'facet_y'],
                          [x, group, facet_x, facet_y]):
        names[label], groups[label] = unname(var)

    # fix special cases
    if x == '.index':
        groups['x'] = '.index'
        names[
            'x'] = dataframe.index.name if dataframe.index.name is not None else ''

    # aggregate data and reorder columns
    gdata = agg_data(dataframe, variables, groups, None, fill_groups=False)
    gdata = gdata[[
        c for c in ['x', 'group', 'facet_x', 'facet_y'] if c in gdata.columns
    ]]

    # start plotting
    g = EZPlot(gdata)

    # determine order and create a categorical type
    colors = ez_colors(g.n_groups('group'))

    # set groups
    if group is None:
        g += p9.geom_density(p9.aes(x="x"),
                             stat=p9.stats.stat_density(**stat_kwargs),
                             colour=ez_colors(1)[0],
                             fill=ez_colors(1)[0],
                             **POSITION_KWARGS[position])
    else:
        g += p9.geom_density(p9.aes(x="x",
                                    group="factor(group)",
                                    colour="factor(group)",
                                    fill="factor(group)"),
                             stat=p9.stats.stat_density(**stat_kwargs),
                             **POSITION_KWARGS[position])
        g += p9.scale_fill_manual(values=colors, reverse=False)
        g += p9.scale_color_manual(values=colors, reverse=False)

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

    # set y scale
    g += p9.scale_y_continuous(labels=ez_labels)

    # set axis labels
    g += \
        p9.xlab(names['x']) + \
        p9.ylab('Density')

    # set theme
    g += theme_ez(figure_size=figure_size,
                  base_size=base_size,
                  legend_title=p9.element_text(text=names['group'],
                                               size=base_size))

    if sort_groups:
        g += p9.guides(fill=p9.guide_legend(reverse=True))

    return g

Esempio n. 30

0

Mostra file

def plot_xbs(df, group, var, n_side=9, n_delta=6):
    r"""Construct Xbar and S chart

    Construct an Xbar and S chart to assess the state of statistical control of
    a dataset.

    Args:
        df (DataFrame): Data to analyze
        group (str): Variable for grouping
        var (str): Variable to study

    Keyword args:
        n_side (int): Number of consecutive runs above/below centerline to flag
        n_delta (int): Number of consecutive runs increasing/decreasing to flag

    Returns:
        plotnine object: Xbar and S chart

    Examples::

        import grama as gr
        DF = gr.Intention()

        from grama.data import df_shewhart
        (
            df_shewhart
            >> gr.tf_mutate(idx=DF.index // 10)
            >> gr.pt_xbs("idx", "tensile_strength")
        )

    """
    ## Prepare the data
    DF = Intention()
    df_batched = (df >> tf_group_by(group) >> tf_summarize(
        X=mean(DF[var]),
        S=sd(DF[var]),
        n=nfcn(DF.index),
    ) >> tf_ungroup())

    df_stats = (df_batched >> tf_summarize(
        X_center=mean(DF.X),
        S_biased=mean(DF.S),
        n=mean(DF.n),
    ))
    n = df_stats.n[0]
    df_stats["S_center"] = df_stats.S_biased / c_sd(n)
    df_stats["X_LCL"] = df_stats.X_center - 3 * df_stats.S_center / sqrt(n)
    df_stats["X_UCL"] = df_stats.X_center + 3 * df_stats.S_center / sqrt(n)
    df_stats["S_LCL"] = B3(n) * df_stats.S_center
    df_stats["S_UCL"] = B4(n) * df_stats.S_center

    ## Reshape for plotting
    df_stats_long = (df_stats >> tf_pivot_longer(
        columns=["X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"],
        names_to=["_var", "_stat"],
        names_sep="_",
        values_to="_value",
    ))
    # Fake group value to avoid issue with discrete group variable
    df_stats_long[group] = [df_batched[group].values[0]
                            ] * df_stats_long.shape[0]

    df_batched_long = (
        df_batched >> tf_pivot_longer(
            columns=["X", "S"],
            names_to="_var",
            values_to="_value",
        )
        ## Flag patterns
        >> tf_left_join(
            df_stats >> tf_pivot_longer(
                columns=[
                    "X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"
                ],
                names_to=["_var", ".value"],
                names_sep="_",
            ),
            by="_var",
        ) >> tf_group_by("_var") >> tf_mutate(
            outlier_below=(DF._value < DF.LCL),  # Outside control limits
            outlier_above=(DF.UCL < DF._value),
            below=consec(DF._value < DF.center, i=n_side),  # Below mean
            above=consec(DF.center < DF._value, i=n_side),  # Above mean
        ) >> tf_mutate(
            decreasing=consec((lead(DF._value) - DF._value) < 0, i=n_delta - 1)
            |  # Decreasing
            consec((DF._value - lag(DF._value)) < 0, i=n_delta - 1),
            increasing=consec(0 < (lead(DF._value) - DF._value), i=n_delta - 1)
            |  # Increasing
            consec(0 < (DF._value - lag(DF._value)), i=n_delta - 1),
        ) >> tf_mutate(
            sign=case_when([DF.outlier_below, "-2"], [DF.outlier_above, "+2"],
                           [DF.below | DF.decreasing, "-1"],
                           [DF.above | DF.increasing, "+1"], [True, "0"]),
            glyph=case_when(
                [DF.outlier_below, "Below Limit"],
                [DF.outlier_above, "Above Limit"],
                [DF.below, "Low Run"],
                [DF.above, "High Run"],
                [DF.increasing, "Increasing Run"],
                [DF.decreasing, "Decreasing Run"],
                [True, "None"],
            )) >> tf_ungroup())

    ## Visualize
    return (df_batched_long >> ggplot(aes(x=group)) + geom_hline(
        data=df_stats_long,
        mapping=aes(yintercept="_value", linetype="_stat"),
    ) + geom_line(aes(y="_value", group="_var"), size=0.2) + geom_point(
        aes(y="_value", color="sign", shape="glyph"),
        size=3,
    ) + scale_color_manual(values={
        "-2": "blue",
        "-1": "darkturquoise",
        "0": "black",
        "+1": "salmon",
        "+2": "red"
    }, ) + scale_shape_manual(
        name="Patterns",
        values={
            "Below Limit": "s",
            "Above Limit": "s",
            "Low Run": "X",
            "High Run": "X",
            "Increasing Run": "^",
            "Decreasing Run": "v",
            "None": "."
        },
    ) + scale_linetype_manual(
        name="Guideline",
        values=dict(LCL="dashed", UCL="dashed", center="solid"),
    ) + guides(color=None) + facet_grid(
        "_var~.",
        scales="free_y",
        labeller=labeller(dict(X="Mean", S="Variability")),
    ) + labs(
        x="Group variable ({})".format(group),
        y="Value ({})".format(var),
    ))

Esempio n. 31

0

Mostra file

File: explore_expression_data.py Progetto: greenelab/generic-expression-patterns

fig = pn.ggplot(normalized_all_data_UMAPencoded_df, pn.aes(x="1", y="2"))
fig += pn.geom_point(pn.aes(color="sample group"), alpha=0.4)
fig += pn.labs(x="UMAP 1",
               y="UMAP 2",
               title="Gene expression data in gene space")
fig += pn.theme_bw()
fig += pn.theme(
    legend_title_align="center",
    plot_background=pn.element_rect(fill="white"),
    legend_key=pn.element_rect(fill="white", colour="white"),
    legend_title=pn.element_text(family="sans-serif", size=15),
    legend_text=pn.element_text(family="sans-serif", size=12),
    plot_title=pn.element_text(family="sans-serif", size=15),
    axis_text=pn.element_text(family="sans-serif", size=12),
    axis_title=pn.element_text(family="sans-serif", size=15),
)
fig += pn.scale_color_manual(["#bdbdbd", "red", "blue"])
fig += pn.guides(colour=pn.guide_legend(override_aes={"alpha": 1}))

fig += pn.scales.xlim(9, 10)
print(fig)
# -

# Based on a UMAP of the normalized gene expression data, it looks like there isn't a clear separation between WT and mutant samples, though there are only 2 samples per group so this type of clustering observation is limited.
#
# **Takeaway:**
#
# In trying to understand why there are these flat-tops to some of the volcano plots and why some volcano plots are completely flat, we found:
# 1. This behavior is _not_ a result of how we are plotting in python (there was some speculation about there being an issue with the numpy library used)
# 2. The latent space shifting we're doing seems to roughly preserve differences between groups (as seen in [this notebook](https://github.com/greenelab/simulate-expression-compendia/blob/master/Pseudo_experiments/create_heatmap.ipynb) where the structure of the samples is preserved but there is a different set of related genes that are DE. More information can be found in Figure 3D in [this paper](https://academic.oup.com/gigascience/article/9/11/giaa117/5952607)), but this signal can be muddled/noisy depending on where the experiment was shifted to (i.e. the representation that is found in that location can cause the experiment to have a more compressed difference between groups) as seen in the heatmaps. The heatmap of the two simulation experiments shows that some experiments have a more noisey distinction between groups (WT vs mutant) whereas the other simulation experiment has a more distinct difference where the within grouping is cleaner. This definitely points to the need to understand how this simulation process is working and how biology is represented in the latent space. This will definitely be a project for the future. For now we at least have an explanation for why we are observing these shapes in the volcano plots

Esempio n. 32

0

Mostra file

File: buzz_example.py Progetto: NPSDC/qb

def plot_empirical_buzz():
    proto_df = pd.read_hdf(
        "data/external/datasets/protobowl/protobowl-042818.log.h5")
    dataset = read_json(QANTA_MAPPED_DATASET_PATH)
    questions = {q["qanta_id"]: q for q in dataset["questions"]}
    folds = {
        q["proto_id"]: q["fold"]
        for q in questions.values() if q["proto_id"] is not None
    }
    proto_df["fold"] = proto_df["qid"].map(lambda x: folds[x]
                                           if x in folds else None)
    proto_df["n"] = 1
    buzztest_df = proto_df[proto_df.fold == "buzztest"]
    play_counts = (
        buzztest_df.groupby("qid").count().reset_index().sort_values(
            "fold", ascending=False))
    qid_to_counts = {r.qid: r.n for r in play_counts.itertuples()}
    popular_questions = play_counts.qid.tolist()
    curve = CurveScore()
    x = np.linspace(0, 1, 100)
    y = [curve.get_weight(n) for n in x]
    curve_df = pd.DataFrame({"buzzing_position": x, "result": y})
    curve_df["qid"] = "Expected Wins Curve Score"
    curve_df["source"] = "Curve Score | Average"
    proto_ids = popular_questions[:10]
    frames = []
    for proto_id in proto_ids:
        plays = buzztest_df[buzztest_df.qid == proto_id].sort_values(
            "buzzing_position")
        plays = plays[plays.result != "prompt"]
        plays["result"] = plays["result"].astype(int)
        frames.append(plays)
    sample_df = pd.concat(frames)

    rows = []
    for qid, group_df in sample_df.groupby("qid"):
        n_opp_correct = 0
        n_opp_total = 0
        n = qid_to_counts[qid]
        rows.append({
            "buzzing_position": 0,
            "n_opp_correct": 0,
            "n_opp_total": 1,
            "qid": f"Question with {n} Plays",
            "source": "Single Question",
            "n_plays": n,
        })
        for r in group_df.itertuples():
            if r.result == 1:
                n_opp_correct += 1
            n_opp_total += 1
            rows.append({
                "buzzing_position": r.buzzing_position,
                "n_opp_correct": n_opp_correct,
                "n_opp_total": n_opp_total,
                "qid": f"Question with {n} Plays",
                "source": "Single Question",
                "n_plays": n,
            })
    n_opp_correct = 0
    n_opp_total = 0
    for r in sample_df.sort_values("buzzing_position").itertuples():
        if r.result == 1:
            n_opp_correct += 1
        n_opp_total += 1
        rows.append({
            "buzzing_position": r.buzzing_position,
            "n_opp_correct": n_opp_correct,
            "n_opp_total": n_opp_total,
            "qid": "Average of Most Played",
            "source": "Curve Score | Average",
        })

    df = pd.DataFrame(rows)
    df["p_opp_correct"] = df["n_opp_correct"] / df["n_opp_total"]
    df["p_win"] = 1 - df["p_opp_correct"]
    df["result"] = df["p_win"]

    def order(c):
        if c.startswith("Expected"):
            return -1000
        elif c.startswith("Average"):
            return -999
        elif c.startswith("Question with"):
            return -int(c.split()[2])
        else:
            return 1000

    categories = list(set(df.qid.tolist()) | set(curve_df.qid.tolist()))
    categories = sorted(categories, key=order)
    categories = pd.CategoricalDtype(categories, ordered=True)
    df["qid"] = df["qid"].astype(categories)
    cmap = plt.get_cmap("tab20")
    colors = [matplotlib.colors.to_hex(c) for c in cmap.colors]
    filter_df = df[df.n_opp_total > 4]
    chart = (p9.ggplot(
        filter_df,
        p9.aes(x="buzzing_position", y="result", color="qid"),
    ) + p9.geom_line(
        p9.aes(linetype="source"),
        data=filter_df[filter_df.source.map(lambda s: s.startswith("Curve"))],
        size=2,
    ) + p9.geom_line(
        p9.aes(linetype="source"),
        data=filter_df[filter_df.source.map(
            lambda s: not s.startswith("Curve"))],
        size=0.5,
    ) + p9.geom_line(
        p9.aes(x="buzzing_position", y="result", linetype="source"),
        data=curve_df,
        size=2,
    ) + p9.labs(
        x="Position in Question (%)",
        y="Empirical Probability of Winning",
        linetype="Data Type",
        color="Data Source",
    ) + p9.guides(size=False) + p9.scale_color_manual(values=colors) +
             theme_pedroai() + p9.theme(legend_position="right"))
    chart.save("output/empirical_buzz.pdf")

Esempio n. 33

0

Mostra file

File: analyze_color.py Progetto: danforthcenter/plantcv

def analyze_color(rgb_img, mask, hist_plot_type=None):
    """Analyze the color properties of an image object
    Inputs:
    rgb_img          = RGB image data
    mask             = Binary mask made from selected contours
    hist_plot_type   = 'None', 'all', 'rgb','lab' or 'hsv'
    
    Returns:
    analysis_image   = histogram output
    
    :param rgb_img: numpy.ndarray
    :param mask: numpy.ndarray
    :param hist_plot_type: str
    :return analysis_images: list
    """

    params.device += 1

    if len(np.shape(rgb_img)) < 3:
        fatal_error("rgb_img must be an RGB image")

    # Mask the input image
    masked = cv2.bitwise_and(rgb_img, rgb_img, mask=mask)
    # Extract the blue, green, and red channels
    b, g, r = cv2.split(masked)
    # Convert the BGR image to LAB
    lab = cv2.cvtColor(masked, cv2.COLOR_BGR2LAB)
    # Extract the lightness, green-magenta, and blue-yellow channels
    l, m, y = cv2.split(lab)
    # Convert the BGR image to HSV
    hsv = cv2.cvtColor(masked, cv2.COLOR_BGR2HSV)
    # Extract the hue, saturation, and value channels
    h, s, v = cv2.split(hsv)

    # Color channel dictionary
    channels = {"b": b, "g": g, "r": r, "l": l, "m": m, "y": y, "h": h, "s": s, "v": v}

    # Histogram plot types
    hist_types = {"ALL": ("b", "g", "r", "l", "m", "y", "h", "s", "v"),
                  "RGB": ("b", "g", "r"),
                  "LAB": ("l", "m", "y"),
                  "HSV": ("h", "s", "v")}

    if hist_plot_type is not None and hist_plot_type.upper() not in hist_types:
        fatal_error("The histogram plot type was " + str(hist_plot_type) +
                    ', but can only be one of the following: None, "all", "rgb", "lab", or "hsv"!')
    # Store histograms, plotting colors, and plotting labels
    histograms = {
        "b": {"label": "blue", "graph_color": "blue",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["b"]], [0], mask, [256], [0, 255])]},
        "g": {"label": "green", "graph_color": "forestgreen",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["g"]], [0], mask, [256], [0, 255])]},
        "r": {"label": "red", "graph_color": "red",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["r"]], [0], mask, [256], [0, 255])]},
        "l": {"label": "lightness", "graph_color": "dimgray",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["l"]], [0], mask, [256], [0, 255])]},
        "m": {"label": "green-magenta", "graph_color": "magenta",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["m"]], [0], mask, [256], [0, 255])]},
        "y": {"label": "blue-yellow", "graph_color": "yellow",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["y"]], [0], mask, [256], [0, 255])]},
        "h": {"label": "hue", "graph_color": "blueviolet",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["h"]], [0], mask, [256], [0, 255])]},
        "s": {"label": "saturation", "graph_color": "cyan",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["s"]], [0], mask, [256], [0, 255])]},
        "v": {"label": "value", "graph_color": "orange",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["v"]], [0], mask, [256], [0, 255])]}
    }

    # Create list of bin labels for 8-bit data
    binval = np.arange(0, 256)
    bin_values = [l for l in binval]

    analysis_images = []
    # Create a dataframe of bin labels and histogram data
    dataset = pd.DataFrame({'bins': binval, 'blue': histograms["b"]["hist"],
                            'green': histograms["g"]["hist"], 'red': histograms["r"]["hist"],
                            'lightness': histograms["l"]["hist"], 'green-magenta': histograms["m"]["hist"],
                            'blue-yellow': histograms["y"]["hist"], 'hue': histograms["h"]["hist"],
                            'saturation': histograms["s"]["hist"], 'value': histograms["v"]["hist"]})

    # Make the histogram figure using plotnine
    if hist_plot_type is not None:
        if hist_plot_type.upper() == 'RGB':
            df_rgb = pd.melt(dataset, id_vars=['bins'], value_vars=['blue', 'green', 'red'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_rgb, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(['blue', 'green', 'red'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'LAB':
            df_lab = pd.melt(dataset, id_vars=['bins'],
                             value_vars=['lightness', 'green-magenta', 'blue-yellow'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_lab, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(['yellow', 'magenta', 'dimgray'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'HSV':
            df_hsv = pd.melt(dataset, id_vars=['bins'],
                             value_vars=['hue', 'saturation', 'value'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_hsv, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(['blueviolet', 'cyan', 'orange'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'ALL':
            s = pd.Series(['blue', 'green', 'red', 'lightness', 'green-magenta',
                           'blue-yellow', 'hue', 'saturation', 'value'], dtype="category")
            color_channels = ['blue', 'yellow', 'green', 'magenta', 'blueviolet',
                              'dimgray', 'red', 'cyan', 'orange']
            df_all = pd.melt(dataset, id_vars=['bins'], value_vars=s, var_name='Color Channel',
                             value_name='Pixels')
            hist_fig = (ggplot(df_all, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(color_channels)
                        )
            analysis_images.append(hist_fig)

    # Hue values of zero are red but are also the value for pixels where hue is undefined
    # The hue value of a pixel will be undefined when the color values are saturated
    # Therefore, hue values of zero are excluded from the calculations below

    # Calculate the median hue value
    # The median is rescaled from the encoded 0-179 range to the 0-359 degree range
    hue_median = np.median(h[np.where(h > 0)]) * 2

    # Calculate the circular mean and standard deviation of the encoded hue values
    # The mean and standard-deviation are rescaled from the encoded 0-179 range to the 0-359 degree range
    hue_circular_mean = stats.circmean(h[np.where(h > 0)], high=179, low=0) * 2
    hue_circular_std = stats.circstd(h[np.where(h > 0)], high=179, low=0) * 2

    # Store into lists instead for pipeline and print_results
    # stats_dict = {'mean': circular_mean, 'std' : circular_std, 'median': median}

    # Plot or print the histogram
    if hist_plot_type is not None:
        if params.debug == 'print':
            hist_fig.save(os.path.join(params.debug_outdir, str(params.device) + '_analyze_color_hist.png'))
        elif params.debug == 'plot':
            print(hist_fig)

    # Store into global measurements
    # RGB signal values are in an unsigned 8-bit scale of 0-255
    rgb_values = [i for i in range(0, 256)]
    # Hue values are in a 0-359 degree scale, every 2 degrees at the midpoint of the interval
    hue_values = [i * 2 + 1 for i in range(0, 180)]
    # Percentage values on a 0-100 scale (lightness, saturation, and value)
    percent_values = [round((i / 255) * 100, 2) for i in range(0, 256)]
    # Diverging values on a -128 to 127 scale (green-magenta and blue-yellow)
    diverging_values = [i for i in range(-128, 128)]
    # outputs.measurements['color_data'] = {
    #     'histograms': {
    #         'blue': {'signal_values': rgb_values, 'frequency': histograms["b"]["hist"]},
    #         'green': {'signal_values': rgb_values, 'frequency': histograms["g"]["hist"]},
    #         'red': {'signal_values': rgb_values, 'frequency': histograms["r"]["hist"]},
    #         'lightness': {'signal_values': percent_values, 'frequency': histograms["l"]["hist"]},
    #         'green-magenta': {'signal_values': diverging_values, 'frequency': histograms["m"]["hist"]},
    #         'blue-yellow': {'signal_values': diverging_values, 'frequency': histograms["y"]["hist"]},
    #         'hue': {'signal_values': hue_values, 'frequency': histograms["h"]["hist"]},
    #         'saturation': {'signal_values': percent_values, 'frequency': histograms["s"]["hist"]},
    #         'value': {'signal_values': percent_values, 'frequency': histograms["v"]["hist"]}
    #     },
    #     'color_features': {
    #         'hue_circular_mean': hue_circular_mean,
    #         'hue_circular_std': hue_circular_std,
    #         'hue_median': hue_median
    #     }
    # }
    outputs.add_observation(variable='blue_frequencies', trait='blue frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["b"]["hist"], label=rgb_values)
    outputs.add_observation(variable='green_frequencies', trait='green frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["g"]["hist"], label=rgb_values)
    outputs.add_observation(variable='red_frequencies', trait='red frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["r"]["hist"], label=rgb_values)
    outputs.add_observation(variable='lightness_frequencies', trait='lightness frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["l"]["hist"], label=percent_values)
    outputs.add_observation(variable='green-magenta_frequencies', trait='green-magenta frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["m"]["hist"], label=diverging_values)
    outputs.add_observation(variable='blue-yellow_frequencies', trait='blue-yellow frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["y"]["hist"], label=diverging_values)
    outputs.add_observation(variable='hue_frequencies', trait='hue frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["h"]["hist"], label=hue_values)
    outputs.add_observation(variable='saturation_frequencies', trait='saturation frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["s"]["hist"], label=percent_values)
    outputs.add_observation(variable='value_frequencies', trait='value frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["v"]["hist"], label=percent_values)
    outputs.add_observation(variable='hue_circular_mean', trait='hue circular mean',
                            method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float,
                            value=hue_circular_mean, label='degrees')
    outputs.add_observation(variable='hue_circular_std', trait='hue circular standard deviation',
                            method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float,
                            value=hue_median, label='degrees')
    outputs.add_observation(variable='hue_median', trait='hue median',
                            method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float,
                            value=hue_median, label='degrees')

    # Store images
    outputs.images.append(analysis_images)

    return analysis_images

Esempio n. 34

0

Mostra file

File: viz_recount2_compendium.py Progetto: greenelab/generic-expression-patterns

# Plot
fig = ggplot(input_data_UMAPencoded_df, aes(x='1', y='2'))
fig += geom_point(aes(color='dataset'), alpha=0.2)
fig += labs(x ='UMAP 1',
            y = 'UMAP 2',
            title = 'UMAP of normalized compendium')
fig += theme_bw()
fig += theme(
    legend_title_align = "center",
    plot_background=element_rect(fill='white'),
    legend_key=element_rect(fill='white', colour='white'), 
    legend_title=element_text(family='sans-serif', size=15),
    legend_text=element_text(family='sans-serif', size=12),
    plot_title=element_text(family='sans-serif', size=15),
    axis_text=element_text(family='sans-serif', size=12),
    axis_title=element_text(family='sans-serif', size=15)
    )
fig += guides(colour=guide_legend(override_aes={'alpha': 1}))
fig += scale_color_manual(['#ff6666', '#add8e6'])

print(fig)


# **Observations:**
# * There looks to be a good amount of variance in the compendium overall.
# * Using a split of 25% seems to get a similar distribution of data between training and validation sets.
# * Remember, the dataset is in 17K dimensional space, which will make the small clusters difficult to represent during training
# 
# Overall, having so many features in our dataset, points to the need for more samples to represent the structure in the compendium. For now, we are limited by memory to only select a subset of recount2, but in a future iteration perhaps this will be updated.

Esempio n. 35

0

Mostra file

def scatter_plot(df,
                 x,
                 y,
                 group=None,
                 facet_x=None,
                 facet_y=None,
                 base_size=10,
                 figure_size=(6, 3),
                 **kwargs):
    '''
    Aggregates data in df and plots as a scatter plot chart.

    Parameters
    ----------
    df : pd.DataFrame
      input dataframe
    x : str
      quoted expression to be plotted on the x axis
    y : str
      quoted expression to be plotted on the y axis
    group : str
      quoted expression to be used as group (ie color)
    facet_x : str
      quoted expression to be used as facet
    facet_y : str
      quoted expression to be used as facet
    base_size : int
      base size for theme_ez
    figure_size :tuple of int
      figure size
    **kwargs:
      additional kwargs passed to geom_point

    Returns
    -------
    g : EZPlot
      EZplot object

    '''

    # create a copy of the data
    dataframe = df.copy()

    # define groups and variables; remove and store (eventual) names
    names = {}
    groups = {}
    variables = {}

    for label, var in zip(['x', 'group', 'facet_x', 'facet_y'],
                          [x, group, facet_x, facet_y]):
        names[label], groups[label] = unname(var)
    names['y'], variables['y'] = unname(y)

    # fix special cases
    if x == '.index':
        groups['x'] = '.index'
        names[
            'x'] = dataframe.index.name if dataframe.index.name is not None else ''

    # aggregate data and reorder columns
    gdata = agg_data(dataframe, variables, groups, None, fill_groups=True)
    gdata = gdata[[
        c for c in ['x', 'y', 'group', 'facet_x', 'facet_y']
        if c in gdata.columns
    ]]

    # add group_x column
    if group is not None:
        gdata['group_x'] = gdata['group'].astype(
            'str') + '_' + gdata['x'].astype(str)

    g = EZPlot(gdata)

    # set groups
    if group is None:
        g += p9.geom_point(p9.aes(x="x", y="y"),
                           colour=ez_colors(1)[0],
                           **kwargs)
    else:
        g += p9.geom_point(
            p9.aes(x="x", y="y", group="factor(group)", color="factor(group)"),
            **kwargs)
        g += p9.scale_color_manual(values=ez_colors(g.n_groups('group')))

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_timestamp('x'):
        g += p9.scale_x_datetime()
    elif g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

    # set y scale
    if g.column_is_timestamp('y'):
        g += p9.scale_y_datetime()
    elif g.column_is_categorical('y'):
        g += p9.scale_y_discrete()
    else:
        g += p9.scale_y_continuous(labels=ez_labels)

    # set axis labels
    g += \
        p9.xlab(names['x']) + \
        p9.ylab(names['y'])

    # set theme
    g += theme_ez(figure_size=figure_size,
                  base_size=base_size,
                  legend_title=p9.element_text(text=names['group'],
                                               size=base_size))

    return g

Esempio n. 36

0

Mostra file

File: color_correction.py Progetto: sarahmathew/plantcv

def quick_color_check(target_matrix, source_matrix, num_chips):
    """ Quickly plot target matrix values against source matrix values to determine
    over saturated color chips or other issues.

    Inputs:
    source_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the source image
    target_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the target image
    num_chips          = number of color card chips included in the matrices (integer)

    :param source_matrix: numpy.ndarray
    :param target_matrix: numpy.ndarray
    :param num_chips: int
    """
    # Imports
    from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \
        scale_y_continuous, scale_color_manual, aes
    import pandas as pd

    # Extract and organize matrix info
    tr = target_matrix[:num_chips, 1:2]
    tg = target_matrix[:num_chips, 2:3]
    tb = target_matrix[:num_chips, 3:4]
    sr = source_matrix[:num_chips, 1:2]
    sg = source_matrix[:num_chips, 2:3]
    sb = source_matrix[:num_chips, 3:4]

    # Create columns of color labels
    red = []
    blue = []
    green = []
    for i in range(num_chips):
        red.append('red')
        blue.append('blue')
        green.append('green')

    # Make a column of chip numbers
    chip = np.arange(0, num_chips).reshape((num_chips, 1))
    chips = np.row_stack((chip, chip, chip))

    # Combine info
    color_data_r = np.column_stack((sr, tr, red))
    color_data_g = np.column_stack((sg, tg, green))
    color_data_b = np.column_stack((sb, tb, blue))
    all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r))

    # Create a dataframe with headers
    dataset = pd.DataFrame({
        'source': all_color_data[:, 0],
        'target': all_color_data[:, 1],
        'color': all_color_data[:, 2]
    })

    # Add chip numbers to the dataframe
    dataset['chip'] = chips
    dataset = dataset.astype({
        'color': str,
        'chip': str,
        'target': float,
        'source': float
    })

    # Make the plot
    p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \
        geom_point(show_legend=False, size=2) + \
        geom_smooth(method='lm', size=.5, show_legend=False) + \
        theme_seaborn() + facet_grid('.~color') + \
        geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \
        scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \
        scale_color_manual(values=['blue', 'green', 'red'])

    # Autoincrement the device counter
    params.device += 1

    # Reset debug
    if params.debug is not None:
        if params.debug == 'print':
            p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png'))
        elif params.debug == 'plot':
            print(p1)

Esempio n. 37

0

Mostra file

File: biorxiv_missing_method_section_experiment.py Progetto: rando2/annorxiver

g = (p9.ggplot(biorxiv_pca_method_section_df) +
     p9.aes(x="pca1", y="pca2", color="category") + p9.geom_point() +
     p9.theme_bw() + p9.labs(title="TSNE Methods Section (300 dim)"))
print(g)

# ## Neuroscience Methods Section

# In[6]:

g = (p9.ggplot(biorxiv_pca_method_section_df.query("category=='neuroscience'"))
     + p9.aes(x="pca1", y="pca2", color="section") +
     p9.geom_point(position=p9.position_dodge(width=0.2)) +
     p9.facet_wrap("section") + p9.theme_bw() +
     p9.theme(subplots_adjust={'wspace': 0.10}) +
     p9.scale_color_manual({
         "has_methods": "#d8b365",
         "no_methods": "#5ab4ac"
     }) + p9.labs(title="Neuroscience Methods Section"))
g.save("output/pca/neuroscience_missing_methods.png", dpi=500)
print(g)

# In[7]:

g = (p9.ggplot(biorxiv_pca_method_section_df.query("category=='neuroscience'"))
     + p9.aes(x="pca1", y="pca2", color="section") +
     p9.geom_point(position=p9.position_dodge(width=0.2)) + p9.theme_bw() +
     p9.scale_color_manual({
         "has_methods": "#d8b365",
         "no_methods": "#5ab4ac"
     }) + p9.labs(title="Neuroscience Methods Section"))
g.save("output/pca/neuroscience_missing_methods_overlapped.png", dpi=500)
print(g)