Exemple #1
0
def cell_cycle_phase_barplot(adata, palette='Set2'):
    """Plots the proportion of cells in each phase of the cell cycle

    See also: cell_cycle_phase_pieplot for the matplotlib pie chart


    Parameters
    -----------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.annotate_cell_cycle`.

    Returns
    -----------
    A plotnine barplot with the total counts of cell in each phase of the
    cell cycle.

    """
    plt_data = adata.obs.copy()
    plt_data['cell_cycle_phase'] = pd.Categorical(
        plt_data['cell_cycle_phase'],
        categories=['G1 post-mitotic', 'G1 pre-replication', 'S/G2/M'])

    cycle_plot = (
        ggplot(plt_data, aes('cell_cycle_phase', fill='cell_cycle_phase')) +
        geom_bar() + coord_flip() + guides(fill=False) +
        labs(y='', x='Cell cycle phase') + theme_light() +
        theme(panel_grid_major_y=element_blank(),
              panel_grid_minor_y=element_blank(),
              panel_grid_major_x=element_line(size=1.5),
              panel_grid_minor_x=element_line(size=1.5)) +
        scale_fill_brewer(type='qual', palette=palette))

    return cycle_plot
Exemple #2
0
def plot_revigo(
    rev,
    outline=2,
    expand_points=(1.05, 1.2),
    figure_size=(8, 8),
    font_size=8,
    point_size=3,
    point_alpha=0.7,
    palette='RdPu',
    dispensability_cutoff=1.,
    show_all_labels=False,
    text_column='name',
    term_size_limit=None,
):

    import plotnine as p9
    import matplotlib.patheffects as path_effects

    pe = [
        path_effects.Stroke(linewidth=2, foreground='white'),
        path_effects.Normal()
    ]
    if not show_all_labels:
        lbl_df = rev[(rev.eliminated == 0)
                     & (rev.dispensability < dispensability_cutoff)]
        if term_size_limit is not None:
            lbl_df = lbl_df[lbl_df.term_size < term_size_limit]
    else:
        lbl_df = rev

    g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) +
         p9.geom_point(p9.aes(fill='neglog10', size='frequency'),
                       color='black',
                       alpha=point_alpha) +
         p9.geom_text(p9.aes(label=text_column),
                      data=lbl_df,
                      size=font_size,
                      adjust_text={
                          'expand_points': expand_points,
                          'arrowprops': {
                              'arrowstyle': '-'
                          },
                          'x': rev.plot_X.values,
                          'y': rev.plot_Y.values
                      },
                      path_effects=pe) + p9.theme_bw() +
         p9.scale_fill_distiller(type='seq', palette=palette, direction=1) +
         p9.labs(x='Semantic similarity space',
                 y='',
                 fill='-log10(adj. p-value)',
                 size='Term frequency') +
         p9.scale_size_continuous(range=(2, 7), trans='log10') +
         p9.theme(figure_size=figure_size,
                  axis_text_x=p9.element_blank(),
                  axis_text_y=p9.element_blank(),
                  axis_ticks=p9.element_blank()))

    return g
Exemple #3
0
def plot_violin_plots(
    par_id: str,
    dims: List[str],
    draws: Dict,
    log_scale_variables: List[str],
    units: Dict[str, str],
    confidence_intervals,
    measurements,
):
    """Plot and save violin plots of parsed distributions.

    :param par_id: Name of the parameter plotted
    :param dims: Dimensions of the parameter
    :param draws: pd.Dataframe of parameter distribution
    indexed by dimensions and contains the population samples
    :param log_scale_variables: Parameters that are log-distributed
    :param units: Dictionary of units for each parameter
    """
    par_units = units[par_id]
    x = fill = dims[0] if len(dims) <= 1 else "experiments"
    plot = (p9.ggplot(data=draws) + p9.geom_violin(
        p9.aes(y=f"{par_id}", x=x, fill=fill),
        position="identity",
        color="None",
        size=0.5,
        alpha=0.7,
        weight=0.7,
        linetype="None",
    ) + p9.labels.ylab(f"{par_id} {par_units}"))
    if par_id in confidence_intervals.keys():
        plot += p9.geoms.geom_errorbar(
            p9.aes(x=x, ymin="lower_ci", ymax="upper_ci"),
            data=confidence_intervals[par_id],
            width=0.1,
        )
    if par_id in measurements.keys():
        if len(measurements[par_id]) > 0:
            plot += p9.geoms.geom_point(
                p9.aes(y="measurement", x=x),
                data=measurements[par_id],
            )
    if len(dims) == 1:
        plot += p9.themes.theme(axis_text_x=p9.element_text(angle=70), )
    if len(dims) > 1:
        plot += p9.facet_wrap(f"~{dims[1]}") + p9.themes.theme(
            panel_spacing_y=0.05,
            panel_spacing_x=0.35,
            axis_title=p9.element_text(size=10),
            axis_text=p9.element_text(size=11),
            axis_text_y=p9.element_text(size=8, angle=45),
            axis_title_x=p9.element_blank(),
            axis_text_x=p9.element_blank(),
        )
    if par_id in log_scale_variables:
        plot += p9.scale_y_log10()

    return plot
def plot_metrics_comparison_lineplot_grid(dataframe,
                                          models_labels,
                                          metrics_labels,
                                          figure_size=(14, 4)):
    """
    We define a function to plot the grid.
    """

    return (
        # Define the plot.
        p9.ggplot(
            dataframe,
            p9.aes(x='threshold',
                   y='value',
                   group='variable',
                   color='variable',
                   shape='variable'))
        # Add the points and lines.
        + p9.geom_point() + p9.geom_line()
        # Rename the x axis and give some space to left and right.
        + p9.scale_x_discrete(name='Threshold', expand=(0, 0.2))
        # Rename the y axis, give some space on top and bottom, and print the tick labels with 2 decimal digits.
        +
        p9.scale_y_continuous(name='Value',
                              expand=(0, 0.05),
                              labels=lambda l: ['{:.2f}'.format(x) for x in l])
        # Replace the names in the legend.
        + p9.scale_shape_discrete(
            name='Metric', labels=lambda l: [metrics_labels[x] for x in l])
        # Define the colors for the metrics for color-blind people.
        +
        p9.scale_color_brewer(name='Metric',
                              labels=lambda l: [metrics_labels[x] for x in l],
                              type='qual',
                              palette='Set2')
        # Place the plots in a grid, renaming the labels for rows and columns.
        + p9.facet_grid('iterations ~ model',
                        labeller=p9.labeller(
                            rows=lambda x: f'iters = {x}',
                            cols=lambda x: f'{models_labels[x]}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the y axis name.
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
def theme_energinet() -> p9.themes.theme:
    """Create a simple Energinet theme."""
    return p9.theme(
        text=p9.element_text(family=endktheme.style.font_family()),
        axis_line=p9.element_line(color="black"),
        plot_background=p9.element_blank(),
        panel_background=p9.element_rect(fill="white"),
        legend_background=p9.element_rect(fill="white"),
        legend_key=p9.element_blank(),
        panel_grid=p9.element_blank(),
        axis_ticks=p9.element_blank(),
    )
Exemple #6
0
    def _plot_theme(grid_axis='both', grid_lines='both', theme='bw'):
        """Internal function provides consistent theme across plots.
        Currently a slightly modified version of theme_bw() with configurable grid lines.

        Args:
            grid_axis: controls the axis on which to draw grid lines
                - Accepts: None, 'x', 'y', 'both'
                - Default: 'both'
            grid_lines: controls whether major or minor grid lines are drawn
                - Accepts: None, 'major', 'minor', 'both'
                - Default: 'both'
            theme:
                - Accepts: 'bw', 'classic', 'gray', 'grey', 'seaborn', '538', 'dark', 'matplotlib', 'minimal', 'xkcd', 'light'
                - Default: 'bw'
        Returns:
            A theme object to be added to a plotnine.ggplot() object.
        """

        import plotnine as gg

        assert (grid_axis in [None, 'x', 'y', 'both'])
        assert (grid_lines in [None, 'major', 'minor', 'both'])
        assert (theme in [
            'bw', 'classic', 'gray', 'grey', 'seaborn', '538', 'dark',
            'matplotlib', 'minimal', 'xkcd', 'light'
        ])

        drop_grid = set()

        if grid_axis is None or grid_lines is None:
            drop_grid.update(['panel_grid_major', 'panel_grid_minor'])
        elif grid_axis == 'x':
            drop_grid.update(['panel_grid_major_y', 'panel_grid_minor_y'])
            if grid_lines == 'major':
                drop_grid.add('panel_grid_minor_y')
            elif grid_lines == 'minor':
                drop_grid.add('panel_grid_major_y')
        elif grid_axis == 'y':
            drop_grid.update(['panel_grid_major_x', 'panel_grid_minor_x'])
            if grid_lines == 'major':
                drop_grid.add('panel_grid_minor_x')
            elif grid_lines == 'minor':
                drop_grid.add('panel_grid_major_x')

        grid_opt = dict()
        for x in drop_grid:
            grid_opt[x] = gg.element_blank()

        return getattr(gg, 'theme_'+theme)() + \
                gg.theme(panel_border = gg.element_blank(),
                          axis_line = gg.element_line(color = "black"),
                          **grid_opt)
Exemple #7
0
def create_confidence_plot(conf_df):
    plt = (ggplot(conf_df) + aes(x='x', color='Method', fill='Method') +
           geom_density(alpha=.45) + facet_wrap('Task', nrow=4) +
           xlab('Confidence') + scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               axis_text_y=element_blank(),
               axis_ticks_major_y=element_blank(),
               axis_title_y=element_blank(),
               legend_title=element_blank(),
               legend_position='top',
               legend_box='horizontal',
           ))
    return plt
Exemple #8
0
def setup_heatmap0(df: pd.DataFrame, format_string, axis_text):
    # https://stackoverflow.com/a/62161556/819272
    # Plotnine does not support changing the position of any axis.
    return (p9.ggplot(df, p9.aes(y='row', x='col')) + p9.coord_equal() +
            p9.geom_tile(p9.aes(fill='scale')) + p9.geom_text(
                p9.aes(label='value'), format_string=format_string, size=7) +
            p9.scale_y_discrete(drop=False) + p9.scale_x_discrete(drop=False) +
            p9.scale_fill_gradientn(colors=['#63BE7B', '#FFEB84', '#F8696B'],
                                    na_value='#CCCCCC',
                                    guide=False) +
            p9.theme(axis_text=p9.element_blank()
                     if not axis_text else p9.element_text(face='bold'),
                     axis_ticks=p9.element_blank(),
                     axis_title=p9.element_blank(),
                     panel_grid=p9.element_blank()))
def plot_preprocessing_boxplot_bymodel(dataframe,
                                       models_labels,
                                       metrics_labels,
                                       groups_labels,
                                       figure_size=(14, 4)):
    """
    We define a function to plot the grid.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='variable', y='value', fill='group'))
        # Add the boxplots.
        + p9.geom_boxplot(position='dodge')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Metric',
                              labels=lambda l: [metrics_labels[x] for x in l])
        # Rename the y axis.
        + p9.scale_y_continuous(
            name='Value',
            expand=(0, 0.05),
            # breaks=[-0.25, 0, 0.25, 0.5, 0.75, 1], limits=[-0.25, 1],
            labels=lambda l: ['{:.2f}'.format(x) for x in l])
        # Define the colors for the metrics for color-blind people.
        + p9.scale_fill_brewer(name='Group',
                               labels=lambda l: [groups_labels[x] for x in l],
                               type='qual',
                               palette='Set2')
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid(
            'model ~ .',
            scales='free_y',
            labeller=p9.labeller(rows=lambda x: f'{models_labels[x]}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the x and y axis names.
            axis_title_x=p9.element_blank(),
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
Exemple #10
0
 def getErrorPlot(self, msg="Error Occured"):
     df = DataFrame({"x": [10], "y": [2], "label": [msg]})
     p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="white") \
         + THEME.cat_colors_lines \
           + THEME.mt \
           + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank())
     return p
Exemple #11
0
def plot_downstream(clwe, table, output, ylim):
    df = pd.read_csv(data_file(table))
    df = df[df.clwe == clwe]
    df = df.assign(
        refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']),
        language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG'])
    )
    g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine'))
    g += p9.geom_bar(position='dodge', stat='identity', width=.8)
    g += p9.coord_cartesian(ylim=ylim)
    g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E'])
    g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial')
    g += p9.theme(
        plot_background=p9.element_rect(fill='white'),
        panel_grid_major_y=p9.element_line(),
        axis_text_x=p9.element_text(margin={'t': 10}),
        axis_text_y=p9.element_text(margin={'r': 8}),
        legend_position=(.7, .9),
        legend_direction='horizontal',
        legend_title=p9.element_blank(),
        legend_text=p9.element_text(size=FONT_SIZE),
        legend_box_margin=0,
        figure_size=(12, 3)
    )
    g.save(filename=output_file(output))
Exemple #12
0
def plot_pointplot(plot_df, y_axis_label="", use_log10=False, limits=[0, 3.2]):
    """
    Plots the pointplot
    Arguments:
        plot_df - the dataframe that contains the odds ratio and lemmas
        y_axis_label - the label for the y axis
        use_log10 - use log10 for the y axis?
    """
    graph = (
        p9.ggplot(plot_df, p9.aes(x="lemma", y="odds_ratio")) +
        p9.geom_pointrange(p9.aes(ymin="lower_odds", ymax="upper_odds"),
                           position=p9.position_dodge(width=1),
                           size=0.3,
                           color="#253494") +
        p9.scale_x_discrete(limits=(plot_df.sort_values(
            "odds_ratio", ascending=True).lemma.tolist())) +
        (p9.scale_y_log10() if use_log10 else p9.scale_y_continuous(
            limits=limits)) +
        p9.geom_hline(p9.aes(yintercept=1), linetype='--', color='grey') +
        p9.coord_flip() + p9.theme_seaborn(
            context='paper', style="ticks", font_scale=1, font='Arial') +
        p9.theme(
            # 640 x 480
            figure_size=(6.66, 5),
            panel_grid_minor=p9.element_blank(),
            axis_title=p9.element_text(size=12),
            axis_text_x=p9.element_text(size=10)) +
        p9.labs(x=None, y=y_axis_label))
    return graph
Exemple #13
0
def theme_cognoma(fontsize_mult=1):
    return (gg.theme_bw(base_size=14 * fontsize_mult) + gg.theme(
        line=gg.element_line(color="#4d4d4d"),
        rect=gg.element_rect(fill="white", color=None),
        text=gg.element_text(color="black"),
        axis_ticks=gg.element_line(color="#4d4d4d"),
        legend_key=gg.element_rect(color=None),
        panel_border=gg.element_rect(color="#4d4d4d"),
        panel_grid=gg.element_line(color="#b3b3b3"),
        panel_grid_major_x=gg.element_blank(),
        panel_grid_minor=gg.element_blank(),
        strip_background=gg.element_rect(fill="#FEF2E2", color="#4d4d4d"),
        axis_text=gg.element_text(size=12 * fontsize_mult, color="#4d4d4d"),
        axis_title_x=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d"),
        axis_title_y=gg.element_text(size=13 * fontsize_mult,
                                     color="#4d4d4d")))
Exemple #14
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') +
           geom_histogram(binwidth=2, position='identity', alpha=.6) +
           geom_text(aes(x='x', y=.22, label='x', color='Method'),
                     mean_len_df,
                     inherit_aes=False,
                     format_string='{:.1f}',
                     show_legend=False) +
           geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
                        mean_len_df,
                        inherit_aes=False,
                        color='black') + scale_linetype_manual(['dashed']) +
           facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) +
           xlab('Example Length') + ylab('Frequency') +
           scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               aspect_ratio=1,
               legend_title=element_blank(),
               legend_position=legend_position,
               legend_box=legend_box,
           ))

    return plt
Exemple #15
0
def plot_breakdown(cip_df: pd.DataFrame):
    """Stacked bar plot of increasing and decreasing stocks per sector in the specified df"""
    cols_to_drop = [colname for colname in cip_df.columns if colname.startswith('bin_')]
    df = cip_df.drop(columns=cols_to_drop)
    df = pd.DataFrame(df.sum(axis='columns'), columns=['sum'])
    df = df.merge(stocks_by_sector(), left_index=True, right_on='asx_code')

    if len(df) == 0: # no stock in cip_df have a sector? ie. ETF?
        return None

    assert set(df.columns) == set(['sum', 'asx_code', 'sector_name'])
    df['increasing'] = df.apply(lambda row: 'up' if row['sum'] >= 0.0 else 'down', axis=1)
    sector_names = df['sector_name'].value_counts().index.tolist() # sort bars by value count (ascending)
    sector_names_cat = pd.Categorical(df['sector_name'], categories=sector_names)
    df = df.assign(sector_name_cat=sector_names_cat)

    #print(df)
    plot = (
        p9.ggplot(df, p9.aes(x='factor(sector_name_cat)', fill='factor(increasing)'))
        + p9.geom_bar()
        + p9.labs(x="Sector", y="Number of stocks")
        + p9.theme(axis_text_y=p9.element_text(size=7), 
                   subplots_adjust={"left": 0.2, 'right': 0.85},
                   legend_title=p9.element_blank()
                  )
        + p9.coord_flip()
    )
    return plot_as_inline_html_data(plot)
class THEME():
    bgcolor = "#293241"
    LOADER_COLOR = "#2a9d8f"
    LOADER_TYPE = "dot"

    colors_light = [
        "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f",
        "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e"
    ]
    colors_dark = [
        "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a"
    ]
    # mt = theme(panel_background=element_rect(fill=bgcolor)
    #            ,plot_background=element_rect(fill=bgcolor)
    #            , axis_text_x = element_text(color="black")
    #            , axis_text_y = element_text(color="black")
    #            , strip_margin_y=0.05
    #            , strip_margin_x=0.5)

    mt = theme_bw() + theme(panel_border=element_blank())

    cat_colors = scale_fill_manual(values=colors_light)
    cat_colors_lines = scale_color_manual(values=colors_light)
    gradient_colors = scale_fill_gradient("#ce4257", "#aad576")
    FILL = 1
    COLOR = 2

    LONG_FIGURE = (10, 20)
Exemple #17
0
def gen_fig():
    X_title = 'Edit Iteration'
    Y_title = 'Position in Question'
    C_title = 'Type'

    stuff = pickle.load(open(data_dir, 'rb'))
    all_questions, all_buzzes = stuff[0], stuff[1]

    for k, (questions, buzzes) in enumerate(zip(all_questions, all_buzzes)):
        if len(buzzes) < 5:
            continue
        if all(x == 'NULL' for x in buzzes):
            continue
        print(k)
        length_buzzing_positions = {X_title: [], Y_title: [], C_title: []}
        for i, (q, b) in enumerate(zip(questions, buzzes)):
            length = len(q.split())
            length_buzzing_positions[X_title].append(i)
            length_buzzing_positions[Y_title].append(length)
            length_buzzing_positions[C_title].append('Question Length')
            if b == 'NULL':
                b = length
            length_buzzing_positions[X_title].append(i)
            length_buzzing_positions[Y_title].append(b)
            length_buzzing_positions[C_title].append('Buzzing Position')

        df = pd.DataFrame(length_buzzing_positions)
        p = (ggplot(df) +
             geom_path(aes(x=X_title, y=Y_title, color=C_title), size=2) +
             theme(
                 legend_title=element_blank(),
                 legend_position='top',
             ))
        p.save(os.path.join(fig_dir, '{}.pdf'.format(k)))
Exemple #18
0
    def __init__(self, *args, **kwargs):
        """See main class docstring."""
        p9.theme_matplotlib.__init__(self, *args, **kwargs)

        gray = '#D9D9D9'  # gray used in themes.theme_matplotlib

        self.add_theme(
            p9.theme(
                panel_border=p9.element_rect(color=gray, size=0.7),
                axis_line=p9.element_blank(),
                axis_ticks_length=0,
                axis_ticks=p9.element_blank(),
                panel_grid_major=p9.element_line(color=gray, size=0.7),
                panel_grid_minor=p9.element_blank(),
                panel_ontop=True,  # plot panel on top of grid
            ),
            inplace=True)
Exemple #19
0
    def __init__(self, args, display_title='Analysis'):
        super().__init__(args, display_title)

        fusion_pos_file = Mkref_fusion.parse_genomeDir(
            args.fusion_genomeDir)['fusion_pos']
        self.pos_dict = Count_fusion.read_pos_file(fusion_pos_file)

        self.p9_theme = {
            'axis_line_x': p9.element_line(size=2, colour="black"),
            'axis_line_y': p9.element_line(size=2, colour="black"),
            'panel_grid_major': p9.element_blank(),
            'panel_grid_minor': p9.element_blank(),
            'panel_border': p9.element_blank(),
            'panel_background': p9.element_blank(),
            'axis_text_x': p9.element_text(colour="black"),
            'axis_text_y': p9.element_text(colour="black"),
        }
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)):
    """
    We create a function to plot the bar plot.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='threshold', fill='value'))
        # Add the bars.
        + p9.geom_bar(position='dodge') +
        p9.geom_text(p9.aes(label='stat(count)'),
                     stat='count',
                     position=p9.position_dodge(0.9),
                     size=7,
                     va='bottom')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Threshold')
        # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top).
        + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500))
        # Replace the names in the legend and set the colors of the bars.
        + p9.scale_fill_manual(values={
            0: '#009e73',
            1: '#d55e00'
        },
                               labels=lambda l: [{
                                   0: 'Stable',
                                   1: 'Unstable'
                               }[x] for x in l])
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid('. ~ iterations',
                        labeller=p9.labeller(cols=lambda x: f'iters = {x}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the y axis name.
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
Exemple #21
0
def plot_restaurants_per_neighborhood(filepath, restaurant_data_file,
                                      pittsburgh_shapefile):
    mexican_restaurants = pd.read_csv(filepath + restaurant_data_file)

    gdf = gpd.GeoDataFrame(
        mexican_restaurants,
        geometry=gpd.points_from_xy(mexican_restaurants.longitude,
                                    mexican_restaurants.latitude),
    )

    restaurant_locations = gdf.filter(items=["geometry"])

    # import Pittsburgh neighborhood shapefile
    neighborhood_polygons = gpd.read_file(pittsburgh_shapefile).filter(
        items=["hood", "hood_no", "geometry"])

    # spatial join to figure out which neighborhood each restaurant is in
    restaurants_in_polys = gpd.sjoin(restaurant_locations,
                                     neighborhood_polygons,
                                     how="inner",
                                     op="intersects")

    restaurants_counted = restaurants_in_polys.groupby(
        "hood_no").count().reset_index()
    restaurants_in_hoods = restaurants_counted.filter(
        items=["hood_no", "hood"])
    restaurants_in_hoods.rename(columns={"hood": "num_restaurants"},
                                inplace=True)

    restaurants_per_shape = gpd.GeoDataFrame(
        pd.merge(neighborhood_polygons, restaurants_in_hoods, how="left"))

    restaurant_map = (p.ggplot(restaurants_per_shape) +
                      p.geom_map(p.aes(fill="num_restaurants")) +
                      p.scale_colour_gradient(low="white", high="black") +
                      p.theme(
                          panel_background=p.element_rect(fill="white"),
                          axis_text_x=p.element_blank(),
                          axis_text_y=p.element_blank(),
                          axis_ticks_major_x=p.element_blank(),
                          axis_ticks_major_y=p.element_blank(),
                      )) + p.scale_fill_gradient(
                          low="#efefef", high="#073763", name="# Restaurants")

    restaurant_map.save("restaurant_map.png")
def scatter_plot2(df1, df2, xcol, ycol, domain, color1='black', color2='red', xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5):
    assert len(domain) == 2

    POINT_SIZE = 1.5
    DASH_PATTERN = (0, (6, 2))

    if xname is None:
        xname = xcol
    if yname is None:
        yname = ycol

    # formatter for axes' labels
    ax_formatter = mizani.custom_format('{:n}')

    if clamp:  # clamp overflowing values if required
        df1 = df1.copy(deep=True)
        df1.loc[df1[xcol] > domain[1], xcol] = domain[1]
        df1.loc[df1[ycol] > domain[1], ycol] = domain[1]

        df2 = df2.copy(deep=True)
        df2.loc[df2[xcol] > domain[1], xcol] = domain[1]
        df2.loc[df2[ycol] > domain[1], ycol] = domain[1]

    # generate scatter plot
    scatter = p9.ggplot(df1)
    scatter += p9.aes(x=xcol, y=ycol)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, color=color1, alpha=0.5)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, data=df2, color=color2, alpha=0.5)
    scatter += p9.labs(x=xname, y=yname)

    # rug plots
    scatter += p9.geom_rug(na_rm=True, sides="tr", color=color1, alpha=0.05)
    scatter += p9.geom_rug(na_rm=True, sides="tr", data=df2, color=color2, alpha=0.05)

    if log:  # log scale
        scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter)
    else:
        scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter)

    # scatter += p9.theme_xkcd()
    scatter += p9.theme_bw()
    scatter += p9.theme(panel_grid_major=p9.element_line(color='#666666', alpha=0.5))
    scatter += p9.theme(panel_grid_minor=p9.element_blank())
    scatter += p9.theme(figure_size=(width, height))
    scatter += p9.theme(text=p9.element_text(size=24, color="black"))

    # generate additional lines
    scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN)  # diagonal
    scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN)  # vertical rule
    scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN)  # horizontal rule

    res = scatter

    return res
Exemple #23
0
def theme_cognoma(fontsize_mult=1):   
    import plotnine as gg
    
    return (gg.theme_bw(base_size = 14 * fontsize_mult) +
        gg.theme(
          line = gg.element_line(color = "#4d4d4d"), 
          rect = gg.element_rect(fill = "white", color = None), 
          text = gg.element_text(color = "black"), 
          axis_ticks = gg.element_line(color = "#4d4d4d"),
          legend_key = gg.element_rect(color = None), 
          panel_border = gg.element_rect(color = "#4d4d4d"),  
          panel_grid = gg.element_line(color = "#b3b3b3"), 
          panel_grid_major_x = gg.element_blank(),
          panel_grid_minor = gg.element_blank(),
          strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"),
          axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"),
          axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"),
          axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d")
    ))
Exemple #24
0
def create_confidence_plot(conf_df):
    plt = (
        ggplot(conf_df)
        + aes(x='x', color='Method', fill='Method')
        + geom_density(alpha=.45)
        + facet_wrap('Task', nrow=4)
        + xlab('Confidence')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            axis_text_y=element_blank(),
            axis_ticks_major_y=element_blank(),
            axis_title_y=element_blank(),
            legend_title=element_blank(),
            legend_position='top',
            legend_box='horizontal',
        )
    )
    return plt
Exemple #25
0
 def __init__(self, base_size=11, base_family='DejaVu Sans'):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(theme(
         axis_ticks=element_line(color='#DDDDDD', size=0.5),
         panel_border=element_rect(fill='None', color='#838383',
                                   size=1),
         strip_background=element_rect(
             fill='#DDDDDD', color='#838383', size=1),
         strip_text_x=element_text(color='black'),
         strip_text_y=element_text(color='black', angle=-90),
         legend_key=element_blank()
     ), inplace=True)
Exemple #26
0
    def getErrorPlot(self, msg="Error Occured"):
        """
        Creates a plotnine plot with error message. To be used to display error essages across dashboards.

        parameters:
        - msg: the message to be displayed when error occurs
        """
        df = DataFrame({"x": [10], "y": [2], "label": [msg]})
        p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="Black") \
            + THEME.cat_colors_lines \
              + THEME.mt \
              + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank())
        return p
Exemple #27
0
Fichier : plot.py Projet : NPSDC/qb
 def __init__(self, base_size=11, base_family="DejaVu Sans"):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(
         theme(
             axis_ticks=element_line(color="#DDDDDD", size=0.5),
             panel_border=element_rect(fill="None", color="#838383", size=1),
             strip_background=element_rect(fill="#DDDDDD", color="#838383", size=1),
             strip_text_x=element_text(color="black"),
             strip_text_y=element_text(color="black", angle=-90),
             legend_key=element_blank(),
         ),
         inplace=True,
     )
Exemple #28
0
def ikuya_sys_plot():
    nips_df = load_ikuya_nips()
    with open('2019_tacl_trick/data/ikuya_cdf.json') as f:
        df = pd.DataFrame(json.load(f))
        df = pd.concat([df, nips_df])
        df['model'] = df['model'].map(relabel)
        model_dtype = CategoricalDtype(
            ['Regular Test', 'IR Adversarial', 'RNN Adversarial'],
            ordered=True)
        df['model'] = df['model'].astype(model_dtype)
        p = (
            ggplot(df) + aes(x='x', y='y', color='model', xmin='x', xmax='x') +
            geom_point(size=1.0, shape='.') +
            xlab('Percent of Question Revealed') + ylab('Accuracy') +
            scale_y_continuous(breaks=np.linspace(0, 1, 6), limits=[0, 1]) +
            theme(
                legend_position=(.335, .7),
                legend_background=element_blank(
                ),  #element_rect(alpha=1, fill='#EEEFEE', color='white'),                                
                #legend_key=element_rect(alpha=0),
                legend_box_margin=0,
                legend_title=element_blank()))
    p.save('2019_tacl_trick/auto_fig/ikuya_cdf.pdf', width=3.5, height=2.5)
Exemple #29
0
def plot_breakdown(ld: LazyDictionary) -> p9.ggplot:
    """Stacked bar plot of increasing and decreasing stocks per sector in the specified df"""
    cip_df = ld["cip_df"]

    cols_to_drop = [
        colname for colname in cip_df.columns if colname.startswith("bin_")
    ]
    df = cip_df.drop(columns=cols_to_drop)
    df = pd.DataFrame(df.sum(axis="columns"), columns=["sum"])
    ss = ld["stocks_by_sector"]
    # ss should be:
    #             asx_code             sector_name
    # asx_code
    # 14D           14D             Industrials
    # 1AD           1AD             Health Care
    # 1AG           1AG             Industrials
    # 1AL           1AL  Consumer Discretionary........
    # print(ss)
    df = df.merge(ss, left_index=True, right_index=True)

    if len(df) == 0:  # no stock in cip_df have a sector? ie. ETF?
        return None

    assert set(df.columns) == set(["sum", "asx_code", "sector_name"])
    df["increasing"] = df.apply(lambda row: "up"
                                if row["sum"] >= 0.0 else "down",
                                axis=1)
    sector_names = (df["sector_name"].value_counts().index.tolist()
                    )  # sort bars by value count (ascending)
    sector_names_cat = pd.Categorical(df["sector_name"],
                                      categories=sector_names)
    df = df.assign(sector_name_cat=sector_names_cat)

    # print(df)
    plot = (p9.ggplot(
        df, p9.aes(x="factor(sector_name_cat)", fill="factor(increasing)")) +
            p9.geom_bar() + p9.coord_flip())
    return user_theme(
        plot,
        x_axis_label="Sector",
        y_axis_label="Number of stocks",
        subplots_adjust={
            "left": 0.2,
            "right": 0.85
        },
        legend_title=p9.element_blank(),
        asxtrade_want_fill_d=True,
    )
Exemple #30
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (
        ggplot(len_df)
        + aes(x='x', fill='Method', y='..density..')
        + geom_histogram(binwidth=2, position='identity', alpha=.6)
        + geom_text(
            aes(x='x', y=.22, label='x', color='Method'),
            mean_len_df,
            inherit_aes=False,
            format_string='{:.1f}',
            show_legend=False
        )
        + geom_segment(
            aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
            mean_len_df,
            inherit_aes=False, color='black'
        )
        + scale_linetype_manual(['dashed'])
        + facet_wrap('Task')
        + xlim(0, 20) + ylim(0, .23)
        + xlab('Example Length') + ylab('Frequency')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            aspect_ratio=1,
            legend_title=element_blank(),
            legend_position=legend_position,
            legend_box=legend_box,
        )
    )

    return plt
Exemple #31
0
def theme_tufte(base_size=11, base_family='serif', lines=True, ticks=True):
    """
    Theme inspired by Chapter 6 'Data-Ink Maximization and Graphical Design` of
    Edward Tufte's 'The Visual Display of Quantitative Information`.

    Parameters
    ----------
    base_size : int, optional
        Base font size. All text sizes are scaled versions of the base font
        size. Default is 11.
    base_family : str, optional
        Base font family.
    lines : bool, optional
        Draw axis spines. Default is True.
    ticks : bool, optional
        Draw axis ticks. Default is True.

    Returns
    -------
    Plotnine theme.

    """
    ret = (p9.theme_bw(base_size=base_size, base_family=base_family) +
           p9.theme(legend_background=p9.element_blank(),
                    legend_key=p9.element_blank(),
                    panel_background=p9.element_blank(),
                    strip_background=p9.element_blank(),
                    plot_background=p9.element_rect(fill='white'),
                    axis_line=p9.element_line(size=0.5),
                    axis_ticks=p9.element_line(size=0.5),
                    panel_grid=p9.element_blank()))

    if not ticks:
        ret = ret + p9.theme(axis_ticks=p9.element_blank())
    if not lines:
        ret = ret + p9.theme(axis_line=p9.element_blank())

    return ret
Exemple #32
0
def rel_plot(sbs, variant, jitter=0.01):
    plotdata = sbs[sbs.variant == variant]
    xcol = "base"
    ycol = "ratio"
    plotdata = plotdata.assign(x=plotdata[xcol], y=plotdata[ycol])
    plotdata = plotdata.assign(sbs_index=plotdata.index.values)
    session_text = (plotdata[["session_index", "base_session_index"]].apply(
        tuple, axis=1).map(lambda tup: f"{tup[0]} vs. {tup[1]}"))
    plotdata = plotdata.assign(session_text=session_text)

    x = np.geomspace(0.02, 1, num=5)
    y = 1 / x
    diag_df = pd.DataFrame({"x": x, "y": y})

    scatterplot = (
        ggplot(plotdata) + geom_jitter(
            aes(x="x", y="y", fill="dataset", color="dataset"),
            width=jitter,
            height=jitter,
            alpha=0.6,
            size=1.0,
        )
        #                 shape=plotdata.dataset.map(lambda x : '.' if x in ['lvis','objectnet'] else 'o'),
        #                 size=plotdata.dataset.map(lambda x : 1. if x in ['lvis','objectnet'] else 2.))
        #  + geom_text(aes(x='base', y='delta', label='category', color='dataset'), va='bottom',
        #              data=plotdata1[plotdata1.ratio < .6],
        #              position=position_jitter(.05, .05), show_legend=False)
        + geom_line(aes(x="x", y="y"), data=diag_df)
        # + geom_text(aes(x='x', y='y', label='session_text'), va='top', data=plotdata[(plotdata.y < .4) | (plotdata.y > 3)])
        + ylab(ycol)
        #               + geom_area(aes(y2=1.1, y=.9), linetype='dashed', alpha=.7)
        + geom_hline(aes(yintercept=1.1), linetype="dashed", alpha=0.7) +
        geom_hline(aes(yintercept=0.9), linetype="dashed", alpha=0.7) +
        geom_vline(
            aes(xintercept=0.1, ),
            linetype="dashed",
            alpha=0.7,
        ) + geom_vline(
            aes(xintercept=0.3, ),
            linetype="dashed",
            alpha=0.7,
        )
        # + geom_abline()
        #    + geom_point(aes(x='recall', y='precision', color='variant'), size=1.)
        #     + facet_wrap(facets=['cat'], ncol=6, scales='free_x')
        + xlab(xcol)
        # +scale_color_discrete()
        + theme(
            figure_size=(8, 5),
            legend_position="top",
            subplots_adjust={"hspace": 0.5},
            legend_title=element_blank(),
            legend_box_margin=-1,
            legend_margin=0.0,
            axis_text=element_text(size=12, margin={
                "t": 0.2,
                "l": -0.3
            }),
            legend_text=element_text(size=11),
            axis_title=element_text(size=12,
                                    margin={
                                        "r": -0.2,
                                        "b": 0.0,
                                        "l": 0,
                                        "t": 0.0
                                    }),
        ) + scale_x_log10(labels=make_labeler(brief_format),
                          breaks=[0.01, 0.1, 0.3, 1.0]) +
        scale_y_log10(labels=make_labeler(brief_format),
                      breaks=[0.5, 0.9, 1.1, 2.0, 3.0, 6, 12]))

    return scatterplot
Exemple #33
0
def ologram_merge_stats(inputfiles=None,
                        pdf_width=None,
                        pdf_height=None,
                        output=None,
                        labels=None):
    # -------------------------------------------------------------------------
    # Check user provided labels
    # -------------------------------------------------------------------------

    if labels is not None:

        labels = labels.split(",")

        for elmt in labels:
            if not re.search("^[A-Za-z0-9_]+$", elmt):
                message(
                    "Only alphanumeric characters and '_' allowed for --more-bed-labels",
                    type="ERROR")
        if len(labels) != len(inputfiles):
            message("--labels: the number of labels should be"
                    " the same as the number of input files ", type="ERROR")

        if len(labels) != len(set(labels)):
            message("Redundant labels not allowed.", type="ERROR")

    # -------------------------------------------------------------------------
    # Loop over input files
    # -------------------------------------------------------------------------

    df_list = list()
    df_label = list()

    for pos, infile in enumerate(inputfiles):
        message("Reading file : " + infile.name)
        # Read the dataset into a temporay dataframe
        df_tmp = pd.read_csv(infile, sep='\t', header=0, index_col=None)
        # Change name of 'feature_type' column.
        df_tmp = df_tmp.rename(index=str, columns={"feature_type": "Feature"})
        # Assign the name of the dataset to a new column

        if labels is None:
            file_short_name = os.path.basename(os.path.normpath(os.path.dirname(infile.name)))
            df_label += [file_short_name]
        else:
            file_short_name = labels[pos]
            df_label += [labels[pos]]

        df_tmp = df_tmp.assign(**{"dataset": [file_short_name] * df_tmp.shape[0]})
        # Pval set to 0 or -1 are changed to 1e-320 and NaN respectively
        df_tmp.loc[df_tmp['summed_bp_overlaps_pvalue'] == 0, 'summed_bp_overlaps_pvalue'] = 1e-320
        df_tmp.loc[df_tmp['summed_bp_overlaps_pvalue'] == -1, 'summed_bp_overlaps_pvalue'] = np.nan
        # Compute -log10(pval)
        df_tmp = df_tmp.assign(**{"-log_10(pval)": -np.log10(df_tmp.summed_bp_overlaps_pvalue)})

        # Which p-values are signifcant ?
        # TODO: For now, draws all p-values. Add Benjamini-Hochberg correction, and distinguish between NaN and 0.
        df_tmp = df_tmp.assign(**{"pval_signif": df_tmp.summed_bp_overlaps_pvalue > 0})

        # Add the df to the list to be subsequently merged
        df_list += [df_tmp]



    if len(set(df_label)) < len(df_label):
        message('Enclosing directories are ambiguous and cannot be used as labels. You may use "--labels".',
                type="ERROR")

    # -------------------------------------------------------------------------
    # Concatenate dataframes (row bind)
    # -------------------------------------------------------------------------

    message("Merging dataframes.")
    df_merged = pd.concat(df_list, axis=0)

    # -------------------------------------------------------------------------
    # Plotting
    # -------------------------------------------------------------------------

    message("Plotting")
    my_plot = ggplot(data=df_merged,
                     mapping=aes(y='Feature', x='dataset'))
    my_plot += geom_tile(aes(fill = 'summed_bp_overlaps_log2_fold_change'))
    my_plot += scale_fill_gradient2()
    my_plot += labs(fill = "log2(fold change) for summed bp overlaps")

    # Points for p-val. Must be after geom_tile()
    my_plot += geom_point(data = df_merged.loc[df_merged['pval_signif']],
        mapping = aes(x='dataset',y='Feature',color = '-log_10(pval)'), size=4, shape ='D', inherit_aes = False)
    my_plot += scale_color_gradientn(colors = ["#160E00","#FFB025","#FFE7BD"])
    my_plot += labs(color = "-log10(p-value)")

    # Theming
    my_plot += theme_bw()
    my_plot += theme(panel_grid_major=element_blank(),
                     axis_text_x=element_text(rotation=90),
                     panel_border=element_blank(),
                     axis_ticks=element_blank())

    # -------------------------------------------------------------------------
    # Saving
    # -------------------------------------------------------------------------

    message("Saving")
    nb_ft = len(list(df_merged['Feature'].unique()))
    nb_datasets = len(list(df_merged['dataset'].unique()))

    if pdf_width is None:
        panel_width = 0.6
        pdf_width = panel_width * nb_datasets

        if pdf_width > 100:
            pdf_width = 100
            message("Setting --pdf-width to 100 (limit)")

    if pdf_height is None:
        panel_height = 0.6
        pdf_height = panel_height * nb_ft

        if pdf_height > 500:
            pdf_height = 500
            message("Setting --pdf-height to 500 (limit)")

    message("Page width set to " + str(pdf_width))
    message("Page height set to " + str(pdf_height))
    figsize = (pdf_width, pdf_height)

    # -------------------------------------------------------------------------
    # Turn warning off. Both pandas and plotnine use warnings for deprecated
    # functions. I need to turn they off although I'm not really satisfied with
    # this solution...
    # -------------------------------------------------------------------------

    def fxn():
        warnings.warn("deprecated", DeprecationWarning)

    # -------------------------------------------------------------------------
    # Saving
    # -------------------------------------------------------------------------

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fxn()

        message("Saving diagram to file : " + output.name)
        message("Be patient. This may be long for large datasets.")

        # NOTE : We must manually specify figure size with save_as_pdf_pages
        save_as_pdf_pages(filename=output.name,
                          plots=[my_plot + theme(figure_size=figsize)],
                          width=pdf_width,
                          height=pdf_height)
Exemple #34
0
    theme2 = theme_gray()
    theme3 = theme1 + theme2
    assert theme3 == theme2


def test_add_empty_theme_element():
    # An empty theme element does not alter the theme
    theme1 = theme_gray() + theme(axis_line_x=element_line(color='red'))
    theme2 = theme1 + theme(axis_line_x=element_line())
    assert theme1 == theme2


l1 = element_line(color='red', size=1, linewidth=1, linetype='solid')
l2 = element_line(color='blue', size=2, linewidth=2)
l3 = element_line(color='blue', size=2, linewidth=2, linetype='solid')
blank = element_blank()


def test_add_element_heirarchy():
    # parent themeable modifies child themeable
    theme1 = theme_gray() + theme(axis_line_x=l1)  # child
    theme2 = theme1 + theme(axis_line=l2)          # parent
    theme3 = theme1 + theme(axis_line_x=l3)        # child, for comparison
    assert theme2.themeables['axis_line_x'] == \
        theme3.themeables['axis_line_x']

    theme1 = theme_gray() + theme(axis_line_x=l1)  # child
    theme2 = theme1 + theme(line=l2)               # grand-parent
    theme3 = theme1 + theme(axis_line_x=l3)        # child, for comparison
    assert theme2.themeables['axis_line_x'] == \
        theme3.themeables['axis_line_x']