def plot_metrics_comparison_lineplot_grid(dataframe,
                                          models_labels,
                                          metrics_labels,
                                          figure_size=(14, 4)):
    """
    We define a function to plot the grid.
    """

    return (
        # Define the plot.
        p9.ggplot(
            dataframe,
            p9.aes(x='threshold',
                   y='value',
                   group='variable',
                   color='variable',
                   shape='variable'))
        # Add the points and lines.
        + p9.geom_point() + p9.geom_line()
        # Rename the x axis and give some space to left and right.
        + p9.scale_x_discrete(name='Threshold', expand=(0, 0.2))
        # Rename the y axis, give some space on top and bottom, and print the tick labels with 2 decimal digits.
        +
        p9.scale_y_continuous(name='Value',
                              expand=(0, 0.05),
                              labels=lambda l: ['{:.2f}'.format(x) for x in l])
        # Replace the names in the legend.
        + p9.scale_shape_discrete(
            name='Metric', labels=lambda l: [metrics_labels[x] for x in l])
        # Define the colors for the metrics for color-blind people.
        +
        p9.scale_color_brewer(name='Metric',
                              labels=lambda l: [metrics_labels[x] for x in l],
                              type='qual',
                              palette='Set2')
        # Place the plots in a grid, renaming the labels for rows and columns.
        + p9.facet_grid('iterations ~ model',
                        labeller=p9.labeller(
                            rows=lambda x: f'iters = {x}',
                            cols=lambda x: f'{models_labels[x]}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the y axis name.
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
def plot_preprocessing_boxplot_bymodel(dataframe,
                                       models_labels,
                                       metrics_labels,
                                       groups_labels,
                                       figure_size=(14, 4)):
    """
    We define a function to plot the grid.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='variable', y='value', fill='group'))
        # Add the boxplots.
        + p9.geom_boxplot(position='dodge')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Metric',
                              labels=lambda l: [metrics_labels[x] for x in l])
        # Rename the y axis.
        + p9.scale_y_continuous(
            name='Value',
            expand=(0, 0.05),
            # breaks=[-0.25, 0, 0.25, 0.5, 0.75, 1], limits=[-0.25, 1],
            labels=lambda l: ['{:.2f}'.format(x) for x in l])
        # Define the colors for the metrics for color-blind people.
        + p9.scale_fill_brewer(name='Group',
                               labels=lambda l: [groups_labels[x] for x in l],
                               type='qual',
                               palette='Set2')
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid(
            'model ~ .',
            scales='free_y',
            labeller=p9.labeller(rows=lambda x: f'{models_labels[x]}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the x and y axis names.
            axis_title_x=p9.element_blank(),
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)):
    """
    We create a function to plot the bar plot.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='threshold', fill='value'))
        # Add the bars.
        + p9.geom_bar(position='dodge') +
        p9.geom_text(p9.aes(label='stat(count)'),
                     stat='count',
                     position=p9.position_dodge(0.9),
                     size=7,
                     va='bottom')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Threshold')
        # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top).
        + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500))
        # Replace the names in the legend and set the colors of the bars.
        + p9.scale_fill_manual(values={
            0: '#009e73',
            1: '#d55e00'
        },
                               labels=lambda l: [{
                                   0: 'Stable',
                                   1: 'Unstable'
                               }[x] for x in l])
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid('. ~ iterations',
                        labeller=p9.labeller(cols=lambda x: f'iters = {x}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the y axis name.
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
Example #4
0
def plot_xbs(df, group, var, n_side=9, n_delta=6):
    r"""Construct Xbar and S chart

    Construct an Xbar and S chart to assess the state of statistical control of
    a dataset.

    Args:
        df (DataFrame): Data to analyze
        group (str): Variable for grouping
        var (str): Variable to study

    Keyword args:
        n_side (int): Number of consecutive runs above/below centerline to flag
        n_delta (int): Number of consecutive runs increasing/decreasing to flag

    Returns:
        plotnine object: Xbar and S chart

    Examples::

        import grama as gr
        DF = gr.Intention()

        from grama.data import df_shewhart
        (
            df_shewhart
            >> gr.tf_mutate(idx=DF.index // 10)
            >> gr.pt_xbs("idx", "tensile_strength")
        )

    """
    ## Prepare the data
    DF = Intention()
    df_batched = (df >> tf_group_by(group) >> tf_summarize(
        X=mean(DF[var]),
        S=sd(DF[var]),
        n=nfcn(DF.index),
    ) >> tf_ungroup())

    df_stats = (df_batched >> tf_summarize(
        X_center=mean(DF.X),
        S_biased=mean(DF.S),
        n=mean(DF.n),
    ))
    n = df_stats.n[0]
    df_stats["S_center"] = df_stats.S_biased / c_sd(n)
    df_stats["X_LCL"] = df_stats.X_center - 3 * df_stats.S_center / sqrt(n)
    df_stats["X_UCL"] = df_stats.X_center + 3 * df_stats.S_center / sqrt(n)
    df_stats["S_LCL"] = B3(n) * df_stats.S_center
    df_stats["S_UCL"] = B4(n) * df_stats.S_center

    ## Reshape for plotting
    df_stats_long = (df_stats >> tf_pivot_longer(
        columns=["X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"],
        names_to=["_var", "_stat"],
        names_sep="_",
        values_to="_value",
    ))
    # Fake group value to avoid issue with discrete group variable
    df_stats_long[group] = [df_batched[group].values[0]
                            ] * df_stats_long.shape[0]

    df_batched_long = (
        df_batched >> tf_pivot_longer(
            columns=["X", "S"],
            names_to="_var",
            values_to="_value",
        )
        ## Flag patterns
        >> tf_left_join(
            df_stats >> tf_pivot_longer(
                columns=[
                    "X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"
                ],
                names_to=["_var", ".value"],
                names_sep="_",
            ),
            by="_var",
        ) >> tf_group_by("_var") >> tf_mutate(
            outlier_below=(DF._value < DF.LCL),  # Outside control limits
            outlier_above=(DF.UCL < DF._value),
            below=consec(DF._value < DF.center, i=n_side),  # Below mean
            above=consec(DF.center < DF._value, i=n_side),  # Above mean
        ) >> tf_mutate(
            decreasing=consec((lead(DF._value) - DF._value) < 0, i=n_delta - 1)
            |  # Decreasing
            consec((DF._value - lag(DF._value)) < 0, i=n_delta - 1),
            increasing=consec(0 < (lead(DF._value) - DF._value), i=n_delta - 1)
            |  # Increasing
            consec(0 < (DF._value - lag(DF._value)), i=n_delta - 1),
        ) >> tf_mutate(
            sign=case_when([DF.outlier_below, "-2"], [DF.outlier_above, "+2"],
                           [DF.below | DF.decreasing, "-1"],
                           [DF.above | DF.increasing, "+1"], [True, "0"]),
            glyph=case_when(
                [DF.outlier_below, "Below Limit"],
                [DF.outlier_above, "Above Limit"],
                [DF.below, "Low Run"],
                [DF.above, "High Run"],
                [DF.increasing, "Increasing Run"],
                [DF.decreasing, "Decreasing Run"],
                [True, "None"],
            )) >> tf_ungroup())

    ## Visualize
    return (df_batched_long >> ggplot(aes(x=group)) + geom_hline(
        data=df_stats_long,
        mapping=aes(yintercept="_value", linetype="_stat"),
    ) + geom_line(aes(y="_value", group="_var"), size=0.2) + geom_point(
        aes(y="_value", color="sign", shape="glyph"),
        size=3,
    ) + scale_color_manual(values={
        "-2": "blue",
        "-1": "darkturquoise",
        "0": "black",
        "+1": "salmon",
        "+2": "red"
    }, ) + scale_shape_manual(
        name="Patterns",
        values={
            "Below Limit": "s",
            "Above Limit": "s",
            "Low Run": "X",
            "High Run": "X",
            "Increasing Run": "^",
            "Decreasing Run": "v",
            "None": "."
        },
    ) + scale_linetype_manual(
        name="Guideline",
        values=dict(LCL="dashed", UCL="dashed", center="solid"),
    ) + guides(color=None) + facet_grid(
        "_var~.",
        scales="free_y",
        labeller=labeller(dict(X="Mean", S="Variability")),
    ) + labs(
        x="Group variable ({})".format(group),
        y="Value ({})".format(var),
    ))
Example #5
0
from plotnine.data import mtcars


def number_to_word(n):
    lst = [
        'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
        'nine'
    ]
    try:
        return lst[int(n)]
    except IndexError:
        return str(n)


labeller_cols_both = labeller(rows='label_value',
                              cols='label_both',
                              multi_line=False)

labeller_towords = labeller(rows='label_both',
                            cols='label_both',
                            multi_line=False,
                            am=number_to_word,
                            gear=number_to_word)

g = ggplot(mtcars, aes(x='wt', y='mpg')) + geom_point()


def test_label_value():
    p = g + facet_wrap('~ gear', labeller='label_value')

    assert p == 'label_value'
Example #6
0
     p9.facet_grid("lambda_d ~ lambda_c", labeller="label_both", scales="free")
     + p9.geom_point() + p9.theme(figure_size=(12, 12)))
print(g)

lambda_c_grid = np.linspace(1e-6, 1e-3, num=5)
lambda_d_grid = np.linspace(1e-6, 1e-3, num=5)

plot_df = run_saucie_param_grid(full_paper_dataset_subset,
                                lambda_c_grid=lambda_c_grid,
                                lambda_d_grid=lambda_d_grid)

g = (p9.ggplot(plot_df) + p9.aes(x="dim1", y="dim2", fill="journal") +
     p9.facet_grid(
         "lambda_d ~ lambda_c",
         labeller=p9.labeller(
             cols=lambda s: f"lambda_c: {float(s):.3e}",
             rows=lambda s: f"lambda_d: {float(s):.3e}",
         ),
         scales="free",
     ) + p9.geom_point() + p9.theme(figure_size=(12, 12)))
g.save("output/figures/saucie_hyperparam_lambda_cd.png", dpi=500)
print(g)

learning_rate_grid = np.linspace(1e-6, 1e-3, num=3)
steps_grid = [1000, 3000, 5000, 10000, 10000]

plot_df = run_saucie_param_grid(
    full_paper_dataset_subset,
    lambda_c_grid=[1.000e-3],
    lambda_d_grid=[1.000e-3],
    steps_grid=steps_grid,
    learning_rate_grid=learning_rate_grid,
Example #7
0
from plotnine import ggplot, aes, geom_point
from plotnine import facet_grid, facet_wrap
from plotnine import labeller, as_labeller
from plotnine.data import mtcars


def number_to_word(n):
    lst = ['zero', 'one', 'two', 'three', 'four',
           'five', 'six', 'seven', 'eight', 'nine']
    try:
        return lst[int(n)]
    except IndexError:
        return str(n)


labeller_cols_both = labeller(rows='label_value', cols='label_both',
                              multi_line=False)

labeller_towords = labeller(rows='label_both', cols='label_both',
                            multi_line=False, am=number_to_word,
                            gear=number_to_word)

g = ggplot(mtcars, aes(x='wt', y='mpg')) + geom_point()


def test_label_value():
    p = g + facet_wrap('~ gear', labeller='label_value')

    assert p == 'label_value'


def test_label_both():