Exemple #1
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') +
           geom_histogram(binwidth=2, position='identity', alpha=.6) +
           geom_text(aes(x='x', y=.22, label='x', color='Method'),
                     mean_len_df,
                     inherit_aes=False,
                     format_string='{:.1f}',
                     show_legend=False) +
           geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
                        mean_len_df,
                        inherit_aes=False,
                        color='black') + scale_linetype_manual(['dashed']) +
           facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) +
           xlab('Example Length') + ylab('Frequency') +
           scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               aspect_ratio=1,
               legend_title=element_blank(),
               legend_position=legend_position,
               legend_box=legend_box,
           ))

    return plt
def test_scale_linetype_strings_tuples():
    # linetype_manual accepts tuples as mapping results
    # this must be tested specifically.
    df = pd.DataFrame({
        "x": [0, 1, 0, 1, 0, 1],
        "y": [0, 1, 0, 2, 0, 3],
        "lt": ["A", "A", "B", "B", "C", "C"],
    })

    p = ggplot(df)
    p += geom_line(aes(x="x", y="y", ymax="y+1", linetype="lt", group="lt"))
    p += scale_linetype_manual(values=['solid', 'dashed', 'dotted'])
    assert p == "scale_linetype_manual_strings"
def test_scale_linetype_manual_tuples():
    # linetype_manual accepts tuples as mapping results
    # this must be tested specifically.
    df = pd.DataFrame({
        "x": [0, 1, 0, 1, 0, 1],
        "y": [0, 1, 0, 2, 0, 3],
        "lt": ["A", "A", "B", "B", "C", "C"],
    })

    p = ggplot(df)
    p += geom_line(aes(x="x", y="y", ymax="y+1", linetype="lt", group="lt"))
    p += scale_linetype_manual(values=(
        (2, (5, 3, 1, 3)),
        (1, (10, 2)),
        (1, (1, 2)),
    ))
    assert p == "scale_linetype_manual_tuples"
Exemple #4
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (
        ggplot(len_df)
        + aes(x='x', fill='Method', y='..density..')
        + geom_histogram(binwidth=2, position='identity', alpha=.6)
        + geom_text(
            aes(x='x', y=.22, label='x', color='Method'),
            mean_len_df,
            inherit_aes=False,
            format_string='{:.1f}',
            show_legend=False
        )
        + geom_segment(
            aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
            mean_len_df,
            inherit_aes=False, color='black'
        )
        + scale_linetype_manual(['dashed'])
        + facet_wrap('Task')
        + xlim(0, 20) + ylim(0, .23)
        + xlab('Example Length') + ylab('Frequency')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            aspect_ratio=1,
            legend_title=element_blank(),
            legend_position=legend_position,
            legend_box=legend_box,
        )
    )

    return plt
Exemple #5
0
def plot_xbs(df, group, var, n_side=9, n_delta=6):
    r"""Construct Xbar and S chart

    Construct an Xbar and S chart to assess the state of statistical control of
    a dataset.

    Args:
        df (DataFrame): Data to analyze
        group (str): Variable for grouping
        var (str): Variable to study

    Keyword args:
        n_side (int): Number of consecutive runs above/below centerline to flag
        n_delta (int): Number of consecutive runs increasing/decreasing to flag

    Returns:
        plotnine object: Xbar and S chart

    Examples::

        import grama as gr
        DF = gr.Intention()

        from grama.data import df_shewhart
        (
            df_shewhart
            >> gr.tf_mutate(idx=DF.index // 10)
            >> gr.pt_xbs("idx", "tensile_strength")
        )

    """
    ## Prepare the data
    DF = Intention()
    df_batched = (df >> tf_group_by(group) >> tf_summarize(
        X=mean(DF[var]),
        S=sd(DF[var]),
        n=nfcn(DF.index),
    ) >> tf_ungroup())

    df_stats = (df_batched >> tf_summarize(
        X_center=mean(DF.X),
        S_biased=mean(DF.S),
        n=mean(DF.n),
    ))
    n = df_stats.n[0]
    df_stats["S_center"] = df_stats.S_biased / c_sd(n)
    df_stats["X_LCL"] = df_stats.X_center - 3 * df_stats.S_center / sqrt(n)
    df_stats["X_UCL"] = df_stats.X_center + 3 * df_stats.S_center / sqrt(n)
    df_stats["S_LCL"] = B3(n) * df_stats.S_center
    df_stats["S_UCL"] = B4(n) * df_stats.S_center

    ## Reshape for plotting
    df_stats_long = (df_stats >> tf_pivot_longer(
        columns=["X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"],
        names_to=["_var", "_stat"],
        names_sep="_",
        values_to="_value",
    ))
    # Fake group value to avoid issue with discrete group variable
    df_stats_long[group] = [df_batched[group].values[0]
                            ] * df_stats_long.shape[0]

    df_batched_long = (
        df_batched >> tf_pivot_longer(
            columns=["X", "S"],
            names_to="_var",
            values_to="_value",
        )
        ## Flag patterns
        >> tf_left_join(
            df_stats >> tf_pivot_longer(
                columns=[
                    "X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"
                ],
                names_to=["_var", ".value"],
                names_sep="_",
            ),
            by="_var",
        ) >> tf_group_by("_var") >> tf_mutate(
            outlier_below=(DF._value < DF.LCL),  # Outside control limits
            outlier_above=(DF.UCL < DF._value),
            below=consec(DF._value < DF.center, i=n_side),  # Below mean
            above=consec(DF.center < DF._value, i=n_side),  # Above mean
        ) >> tf_mutate(
            decreasing=consec((lead(DF._value) - DF._value) < 0, i=n_delta - 1)
            |  # Decreasing
            consec((DF._value - lag(DF._value)) < 0, i=n_delta - 1),
            increasing=consec(0 < (lead(DF._value) - DF._value), i=n_delta - 1)
            |  # Increasing
            consec(0 < (DF._value - lag(DF._value)), i=n_delta - 1),
        ) >> tf_mutate(
            sign=case_when([DF.outlier_below, "-2"], [DF.outlier_above, "+2"],
                           [DF.below | DF.decreasing, "-1"],
                           [DF.above | DF.increasing, "+1"], [True, "0"]),
            glyph=case_when(
                [DF.outlier_below, "Below Limit"],
                [DF.outlier_above, "Above Limit"],
                [DF.below, "Low Run"],
                [DF.above, "High Run"],
                [DF.increasing, "Increasing Run"],
                [DF.decreasing, "Decreasing Run"],
                [True, "None"],
            )) >> tf_ungroup())

    ## Visualize
    return (df_batched_long >> ggplot(aes(x=group)) + geom_hline(
        data=df_stats_long,
        mapping=aes(yintercept="_value", linetype="_stat"),
    ) + geom_line(aes(y="_value", group="_var"), size=0.2) + geom_point(
        aes(y="_value", color="sign", shape="glyph"),
        size=3,
    ) + scale_color_manual(values={
        "-2": "blue",
        "-1": "darkturquoise",
        "0": "black",
        "+1": "salmon",
        "+2": "red"
    }, ) + scale_shape_manual(
        name="Patterns",
        values={
            "Below Limit": "s",
            "Above Limit": "s",
            "Low Run": "X",
            "High Run": "X",
            "Increasing Run": "^",
            "Decreasing Run": "v",
            "None": "."
        },
    ) + scale_linetype_manual(
        name="Guideline",
        values=dict(LCL="dashed", UCL="dashed", center="solid"),
    ) + guides(color=None) + facet_grid(
        "_var~.",
        scales="free_y",
        labeller=labeller(dict(X="Mean", S="Variability")),
    ) + labs(
        x="Group variable ({})".format(group),
        y="Value ({})".format(var),
    ))
    "2020ML": "#33a02c",
    "2020": "#1f78b4",
}

g = (
    p9.ggplot(publish_rate_df.rename(index=str, columns={"label": "Label"})) +
    p9.aes(
        x="pub_month",
        y="rate",
        fill="Label",
        group="Label",
        color="Label",
        linetype="Label",
        shape="Label",
    ) + p9.geom_point(size=2) + p9.geom_line() +
    p9.scale_linetype_manual(["solid", "solid", "solid"]) +
    p9.scale_color_manual(
        [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) +
    p9.scale_fill_manual(
        [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) +
    p9.scale_shape_manual(["o", "o", "o"])
    # plot the x axis titles
    + p9.geom_vline(xintercept=[2.5, 14.5, 26.5, 38.5, 50.5, 62.5, 74.5]) +
    p9.geom_text(label="2014", x=8.5, y=0, color="black", size=13) +
    p9.geom_text(label="2015", x=20.5, y=0, color="black", size=13) +
    p9.geom_text(label="2016", x=32.5, y=0, color="black", size=13) +
    p9.geom_text(label="2017", x=44.5, y=0, color="black", size=13) +
    p9.geom_text(label="2018", x=56.5, y=0, color="black", size=13) +
    p9.geom_text(label="2019", x=68.5, y=0, color="black", size=13)
    # Plot the overall proportion published
    + p9.geom_hline(
    "2020ML": "#33a02c",
    "2020": "#1f78b4",
}

# In[21]:

g = (
    p9.ggplot(publish_rate_df.rename(index=str, columns={"label": "Label"})) +
    p9.aes(x="pub_month",
           y="rate",
           fill="Label",
           group="Label",
           color="Label",
           linetype="Label",
           shape="Label") + p9.geom_point(size=2) + p9.geom_line() +
    p9.scale_linetype_manual(['solid', 'solid', 'solid']) +
    p9.scale_color_manual(
        [color_mapper['2020'], color_mapper['2020ML'], color_mapper['2018']]) +
    p9.scale_fill_manual(
        [color_mapper['2020'], color_mapper['2020ML'], color_mapper['2018']]) +
    p9.scale_shape_manual(['o', 'o', 'o'])

    # plot the x axis titles
    + p9.geom_vline(xintercept=[2.5, 14.5, 26.5, 38.5, 50.5, 62.5, 74.5]) +
    p9.geom_text(label="2014", x=8.5, y=0, color="black") +
    p9.geom_text(label="2015", x=20.5, y=0, color="black") +
    p9.geom_text(label="2016", x=32.5, y=0, color="black") +
    p9.geom_text(label="2017", x=44.5, y=0, color="black") +
    p9.geom_text(label="2018", x=56.5, y=0, color="black") +
    p9.geom_text(label="2019", x=68.5, y=0, color="black")