def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text(aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False) + geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black') + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, )) return plt
def test_scale_linetype_strings_tuples(): # linetype_manual accepts tuples as mapping results # this must be tested specifically. df = pd.DataFrame({ "x": [0, 1, 0, 1, 0, 1], "y": [0, 1, 0, 2, 0, 3], "lt": ["A", "A", "B", "B", "C", "C"], }) p = ggplot(df) p += geom_line(aes(x="x", y="y", ymax="y+1", linetype="lt", group="lt")) p += scale_linetype_manual(values=['solid', 'dashed', 'dotted']) assert p == "scale_linetype_manual_strings"
def test_scale_linetype_manual_tuples(): # linetype_manual accepts tuples as mapping results # this must be tested specifically. df = pd.DataFrame({ "x": [0, 1, 0, 1, 0, 1], "y": [0, 1, 0, 2, 0, 3], "lt": ["A", "A", "B", "B", "C", "C"], }) p = ggplot(df) p += geom_line(aes(x="x", y="y", ymax="y+1", linetype="lt", group="lt")) p += scale_linetype_manual(values=( (2, (5, 3, 1, 3)), (1, (10, 2)), (1, (1, 2)), )) assert p == "scale_linetype_manual_tuples"
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = ( ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text( aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False ) + geom_segment( aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black' ) + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, ) ) return plt
def plot_xbs(df, group, var, n_side=9, n_delta=6): r"""Construct Xbar and S chart Construct an Xbar and S chart to assess the state of statistical control of a dataset. Args: df (DataFrame): Data to analyze group (str): Variable for grouping var (str): Variable to study Keyword args: n_side (int): Number of consecutive runs above/below centerline to flag n_delta (int): Number of consecutive runs increasing/decreasing to flag Returns: plotnine object: Xbar and S chart Examples:: import grama as gr DF = gr.Intention() from grama.data import df_shewhart ( df_shewhart >> gr.tf_mutate(idx=DF.index // 10) >> gr.pt_xbs("idx", "tensile_strength") ) """ ## Prepare the data DF = Intention() df_batched = (df >> tf_group_by(group) >> tf_summarize( X=mean(DF[var]), S=sd(DF[var]), n=nfcn(DF.index), ) >> tf_ungroup()) df_stats = (df_batched >> tf_summarize( X_center=mean(DF.X), S_biased=mean(DF.S), n=mean(DF.n), )) n = df_stats.n[0] df_stats["S_center"] = df_stats.S_biased / c_sd(n) df_stats["X_LCL"] = df_stats.X_center - 3 * df_stats.S_center / sqrt(n) df_stats["X_UCL"] = df_stats.X_center + 3 * df_stats.S_center / sqrt(n) df_stats["S_LCL"] = B3(n) * df_stats.S_center df_stats["S_UCL"] = B4(n) * df_stats.S_center ## Reshape for plotting df_stats_long = (df_stats >> tf_pivot_longer( columns=["X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"], names_to=["_var", "_stat"], names_sep="_", values_to="_value", )) # Fake group value to avoid issue with discrete group variable df_stats_long[group] = [df_batched[group].values[0] ] * df_stats_long.shape[0] df_batched_long = ( df_batched >> tf_pivot_longer( columns=["X", "S"], names_to="_var", values_to="_value", ) ## Flag patterns >> tf_left_join( df_stats >> tf_pivot_longer( columns=[ "X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL" ], names_to=["_var", ".value"], names_sep="_", ), by="_var", ) >> tf_group_by("_var") >> tf_mutate( outlier_below=(DF._value < DF.LCL), # Outside control limits outlier_above=(DF.UCL < DF._value), below=consec(DF._value < DF.center, i=n_side), # Below mean above=consec(DF.center < DF._value, i=n_side), # Above mean ) >> tf_mutate( decreasing=consec((lead(DF._value) - DF._value) < 0, i=n_delta - 1) | # Decreasing consec((DF._value - lag(DF._value)) < 0, i=n_delta - 1), increasing=consec(0 < (lead(DF._value) - DF._value), i=n_delta - 1) | # Increasing consec(0 < (DF._value - lag(DF._value)), i=n_delta - 1), ) >> tf_mutate( sign=case_when([DF.outlier_below, "-2"], [DF.outlier_above, "+2"], [DF.below | DF.decreasing, "-1"], [DF.above | DF.increasing, "+1"], [True, "0"]), glyph=case_when( [DF.outlier_below, "Below Limit"], [DF.outlier_above, "Above Limit"], [DF.below, "Low Run"], [DF.above, "High Run"], [DF.increasing, "Increasing Run"], [DF.decreasing, "Decreasing Run"], [True, "None"], )) >> tf_ungroup()) ## Visualize return (df_batched_long >> ggplot(aes(x=group)) + geom_hline( data=df_stats_long, mapping=aes(yintercept="_value", linetype="_stat"), ) + geom_line(aes(y="_value", group="_var"), size=0.2) + geom_point( aes(y="_value", color="sign", shape="glyph"), size=3, ) + scale_color_manual(values={ "-2": "blue", "-1": "darkturquoise", "0": "black", "+1": "salmon", "+2": "red" }, ) + scale_shape_manual( name="Patterns", values={ "Below Limit": "s", "Above Limit": "s", "Low Run": "X", "High Run": "X", "Increasing Run": "^", "Decreasing Run": "v", "None": "." }, ) + scale_linetype_manual( name="Guideline", values=dict(LCL="dashed", UCL="dashed", center="solid"), ) + guides(color=None) + facet_grid( "_var~.", scales="free_y", labeller=labeller(dict(X="Mean", S="Variability")), ) + labs( x="Group variable ({})".format(group), y="Value ({})".format(var), ))
"2020ML": "#33a02c", "2020": "#1f78b4", } g = ( p9.ggplot(publish_rate_df.rename(index=str, columns={"label": "Label"})) + p9.aes( x="pub_month", y="rate", fill="Label", group="Label", color="Label", linetype="Label", shape="Label", ) + p9.geom_point(size=2) + p9.geom_line() + p9.scale_linetype_manual(["solid", "solid", "solid"]) + p9.scale_color_manual( [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) + p9.scale_fill_manual( [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) + p9.scale_shape_manual(["o", "o", "o"]) # plot the x axis titles + p9.geom_vline(xintercept=[2.5, 14.5, 26.5, 38.5, 50.5, 62.5, 74.5]) + p9.geom_text(label="2014", x=8.5, y=0, color="black", size=13) + p9.geom_text(label="2015", x=20.5, y=0, color="black", size=13) + p9.geom_text(label="2016", x=32.5, y=0, color="black", size=13) + p9.geom_text(label="2017", x=44.5, y=0, color="black", size=13) + p9.geom_text(label="2018", x=56.5, y=0, color="black", size=13) + p9.geom_text(label="2019", x=68.5, y=0, color="black", size=13) # Plot the overall proportion published + p9.geom_hline(
"2020ML": "#33a02c", "2020": "#1f78b4", } # In[21]: g = ( p9.ggplot(publish_rate_df.rename(index=str, columns={"label": "Label"})) + p9.aes(x="pub_month", y="rate", fill="Label", group="Label", color="Label", linetype="Label", shape="Label") + p9.geom_point(size=2) + p9.geom_line() + p9.scale_linetype_manual(['solid', 'solid', 'solid']) + p9.scale_color_manual( [color_mapper['2020'], color_mapper['2020ML'], color_mapper['2018']]) + p9.scale_fill_manual( [color_mapper['2020'], color_mapper['2020ML'], color_mapper['2018']]) + p9.scale_shape_manual(['o', 'o', 'o']) # plot the x axis titles + p9.geom_vline(xintercept=[2.5, 14.5, 26.5, 38.5, 50.5, 62.5, 74.5]) + p9.geom_text(label="2014", x=8.5, y=0, color="black") + p9.geom_text(label="2015", x=20.5, y=0, color="black") + p9.geom_text(label="2016", x=32.5, y=0, color="black") + p9.geom_text(label="2017", x=44.5, y=0, color="black") + p9.geom_text(label="2018", x=56.5, y=0, color="black") + p9.geom_text(label="2019", x=68.5, y=0, color="black")