def plot_forces(variables: pd.DataFrame) -> None: forces = variables.drop(["Height", "Weight"], axis=1).melt() forces[["type", "variable"]] = forces["variable"].str.split(expand=True) tables.describe_table(forces, groupby=["variable", "type"], description="variables") row_kwargs = dict(shorthand="variable", title=None, sort=forces_order) column = alt.Column("type", title=None) forces_plot = (plot_kde().facet( data=forces.query("type != 'Imb'"), row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) imb_plot = (plot_kde().facet( data=forces.query("type == 'Imb'"), row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) plots = (forces_plot | imb_plot).configure_facet(spacing=5) st.altair_chart(plots)
def show_continent_cases(df, label): ''' Creates chart of new cases of each continents ''' new_df = modify_data(df) new_df = get_continent_values(df, label) chart = alt.Chart(new_df).transform_filter( alt.datum.location != 'World').mark_area().encode( x=alt.X('date:T', title='Date'), y=alt.Y(label + ':Q', title=None), color='continent:N', row=alt.Row('continent:N', title=label.replace('_', ' ').title()), tooltip=[ alt.Tooltip('date:T'), alt.Tooltip('continent', title='continent'), alt.Tooltip(label + ':Q', format=",.0f", title=label.replace('_', ' ')) ], ).configure_header( titleColor='grey', titleFontSize=13, labelFontSize=12, ).configure_axis(labelFontSize=11, titleFontSize=13, titleColor='grey').configure_axisX( labelAngle=-30, ).configure_legend( titleFontSize=13, labelFontSize=12, ).properties(height=60).resolve_scale( y='independent').interactive() st.altair_chart(chart, use_container_width=True)
def ridge_plot(d, value, groupby, step=30, overlap=0.8, sort=None): return ( alt.Chart(d) .transform_joinaggregate(mean_value=f"mean({value})", groupby=[groupby]) .transform_bin(["bin_max", "bin_min"], value) .transform_aggregate( value="count()", groupby=[groupby, "mean_value", "bin_min", "bin_max"] ) .transform_impute( impute="value", groupby=[groupby, "mean_value"], key="bin_min", value=0 ) .mark_area( interpolate="monotone", fillOpacity=0.8, stroke="lightgray", strokeWidth=0.5 ) .encode( alt.X("bin_min:Q", bin="binned", title='activation', axis=alt.Axis(format='%', labelFlush=False)), alt.Y("value:Q", scale=alt.Scale(range=[step, -step * overlap]), axis=None), alt.Fill( "mean_value:Q", legend=None, scale=alt.Scale( domain=[d[value].max(), d[value].min()], scheme="redyellowblue" ), ), alt.Row( f"{groupby}:N", title=None, sort=alt.SortArray(sort) if sort else None, header=alt.Header(labelAngle=0, labelAlign="right", format="%B"), ), ) .properties(bounds="flush", height=step) .configure_facet(spacing=0) .configure_view(stroke=None) )
def plot_shap_values(X: pd.DataFrame, model: dict) -> pd.DataFrame: target = "EB mean force" # st.pyplot( # shap.summary_plot(shap.TreeExplainer(model[target], data=X).shap_values(X), X) # ) shap_values = pd.DataFrame(shap.TreeExplainer(model[target], data=X).shap_values(X), columns=X.columns) y_order = shap_values.abs().mean().nlargest(6).index.to_list() shap_values = shap_values[y_order].melt() # shap_values["rank"] = X.rank().melt()["value"].values shap_values["Z-score"] = ((X[y_order] - X[y_order].mean()) / X[y_order].std()).melt()["value"].clip( -0.5, 0.5) # dist = ( # alt.Chart(shap_values) # .mark_circle(size=100) # .encode( # alt.X("value", title=None), # alt.Y("variable", title=None, sort=y_order), # alt.Color("Z-score", scale=alt.Scale(scheme="redblue", domain=[-2.5, 2.5])), # ) # ) # rule = alt.Chart(pd.DataFrame([{'zero': 0}])).mark_rule().encode(alt.X('zero')) stripplot = alt.Chart(shap_values, height=20, width=width).mark_circle( size=100, clip=True).encode( alt.Y( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False), ), alt.X('value', title="Shap value", scale=alt.Scale(domain=[-.4, .4])), alt.Color("Z-score", scale=alt.Scale(scheme="redblue", domain=[-0.5, 0.5])), alt.Row( 'variable', title=None, sort=y_order, header=alt.Header( labelAngle=0, labelAlign='left', ), ), ).transform_calculate( jitter='sqrt(-2*log(random()))*cos(2*PI*random())' ).configure_facet(spacing=0).configure_view(stroke=None) st.altair_chart(stripplot)
def plot_error_dist(predictions: pd.DataFrame) -> None: predictions_melted = predictions.melt(id_vars="target", value_vars=["MAE", "MAPE"]) tables.describe_table(predictions_melted, groupby=["target", "variable"]) row_kwargs = dict(shorthand="target", title=None, sort=forces_order) column = alt.Column("variable", title=None) mae = (plot_kde().facet( data=predictions_melted.query("variable == 'MAE'"), row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) mape = (plot_kde().facet( data=predictions_melted.query("variable == 'MAPE'"), row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) plots = (mae | mape).configure_facet(spacing=5) st.altair_chart(plots)
def plot_targets(targets: pd.DataFrame) -> None: targets_melted = targets.melt() tables.describe_table(targets_melted, description="targets") dist_plot = (plot_kde().facet( data=targets_melted, row=alt.Row( "variable", title=None, header=alt.Header(labelAngle=0, labelAlign="left"), ), ).configure_facet(spacing=5).resolve_scale(y="independent").properties( bounds="flush")) st.altair_chart(dist_plot)
def grouped_single(df_name): alt.themes.enable('dark') df_match = pd.DataFrame(eval(df_name)) ht = re.sub(r"[\[\]\']", "", str(df_name)).replace("grpd_", "").replace("coi3_", "").replace("coi4_", "")[:-3] pdt = re.sub(r"[\[\]\']", "", str(df_name)).replace("grpd_", "").replace( "coi3_", "").replace("coi4_", "").replace("Head", "") pn = re.sub(r"[\[\]\'_]", "", str(df_name)).replace("grpd", "").replace(ht, "").replace(pdt, "") pn = str(eval(pn)) df_match.columns = ['a', 'Explain', 'variable', 'value'] df_match.variable = [i.replace("percent_", "") for i in df_match.variable] g = alt.Chart(df_match).mark_bar().encode( x=alt.X('value:Q', title="% of Occurences"), y=alt.Y('variable:O', axis=alt.Axis(title=None, titlePadding=20, offset=15, ticks=False, minExtent=60, labelLimit=100, domain=False)), color=alt.Color("variable:N", scale=alt.Scale(scheme="inferno"), legend=None), row=alt.Row("a:N", title=ht_dict[ht] + " " + pdt_dict[pdt]), tooltip=["Explain"]).configure(padding={ "left": 15, "top": 25, "right": 75, "bottom": 25 }).configure_axisX(labelFontSize=15, titleFontSize=20).configure_axisY( labelFontSize=15, titleFontSize=20).configure_title( align="center", anchor="middle", dx=50, fontSize=50).properties(title=re.sub( r"[\[\]\']", "", str(pn)).replace(",", " "), width=725) g.save( str("outputs/grouped_" + ht + pdt + re.sub(r"[\[\]\' ]", "", str(pn)).replace(",", "") + ".html")) return g
def generate_ridgeline_plot(data, attribute): '''ridge line plot: multiple histograms overlaps''' step = 40 overlap = 1 graph = alt.Chart(data).transform_joinaggregate( mean_attribute="mean({})".format(str(attribute)), groupby=['species']).transform_bin([ 'bin_max', 'bin_min' ], str(attribute)).transform_aggregate( value='count()', groupby=[ 'species', 'mean_attribute', 'bin_min', 'bin_max' ]).transform_impute( impute='value', groupby=['species', 'mean_attribute'], key='bin_min', value=0).mark_area( interpolate='monotone', fillOpacity=0.4, stroke='lightgray', strokeWidth=0.3).encode( alt.X('bin_min:Q', bin='binned', title=str(attribute)), alt.Y('value:Q', scale=alt.Scale(range=[step, -step * overlap]), axis=None), alt.Fill('mean_attribute:Q', legend=None, scale=alt.Scale(domain=[30, 5], scheme='redyellowblue')), alt.Row( 'species:O', title='Species', header=alt.Header( labelAngle=0, labelAlign='right'))).properties( bounds='flush', title='Comparison: {}'.format( str(metadata_description[attribute])), height=100, width=700, ).configure_facet(spacing=0, ).configure_view( stroke=None, ).configure_title( anchor='end') return graph
def freq_missing() -> None: log.info("collecting frequency counts") o = Moby(ORIGINAL, ORIGINAL_TEXT_PATH, LIMITER) search = set(["jonah", "bildad", "pip", "sperm", "right", "greenland"]) title = "_".join(sorted(search)) tag = "NNP" data = alt.Data(values=(o.freq(search, tag))) alt.Chart(data, width=1000).mark_bar().encode( x=alt.X( title="Chapter", bin={"extent": [1, 135], "step": 1}, field="chapter", type="quantitative", ), y=alt.Y(title=f"word count", aggregate="count", type="quantitative"), row=alt.Row(field="word", type="nominal"), ).save(f"{OUTPUT_DIR}/{title}_index_hist.html") return None