Ejemplo n.º 1
0
def plot_forces(variables: pd.DataFrame) -> None:
    forces = variables.drop(["Height", "Weight"], axis=1).melt()
    forces[["type", "variable"]] = forces["variable"].str.split(expand=True)

    tables.describe_table(forces,
                          groupby=["variable", "type"],
                          description="variables")
    row_kwargs = dict(shorthand="variable", title=None, sort=forces_order)
    column = alt.Column("type", title=None)

    forces_plot = (plot_kde().facet(
        data=forces.query("type != 'Imb'"),
        row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"),
                    **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    imb_plot = (plot_kde().facet(
        data=forces.query("type == 'Imb'"),
        row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    plots = (forces_plot | imb_plot).configure_facet(spacing=5)
    st.altair_chart(plots)
def show_continent_cases(df, label):
    '''
    Creates chart of new cases of each continents
    '''
    new_df = modify_data(df)
    new_df = get_continent_values(df, label)
    chart = alt.Chart(new_df).transform_filter(
        alt.datum.location != 'World').mark_area().encode(
            x=alt.X('date:T', title='Date'),
            y=alt.Y(label + ':Q', title=None),
            color='continent:N',
            row=alt.Row('continent:N', title=label.replace('_', ' ').title()),
            tooltip=[
                alt.Tooltip('date:T'),
                alt.Tooltip('continent', title='continent'),
                alt.Tooltip(label + ':Q',
                            format=",.0f",
                            title=label.replace('_', ' '))
            ],
        ).configure_header(
            titleColor='grey',
            titleFontSize=13,
            labelFontSize=12,
        ).configure_axis(labelFontSize=11, titleFontSize=13,
                         titleColor='grey').configure_axisX(
                             labelAngle=-30, ).configure_legend(
                                 titleFontSize=13,
                                 labelFontSize=12,
                             ).properties(height=60).resolve_scale(
                                 y='independent').interactive()

    st.altair_chart(chart, use_container_width=True)
Ejemplo n.º 3
0
def ridge_plot(d, value, groupby, step=30, overlap=0.8, sort=None):
    return (
        alt.Chart(d)
        .transform_joinaggregate(mean_value=f"mean({value})", groupby=[groupby])
        .transform_bin(["bin_max", "bin_min"], value)
        .transform_aggregate(
            value="count()", groupby=[groupby, "mean_value", "bin_min", "bin_max"]
        )
        .transform_impute(
            impute="value", groupby=[groupby, "mean_value"], key="bin_min", value=0
        )
        .mark_area(
            interpolate="monotone", fillOpacity=0.8, stroke="lightgray", strokeWidth=0.5
        )
        .encode(
            alt.X("bin_min:Q", bin="binned", title='activation', axis=alt.Axis(format='%', labelFlush=False)),
            alt.Y("value:Q", scale=alt.Scale(range=[step, -step * overlap]), axis=None),
            alt.Fill(
                "mean_value:Q",
                legend=None,
                scale=alt.Scale(
                    domain=[d[value].max(), d[value].min()], scheme="redyellowblue"
                ),
            ),
            alt.Row(
                f"{groupby}:N",
                title=None,
                sort=alt.SortArray(sort) if sort else None,
                header=alt.Header(labelAngle=0, labelAlign="right", format="%B"),
            ),
        )
        .properties(bounds="flush", height=step)
        .configure_facet(spacing=0)
        .configure_view(stroke=None)
    )
Ejemplo n.º 4
0
def plot_shap_values(X: pd.DataFrame, model: dict) -> pd.DataFrame:
    target = "EB mean force"
    # st.pyplot(
    #     shap.summary_plot(shap.TreeExplainer(model[target], data=X).shap_values(X), X)
    # )

    shap_values = pd.DataFrame(shap.TreeExplainer(model[target],
                                                  data=X).shap_values(X),
                               columns=X.columns)

    y_order = shap_values.abs().mean().nlargest(6).index.to_list()
    shap_values = shap_values[y_order].melt()
    # shap_values["rank"] = X.rank().melt()["value"].values
    shap_values["Z-score"] = ((X[y_order] - X[y_order].mean()) /
                              X[y_order].std()).melt()["value"].clip(
                                  -0.5, 0.5)

    # dist = (
    #     alt.Chart(shap_values)
    #     .mark_circle(size=100)
    #     .encode(
    #         alt.X("value", title=None),
    #         alt.Y("variable", title=None, sort=y_order),
    #         alt.Color("Z-score", scale=alt.Scale(scheme="redblue", domain=[-2.5, 2.5])),
    #     )
    # )
    # rule = alt.Chart(pd.DataFrame([{'zero': 0}])).mark_rule().encode(alt.X('zero'))

    stripplot = alt.Chart(shap_values, height=20, width=width).mark_circle(
        size=100, clip=True).encode(
            alt.Y(
                'jitter:Q',
                title=None,
                axis=alt.Axis(values=[0],
                              ticks=False,
                              grid=False,
                              labels=False),
            ),
            alt.X('value',
                  title="Shap value",
                  scale=alt.Scale(domain=[-.4, .4])),
            alt.Color("Z-score",
                      scale=alt.Scale(scheme="redblue", domain=[-0.5, 0.5])),
            alt.Row(
                'variable',
                title=None,
                sort=y_order,
                header=alt.Header(
                    labelAngle=0,
                    labelAlign='left',
                ),
            ),
        ).transform_calculate(
            jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
        ).configure_facet(spacing=0).configure_view(stroke=None)

    st.altair_chart(stripplot)
Ejemplo n.º 5
0
def plot_error_dist(predictions: pd.DataFrame) -> None:
    predictions_melted = predictions.melt(id_vars="target",
                                          value_vars=["MAE", "MAPE"])
    tables.describe_table(predictions_melted, groupby=["target", "variable"])

    row_kwargs = dict(shorthand="target", title=None, sort=forces_order)
    column = alt.Column("variable", title=None)

    mae = (plot_kde().facet(
        data=predictions_melted.query("variable == 'MAE'"),
        row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"),
                    **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    mape = (plot_kde().facet(
        data=predictions_melted.query("variable == 'MAPE'"),
        row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    plots = (mae | mape).configure_facet(spacing=5)
    st.altair_chart(plots)
Ejemplo n.º 6
0
def plot_targets(targets: pd.DataFrame) -> None:
    targets_melted = targets.melt()

    tables.describe_table(targets_melted, description="targets")

    dist_plot = (plot_kde().facet(
        data=targets_melted,
        row=alt.Row(
            "variable",
            title=None,
            header=alt.Header(labelAngle=0, labelAlign="left"),
        ),
    ).configure_facet(spacing=5).resolve_scale(y="independent").properties(
        bounds="flush"))
    st.altair_chart(dist_plot)
Ejemplo n.º 7
0
def grouped_single(df_name):

    alt.themes.enable('dark')

    df_match = pd.DataFrame(eval(df_name))
    ht = re.sub(r"[\[\]\']", "",
                str(df_name)).replace("grpd_",
                                      "").replace("coi3_",
                                                  "").replace("coi4_", "")[:-3]
    pdt = re.sub(r"[\[\]\']", "", str(df_name)).replace("grpd_", "").replace(
        "coi3_", "").replace("coi4_", "").replace("Head", "")
    pn = re.sub(r"[\[\]\'_]", "",
                str(df_name)).replace("grpd", "").replace(ht,
                                                          "").replace(pdt, "")
    pn = str(eval(pn))

    df_match.columns = ['a', 'Explain', 'variable', 'value']
    df_match.variable = [i.replace("percent_", "") for i in df_match.variable]

    g = alt.Chart(df_match).mark_bar().encode(
        x=alt.X('value:Q', title="% of Occurences"),
        y=alt.Y('variable:O',
                axis=alt.Axis(title=None,
                              titlePadding=20,
                              offset=15,
                              ticks=False,
                              minExtent=60,
                              labelLimit=100,
                              domain=False)),
        color=alt.Color("variable:N",
                        scale=alt.Scale(scheme="inferno"),
                        legend=None),
        row=alt.Row("a:N", title=ht_dict[ht] + " " + pdt_dict[pdt]),
        tooltip=["Explain"]).configure(padding={
            "left": 15,
            "top": 25,
            "right": 75,
            "bottom": 25
        }).configure_axisX(labelFontSize=15, titleFontSize=20).configure_axisY(
            labelFontSize=15, titleFontSize=20).configure_title(
                align="center", anchor="middle",
                dx=50, fontSize=50).properties(title=re.sub(
                    r"[\[\]\']", "", str(pn)).replace(",", "    "),
                                               width=725)
    g.save(
        str("outputs/grouped_" + ht + pdt +
            re.sub(r"[\[\]\' ]", "", str(pn)).replace(",", "") + ".html"))
    return g
Ejemplo n.º 8
0
def generate_ridgeline_plot(data, attribute):
    '''ridge line plot: multiple histograms overlaps'''

    step = 40
    overlap = 1

    graph = alt.Chart(data).transform_joinaggregate(
        mean_attribute="mean({})".format(str(attribute)),
        groupby=['species']).transform_bin([
            'bin_max', 'bin_min'
        ], str(attribute)).transform_aggregate(
            value='count()',
            groupby=[
                'species', 'mean_attribute', 'bin_min', 'bin_max'
            ]).transform_impute(
                impute='value',
                groupby=['species', 'mean_attribute'],
                key='bin_min',
                value=0).mark_area(
                    interpolate='monotone',
                    fillOpacity=0.4,
                    stroke='lightgray',
                    strokeWidth=0.3).encode(
                        alt.X('bin_min:Q', bin='binned', title=str(attribute)),
                        alt.Y('value:Q',
                              scale=alt.Scale(range=[step, -step * overlap]),
                              axis=None),
                        alt.Fill('mean_attribute:Q',
                                 legend=None,
                                 scale=alt.Scale(domain=[30, 5],
                                                 scheme='redyellowblue')),
                        alt.Row(
                            'species:O',
                            title='Species',
                            header=alt.Header(
                                labelAngle=0,
                                labelAlign='right'))).properties(
                                    bounds='flush',
                                    title='Comparison: {}'.format(
                                        str(metadata_description[attribute])),
                                    height=100,
                                    width=700,
                                ).configure_facet(spacing=0, ).configure_view(
                                    stroke=None, ).configure_title(
                                        anchor='end')

    return graph
Ejemplo n.º 9
0
def freq_missing() -> None:
    log.info("collecting frequency counts")
    o = Moby(ORIGINAL, ORIGINAL_TEXT_PATH, LIMITER)
    search = set(["jonah", "bildad", "pip", "sperm", "right", "greenland"])
    title = "_".join(sorted(search))
    tag = "NNP"
    data = alt.Data(values=(o.freq(search, tag)))
    alt.Chart(data, width=1000).mark_bar().encode(
        x=alt.X(
            title="Chapter",
            bin={"extent": [1, 135], "step": 1},
            field="chapter",
            type="quantitative",
        ),
        y=alt.Y(title=f"word count", aggregate="count", type="quantitative"),
        row=alt.Row(field="word", type="nominal"),
    ).save(f"{OUTPUT_DIR}/{title}_index_hist.html")
    return None