Example #1
0
def plot_forces(variables: pd.DataFrame) -> None:
    forces = variables.drop(["Height", "Weight"], axis=1).melt()
    forces[["type", "variable"]] = forces["variable"].str.split(expand=True)

    tables.describe_table(forces,
                          groupby=["variable", "type"],
                          description="variables")
    row_kwargs = dict(shorthand="variable", title=None, sort=forces_order)
    column = alt.Column("type", title=None)

    forces_plot = (plot_kde().facet(
        data=forces.query("type != 'Imb'"),
        row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"),
                    **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    imb_plot = (plot_kde().facet(
        data=forces.query("type == 'Imb'"),
        row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    plots = (forces_plot | imb_plot).configure_facet(spacing=5)
    st.altair_chart(plots)
Example #2
0
def ridge_plot(d, value, groupby, step=30, overlap=0.8, sort=None):
    return (
        alt.Chart(d)
        .transform_joinaggregate(mean_value=f"mean({value})", groupby=[groupby])
        .transform_bin(["bin_max", "bin_min"], value)
        .transform_aggregate(
            value="count()", groupby=[groupby, "mean_value", "bin_min", "bin_max"]
        )
        .transform_impute(
            impute="value", groupby=[groupby, "mean_value"], key="bin_min", value=0
        )
        .mark_area(
            interpolate="monotone", fillOpacity=0.8, stroke="lightgray", strokeWidth=0.5
        )
        .encode(
            alt.X("bin_min:Q", bin="binned", title='activation', axis=alt.Axis(format='%', labelFlush=False)),
            alt.Y("value:Q", scale=alt.Scale(range=[step, -step * overlap]), axis=None),
            alt.Fill(
                "mean_value:Q",
                legend=None,
                scale=alt.Scale(
                    domain=[d[value].max(), d[value].min()], scheme="redyellowblue"
                ),
            ),
            alt.Row(
                f"{groupby}:N",
                title=None,
                sort=alt.SortArray(sort) if sort else None,
                header=alt.Header(labelAngle=0, labelAlign="right", format="%B"),
            ),
        )
        .properties(bounds="flush", height=step)
        .configure_facet(spacing=0)
        .configure_view(stroke=None)
    )
Example #3
0
def price_subplot(df,
                  color='Category',
                  color_sort_order=['Clothes', 'Accessory'],
                  color_scale='tableau10',
                  price_scale=alt.Scale()):
    color_kwargs = {"scale": alt.Scale(scheme=color_scale)}
    if color_sort_order:
        color_kwargs['sort'] = color_sort_order
    chart = alt.Chart(df, width=50)
    result = chart.mark_circle(size=12, opacity=0.7).encode(
        x=alt.X(
            'jitter:Q',
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y("Price:Q", axis=alt.Axis(), scale=price_scale),
        color=alt.Color(f'{color}:N', **color_kwargs),
        tooltip=['Product', 'Price', 'Price-2019']).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(-2*log(random()))*cos(2*PI*random())')
    result = chart.mark_rule(
        color='red', size=2).encode(y=alt.Y("median(Price-2019):Q")) + result

    return result.facet(column=alt.Column(
        'Year:O',
        header=alt.Header(
            labelAngle=-90,
            titleOrient='top',
            labelOrient='bottom',
            labelAlign='right',
            labelPadding=3,
        ),
    ))
Example #4
0
def dur_dist_plot(dur_dist, to_json_for_lab=None):
    if to_json_for_lab is not None:
        alt.data_transformers.register("json", to_json_for_lab)
    alt.data_transformers.enable("json")
    return (alt.Chart(dur_dist).transform_density(
        "duration",
        as_=["duration", "density"],
        extent=[0, 70],
        groupby=["cluster"]).mark_area(orient="horizontal").encode(
            y="duration:Q",
            color="cluster:N",
            x=alt.X(
                "density:Q",
                stack="center",
                impute=None,
                title=None,
                axis=alt.Axis(labels=False, values=[0], grid=False,
                              ticks=True),
            ),
            column=alt.Column(
                "cluster:N",
                header=alt.Header(
                    titleOrient="bottom",
                    labelOrient="bottom",
                    labelPadding=0,
                ),
            ),
        ).properties(width=100).configure_facet(spacing=0).configure_view(
            stroke=None))
Example #5
0
def altairPlot():
    import altair as alt
    from vega_datasets import data

    source = data.movies.url

    stripplot = alt.Chart(source, width=80).mark_circle(size=8).encode(
        x=alt.X(
            'jitter:Q',
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y('IMDB_Rating:Q'),
        color=alt.Color('Major_Genre:N', legend=None),
        column=alt.Column(
            'Major_Genre:N',
            header=alt.Header(
                labelAngle=-90,
                titleOrient='top',
                labelOrient='bottom',
                labelAlign='right',
                labelPadding=3,
            ),
        ),
    ).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter='sqrt(-2*log(random()))*cos(2*PI*random())').configure_facet(
            spacing=0).configure_view(stroke=None)
    return stripplot
Example #6
0
def bv_violinPlot(data, engine, xlabel, ylabel1, ylabel2):

    data = data.copy() 
    data.rename(columns={'plotY':ylabel1, 'plotX1':ylabel2}, inplace=True)
    data = data[[ylabel1, ylabel2]].copy()

    if engine == 'Static':
        plt.rcParams['figure.figsize'] = (9,6)
        fig = sns.violinplot(x = 'variable', y = 'value', data = data.melt())
        fig.grid(b=True, which='major', color='k', linewidth=0.25)
        fig.grid(b=True, which='minor', color='k', linewidth=0.125)
        plt.close()
        return pn.pane.Matplotlib(fig.figure, tight=True)

    elif engine == 'Interactive':
        p = alt.Chart(data.dropna().melt())
        p = p.transform_density('value',
                                as_=['value', 'density'],
                                groupby=['variable'])
        p = p.mark_area(orient='horizontal').encode(
            y=alt.Y('value:Q', axis=alt.Axis(format='~s')),
            color='variable:N',
            x=alt.X('density:Q', stack='center',
                    impute=None, title=None,
                    axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True)),
            column=alt.Column('variable:N', header=alt.Header(titleOrient='bottom',
                                                            labelOrient='bottom',
                                                            labelPadding=0)))
        p = p.properties(width = 200, height = 280)
        p = p.configure_facet(spacing=0)
        p = p.configure_view(stroke=None)
        return p
Example #7
0
def punchcode():
    dat = df.copy()
    dat['mnth_yr'] = dat['workshop_start'].dt.to_period('M').astype(str)
    dat['workshop_category'] = dat['workshop_category'].apply(
        lambda x: 'Corporate' if (x == 'Corporate') else 'Public')
    dat['contrib'] = dat['workshop_hours'] * dat['class_size']

    chart = alt.Chart(
        dat[dat.name != 'Capstone']).mark_circle(color='#bbc6cbe6').encode(
            x=alt.X('mnth_yr:T', axis=alt.Axis(title='')),
            y='name:O',
            size=alt.Size('sum(contrib):Q', legend=None),
            column=alt.Column('workshop_category:O',
                              title=None,
                              sort="descending",
                              header=alt.Header(titleColor='#bbc6cbe6',
                                                labelColor='#bbc6cbe6',
                                                labelAngle=30,
                                                titleFontSize=40,
                                                titleAngle=30))).properties(
                                                    width=300,
                                                    height=320).configure_axis(
                                                        labelColor='#bbc6cbe6',
                                                        titleColor='#bbc6cbe6',
                                                        grid=False)
    return chart.to_json()
Example #8
0
def homeAdv():

    adv = pd.read_csv("App/Data/homeadvantage.csv")

    palette = alt.Scale(domain=['Home Team', 'Away Team'],
                        range=["#5bc0de", "#d9534f"])

    chart = alt.Chart(adv, height=500, width=1000).mark_bar().encode(
        x=alt.X('points:Q', title='Average points'),
        y=alt.Y('team_flag:N',
                sort='-y',
                title='',
                axis=alt.Axis(labels=False)),
        color=alt.Color('team_flag:N', scale=palette, title=''),
        row=alt.Row('league:N',
                    title='',
                    sort=alt.EncodingSortField("points",
                                               op='max',
                                               order='descending'),
                    header=alt.Header(labelAngle=0, labelAlign='left')),
        tooltip=[alt.Tooltip(
            'points:Q', format='.2f')]).properties(height=25).configure_view(
                stroke='transparent').configure_axis(grid=False).interactive()

    return chart.to_json()
Example #9
0
def cbo_bar_chart(cbo_data, var, title, bar_width=30, width=600, height=250):
    """
    Creates a bar chart comparing the current and new CBO projections
    Parameters
    ----------
    cbo_data: data containing both current and new CBO projections
        concatenated together
    var: Y-axis variable
    title: title of the chart
    bar_width: width of the bars in the plot
    width: width of the chart
    height: height of the chart
    """
    # we divide up total width equally among facets of the chart
    _width = width / len(cbo_data["index"].value_counts())
    chart = (alt.Chart(cbo_data, title=title).mark_bar(width=bar_width).encode(
        x=alt.X(
            "Projections",
            axis=alt.Axis(title=None,
                          labels=False,
                          ticks=False,
                          labelFontSize=15),
        ),
        y=alt.Y(var, axis=alt.Axis(labelFontSize=10, titleFontSize=15)),
        color=alt.Color("Projections"),
        column=alt.Column("index",
                          header=alt.Header(title=None, labelOrient="bottom")),
    ).properties(
        height=height,
        width=_width).configure_view(stroke="transparent").configure_facet(
            spacing=0).configure_title(fontSize=20))
    return chart
Example #10
0
    def player_roll_chart(self):
        """ """
        # Make Altair bar chart
        plt_df = self.player_count.round(2)
        roll_chart = alt.Chart(plt_df).mark_bar(strokeWidth=0.5,
                                                stroke="black").encode(
            x=alt.X("Player:O", axis=alt.Axis(title=None, labels=False,
                    ticks=False)),
            y='Count:Q',
            color=alt.Color('Player:N', scale=alt.Scale(
                            domain=self.player_names, range=self.player_colors),
                            legend=alt.Legend()),
            column=alt.Column("Roll:N", header=alt.Header(title=None,
                              labelOrient="bottom", labelFontSize=22)),
            tooltip=list(self.player_count.columns)
        ).configure_view(
            strokeWidth=0
        ).configure_title(
            fontSize=32, limit=800, dx=45, dy=-50,
            font="Arial", align="center", anchor="middle"
        ).configure_legend(
            strokeColor="black", padding=10, orient="bottom", cornerRadius=10,
            direction="horizontal", labelFontSize=10
        ).properties(
            title="Roll Count by Player",
            width=self.screen_width / 45
        ).configure_axis(
            grid=False, labelFontSize=14, titleFontSize=16
        )

        return roll_chart
Example #11
0
def plot_shap_values(X: pd.DataFrame, model: dict) -> pd.DataFrame:
    target = "EB mean force"
    # st.pyplot(
    #     shap.summary_plot(shap.TreeExplainer(model[target], data=X).shap_values(X), X)
    # )

    shap_values = pd.DataFrame(shap.TreeExplainer(model[target],
                                                  data=X).shap_values(X),
                               columns=X.columns)

    y_order = shap_values.abs().mean().nlargest(6).index.to_list()
    shap_values = shap_values[y_order].melt()
    # shap_values["rank"] = X.rank().melt()["value"].values
    shap_values["Z-score"] = ((X[y_order] - X[y_order].mean()) /
                              X[y_order].std()).melt()["value"].clip(
                                  -0.5, 0.5)

    # dist = (
    #     alt.Chart(shap_values)
    #     .mark_circle(size=100)
    #     .encode(
    #         alt.X("value", title=None),
    #         alt.Y("variable", title=None, sort=y_order),
    #         alt.Color("Z-score", scale=alt.Scale(scheme="redblue", domain=[-2.5, 2.5])),
    #     )
    # )
    # rule = alt.Chart(pd.DataFrame([{'zero': 0}])).mark_rule().encode(alt.X('zero'))

    stripplot = alt.Chart(shap_values, height=20, width=width).mark_circle(
        size=100, clip=True).encode(
            alt.Y(
                'jitter:Q',
                title=None,
                axis=alt.Axis(values=[0],
                              ticks=False,
                              grid=False,
                              labels=False),
            ),
            alt.X('value',
                  title="Shap value",
                  scale=alt.Scale(domain=[-.4, .4])),
            alt.Color("Z-score",
                      scale=alt.Scale(scheme="redblue", domain=[-0.5, 0.5])),
            alt.Row(
                'variable',
                title=None,
                sort=y_order,
                header=alt.Header(
                    labelAngle=0,
                    labelAlign='left',
                ),
            ),
        ).transform_calculate(
            jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
        ).configure_facet(spacing=0).configure_view(stroke=None)

    st.altair_chart(stripplot)
Example #12
0
def participant_count_plot_live(data):

    df2 = data[['Start Date', 'Treatment', 'ROWID']].copy()
    df2['Start Date'] = df2['Start Date'].dt.normalize()
    df2 = df2.drop_duplicates().groupby(by=['Start Date', 'Treatment']).agg({
        'ROWID':
        'count'
    }).reset_index()
    df2.columns = ['date', 'branch', 'total']
    df2['display_date'] = df2.date.dt.strftime('%b %d')
    df2['source'] = 'Amazon'
    df2.loc[(df2.date > '2021-04-05'), 'source'] = 'XLab'
    df2 = df2.groupby(by=['branch', 'source']).agg({
        'total': 'sum'
    }).reset_index().rename(columns={'branch': 'treatment'})

    base = alt.Chart().mark_bar().encode(
        x=alt.X('total:Q',
                axis=alt.Axis(title='Participants Assigned',
                              labelPadding=10,
                              labelFontSize=20,
                              titleFontSize=25)),
        y=alt.X('treatment:O',
                axis=alt.Axis(title='',
                              labelAngle=0,
                              labelPadding=10,
                              labelFontSize=20,
                              titleFontSize=25),
                sort=['Control', 'Typographical', 'Phonological']),
        color=alt.Color(
            'treatment:O',
            legend=None,
            scale=alt.Scale(range=[
                berkeley_palette['pacific'], berkeley_palette['berkeley_blue'],
                berkeley_palette['founders_rock']
            ]))).properties(width=650, height=150)

    txt = base.mark_text(dx=-15, size=15).encode(text='total:Q',
                                                 color=alt.value('white'))

    p = alt.layer(base, txt).properties(width=600, height=150, title={'text':''})\
        .facet(
            row=alt.Row('source:N',
                sort=alt.SortArray(['XLab','Amazon']),
                header=alt.Header(labelColor=berkeley_palette['pacific'], labelFontSize=25,labelFont='Lato',title='')
                ),
            data=df2,
            title='Live Study Participation'
        ).configure(padding={'top':20, 'left':20, 'right':20,'bottom':20})\
            .configure_facet(spacing=10)\
            .configure_view(stroke=None)\
            .configure_title(anchor='middle')\
            .configure_axis(grid=False)\
            .configure_title(dy=-20)

    return p
Example #13
0
def income_expenses_over_time(df_orig):
    # Time interval aggregation level
    time_interval = st.sidebar.radio(
        "Time interval:", ("Month", "Quarter", "Year"), index=1)
    dfn, n_levels = time_interval_aggregation(df_orig, time_interval)
    if st.sidebar.checkbox('Invert sign of "Income"', value=True):
        dfn.loc["Income", :] = -dfn.loc["Income", :].values
    st.subheader('Income and Expenses over Time')
    plot_type = st.sidebar.selectbox('Plot type', ["pyplot", "altair", "bokeh"], key="plot_type")
    df_L0 = dfn.groupby(["Account_L0"]) \
        .sum() \
        .transpose() \
        .reset_index()

    df_L0.columns.name = "Account"
    if plot_type == "pyplot":
        fig = plt.figure(figsize=(14, 5))
        ax = plt.axes()
        df_L0.plot.bar(ax=ax, x=time_interval, y=["Income", "Expenses"],
                       xlabel=time_interval, ylabel=df_L0["level_0"][0], rot=90)
        ax.locator_params(axis="x", tight=True, nbins=40)
        st.pyplot(fig)
    elif plot_type == "altair":
        n_intervals = df_L0.shape[0]
        df_new = df_L0.drop(columns="level_0") \
            .set_index(time_interval) \
            .stack() \
            .reset_index() \
            .rename(columns={0: dfn.columns.levels[0][0]})
        custom_spacing = 2
        chart = alt.Chart(df_new).mark_bar().encode(
            column=alt.Column(time_interval, spacing=custom_spacing, header=alt.Header(title="Income and Expenses",
                                                                                       labelOrient='bottom',
                                                                                       labelAlign='right',
                                                                                       labelAngle=-90)),
            x=alt.X('Account:O', axis=alt.Axis(title=None, labels=False, ticks=False)),
            y=alt.Y('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0], axis=alt.Axis(grid=False)),
            color=alt.Color('Account', scale=alt.Scale(range=['#EA98D2', '#659CCA'])),
            tooltip=[alt.Tooltip('Account:O', title='Account'),
                     alt.Tooltip('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0]),
                     alt.Tooltip('{}:N'.format(time_interval), title=time_interval)]
        ).properties(width=(700 - n_intervals * custom_spacing) / n_intervals)
        st.altair_chart(chart, use_container_width=False)
    elif plot_type == "bokeh":
        x = [(ti, acnt) for ti in df_L0[time_interval] for acnt in ["Income", "Expenses"]]
        counts = sum(zip(df_L0['Income'], df_L0['Expenses']), ())
        source = ColumnDataSource(data=dict(x=x, counts=counts))
        p = figure(x_range=FactorRange(*x), plot_height=450, plot_width=900, title="Income and Expenses",
                   toolbar_location="above", tooltips=[("Period, Account", "@x"), ("Value", "@counts")])
        p.vbar(x='x', top='counts', width=0.9, source=source)
        p.y_range.start = 0
        p.x_range.range_padding = 0.5
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        st.bokeh_chart(p)
    return
Example #14
0
def source_vs_hour_chart(
    base: alt.Chart, sensor_unit: str, max_absolute_error: float, faceted: bool = False
) -> Union[alt.Chart, alt.FacetChart]:
    hd_chart = (
        base.mark_rect()
        .transform_joinaggregate(
            on_the_fly_mae="mean(mae)",
            on_the_fly_reference="mean(reference_value)",
            groupby=["event_start", "source"],
        )
        .transform_calculate(accuracy=alt.datum.on_the_fly_mae)
        .encode(
            x=alt.X(
                "event_start:O",
                timeUnit="hours",
                axis=alt.Axis(domain=False, ticks=False, labelAngle=0),
                scale=alt.Scale(domain=list(range(24))),
                title="Hour of day",  # "UTC hour of day"
            ),
            color=alt.condition(
                selectors.time_selection_brush,
                alt.Color(
                    "accuracy:Q",
                    scale=alt.Scale(
                        domain=(max_absolute_error, 0), scheme="redyellowgreen"
                    ),
                    title="Error",
                ),
                alt.value(selectors.idle_color),
            ),
            tooltip=[
                alt.Tooltip("event_start:T", timeUnit="hours", title="Hour of day"),
                alt.Tooltip(
                    "accuracy:Q",
                    title="Mean absolute error (%s)" % sensor_unit,
                    format=".2f",
                ),
            ],
        )
    )
    if faceted:
        hd_chart = hd_chart.facet(
            row=alt.Row("source:O", title=None, header=alt.Header(labelAngle=0))
        )
    else:
        hd_chart = hd_chart.encode(
            y=alt.Y(
                "source:O",
                axis=alt.Axis(domain=False, ticks=False, labelAngle=0, labelPadding=5),
                title=None,
            )
        )
    return hd_chart.properties(
        title=alt.TitleParams("Model performance given a time of day", anchor="middle")
    )
Example #15
0
def plot(data):
    """
    Takes in a Dataframe data containing information about the population,
    educational attainment, and internet access for U.S. counties.
    Plots a grouped bar chart visualization comparing 5 urban and 5 rural
    counties, and the relationship between attaining a Bachelor's Degree
    and lacking internet access for these counties.
    """
    counties = [
        'New York County', 'Los Angeles County', 'Cook County',
        'Harris County', 'Maricopa County', 'Chaves County',
        'Aroostook County', 'Clallam County', 'McCracken County',
        'St. Landry Parish'
    ]

    data = clean(data, counties)
    data = calculate_percentage(data)

    # Plot
    q4_chart = alt.Chart(data).mark_bar().encode(
        x=alt.X('Statistic',
                type='nominal',
                sort=counties,
                title=None,
                axis=alt.Axis(labels=False)),
        y='Percentage:Q',
        color=alt.Color('Statistic:N',
                        scale=alt.Scale(range=['#96ceb4', '#ffcc5c']),
                        title=None),
        column=alt.Column('County:N',
                          sort=counties,
                          header=alt.Header(titleOrient='bottom',
                                            labelOrient='bottom',
                                            labelAngle=-90,
                                            labelPadding=90,
                                            labelBaseline='middle'))
    ).properties(
        title={
            'text': [
                'Internet Access and Education Attainment in Urban vs. ' +
                'Rural Counties (2016)'
            ],
            'subtitle': [
                '', '.         Urban         Urban         Urban      ' +
                '   Urban         Urban         Rural          Rural' +
                '          Rural          Rural          Rural'
            ],
            'subtitlePadding':
            10
        }).configure_title(fontSize=18,
                           orient='top',
                           offset=12,
                           anchor='start').configure_axisX(labelPadding=100)

    q4_chart.save('q4_chart.html')
def faceted_bar_chart(
        df: pd.DataFrame(),
        xcol: str,
        xtitle: str,
        ycol: str,
        ytitle: str,
        colorcol: str,
        textcol: str,
        title: str,
        columncol: str,
        legend_title="Hardware") -> alt.vegalite.v4.api.FacetChart:
    """
    Method that outputs a raw faceted bar chart. This does not process the input df, so it has to come already processed.
    Parameters
    ----------
    df_: str
        dataframe from which the bar chart will be created.
    xcol: str
        dataframe column name that will be used for the x axis of the plot.
    xtitle:str
        title of the x-axis.
    ycol: str
        dataframe column name that will be used for the y axis of the plot.
    ytitle:str
        title of the y-axis.
    colorcol:str
        dataframe column name that which will hold the separation between colors.
    textcol: str
        dataframe column name that will be used for the displaying the numeric values inside the plot.
    columncol:str
        dataframe column name which holds the separation between all the faceted charts, x axis above plot.
    title: str
        Chart title.

    Returns
    -------
    alt.vegalite.v4.api.Chart
        Faceted bar chart created from the input dataframe.
    """
    bars = alt.Chart().mark_bar().encode(
        x=alt.X(xcol + ':N', title=xtitle),
        y=alt.Y(ycol + ':Q', title=ytitle),
        color=alt.Color(colorcol + ':N', title=legend_title),
    )
    text = bars.mark_text(
        angle=270,
        align='left',
        baseline='middle',
        dx=10  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(text=alt.Text(ycol + ':Q', format='.1f'))
    return alt.layer(bars, text, data=df).facet(column=alt.Column(
        columncol + ':N',
        header=alt.Header(labelAngle=-85, labelAlign='right'),
        title=title)).interactive()
Example #17
0
def grouped_bar_chart(data_frame, x_column, y_column, grouping_column,
                      col_header_type, col_header_format):
    head = alt.Header(labelOrient="bottom")
    if col_header_type is not None:
        head.formatType = col_header_type
        head.format = col_header_format
    col = alt.Column(grouping_column, header=head)
    return alt.Chart(data_frame).mark_bar().encode(
        x=alt.X(x_column, axis=None),
        y=alt.Y(y_column, axis=alt.Axis(grid=True)),
        color=x_column,
        column=col)
def generate_ridgeline_plot(data, x_lab_country_name):
    """A function that generates a ridgeline plot for covid_19 CAN & USA dataset.

    Parameters
    ----------
    data
        input data set from preprocessed csv.
    x_lab_country_name
        name of the country for which we want to generate the ridgeline plot

    Returns
    -------
    altair object
        returns the plot as a altair object
    """
    step = 40
    overlap = 1

    ridgeline_plt = alt.Chart(
        data, height=step
    ).transform_timeunit(Month='month(date)').transform_joinaggregate(
        mean_response_ratio='mean(response_ratio)', groupby=['Month']
    ).transform_bin([
        'bin_max', 'bin_min'
    ], 'response_ratio').transform_aggregate(
        value='count()',
        groupby=['Month', 'mean_response_ratio', 'bin_min', 'bin_max']
    ).transform_impute(
        impute='value',
        groupby=['Month', 'mean_response_ratio'
                 ],
        key='bin_min',
        value=0).mark_area(
            interpolate='monotone',
            fillOpacity=0.8,
            stroke='lightgray',
            strokeWidth=0.5).encode(
                alt.X('bin_min:Q',
                      bin='binned',
                      title=f'Mean Response Ratio in {x_lab_country_name}'),
                alt.Y('value:Q',
                      scale=alt.Scale(range=[step, -step * overlap]),
                      axis=None),
                alt.Fill('mean_response_ratio:Q')).facet(
                    row=alt.Row('Month:T',
                                title=None,
                                header=alt.Header(labelAngle=0,
                                                  labelAlign='right',
                                                  format='%B'))).properties(
                                                      title='', bounds='flush')

    return ridgeline_plt
Example #19
0
def plot_error_dist(predictions: pd.DataFrame) -> None:
    predictions_melted = predictions.melt(id_vars="target",
                                          value_vars=["MAE", "MAPE"])
    tables.describe_table(predictions_melted, groupby=["target", "variable"])

    row_kwargs = dict(shorthand="target", title=None, sort=forces_order)
    column = alt.Column("variable", title=None)

    mae = (plot_kde().facet(
        data=predictions_melted.query("variable == 'MAE'"),
        row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"),
                    **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    mape = (plot_kde().facet(
        data=predictions_melted.query("variable == 'MAPE'"),
        row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    plots = (mae | mape).configure_facet(spacing=5)
    st.altair_chart(plots)
def strip_plot(df, ordered_cats, name):
    """Make a strip plot
    comparing topics in different categories
    """
    stripplot = (
        alt.Chart(df).mark_circle(
            size=14, stroke="grey", strokeWidth=0.5).encode(
                x=alt.X(
                    "jitter:Q",
                    title=None,
                    axis=alt.Axis(values=[0],
                                  ticks=True,
                                  grid=False,
                                  labels=False),
                    scale=alt.Scale(),
                ),
                y=alt.Y("ratio:Q", title="Specialisation"),
                tooltip=["index"],
                size=alt.Size(
                    "levels",
                    title=["Number", "of papers"],
                    # scale=alt.Scale(type='log')
                ),
                color=alt.Color("cat_sel:N",
                                legend=None,
                                scale=alt.Scale(scheme="tableau10")),
                column=alt.Column(
                    "cat_sel:N",
                    title="arXiv category",
                    sort=ordered_cats,
                    header=alt.Header(
                        labelFontSize=12,
                        labelAngle=270,
                        titleOrient="top",
                        labelOrient="bottom",
                        labelAlign="center",
                        labelPadding=25,
                    ),
                ),
            ).transform_calculate(
                # Generate Gaussian jitter with a Box-Muller transform
                jitter="sqrt(-2*log(random()))*cos(2*PI*random())")
        #         .transform_filter(
        #             alt.datum.levels > 0)
        .configure_facet(spacing=0).configure_view(stroke=None).configure_axis(
            labelFontSize=12, titleFontSize=12).properties(title=name,
                                                           width=10,
                                                           height=200))

    return stripplot
Example #21
0
def plot_targets(targets: pd.DataFrame) -> None:
    targets_melted = targets.melt()

    tables.describe_table(targets_melted, description="targets")

    dist_plot = (plot_kde().facet(
        data=targets_melted,
        row=alt.Row(
            "variable",
            title=None,
            header=alt.Header(labelAngle=0, labelAlign="left"),
        ),
    ).configure_facet(spacing=5).resolve_scale(y="independent").properties(
        bounds="flush"))
    st.altair_chart(dist_plot)
Example #22
0
def plot_supervisor(states, gender, remote):
    data = m_data[m_data['state'].isin(states)]
    data = data[data['Gender'].isin(gender)]
    data = data[data['remote_work'].isin(remote)]
    chart = alt.Chart(data).mark_bar().encode(
        x=alt.X('work_interfere', axis=None),
        y=alt.Y('count()', title='Count of Participants'),
        tooltip='count()',
        color='work_interfere',
        column=alt.Column(
            'supervisor',
            title='Have You Talked With Supervisor?',
            header=alt.Header(titleOrient='bottom',
                              labelOrient='bottom'))).properties(width=75)
    return chart.to_html()
Example #23
0
def scatter_subplot(df,
                    color='Category',
                    color_sort_order=['Clothes', 'Accessory'],
                    y_col='Price',
                    color_scale='tableau10',
                    price_scale=alt.Scale(),
                    size=14,
                    opacity=0.7,
                    tooltip=None):
    adjusted_price_col = [c for c in df.columns if c.startswith('Price-')][0]
    if tooltip is None:
        tooltip = list(
            dict.fromkeys(['Product', 'Price', adjusted_price_col, y_col]))
    if isinstance(color_scale, alt.Scale):
        color_kwargs = {"scale": color_scale}
    else:
        color_kwargs = {"scale": alt.Scale(scheme=color_scale)}
    if color_sort_order:
        color_kwargs['sort'] = color_sort_order
    chart = alt.Chart(df, width=50)
    result = chart.mark_circle(size=size, opacity=opacity).encode(
        x=alt.X(
            'jitter:Q',
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y(f"{y_col}:Q", axis=alt.Axis(), scale=price_scale),
        color=alt.Color(f'{color}:N', **color_kwargs),
        tooltip=tooltip).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(-1*log(random()))*cos(2*PI*random())')
    result = chart.mark_rule(color='red', opacity=0.5, size=2).encode(
        y=alt.Y(f"median({y_col}):Q")) + result
    result = chart.mark_boxplot(
        outliers=False, opacity=0.5, color='#A0A0A0', size=25).encode(
            y=alt.Y(f"{y_col}:Q", axis=alt.Axis(), scale=price_scale)) + result

    return result.facet(column=alt.Column(
        'Year:O',
        header=alt.Header(
            labelAngle=-90,
            titleOrient='top',
            labelOrient='bottom',
            labelAlign='right',
            labelPadding=3,
        ),
    ))
Example #24
0
def accum_global():
    chart = alt.Chart(g.accum).mark_area().encode(
        column=alt.Column('workshop_category',
                          title=None,
                          sort="descending",
                          header=alt.Header(titleColor='red',
                                            labelColor='red',
                                            titleAnchor="end")),
        x=alt.X("workshop_start", title="Date"),
        y=alt.Y("cumsum:Q", title="Cumulative"),
        color=alt.Color("variable",
                        scale=alt.Scale(range=['#7dbbd2cc', '#bbc6cbe6']),
                        legend=None),
        tooltip=['variable', 'cumsum:Q']).properties(width=350).configure_axis(
            labelColor='#bbc6cbe6', titleColor='#bbc6cbe6')
    return chart.to_json()
Example #25
0
def bubble_chart(df, y, facet, tooltip):
    ''' create bubble chart 
        :param df:      Pandas DataFrame to display
        :param y:       column of DataFrame to use for bubble size
        :param facet:   column of DataFrame to create facet with
        :param tooltip: list of DataFrame columns to include in tooltip
        :return:        altair bubble chart '''
    return alt.Chart(df).mark_circle().encode(
        x=alt.X('days', axis=alt.Axis(grid=True)),
        y=alt.Y(y, axis=alt.Axis(grid=False, labels=False), title=None),
        color=alt.value('#17becf'),
        row=alt.Row(facet, title=None, header=alt.Header(labelAngle=-45)),
        tooltip=tooltip,
        size=alt.Size(y, scale=alt.Scale(range=[100, 500]))).properties(
            width=450, height=400 /
            len(df)).configure_facet(spacing=5).configure_view(stroke=None)
def generate_ridgeline_plot(data, attribute):
    '''ridge line plot: multiple histograms overlaps'''

    step = 40
    overlap = 1

    graph = alt.Chart(data).transform_joinaggregate(
        mean_attribute="mean({})".format(str(attribute)),
        groupby=['species']).transform_bin([
            'bin_max', 'bin_min'
        ], str(attribute)).transform_aggregate(
            value='count()',
            groupby=[
                'species', 'mean_attribute', 'bin_min', 'bin_max'
            ]).transform_impute(
                impute='value',
                groupby=['species', 'mean_attribute'],
                key='bin_min',
                value=0).mark_area(
                    interpolate='monotone',
                    fillOpacity=0.4,
                    stroke='lightgray',
                    strokeWidth=0.3).encode(
                        alt.X('bin_min:Q', bin='binned', title=str(attribute)),
                        alt.Y('value:Q',
                              scale=alt.Scale(range=[step, -step * overlap]),
                              axis=None),
                        alt.Fill('mean_attribute:Q',
                                 legend=None,
                                 scale=alt.Scale(domain=[30, 5],
                                                 scheme='redyellowblue')),
                        alt.Row(
                            'species:O',
                            title='Species',
                            header=alt.Header(
                                labelAngle=0,
                                labelAlign='right'))).properties(
                                    bounds='flush',
                                    title='Comparison: {}'.format(
                                        str(metadata_description[attribute])),
                                    height=100,
                                    width=700,
                                ).configure_facet(spacing=0, ).configure_view(
                                    stroke=None, ).configure_title(
                                        anchor='end')

    return graph
Example #27
0
def visualize_emb(vis_dict):
  dict = vis_dict['dict']
  c_names = vis_dict['c_names']
  emb_vis_data = pd.DataFrame(dict)
  step = 20
  overlap = 1
  emb_chart = alt.Chart(emb_vis_data).transform_fold(
      c_names,
      as_=['embedding', 'lv']
    ).mark_area(
      interpolate='monotone',
      fillOpacity=0.8,
      stroke='lightgray',
      strokeWidth=0.2
    ).encode(
      # x='x',
      # y='lv:Q',
      # alt.Color('embedding:N'),
      alt.X('x:Q', title=None,
            scale=alt.Scale(domain=[0,512], range=[0,1500])),
      alt.Y(
          'lv:Q',
          title="",
          scale=alt.Scale(rangeStep=40),
          # scale=alt.Scale(range=[step, -step * overlap]),
          axis=None
      ),
      alt.Fill(
          'embedding:N',
          legend=None,
          scale=alt.Scale(scheme='redyellowblue')
      ),
      row=alt.Row(
           'embedding:N',
           title=None,
           header=alt.Header(labelAngle=360)
       )
   ).properties(
       bounds='flush', title='Вектор статьи', height=step, width=1200
  ).configure_facet(
      spacing=0
  ).configure_view(
      stroke=None
  ).configure_title(
      anchor='middle'
  )
  st.altair_chart(emb_chart, width=-1)
def draw_availability90_categories(df, upper_category, upper_type,
                                   upper_rename):
    brush = alt.selection_interval(encodings=["x", "y"])
    field_availability_chart = alt.Chart(df).transform_filter(
        brush).transform_density(
            'availability_90',
            as_=['availability_90', 'density'],
            groupby=[upper_category]).mark_area(orient='horizontal').encode(
                y=alt.Y('availability_90:Q', title='Availability in 90 Days'),
                color=alt.Color(upper_category,
                                type=upper_type,
                                legend=alt.Legend(title=upper_rename)),
                x=alt.X(
                    'density:Q',
                    stack='center',
                    impute=None,
                    title=None,
                    axis=alt.Axis(labels=False,
                                  values=[0],
                                  grid=False,
                                  ticks=True),
                ),
                column=alt.Column(upper_category,
                                  type=upper_type,
                                  header=alt.Header(titleOrient='bottom',
                                                    labelOrient='bottom',
                                                    labelPadding=0,
                                                    title=upper_rename)),
                tooltip=[alt.Tooltip('availability_90:Q')
                         ]).properties(width=200).interactive()

    scatterplot = alt.Chart(df[df['price'] < 500]).mark_circle(
        size=100).encode(alt.X("price"),
                         alt.Y("review_scores_rating",
                               scale=alt.Scale(zero=False),
                               title='review score'),
                         tooltip=[
                             alt.Tooltip('price', format='$.2f'),
                             alt.Tooltip('review_scores_rating',
                                         title='Review Score')
                         ]).add_selection(brush)

    st.write(
        "**Brush through the lower scatterplot to filter the upper chart by review score and price.**"
    )
    st.write(field_availability_chart & scatterplot)
Example #29
0
def plot_state_bar(states, gender, remote):
    click = alt.selection_multi(fields=['state'], bind='legend')
    data = m_data[m_data['state'].isin(states)]
    data = data[data['Gender'].isin(gender)]
    data = data[data['remote_work'].isin(remote)]
    chart = alt.Chart(data).mark_bar().encode(
        x=alt.X('state', axis=None),
        y=alt.Y('count()', title='Count of Participants'),
        tooltip='count()',
        color='state',
        column=alt.Column('treatment',
                          title='Sought Treatment or Not',
                          header=alt.Header(titleOrient='bottom',
                                            labelOrient='bottom')),
        opacity=alt.condition(
            click, alt.value(0.9),
            alt.value(0.1))).add_selection(click).properties(width=100)
    return chart.to_html()
Example #30
0
def plot_interfere(states, gender, remote):
    click = alt.selection_multi(fields=['state'], bind='legend')
    data = m_data[m_data['state'].isin(states)]
    data = data[data['Gender'].isin(gender)]
    data = data[data['remote_work'].isin(remote)]
    chart = alt.Chart(data).mark_bar().encode(
        x=alt.X('state', axis=None),
        y=alt.Y('count()', title='Count of Participants'),
        tooltip='count()',
        color='state',
        column=alt.Column(
            'work_interfere',
            title='Does Your Mental Illness Interfere With Work?',
            header=alt.Header(titleOrient='bottom', labelOrient='bottom')),
        opacity=alt.condition(
            click, alt.value(0.9),
            alt.value(0.2))).add_selection(click).properties(width=75)
    return chart.to_html()