def create_grouped_kde(df, col, target=None, by_class=True, size=175, ylabel=None, xlabel=None): """ Create an altair chart showing the kernel density estimate of a variable with the option to plot a separate distribution by each class. Parameters ---------- df : pandas DataFrame col : string The column of df to be plotted target : string The column of df that holds target labels by_class : boolean, default True Whether to plot data by class size : integer Size (width & height) of the returned plot ylabel : string xlabel : string Returns ------- Altair chart """ ylabel = ylabel if ylabel is not None else "density" xlabel = xlabel if xlabel is not None else col if by_class: labels = df[target].unique() colors = ["DarkMagenta", "MediumOrchid", "RebeccaPurple"] chart = alt.Chart(df, title=col).transform_density( density=col, counts = True, groupby=[target], steps=len(df),extent=[df[col].min() * 0.8, df[col].max() * 1.2], as_=[col, "density"] ).mark_area( opacity=0.7, line=alt.OverlayMarkDef(stroke="black", strokeWidth=3) ).encode( x=alt.X(f"{col}:Q", title=xlabel), y=alt.Y("density:Q", title=ylabel), color=alt.Color(f"{target}:N", scale=alt.Scale(domain=labels, range=colors[:len(labels)])) ) else: chart = alt.Chart(df, title=col).transform_density( density=col, counts = True, steps=len(df), extent=[df[col].min() * 0.8, df[col].max() * 1.2], as_=[col, "density"] ).mark_area( opacity=0.8, line=alt.OverlayMarkDef(stroke="black", strokeWidth=3, fill="DarkMagenta") ).encode( x=alt.X(f"{col}:Q", title=xlabel), y=alt.Y("density:Q", title=ylabel) ) return chart.properties(width=size, height=size)
def make_chart(self, df): sort_order = ['Confirmados', 'Probables', 'Muertes'] lines = alt.Chart(df).mark_line( strokeWidth=3, point=alt.OverlayMarkDef(size=50)).encode( x=alt.X('yearmonthdate(bulletin_date):O', title="Fecha boletÃn", axis=alt.Axis(format='%d/%m', titlePadding=10)), y=alt.Y('value:Q', title=None), color=alt.Color('variable', sort=sort_order, legend=None), tooltip=[ 'variable', 'bulletin_date', alt.Tooltip(field='value', type='quantitative', format=".1f") ]) text = lines.mark_text( align='center', baseline='line-top', size=15, dy=10).encode(text=alt.Text('value:Q', format='.1f')) return (lines + text).properties(width=550, height=37).facet( row=alt.Row('variable', title=None, sort=sort_order))
def concat_ngdp_vis(year, geo): nominal_gdp = alt.Chart( eco, title="Total GDP").mark_bar().transform_aggregate( groupby=['Geography', 'Year'], GDP='sum(Nominal GDP)').encode( x=alt.X('sum(GDP):Q', title='CA$ (MM)', axis=alt.Axis(titleFontSize=20, grid=False, ticks=False, labels=False)), y=alt.Y('Geography:O', sort='-x', title=None, axis=alt.Axis(labelFontSize=20, ticks=False, grid=False)), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldEqualPredicate( field='Year', equal=year)).properties( height=200, width=400).mark_text( dx=-175, color='darkblue', size=40).encode(text=alt.Text( 'sum(GDP):Q', format=('$,'))) nominal_gdp_gr = alt.Chart( eco_gr, title="Growth rate").mark_bar().transform_aggregate( groupby=['Geography', 'Year'], GDP='sum(Nominal GDP)').encode( x=alt.X('sum(GDP):Q', title='Yearly %', axis=alt.Axis(titleFontSize=20, grid=False, ticks=False, labels=False)), y=alt.Y('Geography:O', sort='-x', title=None, axis=alt.Axis(grid=False, ticks=False, labels=False)), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldEqualPredicate( field='Year', equal=year)).properties( height=200, width=400).mark_text( dx=-175, color='darkblue', size=40).encode(text=alt.Text( 'sum(GDP):Q', format=('.2%'))) nominal_gdp_evo = alt.Chart(eco, title="GDP evolution").mark_area( point=alt.OverlayMarkDef(filled=False, fill='darkblue')).encode( x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('sum(Nominal GDP):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=('$,f')), title=None), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('sum(Nominal GDP):Q', format=('$,'), title='GDP') ]).transform_filter( alt.FieldEqualPredicate(field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2000, year])) nominal_gdp_rank = alt.Chart( eco, title="Contribution by province/territory").mark_bar().encode( x=alt.X('sum(Nominal GDP):Q', title='CA$ (MM)', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, format=('$,f'))), y=alt.Y('Geography:O', sort='-x', axis=alt.Axis(labelFontSize=18), title=None), color=alt.condition((alt.datum.Geography == geo) | (alt.datum.Geography == 'Canada'), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('sum(Nominal GDP):Q', format=('$,'), title='GDP') ]).transform_filter( alt.FieldEqualPredicate(field='Year', equal=year)) nominal_gdp_gr_evo = alt.Chart(eco_gr, title="GDP growth rates").mark_bar( point=alt.OverlayMarkDef(filled=False, fill='darkblue'), size=16).encode(x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('sum(Nominal GDP):Q', axis=alt.Axis(tickCount=3, grid=False, ticks=False, format=('%')), title=None), color=alt.condition((alt.datum.Year == year), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('sum(Nominal GDP):Q', format=('.2%'), title='Growth rate') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2000, year])) nominal_gdp_gr_rank = alt.Chart( eco_gr, title="By province/territory").mark_bar().encode( x=alt.X('sum(Nominal GDP):Q', title=None, axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, format=('%'))), y=alt.Y('Geography:O', sort='-x', axis=alt.Axis(labelFontSize=18), title=None), color=alt.condition((alt.datum.Geography == geo) | (alt.datum.Geography == 'Canada'), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('sum(Nominal GDP):Q', format=('.2%'), title='Growth rate') ]).transform_filter( alt.FieldEqualPredicate(field='Year', equal=year)) w = 300 h = 200 nominal_gdp = nominal_gdp.properties(width=w, height=h) nominal_gdp_gr = nominal_gdp_gr.properties(width=w, height=h) nominal_gdp_evo = nominal_gdp_evo.properties(width=w, height=h) nominal_gdp_rank = nominal_gdp_rank.properties(width=w, height=h) nominal_gdp_gr_evo = nominal_gdp_gr_evo.properties(width=w, height=h) nominal_gdp_gr_rank = nominal_gdp_gr_rank.properties(width=w, height=h) ngdp_summary = (nominal_gdp | nominal_gdp_gr) ngdp_total = (nominal_gdp_evo | nominal_gdp_rank) ngdp_gr = (nominal_gdp_gr_evo | nominal_gdp_gr_rank) ngdp_vis = ((ngdp_summary & ngdp_total) & ngdp_gr).configure_view(strokeOpacity=0).configure_axis( domain=False).configure_title(fontSize=30) return ngdp_vis.to_html()
def concat_employment_vis(year, geo): er_evo = alt.Chart(eco, title="Unemployment rate evolution").mark_area( point=alt.OverlayMarkDef(filled=False, fill='darkblue')).encode( x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(Unemployment rate):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=('%')), title=None), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(Unemployment rate):Q', format=('.2%'), title='Unemployment rate') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2000, year])).properties(height=430) er_rank = alt.Chart( eco, title="Unemployment rate by province/territory").mark_bar().encode( x=alt.X('average(Unemployment rate):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=('%')), title=None), y=alt.Y('Geography:O', sort='-x', axis=alt.Axis(labelFontSize=18), title=None), color=alt.condition((alt.datum.Geography == geo) | (alt.datum.Geography == 'Canada'), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(Unemployment rate):Q', format=('.2%'), title='Unemployment rate') ]).transform_filter( alt.FieldEqualPredicate(field='Year', equal=year)) er_ind_rank = alt.Chart( labour, title="Unemployment rate by industry").mark_bar().encode( x=alt.X('Unemployment rate:Q', axis=alt.Axis(tickCount=3, grid=False, ticks=False, format=('%')), title=None), y=alt.Y('Industry:O', sort='-x', axis=alt.Axis(labelFontSize=18), title=None), color=alt.value('lightblue'), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('Industry'), alt.Tooltip('Unemployment rate:Q', format=('.2%'), title='Unemployment rate') ]).transform_filter( alt.FieldEqualPredicate( field='Year', equal=year)).transform_filter( alt.FieldEqualPredicate(field='Geography', equal=geo)) w = 300 h = 200 # nominal_gdp = nominal_gdp.properties( # width=w, # height=h # ) # # nominal_gdp_gr = nominal_gdp_gr.properties( # width=w, # height=h # ) # # nominal_gdp_evo = nominal_gdp_evo.properties( # width=w, # height=h # ) er_ind_rank = er_ind_rank.properties(width=w, height=h) er_rank = er_rank.properties(width=w, height=h) er_evo = er_evo.properties(width=w, height=h) employment_plot = ((er_evo | er_rank) & er_ind_rank).configure_view( strokeOpacity=0).configure_axis(domain=False).configure_title( fontSize=30) return employment_plot.to_html()
def concat_earnings_vis(year, geo): all_ear_evo = alt.Chart(eco, title="All industries").mark_area( point=alt.OverlayMarkDef(filled=False, fill='darkblue')).encode( x=alt.X('Year', title='CA$', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(All industries):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=('$,f')), title=None), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(All industries):Q', format=('$,'), title='Earnings') ]).transform_filter( alt.FieldEqualPredicate(field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2002, year])) all_ear_gr_evo = alt.Chart(eco_gr, title="Growth rates").mark_bar( point=alt.OverlayMarkDef(filled=False, fill='darkblue'), size=16).encode(x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(All industries):Q', axis=alt.Axis(tickCount=3, grid=False, ticks=False, format=('%')), title=None), color=alt.condition((alt.datum.Year == year), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(All industries):Q', format=('.2%'), title='Growth rate') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2002, year])) goods_ear_evo = alt.Chart(eco, title="Goods-producing sector").mark_area( point=alt.OverlayMarkDef(filled=False, fill='darkblue')).encode( x=alt.X('Year', title='CA$', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(Goods-producing sector):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=('$,f')), title=None), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(Goods-producing sector):Q', format=('$,'), title='Earnings') ]).transform_filter( alt.FieldEqualPredicate(field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2002, year])) goods_ear_gr_evo = alt.Chart(eco_gr, title="Growth rates").mark_bar( point=alt.OverlayMarkDef(filled=False, fill='darkblue'), size=16).encode(x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(Goods-producing sector):Q', axis=alt.Axis(tickCount=3, grid=False, ticks=False, format=('%')), title=None), color=alt.condition((alt.datum.Year == year), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(Goods-producing sector):Q', format=('.2%'), title='Growth rate') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2002, year])) serv_ear_evo = alt.Chart(eco, title="Service-producing").mark_area( point=alt.OverlayMarkDef(filled=False, fill='darkblue')).encode( x=alt.X('Year', title='CA$', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(Service-producing sector):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=('$,f')), title=None), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(Service-producing sector):Q', format=('$,'), title='Earnings') ]).transform_filter( alt.FieldEqualPredicate(field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2002, year])) serv_ear_gr_evo = alt.Chart(eco_gr, title="Growth rates").mark_bar( point=alt.OverlayMarkDef(filled=False, fill='darkblue'), size=16).encode(x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(Service-producing sector):Q', axis=alt.Axis(tickCount=3, grid=False, ticks=False, format=('%')), title=None), color=alt.condition((alt.datum.Year == year), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(Service-producing sector):Q', format=('.2%'), title='Growth rate') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2002, year])) w = 300 h = 200 serv_ear_gr_evo = serv_ear_gr_evo.properties(width=w, height=h) serv_ear_evo = serv_ear_evo.properties(width=w, height=h) goods_ear_gr_evo = goods_ear_gr_evo.properties(width=w, height=h) goods_ear_evo = goods_ear_evo.properties(width=w, height=h) all_ear_gr_evo = all_ear_gr_evo.properties(width=w, height=h) all_ear_evo = all_ear_evo.properties(width=w, height=h) all_ear = (all_ear_evo | all_ear_gr_evo) goods_ear = (goods_ear_evo | goods_ear_gr_evo) serv_ear = (serv_ear_evo | serv_ear_gr_evo) earnings_plot = ((all_ear & serv_ear) & goods_ear).configure_view( strokeOpacity=0).configure_axis(domain=False).configure_title( fontSize=30) return earnings_plot.to_html()
""" Line Chart with Points ---------------------- This chart shows a simple line chart with points marking each value. Use ``point=True`` for points with default appearance or customize it with ``OverlayMarkDef()``. """ # category: line charts import altair as alt import numpy as np import pandas as pd x = np.arange(100) source = pd.DataFrame({'x': x, 'f(x)': np.sin(x / 5)}) alt.Chart(source).mark_line(point=alt.OverlayMarkDef(color="red")).encode( x='x', y='f(x)')
def concat_cpi_vis(year, geo): all_cpi_evo = alt.Chart(eco, title="CPI all-items evolution").mark_area( point=alt.OverlayMarkDef(filled=False, fill='darkblue')).encode( x=alt.X('Year', title='2002=100', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(All-items):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=(',.0f')), title=None), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(All-items):Q', format=('.0f'), title='CPI') ]).transform_filter( alt.FieldEqualPredicate(field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2000, year])) all_cpi_gr = alt.Chart(eco_gr, title="Growth rates").mark_bar( point=alt.OverlayMarkDef(filled=False, fill='darkblue'), size=16).encode(x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('sum(All-items):Q', axis=alt.Axis(tickCount=3, grid=False, ticks=False, format=('%')), title=None), color=alt.condition((alt.datum.Year == year), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('sum(All-items):Q', format=('.2%'), title='Growth rate') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2000, year])) gasoline_cpi_evo = alt.Chart( eco, title="CPI gasoline evolution").mark_area( point=alt.OverlayMarkDef(filled=False, fill='darkblue')).encode( x=alt.X('Year', title='2002=100', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('average(Gasoline):Q', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, ticks=False, format=(',.0f')), title=None), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(Gasoline):Q', title='CPI') ]).transform_filter( alt.FieldEqualPredicate(field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2000, year])) gasoline_cpi_gr = alt.Chart(eco_gr, title="Growth rates").mark_bar( point=alt.OverlayMarkDef(filled=False, fill='darkblue'), size=16).encode(x=alt.X('Year', title='Year', axis=alt.Axis(tickCount=5, titleFontSize=20, grid=False, ticks=False, format='Y')), y=alt.Y('sum(Gasoline):Q', axis=alt.Axis(tickCount=3, grid=False, ticks=False, format=('%')), title=None), color=alt.condition((alt.datum.Year == year), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('sum(Gasoline):Q', format=('.2%'), title='Growth rate') ]).transform_filter( alt.FieldEqualPredicate( field='Geography', equal=geo)).transform_filter( alt.FieldRangePredicate( 'Year', [2000, year])) cpi_rank = alt.Chart( eco, title="CPI all-items by province/territory").mark_bar().encode( x=alt.X('average(All-items):Q', title='2002=100', axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, format=(',.0f'))), y=alt.Y('Geography:O', sort='-x', axis=alt.Axis(labelFontSize=18), title=None), color=alt.condition((alt.datum.Geography == geo) | (alt.datum.Geography == 'Canada'), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(All-items):Q', title='CPI') ]).transform_filter( alt.FieldEqualPredicate(field='Year', equal=year)) cpi_gr_rank = alt.Chart( eco_gr, title="Growth rate by province/territory").mark_bar().encode( x=alt.X('average(All-items):Q', title=None, axis=alt.Axis(tickCount=3, titleFontSize=20, grid=False, format=('%'))), y=alt.Y('Geography:O', sort='-x', axis=alt.Axis(labelFontSize=18), title=None), color=alt.condition((alt.datum.Geography == geo) | (alt.datum.Geography == 'Canada'), alt.value('darkblue'), alt.value('lightblue')), tooltip=[ alt.Tooltip('Geography', title='Province/territory'), alt.Tooltip('Year'), alt.Tooltip('average(All-items):Q', format=('.2%'), title='CPI') ]).transform_filter( alt.FieldEqualPredicate(field='Year', equal=year)) w = 300 h = 200 all_cpi_evo = all_cpi_evo.properties(width=w, height=h) all_cpi_gr = all_cpi_gr.properties(width=w, height=h) gasoline_cpi_evo = gasoline_cpi_evo.properties(width=w, height=h) gasoline_cpi_gr = gasoline_cpi_gr.properties(width=w, height=h) cpi_rank = cpi_rank.properties(width=w, height=h) cpi_gr_rank = cpi_gr_rank.properties(width=w, height=h) all_cpi = (all_cpi_evo | all_cpi_gr) gasoline_cpi = (gasoline_cpi_evo | gasoline_cpi_gr) rank_cpi = (cpi_rank | cpi_gr_rank) cpi_plot = ((all_cpi & gasoline_cpi) & rank_cpi).configure_view(strokeOpacity=0).configure_axis( domain=False).configure_title(fontSize=30) return cpi_plot.to_html()