def chart(disks, tower): num_disks = len(disks) width = np.zeros(10) for i in range(1, num_disks + 1, 1): width[-i] = disks[i - 1] source = pd.DataFrame({'y': np.arange(10), 'x': width}) left = alt.Chart(source).encode( y=alt.Y('y:O', axis=None), x=alt.X('x:Q', axis=None, scale=alt.Scale(domain=(0, 10)), sort=alt.SortOrder('descending')), ).mark_bar().properties(width=100, height=150) right = alt.Chart(source).encode( y=alt.Y('y:O', axis=None), x=alt.X('x:Q', axis=None, scale=alt.Scale(domain=(0, 10))), ).mark_bar().properties(width=100, height=150) return alt.concat(left, right, spacing=5).properties(title=tower + " Tower")
def xai_indiv(): clf = load_model("output/lgb_model.pkl") sample = load_data("output/test.gz.parquet") # sk_ids = get_sk_ids(sample["SK_ID_CURR"]) x_sample = sample[FEATURES] y_sample = sample[TARGET].values scores = predict(clf, x_sample) shap_df = load_data("output/shap_df.gz.parquet") # TODO score_df = sample[[TARGET]].copy() score_df["Prediction"] = scores charts = [ plot_hist( score_df[score_df[TARGET] == lb]).properties(title=f"Class = {lb}") for lb in TARGET_CLASSES ] st.altair_chart(alt.concat(*charts, columns=2), use_container_width=True) # customized bin_options = [f"{i / 10:.1f} - {(i + 1) / 10:.1f}" for i in range(10)] scores_bin = (scores * 10).astype(int) select_bin = st.selectbox("Select prediction bin", bin_options) bin_idx = bin_options.index(select_bin) select_class = st.selectbox("Select class", TARGET_CLASSES, 1) class_idx = TARGET_CLASSES.index(select_class) select_samples = sample.index[(y_sample == class_idx) & (scores_bin == bin_idx)] # Select instance if len(select_samples) > 0: _row_idx = st.slider("Select instance", 0, len(select_samples), 0) row_idx = select_samples[_row_idx] instance = x_sample.iloc[row_idx:row_idx + 1] st.write("**Feature values**") st.dataframe(instance.T) st.write(f"**Actual label: `{y_sample[row_idx]}`**") st.write(f"**Prediction: `{scores[row_idx]:.4f}`**") # Compute SHAP values st.subheader("Feature SHAP contribution to prediction") shap_values = shap_df[FEATURES].iloc[row_idx].values base_value = shap_df["base_value"].iloc[row_idx] source = make_source_waterfall(instance, base_value, shap_values, max_display=15) st.altair_chart(waterfall_chart(source).properties(height=500), use_container_width=True) else: st.write("**No instances found.**")
def draw(self): """ Returns an altair heatmap drawing of self.data """ base = alt.Chart(self.data).mark_rect().encode( x=alt.X("x:O", axis=None), #alt.Axis(labels=False)), y=alt.Y('y:O', axis=None), #alt.Axis(labels=False)), color='z:Q', tooltip=[alt.Tooltip('z:Q', title='z')], ).properties(title=repr(self), width=250, height=250) return alt.concat(base)
def con_plt(DayofWeek="Monday", TimeofDay="Morning", branch_index="A"): plt1 = update_graph(DayofWeek, TimeofDay, branch_index, 'sum(Total)', "Total Sales") plt2 = update_graph(DayofWeek, TimeofDay, branch_index, 'count(Invoice ID)', "Customer Traffic") plt3 = update_graph(DayofWeek, TimeofDay, branch_index, "mean(Total)", "Average Transaction Size") plt4 = update_graph(DayofWeek, TimeofDay, branch_index, "mean(Rating)", "Average Satisfaction") return alt.concat(plt1, plt2, plt3, plt4, columns=4)
def con_plt(day_of_week='Monday', time_of_day='Morning', branch_index='A'): """Concatenate all bar plots""" bar_plot_sales = make_bar_plot(day_of_week, time_of_day, branch_index, 'sum(Total)', 'Total Sales', 'Sales in MMK') bar_plot_traffic = make_bar_plot(day_of_week,time_of_day, branch_index, 'count(Invoice ID)', 'Customer Traffic', 'Transactions') bar_plot_trans = make_bar_plot(day_of_week, time_of_day, branch_index, 'mean(Total)', 'Average Transaction Size', 'Sales in MMK') bar_plot_rating = make_bar_plot(day_of_week, time_of_day, branch_index, 'mean(Rating)', 'Average Satisfaction', 'Rating') return (alt.concat(bar_plot_sales, bar_plot_traffic, bar_plot_trans, bar_plot_rating, columns=4) .configure_axis(labelFontSize=13, titleFontSize=13) .configure_title(fontSize=14) .configure_axisX(labelAngle=45) )
def concatenate(): scatter = ( alt.Chart(df) .mark_point() .encode(x="duration", y="views:Q") .properties(width=250, height=250) ) chart = alt.concat( scatter.encode(color="published_month"), scatter.encode(color="published_day") ).resolve_scale(color="independent") st.altair_chart(chart)
def scoring_confusion_matrix_v2( data: pd.DataFrame, xvar: str, target_var: str, threshold: float = 0.5, bin_width: float = 0.1, width: int = 200, height: int = 200, ) -> alt.ConcatChart: data = compute_confusion_categories(data, xvar, target_var, threshold, add_counts=False) quadrants: List[alt.Chart] = [] for idx, confusion_category in enumerate(CONFUSION_CATEGORIES): data_to_plot = data.query( f"{CONFUSION_CATEGORIES_COL_NAME} == '{confusion_category}'") count = data_to_plot[CONFUSION_CATEGORIES_COL_NAME].value_counts( ).item() title = f"{confusion_category} ({count})" quadrants.append( scoring_quadrant( data_to_plot, xvar, bin_width, width, height, title=title, xtitle=AXIS_TITLES[idx][0], ytitle=AXIS_TITLES[idx][1], )) confusion_matrix = (alt.concat(*quadrants, columns=2).resolve_scale( x="shared", y="shared").properties( title={ "text": "Scoring confusion matrix", "subtitle": f"Threshold: {threshold}", }, usermeta={ "embedOptions": { "scaleFactor": 5, "downloadFileName": "scoring_confusion_matrix_v2", } }, )) return confusion_matrix
def alg_fai(fmeasures, aif_metric, threshold): st.write( f"Fairness is when **ratio is between {1-threshold:.2f} and {1+threshold:.2f}**." ) chart = plot_fmeasures_bar(fmeasures, threshold) st.altair_chart(chart, use_container_width=True) st.dataframe( fmeasures[["Metric", "Unprivileged", "Privileged", "Ratio", "Fair?"]].style.applymap(color_red, subset=["Fair?"])) st.write("**Performance Metrics**") all_perfs = [] for metric_name in [ 'TPR', 'TNR', 'FPR', 'FNR', 'PPV', 'NPV', 'FDR', 'FOR', 'ACC', 'selection_rate', 'precision', 'recall', 'sensitivity', 'specificity', 'power', 'error_rate' ]: df = get_perf_measure_by_group(aif_metric, metric_name) c = alt.Chart(df).mark_bar().encode( x=f"{metric_name}:Q", y="Group:O", tooltip=["Group", metric_name], ) all_perfs.append(c) all_charts = alt.concat(*all_perfs, columns=1) st.altair_chart(all_charts, use_container_width=False) st.write("**Confusion Matrices**") cm1 = aif_metric.binary_confusion_matrix(privileged=None) c1 = get_confusion_matrix_chart(cm1, "All") st.altair_chart(alt.concat(c1, columns=2), use_container_width=False) cm2 = aif_metric.binary_confusion_matrix(privileged=True) c2 = get_confusion_matrix_chart(cm2, "Privileged") cm3 = aif_metric.binary_confusion_matrix(privileged=False) c3 = get_confusion_matrix_chart(cm3, "Unprivileged") st.altair_chart(c2 | c3, use_container_width=False)
def importance_heatmap_altair(fanova): """Outputs the importance of each hyper-parameter according to FANOVA Parameters ---------- fanova: FANOVA instance of FANOVA class Examples -------- >>> from olympus.dashboard.analysis.hpfanova import FANOVA >>> import pandas as pd >>> data = [ ... dict(objective=0.12 / 0.08, uid=0, epoch=32, hp1=0.12, hp2=0.08), ... dict(objective=0.14 / 0.09, uid=0, epoch=32, hp1=0.14, hp2=0.09), ... dict(objective=0.15 / 0.10, uid=0, epoch=32, hp1=0.15, hp2=0.10), ... dict(objective=0.16 / 0.11, uid=0, epoch=32, hp1=0.16, hp2=0.11), ... dict(objective=0.17 / 0.12, uid=0, epoch=32, hp1=0.17, hp2=0.12) ... ] >>> space = { ... 'hp1': 'uniform(0, 1)', ... 'hp2': 'uniform(0, 1)' ... } >>> data = pd.DataFrame(data) >>> fanova = FANOVA( ... data, ... hp_names=list(space.keys()), ... objective='objective', ... hp_space=space) >>> chart = importance_heatmap_altair(fanova) .. image:: ../../../docs/_static/plots/importance.png """ import altair as alt alt.themes.enable('dark') data = alt.Data(values=fanova.importance_long) base = alt.Chart(data).mark_rect().encode(x='row:O', y='col:O').properties(width=200, height=200) chart = alt.concat( base.encode(color='importance:Q'), base.encode(color='std:Q')).resolve_scale(color='independent') return chart
def plot_points_average_and_trend(configs, title, footer): return altair.concat( altair.vconcat(*[points_average_and_trend( **c) for c in configs]).resolve_scale(x='shared').properties( title=altair.TitleParams( footer, baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10), )).properties(title=altair.TitleParams( title, anchor='middle', ))
def suicides_gender(source): group1 = source click = alt.selection_multi() alt.data_transformers.disable_max_rows() color_scale = alt.Scale(domain=['male', 'female'], range=['#5389b8', '#c87e35']) left = (alt.Chart(group1).transform_filter((datum.sex == 'female')).encode( y=alt.Y('country', axis=None), x=alt.X('Average_suicides_per_capita', title='Average_suicides_per_capita', sort=alt.SortOrder('descending'), axis=alt.Axis(orient='top'), scale=alt.Scale(domain=(0, 70))), color=alt.Color('sex:N', scale=color_scale, legend=None), opacity=alt.condition(click, alt.value(1.0), alt.value(0.5)), tooltip=[ alt.Tooltip('country:N'), alt.Tooltip('Average_suicides_per_capita:Q') ], ).mark_bar().properties(title='Female').add_selection(click)) middle = alt.Chart(group1).encode( y=alt.Y('country', axis=None), text=alt.Text('country'), ).mark_text(color='white').properties(width=150) right = (alt.Chart(group1).transform_filter((datum.sex == 'male')).encode( y=alt.Y('country', axis=None), x=alt.X('Average_suicides_per_capita', title='Average_suicides_per_capita', axis=alt.Axis(orient='top'), scale=alt.Scale(domain=(0, 70))), color=alt.Color('sex:N', scale=color_scale, legend=None), opacity=alt.condition(click, alt.value(1.0), alt.value(0.5)), tooltip=[ alt.Tooltip('country:N'), alt.Tooltip('Average_suicides_per_capita:Q'), alt.Tooltip('sex') ], ).mark_bar().properties(title='Male').add_selection(click)) chartf = alt.concat(left, middle, right, spacing=4).configure_axis(grid=False) return chartf.to_html()
def display_the_conplot(): slider = alt.binding_range(min=2006, max=2013, step=1) select_year = alt.selection_single(name='Select', fields=['Year'], bind=slider, init={'Year': 2006}) base = alt.Chart(tidy_df).add_selection(select_year).transform_filter( select_year).transform_calculate(types=alt.expr.if_( alt.datum.Half == 1.0, '1st Half', '2nd Half')).properties( width=250, ) color_scale = alt.Scale(domain=['1st Half', '2nd Half'], range=['green', 'orange']) left = base.transform_filter(alt.datum.types == '1st Half').encode( y=alt.Y('County Councils:O', axis=None), x=alt.X('sum(esb):Q', title='ESB Count', sort=alt.SortOrder('descending')), color=alt.Color('types:N', scale=color_scale, legend=None), tooltip=('sum(esb):Q')).mark_bar().properties( title='First Half of Year') middle = base.encode( y=alt.Y('County Councils:O', axis=None), text=alt.Text('County Councils:O'), ).mark_text(color='steelblue', size=15).properties(width=105) right = base.transform_filter(alt.datum.types == '2nd Half').encode( y=alt.Y('County Councils:O', axis=None), x=alt.X('sum(esb):Q', title='ESB Count'), color=alt.Color('types:N', scale=color_scale, legend=None), tooltip=('sum(esb):Q')).mark_bar().properties( title='Second Half of Year') conplot = alt.concat(left, middle, right, spacing=5) conplot.save("templates/conplot.html") return render_template("conplot.html")
def make_owd_chart(): vaccine_location_data = pd.read_csv('./owd_datasets/vaccine_approval_owd.csv') vaccine_location_data['vaccines'] = vaccine_location_data.vaccines.apply(lambda x: list(map(str.lstrip, x.split(',')))) vaccine_location_data = vaccine_location_data.explode('vaccines').reset_index(drop=True) countries_map = get_shapefiles() countries_map.loc[countries_map.NAME == 'France', 'iso_code'] = "FRA" countries_map.loc[countries_map.NAME == 'Norway', 'iso_code'] = "NOR" vaccine_location_data.loc[vaccine_location_data.location.isin(['England', 'Wales', 'Scotland', 'Northern Ireland']), 'iso_code'] = "GBR" plot_data = countries_map.merge(vaccine_location_data, how='inner', on='iso_code') base = alt.Chart(countries_map[countries_map.iso_code!='ATA']).mark_geoshape(fill='#eee', stroke="#fff", strokeWidth=0.5) chart = alt.concat(*( base + alt.Chart(plot_data[plot_data['vaccines']==vaccine], title=vaccine, height=200, width=350).mark_geoshape(stroke="#fff", strokeWidth=0.5).encode( color=alt.value('#2e7265') ).project('equalEarth') for vaccine in list(plot_data.vaccines.unique()) ), columns=3, title="Where each vaccine is being used", spacing=0 ).configure_view(strokeWidth=0) return chart
def con_plt(day_of_week='Monday', time_of_day='Morning', branch_index='A'): """ Concatenate all bar plots Parameters ---------- day_of_week: str the day of week ranging from Monday to Sunday time_of_day: str the time of day (Morning, Afternoon or Evening) branch_index: str the character used to represent the supermarket branch Returns ------- Altair chart object concatenated bar plots """ bar_plot_sales = make_bar_plot(day_of_week, time_of_day, branch_index, 'sum(Total)', 'Total Sales', 'Sales in MMK') bar_plot_traffic = make_bar_plot(day_of_week, time_of_day, branch_index, 'count(Invoice ID)', 'Customer Traffic', 'Transactions') bar_plot_trans = make_bar_plot(day_of_week, time_of_day, branch_index, 'mean(Total)', 'Average Transaction Size', 'Sales in MMK') bar_plot_rating = make_bar_plot(day_of_week, time_of_day, branch_index, 'mean(Rating)', 'Average Customer Satisfaction', 'Rating') return (alt.concat(bar_plot_sales, bar_plot_traffic, bar_plot_trans, bar_plot_rating, columns=4).configure_axis( labelFontSize=13, titleFontSize=13).configure_title( fontSize=14).configure_axisX(labelAngle=45))
def plot_repeated_experiments(data, n_card_pairs, show='Experiments, Histogram, Std', plot_height=225): plots = [] # define input selection input_n_cards = alt.binding( input='range', min=1, max=n_card_pairs, step=1, name='Card pairs per Experiment: ' ) selection = alt.selection_single( bind=input_n_cards, init={'card_pair': 25} ) # filter data base = alt.Chart(data).add_selection( selection ).transform_filter( alt.datum.card_pair <= alt.expr.toNumber(selection.card_pair) ).transform_aggregate( p_win='mean(win):Q', groupby=["stack","experiment"] ) scale = alt.Scale(domain=[-.1,1.1]) if 'experiments' in show.lower(): # plot individual experiments dots = base.mark_point().encode( x=alt.X('experiment:Q', title='Experiment'), y=alt.Y( 'p_win:Q', scale=scale, axis=alt.Axis( values=np.arange(0,1.1,.2), grid=True ), title='Winning probability' ), color='stack:N' ).properties( width=400, height=plot_height ) plots.append(dots) if 'histogram' in show.lower(): # plot histogram over experiment results hist = base.mark_bar( fillOpacity=.33, strokeOpacity=.66, strokeWidth=2, cornerRadius=2, binSpacing=1 ).encode( y=alt.Y( 'p_win:Q', bin=alt.Bin(extent=[0,1], step=.1), scale=scale, axis=alt.Axis( values=np.arange(0,1.1,.2), title=None, labels=False, grid=True ), ), x=alt.X( 'count(experiment):Q', stack=None, # no stacked bar chart title='Number of experiments' ), color=alt.Color('stack:N', legend=alt.Legend(title='Stack')), stroke=alt.Color('stack:N', legend=None), order='stack:Q', ).properties( width=150, height=plot_height ) plots.append(hist) if 'std' in show.lower(): # define custom label for standard deviation bars std_text = alt.Chart( pd.DataFrame({'x':[4], 'y':[.5], 'text':['Mean ± Standard Deviation']}) ).mark_text( angle=90, baseline='middle' ).encode( x='x', y='y', text='text' ) # plot standard deviation bars errorbars = alt.layer( base.mark_errorbar(extent='stdev', rule=alt.MarkConfig(size=2)).encode( y=alt.Y('p_win:Q', axis=None,#alt.Axis(orient='right', ticks=False, labels=False, grid=False, style=None), scale=scale, ), x=alt.X('stack:Q', axis=None), color='stack:N', ), base.mark_point(size=0).encode( y=alt.Y('p_win:Q', aggregate='mean', scale=scale), x=alt.X('stack:Q', scale=alt.Scale(domain=[1,4])), ), std_text, view=alt.ViewConfig(strokeWidth=0), ).properties( width=25, height=plot_height ) plots.append(errorbars) # combine & show alt.concat(*plots).configure_axis( grid=False ).configure_view( strokeWidth=0 ).display(renderer='svg')
width=550, height=140, title={ "text": ['Human Population vs Meat Production 1961-2018'], "fontSize": 20, "font": 'Courier', "anchor": 'middle', "color": 'gray' } ) alt.concat(chart, title=alt.TitleParams( ['Scale: 1 person = 1 billion and 1 animal = 10 billion kg', '#30DayChartChallenge - abstract - 2021/04/10', 'Dataset: https://ourworldindata.org/meat-production', 'twitter.com/vivekparasharr | github.com/vivekparasharr | vivekparasharr.medium.com'], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=12, color='gray' ) ) ''' Data World 1961 2018 Sheep 6.03 million t 15.77 million t Beef 28.76 million t 71.61 million t Pig 24.75 million t 120.88 million t People 3,091,843,507 7,631,091,040
def pyramid_chart(data, left="left", right="right", hleft=fmt, hright=fmt, where=None): """ A Population pyramid chart. Args: data: Input dataframe with ["left", "right"] columns. left: Name of the column that will be displayed to the left. right: Name of the column that will be displayed to the right. hleft: Humanized left column or function. hright: Humanized right column or function. """ cols = ["left", "right"] titles = [left, right] directions = ["descending", "ascending"] h_cols = [left, right] # Transform datasets data = data.copy() data["index"] = [str(x) for x in data.index] data["color_left"] = "A" data["color_right"] = "B" if callable(hleft): data[h_cols[0]] = data["left"].apply(hleft) else: data[h_cols[0]] = hleft if callable(hright): data[h_cols[1]] = data["right"].apply(hright) else: data[h_cols[1]] = hright data = data.loc[::-1] # Chart base = alt.Chart(data) height = 250 width = 300 def piece(i): return (base.mark_bar().encode( x=alt.X(cols[i], title=None, sort=alt.SortOrder(directions[i])), y=alt.Y("index", axis=None, title=None, sort=alt.SortOrder("descending")), tooltip=alt.Tooltip([h_cols[i]]), color=alt.Color(f"color_{cols[i]}:N", legend=None), ).properties(title=titles[i], width=width, height=height).interactive()) where.altair_chart( alt.concat( piece(0), base.encode( y=alt.Y("index", axis=None, sort=alt.SortOrder("descending")), text=alt.Text("index"), ).mark_text().properties(width=50, height=height), piece(1), spacing=5, ), use_container_width=False, )
def page_charts(today_date=date.today() - timedelta(days=1)): st.subheader("Shiller charts") df0 = load_ie_data() c1 = altair.generate_chart("line", df0[["Real_Price", "10xReal_Earnings"]]).properties( title="Index Plot", height=200, width=260, ) c2 = altair.generate_chart("line", df0[["CAPE", "10xLong_IR"]]).properties( title="PE (CAPE) Plot", height=200, width=260, ) st.altair_chart(alt.concat(c1, c2, columns=2), use_container_width=True) st.subheader("Stock charts") start_date = get_start_date(today_date, options=("3Y", "2Y", "1Y")) dates = pd.date_range(today_date - timedelta(days=365 * 2), today_date) # MSCI symbols = ["URTH", "EEM", "SPY", "ES3.SI"] colnames = ["MSCI World", "MSCI EM", "S&P500", "ES3"] df1 = load_data(dates, symbols, "SPY") df1.columns = colnames rebased_df1 = rebase(df1[df1.index >= start_date]) chart1 = altair.generate_chart("line", rebased_df1).properties( title="MSCI", height=200, width=260, ) # VIX symbols = ["^VIX"] colnames = ["VIX"] df2 = load_data(dates, symbols)[symbols] df2.columns = colnames chart2 = altair.generate_chart("line", df2[df2.index >= start_date]).properties( title="VIX", height=200, width=260, ) st.altair_chart(alt.concat(chart1, chart2, columns=2), use_container_width=True) # etfs symbols = ["IWDA", "EIMI"] colnames = ["World", "EM"] df3a = load_data(dates, symbols) df3a.columns = colnames rebased_df3a = rebase(df3a[df3a.index >= start_date]) chart3a = altair.generate_chart("line", rebased_df3a).properties( title="ETF", height=200, width=260, ) symbols = ["O87.SI", "ES3.SI", "CLR.SI"] colnames = ["GLD", "ES3", "Lion-Phillip"] df3b = load_data(dates, symbols) df3b.columns = colnames rebased_df3b = rebase(df3b[df3b.index >= start_date]) chart3b = altair.generate_chart("line", rebased_df3b).properties( title="ETF SGX", height=200, width=260, ) st.altair_chart(alt.concat(chart3a, chart3b, columns=2), use_container_width=True) # industrial symbols = [ "ES3.SI", "O5RU.SI", "A17U.SI", "J91U.SI", "BUOU.SI", "ME8U.SI", "M44U.SI" ] colnames = ["ES3", "AA", "Ascendas", "ESR", "FLCT", "MIT", "MLT"] df4 = load_data(dates, symbols) df4.columns = colnames rebased_df4 = rebase(df4[df4.index >= start_date]) chart4a = altair.generate_chart( "line", rebased_df4[["ES3", "Ascendas", "FLCT", "MIT", "MLT"]], ).properties( title="Industrial 1", height=200, width=260, ) chart4b = altair.generate_chart( "line", rebased_df4[["ES3", "AA", "ESR"]], ).properties( title="Industrial 2", height=200, width=260, ) st.altair_chart(alt.concat(chart4a, chart4b, columns=2), use_container_width=True) # retail symbols = ["ES3.SI", "C38U.SI", "J69U.SI", "N2IU.SI"] colnames = ["ES3", "CICT", "FCT", "MCT"] df5 = load_data(dates, symbols) df5.columns = colnames rebased_df5 = rebase(df5[df5.index >= start_date]) chart5 = altair.generate_chart("line", rebased_df5).properties( title="Retail & Commercial", height=200, width=250, ) # banks symbols = ["ES3.SI", "D05.SI", "O39.SI", "U11.SI"] colnames = ["ES3", "DBS", "OCBC", "UOB"] df6 = load_data(dates, symbols) df6.columns = colnames rebased_df6 = rebase(df6[df6.index >= start_date]) chart6 = altair.generate_chart("line", rebased_df6).properties( title="Banks", height=200, width=250, ) st.altair_chart(alt.concat(chart5, chart6, columns=2), use_container_width=True)
y='petalWidth:Q', color=alt.condition(hover, 'species:N', alt.value('lightgray'))).properties( width=180, height=180, ) points = base.mark_point().add_selection(hover) text = base.mark_text(dy=-5).encode(text='species:N', opacity=alt.condition( hover, alt.value(1), alt.value(0))) alt.layer(points, text).facet('species:N', ) ######### # Scale and Guide Resolution import altair as alt from vega_datasets import data source = data.cars() base = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q').properties(width=200, height=200) alt.concat(base.encode(color='Origin:N'), base.encode(color='Cylinders:O')) alt.concat(base.encode(color='Origin:N'), base.encode(color='Cylinders:O')).resolve_scale(color='independent')
alt.layer( alt.Chart(sphere).mark_geoshape(fill='lightblue'), alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5), alt.Chart(source).mark_geoshape(fill='ForestGreen', stroke='black') ).project( 'naturalEarth1' ).properties(width=600, height=400).configure_view(stroke=None) #### import altair as alt from vega_datasets import data source = alt.topo_feature(data.world_110m.url, 'countries') base = alt.Chart(source).mark_geoshape( fill='#666666', stroke='white' ).properties( width=300, height=180 ) projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic'] charts = [base.project(proj).properties(title=proj) for proj in projections] alt.concat(*charts, columns=2)
text = altair.Text('Age_Band_5yr') ) plt = altair.concat( altair.layer( trend, text ).properties( height=450, width=800, title=altair.TitleParams( [ 'From DoH daily data', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y'), ], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) ).properties( title=altair.TitleParams( 'NI COVID-19 cases by age band', anchor='middle', ) ) plt.save('ni-age-band-cases-%s.png'%(datetime.datetime.now().date().strftime('%Y-%m-%d'))) plt
def plot_timelines_with_latest(df, x, y, color, y_title, y_format, latest, latest_y, title, subtitle, y_scale='linear'): if y_scale == 'log': y_title += ' (log scale)' title += ' (log scale)' trend = altair.Chart(df).mark_line().encode( x = x, y = altair.Y( field=y, type='quantitative', aggregate='sum', axis=altair.Axis(title=y_title, format=y_format), scale=altair.Scale(type=y_scale), ), color = altair.Color( color, legend=None ), ) text = altair.Chart(latest).mark_text( align='left', baseline='middle', dx=5 ).encode( x = x, y = altair.Y( field=latest_y, type='quantitative', aggregate='sum', scale=altair.Scale(type=y_scale), ), color = altair.Color( color, legend=None ), text = altair.Text(color) ) return altair.concat( altair.layer( trend, text ).properties( height=450, width=800, title=altair.TitleParams( subtitle, baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) ).properties( title=altair.TitleParams( title, anchor='middle', ) )
sort='y') ).properties( title='By Channel' ) # Sort according to another field sortfield = base.encode( alt.X(field='site', type='nominal', sort=alt.EncodingSortField(field='yield', op='mean')) ).properties( title='By Yield' ) alt.concat( ascending, descending, explicit, sortchannel, sortfield, columns=3 ) # #### ############### import altair as alt from vega_datasets import data barley = data.barley() base = alt.Chart(barley).mark_point().encode( y='yield:Q', ).properties(width=200)
def get_altair_chart(df, x_col, y_cols='ALL', cat_col=None, sel_cols=None, sliders=None, ns_opacity=1.0, chart_title='', scheme='lightmulti', mark_type='line', sort_values=False, y_index=-1, stack=None): if mark_type == 'bar': chart = alt.Chart(df).mark_bar() elif mark_type == 'area': chart = alt.Chart(df).mark_area() else: chart = alt.Chart(df).mark_line(point=True, strokeWidth=2) sort_axis = 'x' x_col_ed = x_col if sort_values: x_col_ed = alt.X(f'{x_col}:N', sort='y') chart = chart.encode( x=x_col_ed, tooltip=list(df.columns), ).properties(width=600, height=400) #.interactive() if sliders: for key, value in sliders.items(): if key == 'min': comparisson = '>=' elif key == 'max': comparisson = '<=' else: print( f"Atenção: a chave '{key}' não é válida para a variável sliders. Usar apenas 'min' ou 'max'" ) continue if type(value) is list: slider_col = value[0] if len(value) > 1: init_value = value[1] else: init_value = eval(f'{key}(df[slider_col])') else: slider_col = value init_value = eval(f'{key}(df[slider_col])') if slider_col in df.columns: slider = alt.binding_range(min=min(df[slider_col]), max=max(df[slider_col]), step=1) slider_selector = alt.selection_single( bind=slider, name=key, fields=[slider_col], init={slider_col: init_value}) chart = chart.add_selection(slider_selector).transform_filter( f'datum.{slider_col} {comparisson} {key}.{slider_col}[0]') if y_cols == 'ALL': index = 1 if cat_col: index += 1 if sel_cols: index += len(sel_cols) y_cols = df.columns[index:].to_list() if len(y_cols) > 1: columns = y_cols y_col_name = 'Y_col' select_box = alt.binding_select(options=columns, name=y_col_name) sel = alt.selection_single(fields=[y_col_name], bind=select_box, init={y_col_name: y_cols[y_index]}) chart = chart.transform_fold(columns, # as_=[y_col_name, 'Valor'] ).transform_filter(sel) if stack == 'normalize': chart = chart.encode(y=alt.Y("Valor:Q", stack="normalize"), ) elif stack == 'sum': chart = chart.encode(y='sum(Valor):Q', ) else: chart = chart.encode(y='Valor:Q', ) chart = chart.add_selection(sel) else: y_col = y_cols[0] if stack == 'normalize': chart = chart.encode(y=alt.Y(f"{y_col}:Q", stack="normalize"), ) elif stack == 'sum': chart = chart.encode(y=f'sum({y_col}):Q', ) else: chart = chart.encode(y=f'{y_col}:Q', ) # TODO: adicionar filtro de range # lower = chart.properties( # height=60 # ).add_selection(brush) # chart = chart & lower if cat_col: base_cat = cat_col chart = chart.encode( color=alt.Color(base_cat, scale=alt.Scale(scheme=scheme)), #,legend=None), ) sel_base = alt.selection_multi(empty='all', fields=[base_cat], bind='legend') chart = chart.add_selection(sel_base).encode(opacity=alt.condition( sel_base, alt.value(1.0), alt.value(ns_opacity))) bar = alt.Chart(df).mark_bar().encode( y=alt.Y(f'{base_cat}:O', title=None), x='total', # tooltip='total', color=alt.condition( sel_base, alt.Color(f'{base_cat}:N', scale=alt.Scale(scheme=scheme)), alt.ColorValue("lightgrey"), legend=None)).add_selection(sel_base).properties(width=100, height=400) chart = alt.concat(chart, bar) # chart = chart & lower TODO: adicionar fltro de range select_cols = sel_cols if select_cols: options_lists = [ df[cat].dropna().astype(str).sort_values().unique().tolist() for cat in select_cols ] selection = alt.selection_single( name='Selecione', fields=select_cols, init={ cat: options_lists[i][0] for i, cat in enumerate(select_cols) }, bind={ cat: alt.binding_select(options=options_lists[i]) for i, cat in enumerate(select_cols) }) chart = chart.add_selection(selection).transform_filter(selection) return chart
scale=alt.Scale(domain=(6, 10)), axis=alt.Axis(title='IMDb Rating') #(format='%', title='percentage') ), alt.Y('season:O', axis=alt.Axis(title='Season') ) ).properties( #width=550, height=140, title={ "text": ['The Office IMDb Ratings Distribution by Season'], "fontSize": 18, "font": 'Courier', "anchor": 'middle', "color": 'gray' } ) alt.concat(chart, title=alt.TitleParams( ['', '#30DayChartChallenge - strips - 2021/04/12', 'Dataset: TidyTuesday Dataset 2020-03-17', 'twitter.com/vivekparasharr | github.com/vivekparasharr | vivekparasharr.medium.com'], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=12, color='gray' ) )
plt = altair.concat( altair.Chart(df).mark_bar( thickness=2, width=25, opacity=1 ).encode( x=altair.X('Nation:O', axis=altair.Axis(labelAngle=0)), y=altair.Y('First doses as % of total population:Q', aggregate='sum', axis=altair.Axis(format='%', title='Population received first dose')), color=altair.Color('Nation', legend=None) ).properties( width=300, title=altair.TitleParams( text='NI has vaccinated 5% less of its population than England', subtitle=['NI has vaccinated 63%, England 68.2% for first doses'], align='left', anchor='start', fontSize=18, subtitleFontSize=14 ) ) ).properties( title=altair.TitleParams( ['Population data for 2020 from ONS', 'Vaccination data from HSCNI and NHS England', 'https://twitter.com/ni_covid19_data on 26th July 2021'], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), )
def fai(debias=False): protected_attribute = st.selectbox("Select protected column.", list(CONFIG_FAI.keys())) # Load data valid = load_data("output/test.gz.parquet").fillna(0) x_valid = valid[FEATURES] y_valid = valid[TARGET].values valid_fai = valid[list(CONFIG_FAI.keys())] # Get predictions y_pred, text_model_perf = prepare_pred(x_valid, y_valid, debias=debias) st.header("Model Performance") st.text(text_model_perf) st.header("Algorithmic Fairness Metrics") fthresh = st.slider("Set fairness deviation threshold", 0., 1., 0.2, 0.05) st.write("Absolute fairness is 1. The model is considered fair " f"if **ratio is between {1-fthresh:.2f} and {1+fthresh:.2f}**.") # Compute fairness measures privi_info = CONFIG_FAI[protected_attribute] aif_metric = get_aif_metric( valid_fai, y_valid, y_pred, protected_attribute, privi_info["privileged_attribute_values"], privi_info["unprivileged_attribute_values"], ) fmeasures = compute_fairness_measures(aif_metric) fmeasures = fmeasures[fmeasures["Metric"].isin(METRICS_TO_USE)] fmeasures["Fair?"] = fmeasures["Ratio"].apply( lambda x: "Yes" if np.abs(x - 1) < fthresh else "No") st.altair_chart(plot_fmeasures_bar(fmeasures, fthresh), use_container_width=True) st.dataframe( fmeasures[["Metric", "Unprivileged", "Privileged", "Ratio", "Fair?"]].style.applymap(color_red, subset=["Fair?"]).format({ "Unprivileged": "{:.3f}", "Privileged": "{:.3f}", "Ratio": "{:.3f}" })) st.subheader("Confusion Matrices") cm1 = aif_metric.binary_confusion_matrix(privileged=None) c1 = get_confusion_matrix_chart(cm1, "All") st.altair_chart(alt.concat(c1, columns=2), use_container_width=False) cm2 = aif_metric.binary_confusion_matrix(privileged=True) c2 = get_confusion_matrix_chart(cm2, "Privileged") cm3 = aif_metric.binary_confusion_matrix(privileged=False) c3 = get_confusion_matrix_chart(cm3, "Unprivileged") st.altair_chart(c2 | c3, use_container_width=False) st.header("Annex") st.subheader("Performance Metrics") all_perfs = [] for metric_name in [ 'TPR', 'TNR', 'FPR', 'FNR', 'PPV', 'NPV', 'FDR', 'FOR', 'ACC', 'selection_rate', 'precision', 'recall', 'sensitivity', 'specificity', 'power', 'error_rate' ]: df = get_perf_measure_by_group(aif_metric, metric_name) c = alt.Chart(df).mark_bar().encode( x=f"{metric_name}:Q", y="Group:O", tooltip=["Group", metric_name], ) all_perfs.append(c) all_charts = alt.concat(*all_perfs, columns=1) st.altair_chart(all_charts, use_container_width=False) st.subheader("Notes") st.write("**Equal opportunity**:") st.latex( r"\frac{\text{FNR}(D=\text{unprivileged})}{\text{FNR}(D=\text{privileged})}" ) st.write("**Predictive parity**:") st.latex( r"\frac{\text{PPV}(D=\text{unprivileged})}{\text{PPV}(D=\text{privileged})}" ) st.write("**Statistical parity**:") st.latex( r"\frac{\text{Selection Rate}(D=\text{unprivileged})}{\text{Selection Rate}(D=\text{privileged})}" )
# -------------------- Choropleth map 1990 ----------------------------# # Add Base Layer base_2010 = alt.Chart(choro_2010_data, title='Total CO2 emissions in 2010').mark_geoshape( stroke='black', strokeWidth=0.4, fill='lightgray').encode().properties(width=300, height=300) # Add Choropleth Layer choro_2010 = alt.Chart(choro_2010_data).mark_geoshape(stroke='black').encode( color=alt.Color('properties.Value', type='quantitative', scale=alt.Scale(scheme='goldorange', domain=[2 * 10**4, 10**8]), legend=alt.Legend(labelColor='grey', labelLimit=30, labelFontSize=7), title="Thousands of tonnes"), tooltip=alt.Tooltip(['properties.Value:Q', 'properties.COU:N'])) # Add Labels Layer labels_2010 = alt.Chart(choro_1990_data).mark_text(baseline='top').properties( width=400, height=400).encode(longitude='properties.centroid_lon:Q', latitude='properties.centroid_lat:Q', text='properties.COU:N', size=alt.value(10)) chart_2010 = base_2010 + choro_2010 + labels_2010 final_chart = alt.concat(chart_1990, chart_2010).resolve_scale(color='independent') st.write(final_chart)
def write(): data = funcs.get_data() data = funcs.convertTimes(data) data['Country Name'] = data['Country'].apply(funcs.getCountryName) data = funcs.removeNotFinished(data) ## ## Gráfico ## athletes = data['Name'].unique() option1 = st.sidebar.selectbox('Buscar atleta pelo nome:', sorted(athletes) ) atleta1 = data.loc[data['Name'] == option1].drop(['Country'], axis=1) option2 = st.sidebar.selectbox('Segundo atleta:', sorted(athletes) ) atleta2 = data.loc[data['Name'] == option2].drop(['Country'], axis=1) df = pd.DataFrame([ {'val':'1', 'Order':1, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Swim'), 'TempoN': funcs.getValueUniq(atleta1, 'SwimN'), 'Atividade':'Swim'}, {'val':'2', 'Order':1, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Swim'), 'TempoN': funcs.getValueUniq(atleta2, 'SwimN'), 'Atividade':'Swim'}, {'val':'1', 'Order':2, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'T1'), 'TempoN': funcs.getValueUniq(atleta1, 'T1N'), 'Atividade':'T1'}, {'val':'2', 'Order':2, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'T1'), 'TempoN': funcs.getValueUniq(atleta2, 'T1N'), 'Atividade':'T1'}, {'val':'1', 'Order':3, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Bike'), 'TempoN': funcs.getValueUniq(atleta1, 'BikeN'), 'Atividade':'Bike'}, {'val':'2', 'Order':3, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Bike'), 'TempoN': funcs.getValueUniq(atleta2, 'BikeN'), 'Atividade':'Bike'}, {'val':'1', 'Order':4, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'T2'), 'TempoN': funcs.getValueUniq(atleta1, 'T2N'), 'Atividade':'T2'}, {'val':'2', 'Order':4, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'T2'), 'TempoN': funcs.getValueUniq(atleta2, 'T2N'), 'Atividade':'T2'}, {'val':'1', 'Order':5, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Run'), 'TempoN': funcs.getValueUniq(atleta1, 'RunN'), 'Atividade':'Run'}, {'val':'2', 'Order':5, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Run'), 'TempoN': funcs.getValueUniq(atleta2, 'RunN'), 'Atividade':'Run'} ]) base = alt.Chart(df).properties( width=400 ) color_scale = alt.Scale(domain=[option1, option2], range=['#1f77b4', '#1f77b4']) left = base.transform_filter( alt.datum.val == '1' ).encode( y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')), x=alt.X('sum(TempoN):Q', title='Tempo', sort=alt.SortOrder('descending')), color=alt.Color('name:N', scale=color_scale, legend=None), tooltip=['Tempo:N'], order=alt.Order( 'Order', sort='ascending' ) ).mark_bar().properties(title=option1) middle = base.encode( y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')), text=alt.Text('Atividade:N'), order=alt.Order( 'Order', sort='ascending' ) ).mark_text().properties(width=40) right = base.transform_filter( alt.datum.val == '2' ).encode( y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')), x=alt.X('sum(TempoN):Q', title='Tempo'), color=alt.Color('name:N', scale=color_scale, legend=None), tooltip=['Tempo:N'] ).mark_bar().properties(title=option2) st.altair_chart( alt.concat(left, middle, right, spacing=5) ) ## ## Tabela ## df1 = pd.DataFrame([ {'Atividade':'Swim', 'Tempo': funcs.getValueUniq(atleta1, 'Swim')}, {'Atividade':'T1', 'Tempo': funcs.getValueUniq(atleta1, 'T1')}, {'Atividade':'Bike', 'Tempo': funcs.getValueUniq(atleta1, 'Bike')}, {'Atividade':'T2', 'Tempo': funcs.getValueUniq(atleta1, 'T2')}, {'Atividade':'Run', 'Tempo': funcs.getValueUniq(atleta1, 'Run')}, {'Atividade':'Total', 'Tempo': funcs.getValueUniq(atleta1, 'Overall')} ]) df2 = pd.DataFrame([ {'Atividade':'Swim', 'Tempo': funcs.getValueUniq(atleta2, 'Swim')}, {'Atividade':'T1', 'Tempo': funcs.getValueUniq(atleta2, 'T1')}, {'Atividade':'Bike', 'Tempo': funcs.getValueUniq(atleta2, 'Bike')}, {'Atividade':'T2', 'Tempo': funcs.getValueUniq(atleta2, 'T2')}, {'Atividade':'Run', 'Tempo': funcs.getValueUniq(atleta2, 'Run')}, {'Atividade':'Total', 'Tempo': funcs.getValueUniq(atleta2, 'Overall')} ]) with Grid("1 1", color="#000000", background_color="#FFFFFF") as grid: grid.cell("a", 1, 2, 1, 2).dataframe( df1.set_index('Atividade', inplace=False) ) grid.cell("b", 2, 3, 1, 2).dataframe( df2.set_index('Atividade', inplace=False) ) # st.write(funcs.secondsToTime( funcs.getValueUniq(atleta2, 'RunN') ))
sort=alt.SortOrder('descending')), color=alt.Color('gender:N', scale=color_scale, legend=None)).mark_bar().properties(title='Female') middle = base.encode( y=alt.Y('age:O', axis=None), text=alt.Text('age:Q'), ).mark_text().properties(width=20) right = base.transform_filter(alt.datum.gender == 'Male').encode( y=alt.Y('age:O', axis=None), x=alt.X('sum(people):Q', title='population'), color=alt.Color('gender:N', scale=color_scale, legend=None)).mark_bar().properties(title='Male') alt.concat(left, middle, right, spacing=5) ###### import altair as alt from vega_datasets import data source = data.population.url alt.Chart(source).mark_area().encode( x='age:O', y=alt.Y('sum(people):Q', title='Population', axis=alt.Axis(format='~s')), facet=alt.Facet('year:O', columns=5), ).properties(title='US Age Distribution By Year', width=90, height=80) ########### ################