def display_scatter_matrix(assets, start_date, end_date, available_metrics, id_info_map): metrics = ["PriceUSD", "TxCnt", "VtyDayRet30d"] usable_metrics = [] for metric in metrics: if metric in available_metrics: usable_metrics.append(metric) df = get_aggregated_metrics(assets, metrics) df = filter_metrics_by_date(df, start_date, end_date) usable_metrics_names = [] for metric in usable_metrics: metric_name = id_info_map[metric][0] df[metric_name] = df[metric] usable_metrics_names.append(metric_name) chart = alt.Chart(df).mark_circle().encode( x=alt.X(alt.repeat("column"), type="quantitative", scale=alt.Scale(type="log")), y=alt.Y(alt.repeat("row"), type="quantitative", scale=alt.Scale(type="log")), color="Name:N", tooltip=usable_metrics_names).properties(width=200, height=200).repeat( row=usable_metrics_names, column=usable_metrics_names) st.write(chart)
def make_chart(self, df): variables = [ 'Casos nuevos (último boletín)', 'Casos nuevos (últimos 7 boletines)' ] municipalities = self.geography() return alt.Chart(municipalities).transform_lookup( lookup='properties.NAME', from_=alt.LookupData(df, 'Municipio', variables), default='0' ).mark_geoshape().encode( color=alt.Color( alt.repeat('row'), type='quantitative', sort="descending", scale=alt.Scale( type='symlog', scheme='redgrey', domainMid=0, # WORKAROUND: Set the domain manually to forcibly # include zero or else we run into # https://github.com/vega/vega-lite/issues/6544 domain=alt.DomainUnionWith(unionWith=[0])), legend=alt.Legend(orient='left', titleLimit=400, titleOrient='left')), tooltip=[ alt.Tooltip(field='properties.NAME', type='nominal'), alt.Tooltip(alt.repeat('row'), type='quantitative') ]).properties(width=575, height=200).repeat(row=variables).resolve_scale( color='independent').configure_view( strokeWidth=0).configure_concat(spacing=80)
def get_chart_data(data): altchart = alt.Chart(data[CHART]).mark_circle().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), color='Promo:N').properties(width=150, height=150).repeat( row=['DayOfWeek', 'Customers', 'Sales'], column=['Sales', 'Customers', 'DayOfWeek']).interactive() return st.altair_chart(altchart, width=-1)
def show_genre_region_bar(df): st.write('## Popular Genres') st.write("**What about genres? Let's compare!**") st.write("💡 *You can select specific year range with slider*") st.write( "💡 *Try clicking on one or more genres to see how they perform in different regions*" ) st.write( "💡 *You can also click on the individual points in the scatterplot to see other games in its genre*" ) st.write( "⌛ *Since it takes time to load the transformed data, the response may take a few seconds. Please be patient.*" ) brush = alt.selection_multi(encodings=['color']) min_year, max_year = get_year_range(df) selected_year = st.slider("View Popular Genre in Year Range", min_year, max_year, (min_year, max_year), 1) new_df = df[(df["Year"] >= selected_year[0]) & (df["Year"] <= selected_year[1])] hist = (alt.Chart(new_df).mark_bar().encode(y=alt.Y( alt.repeat('row'), aggregate='sum', type='quantitative'), )) color_scheme = alt.condition(brush, "Genre:N", alt.value('lightgrey'), scale=alt.Scale(scheme='tableau20')) top_chart = alt.layer( hist.encode(x=alt.X("Genre:N", sort='-y'), color=color_scheme)).properties( width=500, height=200).repeat(row=[ 'Global_Sales', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales' ], data=new_df).add_selection(brush) bottom_chart = alt.Chart(new_df).mark_point().encode( x=alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(type='sqrt')), y=alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(type='sqrt')), color=color_scheme, tooltip=['Name']).properties(width=150, height=150).repeat( row=["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales"], column=["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales"]).add_selection(brush) complete_chart = alt.vconcat(top_chart, bottom_chart) st.write(complete_chart)
def eda(df, target): """ Generates a dictionary to access summary statistics of the given data frame Parameters -------- df : pandas.DataFrame input dataframe to be analyzed target : string target column name Returns -------- dict access summary statistics of the given data frame. cor the correlation map Examples -------- >>> from propropy import eda >>> url1 = "https://archive.ics.uci.edu/ml/machine-learning-databases/" >>> url2 = "wine-quality/winequality-red.csv" >>> url = url1+url2 >>> df = pd.read_csv(url, ";") >>> target = "quality" >>> res = eda(df,quality) """ # Check the dataframe input if not isinstance(df, pd.DataFrame): raise TypeError("Input data must be an instance of DataFrame") # Create an empty dictionary res = {} # obtain statistical information df_fea = df.drop(target, 1) num_fea = df_fea.select_dtypes("number").columns.to_list() cat_fea = list(set(list(df_fea.columns)) - set(num_fea)) key_null = list(df_fea.isnull().sum().index) val_null = list(df_fea.isnull().sum().values) res["nb_missing_values"] = list(zip(key_null, val_null)) res["nb_cat_features"] = len(cat_fea) res["cat_features_name"] = cat_fea res["nb_num_features"] = len(num_fea) res["num_features_name"] = num_fea res["nb_class"] = len(list(set(df[target]))) class_count = df[target].value_counts(normalize=True).values res["class_ratio"] = list(class_count.round(4)) # Create a pair plots with Altair color_lab = target + ":N" chart = (alt.Chart(df).mark_circle().encode( alt.X(alt.repeat("column"), type="quantitative"), alt.Y(alt.repeat("row"), type="quantitative"), color=color_lab, ).properties(width=100, height=100).repeat(row=num_fea, column=num_fea)) res["pairplot"] = chart return res
def pairplot(data, vars=None): if vars is None: vars = list(data.columns) chart = alt.Chart(data).mark_circle().encode( alt.X(alt.repeat("column"), type="quantitative"), alt.Y(alt.repeat("row"), type="quantitative"), color="Origin:N").properties(width=300, height=300).repeat( row=vars, column=vars).interactive() return chart
def plot_scatter_matrix(df, color_field, x_y_prefix, tooltip_fields, size): repeated_facets = df.columns[df.columns.str.match(x_y_prefix)] scatter_matrix = (alt.Chart(df).mark_circle().encode( alt.X(alt.repeat("column"), type="quantitative"), alt.Y(alt.repeat("row"), type="quantitative"), color=f"{color_field}", tooltip=tooltip_fields, ).properties(width=size, height=size).repeat(row=list(repeated_facets), column=list(repeated_facets[::-1]))) return scatter_matrix
def graphMultiSimStats(simList): wideScores = multiSimStats(simList) display(alt.Chart(wideScores).mark_circle().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), ).properties( width=250, height=250 ).repeat( row=list(wideScores.columns[:-3].values), column=['FirstHPayoff', 'FirstLPayoff'] ))
def vega_example3(): brush = alt.selection_interval() chart = (alt.Chart(cars).mark_point().encode( alt.X(alt.repeat("column"), type="quantitative"), alt.Y(alt.repeat("row"), type="quantitative"), color=alt.condition(brush, "Origin:N", alt.value("gray")), ).add_selection(brush).properties(width=250, height=250).repeat( row=["Horsepower", "Miles_per_Gallon"], column=["Acceleration", "Displacement"], )) return jsonify(chart.to_dict())
def example_scatterplot_matrix(): import altair as alt from vega_datasets import data df = data.iris() p = alt.Chart(df).mark_circle().encode( x=alt.X(alt.repeat('column'), type='quantitative'), y=alt.Y(alt.repeat('row'), type='quantitative'), color='species:N').properties(width=150, height=150).repeat( row=['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'], column=['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']).interactive() return p
def scatter_matrix(df, x, z, output_directory=None, custom_scheme='dark2'): '''Requires altair and altair saver. Plots a scatter matrix of a list of variables (x), where z is a factor that changes point color (optional). Option to save a html file to output directory. ''' x_inverse = x[::-1] chart = alt.Chart(df).mark_circle().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), color=alt.Color(z + ':N', scale=alt.Scale(scheme=custom_scheme))).properties( width=150, height=150).repeat(row=x, column=x_inverse).interactive() if output_directory != None: chart.save(output_directory + 'Scatterplot Matrix.html') return chart
def pairplot(data, hue=None, vars=None): if vars is None: vars = list(data.columns) chart = alt.Chart(data).mark_circle().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), color='{hue}:N'.format(hue=hue) ).properties( width=250, height=250 ).repeat( row=vars, column=vars ) return chart
def generate_align_vs_features(data_frame, output_folder, file_name): """ Generates a chart of the relation between align and other features in dataset. Also saves resulting image as file in given output folder. Parameters: ----------- data_frame : pandas.DataFrame input path to be verified output_folder : str output folder path to save the chart file_name : str file name for generated chart image Returns: ----------- None """ features = ['id', 'eye', 'hair', 'sex', 'gsm', 'publisher'] align_vs_features = (alt.Chart(data_frame).mark_circle().encode( alt.Y(alt.repeat(), type='ordinal'), alt.X('count()', title = "Character Count"), size =alt.Size('count()', legend=alt.Legend(title="Characters")), color = alt.Color("align", legend=alt.Legend(title="Alignment")) ).properties(height=300, width=200).repeat(repeat=features, columns=3)) save(align_vs_features, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver) if verbose: print("Alignment vs features chart created, saved to " + output_folder + "/figures/" + file_name + '.png')
def make_cat_plot(dat, cat_list): """ plot count of categorical features. Parameters ---------- list: cat_list list of strings contains features name DataFrame: dat input dataset Returns ------- altair.vegalite.v3.api.Chart altair plots Examples -------- make_cat_plot(bank_train, ['job', 'default']) """ cat_p = alt.Chart(dat).mark_bar(opacity=0.8).encode( alt.X("count()"), alt.Y(alt.repeat("row"), type='nominal'), color=alt.Color('y', legend=None)).properties( width=200, height=150).repeat(row=cat_list) return cat_p
def create_map_series(df): # Note that a LayeredChart cannot contain faceted charts as its elements. # Therefore, we cannot use a basemap in this configuration, since it would produce an invalid specification. # More info: https://github.com/altair-viz/altair/issues/785 df2 = df.groupby('context').mean() df2['context'] = df2.index print(df2) grid = alt.topo_feature( 'https://raw.githubusercontent.com/anitagraser/sandbox/master/grid40.topojson', 'grid40') variable_list = [ 'distance_error', 'along_track_error', 'cross_track_error' ] map_chart = alt.Chart(grid, title=SHIPTYPE).mark_geoshape( stroke='white', strokeWidth=2).encode( alt.Color( alt.repeat('column'), type='quantitative', scale=alt.Scale(domain=(0, 2000)))).transform_lookup( lookup='properties.id', from_=alt.LookupData( df2, 'context', variable_list)).project(type='mercator').properties( width=300, height=200).repeat( column=variable_list).resolve_scale( color='shared') with open(INPUT.replace('.csv', '_{}_map_series.vega'.format(SHIPTYPE)), 'w') as map_output: map_output.write(map_chart.to_json(indent=2))
def j_summary(samples, ctype='hist', properties={'width': 800}): # print(vcov) if type(samples) == dict: print_summary(samples, 0.89, False) df = pd.DataFrame(samples).clean_names() else: print_summary(dict(zip(samples.columns, samples.T.values)), 0.89, False) df = samples display(df.corr()) df = df if len(df) < 5000 else df.sample(n=4000) base = alt.Chart(df).properties(height=30) if ctype == 'density': l = [ base.mark_line().transform_density( row, as_=[row, 'density'], ).encode(alt.X(f'{row}:Q'), alt.Y('density:Q')) for row in df.columns ] density = alt.vconcat(*l) return_chart = density if ctype == 'hist': hist = base.mark_bar().encode( alt.X(bin=alt.Bin(maxbins=20), field=alt.repeat("row"), type='quantitative'), y=alt.Y(title=None, aggregate='count', type='quantitative')).repeat(row=[c for c in df.columns]) return_chart = hist display(return_chart)
def draw_numeric_plot(train_df): """draw numeric plot Args: train_df (pd.DataFrame): train data split as a pandas dataframe Returns: alt.RepeatChart: plot object of numeric plot, repeated for each numeric column """ num_cols = list(train_df.select_dtypes(include=np.number).iloc[:,1:].columns) num_plot = alt.Chart(train_df).mark_area( opacity=0.5, interpolate='monotone' ).encode( alt.X(alt.repeat("repeat"), type='quantitative', scale=alt.Scale(zero=False), bin=alt.Bin(maxbins=100)), alt.Y('count()', stack=None), fill='good_wine' ).properties( height=200, width=200 ).repeat( repeat=num_cols, columns = 4 ).configure_axis(labels=False) return num_plot
def step4_related_factors(): # next_block() st.header("Step4: Factors that may affect CO2 emissions") st.write("Tips:") st.write("1. Add indicators you want to compare with CO2 emissions!") st.write("2. Put your mouse on a country and compare across indicators!") st.write("3. Remember to play with the year slide bar :)") slider = alt.binding_range(min=1991, max=2011, step=1) select_year = alt.selection_single(name="Year", fields=['Year'], bind=slider, init={'Year': 2011}) highlight = alt.selection_single( on='mouseover', fields=['Country Name'], empty='all') # init={"Country Name": "United States"}) dataset2 = st.multiselect("Choose factors to compare!", [ "CO2 emissions per GDP", "CO2 emissions (kt)", "CO2 emissions per capita", "Urban population (% of total)", "Renewable energy consumption (% of total final energy consumption)", "Forest area (% of land area)", "Marine protected areas (% of territorial waters)", "Population growth (annual %)", "Renewable electricity output % of total", "Terrestrial protected areas % of total", "Total greenhouse gas emissions (kt of CO2 equivalent)" ], [ "CO2 emissions (kt)", "CO2 emissions per GDP", "Renewable electricity output % of total" ]) st.write( alt.hconcat( world_map_for_factors(highlight, dataset2, select_year), alt.Chart(df).mark_point().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), color='Country Name:N', ).properties( width=160, height=160, ).repeat( row=dataset2, column=dataset2, ).transform_filter(select_year).interactive()))
def scatter_matrix(): chart = ( alt.Chart(df) .mark_circle() .encode( alt.X(alt.repeat("column"), type="quantitative"), alt.Y(alt.repeat("row"), type="quantitative"), color="published_day", ) .properties(width=150, height=150) .repeat( row=["duration", "views", "comments"], column=["comments", "views", "duration"], ) .interactive() ) st.altair_chart(chart)
def scatter_matrix(df, color: Union[str, None] = None, alpha: float = 1.0, tooltip: Union[List[str], tooltipList, None] = None, **kwargs) -> alt.Chart: """ plots a scatter matrix At the moment does not support neither histogram nor kde; Uses f-f scatterplots instead. Interactive and with a cusotmizable tooltip Parameters ---------- df : DataFame DataFame to be used for scatterplot. Only numeric columns will be included. color : string [optional] Can be a column name or specific color value (hex, webcolors). alpha : float Opacity of the markers, within [0,1] tooltip: list [optional] List of specific column names or alt.Tooltip objects. If none (default), will show all columns. """ dfc = _preprocess_data(df) tooltip = _process_tooltip(tooltip) or dfc.columns.tolist() cols = dfc._get_numeric_data().columns.tolist() chart = (alt.Chart(dfc).mark_circle().encode( x=alt.X(alt.repeat("column"), type="quantitative"), y=alt.X(alt.repeat("row"), type="quantitative"), opacity=alt.value(alpha), tooltip=tooltip, ).properties(width=150, height=150)) if color: color = str(color) if color in dfc: color = alt.Color(color) if "colormap" in kwargs: color.scale = alt.Scale(scheme=kwargs.get("colormap")) else: color = alt.value(color) chart = chart.encode(color=color) return chart.repeat(row=cols, column=cols).interactive()
def omniplot(folder="./Cifar10/ResNet101/exp1/students"): data = load_data(folder) detalle = [ 'test_acc', 'test_loss', 'test_eval', 'train_acc', 'train_loss', 'train_eval', 'temp' ] chart = alt.Chart(data).mark_point().encode( alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False, base=10, type='log')), alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False, base=10, type='log')), shape='student', color='distillation').properties(width=150, height=150).repeat(row=detalle, column=detalle) return chart
def create_text(lines, nearest): """ Component of Altair plot creation. Draw text labels near the points, and highlight based on selection """ text = lines.mark_text(align='left', dx=5, dy=-5).encode( text=alt.condition(nearest, alt.Y(alt.repeat("row"), type='quantitative'), alt.value(' ')) ) return text
def hist_frame(self, column=None, layout=(-1, 2), **kwargs): if column is not None: if isinstance(column, str): column = [column] data = self._preprocess_data(with_index=False, usecols=column) data = data._get_numeric_data() nrows, ncols = _get_layout(data.shape[1], layout) return (alt.Chart(data, mark=self._get_mark_def("bar", kwargs)).encode( x=alt.X(alt.repeat("repeat"), type="quantitative", bin=True), y=alt.Y("count()", title="Frequency"), ).repeat(repeat=list(data.columns), columns=ncols))
def multiscatter(data, columns=None, group_by=None, color=None, tooltip=None, height=300, width=400): """Generate many scatter plots. Based on several columns, pairwise. """ kwargs = choose_kwargs(from_=locals(), which=["color", "tooltip"]) assert group_by is None, "Long format not supported yet" return (alt.Chart(data, height=height / len(columns), width=width / len(columns)).mark_point(size=1 / len(columns)).encode( alt.X(alt.repeat("column"), type="quantitative"), alt.Y(alt.repeat("row"), type="quantitative"), **kwargs).repeat(row=columns, column=columns))
def app(): st.title("Let's demo Streamlit basics") st.sidebar.markdown("**Some tools we'll use:**") st.sidebar.markdown(""" * Markdown * Pandas * Altair""") st.markdown("""## Markdown and Pandas I am writing Markdown but I can also show a pandas DataFrame. """) df_cars = data.cars() st.write(df_cars.head()) st.markdown("""## Altair And I can easily make interactive altair plots. """) brush = alt.selection_interval(encodings=['x', 'y']) repeat_chart = alt.Chart(df_cars).mark_point().encode( alt.X(alt.repeat('column'), type='quantitative'), alt.Y('Miles_per_Gallon:Q'), color=alt.condition(brush, 'Origin:N', alt.value('lightgray')), opacity=alt.condition( brush, alt.value(0.7), alt.value(0.1))).properties( width=150, height=150).add_selection(brush).repeat( column=['Weight_in_lbs', 'Acceleration', 'Horsepower']) st.write(repeat_chart) st.markdown("""## User Input I can create a text input field to get input from the user. """) n = int(st.text_input("How many points do you want plotted:", '100')) x = np.random.random(n) * 10 y = np.random.random(n) * 10 s = np.random.random(n) df = pd.DataFrame({'x': x, 'y': y, 'size': s}) chart = alt.Chart(df, width=400, height=400).mark_point().encode( x='x', y='y', size=alt.Size('size', legend=None), tooltip=['size']).interactive() st.write(chart)
def correlation_scatter_charts(d: pd.DataFrame, title: str = '') -> alt.Chart: """Produces a grid of scatter plots of runtimes of stages versus covariates. Args: d: A pandas dataframe of runtime by regions. title: A title for the plot. Returns: An altair chart """ columns_used = ['region', 'total runtime'] + RUNTIME_COLUMNS + COUNT_COLUMNS d = d[columns_used] return alt.Chart(d).mark_circle(opacity=0.1).encode( x=alt.X(alt.repeat('column'), type='quantitative', axis=alt.Axis(labelExpr="datum.value + 's'")), y=alt.Y(alt.repeat('row'), type='quantitative'), tooltip='region' ).properties(width=100, height=100) \ .repeat( column=['total runtime'] + RUNTIME_COLUMNS, row=COUNT_COLUMNS, ).properties(title=title)
def create_lines(source, datetime_col="Time", logger_col="Location"): """ Component of Altair plot creation. create lines object for an Altair plot """ lines = alt.Chart(source).mark_line( ).encode( alt.X(datetime_col, type='temporal'), alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False)), color=f'{logger_col}:N', ) return lines
def app(): st.title("Streamlit demo for 202101") st.markdown(""" ## Some section This is great! * bullet 1 * bullet 2 """) st.sidebar.markdown('''This is the sidebar''') df_cars = data.cars() st.write(df_cars.head()) brush = alt.selection_interval(encodings=['x', 'y']) repeat_chart = alt.Chart(df_cars).mark_point().encode( alt.X(alt.repeat('column'), type='quantitative'), alt.Y('Miles_per_Gallon:Q'), color=alt.condition(brush, 'Origin:N', alt.value('lightgray')), opacity=alt.condition( brush, alt.value(0.7), alt.value(0.1))).properties( width=150, height=150).add_selection(brush).repeat( column=['Weight_in_lbs', 'Acceleration', 'Horsepower']) st.write(repeat_chart) st.markdown(""" ## User Input Demo """) n = int(st.text_input('How many point do you want', '100')) x = np.random.random(n) * 10 y = np.random.random(n) * 10 s = np.random.random(n) df = pd.DataFrame({'x': x, 'y': y, 'size': s}) chart = alt.Chart(df, width=400, height=400).mark_point().encode( x='x', y='y', size=alt.Size('size', legend=None), tooltip=['size']).interactive() st.write(chart)
def facetted_histogram(df): """Facet of one histogram per column with cross filter interaction""" brush = alt.selection(type='interval', encodings=['x']) base = (alt.Chart().mark_bar().encode( x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)), y=alt.Y('count()', axis=alt.Axis(title='')), ).properties(width=200, height=150)) background = base.encode(color=alt.value('#ddd')).add_selection(brush) highlight = base.transform_filter(brush) chart = alt.layer(background, highlight, data=df).repeat(column=list(df.columns)) return chart
def numericHistograms(data): """ Generate a chart for all numeric values in Pandas DataFrame For all the numeric columns in a Pandas Dataframe, this function will generate a Histogram to show how the data is distributed Attributes ---------- data : pd.DataFrame, default None A Pandas DataFrame that contains numeric values """ numericColumns = data.select_dtypes(np.number) columns = numericColumns.columns.tolist() chart = alt.Chart(data, height=100).mark_bar().encode( x=alt.X(alt.repeat('row'), type='quantitative', bin=True), y=alt.Y("count()", type='quantitative')).repeat(row=columns) return chart
""" Repeated Choropleth Map ======================= Three choropleths representing disjoint data from the same table. """ # category: maps import altair as alt from vega_datasets import data states = alt.topo_feature(data.us_10m.url, 'states') source = data.population_engineers_hurricanes.url variable_list = ['population', 'engineers', 'hurricanes'] alt.Chart(states).mark_geoshape().encode( alt.Color(alt.repeat('row'), type='quantitative') ).transform_lookup( lookup='id', from_=alt.LookupData(source, 'id', variable_list) ).properties( width=500, height=300 ).project( type='albersUsa' ).repeat( row=variable_list ).resolve_scale( color='independent' )
""" # category: interactive charts import altair as alt from vega_datasets import data source = alt.UrlData( data.flights_2k.url, format={'parse': {'date': 'date'}} ) brush = alt.selection(type='interval', encodings=['x']) # Define the base chart, with the common parts of the # background and highlights base = alt.Chart().mark_bar().encode( x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)), y='count()' ).properties( width=180, height=130 ) # blue background with selection background = base.properties(selection=brush) # yellow highlights on the transformed data highlight = base.encode( color=alt.value('goldenrod') ).transform_filter(brush) # layer the two charts & repeat