def plot_experiment_bars_with_errors(data, n_card_pairs, n_card_pairs_init, n_repeats): # define input selection input_n_cards = alt.binding( input='range', min=1, max=n_card_pairs, step=1, name='Card pairs per experiment: ' ) selection_n_cards = alt.selection_single( bind=input_n_cards, init={'card_pair': n_card_pairs_init} ) input_experiment = alt.binding( input='range', min=1, max=n_repeats, step=1, name='Select one experiment: ' ) selection_experiment = alt.selection_single( bind=input_experiment, init={'experiment': 1} ) # filter data base = alt.Chart(data).add_selection( selection_n_cards, selection_experiment ).transform_filter( # there are sometimes problems with automatic types in comparisons, # when comparing datum with a selection. # hence we just put the explicit types everywhere (alt.expr.toNumber(alt.datum.card_pair) <= alt.expr.toNumber(selection_n_cards.card_pair)) & (alt.expr.toNumber(alt.datum.experiment) == alt.expr.toNumber(selection_experiment.experiment)) ) # plot bar chart bars = base.mark_bar().encode( x='stack:N', y='mean(win):Q', color=alt.Color('stack:N', legend=None) ) # plot errorbars errors=base.mark_errorbar(extent='ci', rule=alt.MarkConfig(size=2)).encode( alt.Y('win:Q', title='Winning probability'), x=alt.X('stack:N', title='Card Stack') ) # combine plot alt.layer(bars, errors).properties( width = 250, height = 250 ).configure_axis( grid=False ).configure_view( strokeWidth=0 ).display(renderer='svg')
def layered_stage_and_task_charts(task_layers, noun="Time"): selection = alt.selection_multi(name="selector_SelectorName", fields=['Stage ID'], empty='none') sdfs = [ tdf.groupby(['Stage ID', 'Metric Name']).sum() for tdf in task_layers ] stages = alt.layer(*[ alt.Chart(sdf.reset_index()).mark_bar().encode( x='Stage ID:N', y=alt.Y('sum(Metric Value):Q', title=noun), color='Metric Name:N', tooltip=['Metric Name', 'Metric Value', 'Task ID']) for sdf in sdfs ]).add_selection(selection).interactive() tasks = alt.layer(*[ alt.Chart(tdf.reset_index()).mark_bar().encode( x='Task ID:N', y=alt.Y('sum(Metric Value):Q', title=noun), color='Metric Name:N', tooltip=['Metric Name', 'Metric Value', 'Task ID' ]).transform_filter(selection) for tdf in task_layers ]).interactive() return alt.vconcat(stages, tasks)
def plot_lowess(X, y, z, config=True): df1 = pd.DataFrame({'X':X, 'y':y}) df2 = pd.DataFrame({'X':z[:,0], 'y':z[:,1]}) scatter = alt.Chart(df1 ).mark_circle(size=100, color='red', opacity=1 ).encode( x='X', y='y') line = alt.Chart(df2 ).mark_line( ).encode( x='X', y='y' ) if config: return alt.layer(scatter, line).configure_axis( labelFontSize=20, titleFontSize=20 ).configure_legend( titleFontSize=20, labelFontSize=20 ) else: return alt.layer(scatter, line)
def plot_hourly_weather(hourly): base = alt.Chart(hourly).encode(x=alt.X('dt:T', axis=alt.Axis(title=None))) temp_color = "#000000" wind_color = "#00d2e6" temp = (base.mark_line(stroke=temp_color).encode(y=alt.Y( 'temp:Q', scale=alt.Scale(zero=False), axis=alt.Axis(title="Temperature (°F)", titleColor=temp_color)))) wind = (base.mark_line(stroke=wind_color).encode(y=alt.Y( 'wind_speed:Q', axis=alt.Axis(title="Wind Speed (MPH)", titleColor=wind_color)))) precip = (base # .mark_line(stroke=wind_color) .encode(y=alt.Y('pop', axis=alt.Axis(title=None, labels=False)), color=alt.Color('color', scale=None)) .mark_area(line=False, interpolate='step-after', opacity=0.5)) sun = (base.encode(x2='dt_end:T', color=alt.Color('color', scale=None)).mark_rect(opacity=0.3)) sun_text = base.encode(text="description").mark_text(opacity=0.15, angle=270, xOffset=8, y=250, align="left") return alt.layer( alt.layer(temp, wind).resolve_scale(y='independent'), precip, sun, sun_text)
def make_combined_chart(data, scale="log", show_uncertainty=True): lines = make_exposed_infected_line_chart(data, scale=scale) if not show_uncertainty: output = alt.layer(lines) else: band_E = make_exposed_infected_error_area_chart( data, plot_params["exposed"]["name"], plot_params["exposed"]["color"], scale=scale, ) band_I = make_exposed_infected_error_area_chart( data, plot_params["infected"]["name"], plot_params["infected"]["color"], scale=scale, ) output = alt.layer(band_E, band_I, lines) return (alt.vconcat(output.interactive(), padding={ "top": 20 }).configure_title(fontSize=16).configure_axis( labelFontSize=14, titleFontSize=14).configure_legend(labelFontSize=14, titleFontSize=14))
def plot_model(X, y, model, predict_proba = False): # Join data for plotting sample = (X.join(y)) # Create a mesh for plotting step = (X.max() - X.min()) / 50 x1, x2 = np.meshgrid(np.arange(sample.min()[0]-step[0], sample.max()[0]+step[0], step[0]), np.arange(sample.min()[1]-step[1], sample.max()[1]+step[1], step[1])) # Store mesh in dataframe mesh_df = pd.DataFrame(np.c_[x1.ravel(), x2.ravel()], columns=['x1', 'x2']) # Mesh predictions if predict_proba: mesh_df['predictions'] = model.predict_proba(mesh_df[['x1', 'x2']])[:, 1] # Plot base_plot = alt.Chart(mesh_df).mark_rect(opacity=0.5).encode( x=alt.X('x1', bin=alt.Bin(step=step[0]), axis=alt.Axis(title=X.columns[0])), y=alt.Y('x2', bin=alt.Bin(step=step[1]), axis=alt.Axis(title=X.columns[1])), color=alt.Color('predictions', title='P(red)', scale=alt.Scale(scheme='blueorange')) ).properties( width=400, height=400 ) return alt.layer(base_plot).configure_axis( labelFontSize=20, titleFontSize=20 ).configure_legend( titleFontSize=20, labelFontSize=20 ) else: mesh_df['predictions'] = model.predict(mesh_df[['x1', 'x2']]) # Plot scat_plot = alt.Chart(sample).mark_circle( stroke='black', opacity=1, strokeWidth=1.5, size=100 ).encode( x=alt.X(X.columns[0], axis=alt.Axis(labels=True, ticks=True, title=X.columns[0])), y=alt.Y(X.columns[1], axis=alt.Axis(labels=True, ticks=True, title=X.columns[1])), color=alt.Color(y.columns[0]) ) base_plot = alt.Chart(mesh_df).mark_rect(opacity=0.5).encode( x=alt.X('x1', bin=alt.Bin(step=step[0])), y=alt.Y('x2', bin=alt.Bin(step=step[1])), color=alt.Color('predictions', title='Legend') ).properties( width=400, height=400 ) return alt.layer(base_plot, scat_plot).configure_axis( labelFontSize=20, titleFontSize=20 ).configure_legend( titleFontSize=20, labelFontSize=20 )
def plot_inventory( planning: pd.DataFrame, timeline: List[str], cust_orders, ) -> None: # Plot inventory df = (planning.filter(like="early prod", axis=0).copy().rename(columns={ "Solution": "Qty" }).reset_index()) df[["Date", "Order"]] = df["index"].str.split(",", expand=True) df["Date"] = df["Date"].str.split("[").str[1] df["Order"] = df["Order"].str.split("]").str[0] df = df[["Date", "Qty", "Order"]] models_list = cust_orders[['Order', 'Product_Family']] df = pd.merge(df, models_list, on='Order', how='inner') df = df[["Date", "Qty", "Product_Family"]] bars = (alt.Chart(df).mark_bar().encode( y="Qty:Q", color="Product_Family:N", tooltip=["Product_Family", "Qty"], ).interactive().properties(width=550 / len(timeline) - 22, height=60)) chart_inventory = (alt.layer( bars, data=df).facet(column="Date:N").properties(title="Inventory")) # Plot shortage df = (planning.filter(like="late prod", axis=0).copy().rename(columns={ "Solution": "Qty" }).reset_index()) df[["Date", "Order"]] = df["index"].str.split(",", expand=True) df["Date"] = df["Date"].str.split("[").str[1] df["Order"] = df["Order"].str.split("]").str[0] df = df[["Date", "Qty", "Order"]] models_list = cust_orders[['Order', 'Product_Family']] df = pd.merge(df, models_list, on='Order', how='inner') df = df[["Date", "Qty", "Product_Family"]] bars = (alt.Chart(df).mark_bar().encode( y="Qty:Q", color="Product_Family:N", tooltip=["Product_Family", "Qty"], ).interactive().properties(width=550 / len(timeline) - 22, height=60)) chart_shortage = (alt.layer( bars, data=df).facet(column="Date:N").properties(title="Shortage")) chart = alt.vconcat(chart_inventory, chart_shortage) chart.save("Inventory_Shortage.html")
def single_line(data: pd.DataFrame, game_id: str, preds: list = [], width: int = 800, height: int = 400): pred_df = pd.DataFrame(preds) line = data[data['Game ID'] == game_id] if line.shape[0] > 10: ticks = 10 else: ticks = line.shape[0] offers = alt.Chart(line).mark_line().encode(x=alt.X('Round:Q', axis=alt.Axis( tickCount=ticks, grid=False)), y=f'Offer:Q', color=alt.value('red'), opacity=alt.value(1)) expected = alt.Chart(line).mark_line().encode(x=alt.X('Round:Q', axis=alt.Axis( tickCount=ticks, grid=False)), y=f'Board Average:Q', color=alt.value('gray'), opacity=alt.value(.75)) if len(preds) > 0: models = list(set(val for dic in preds for val in dic.values())) predictions = alt.Chart(pred_df).mark_line().encode( x=alt.X('Round:Q', axis=alt.Axis(tickCount=ticks, grid=False)), y=f'Prediction:Q', color='Model', opacity=alt.value(0.4)) nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['Round'], empty='none') selectors = alt.Chart(data).mark_point().encode( x='Round:Q', opacity=alt.value(0), ).add_selection(nearest) points = predictions.mark_point().encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0))) text = predictions.mark_text( align='left', dx=5, dy=-5).encode(text=alt.condition(nearest, 'Model', alt.value(' '))) rules = alt.Chart(data).mark_rule(color='gray').encode( x='Round:Q', ).transform_filter(nearest) layers = alt.layer(offers, expected, predictions, selectors, points, text, rules).properties(width=width, height=height) else: layers = alt.layer(offers, expected).properties(width=width, height=height) st.altair_chart(layers)
def layer(*layers, **kwargs): """Layer charts: a drop in replacement for altair.layer that does a deepcopy of the layers to avoid side-effects and lifts identical datasets one level down to top level.""" layers = [l.copy() for l in layers] data = layers[0].data if all(map(lambda l: data.equals(l.data), layers)): layered = alt.layer(*layers, **kwargs, data=data) for l in layered.layer: del l._kwds["data"] else: layered = alt.layer(*layers, **kwargs) return layered
def test_single_condition_nodata(): window = BayesRegression(df=dfl, y='Log power', treatment='stim', group='mouse') window.fit(model=models.model_hierarchical, do_make_change='divide', dist_y='normal') alt.layer(*plot_posterior( df=window.data_and_posterior, y_title='Log power', )).display() window.plot(independent_axes=True).display()
def rows(df): today = pd.to_datetime(df.iloc[-1].date).strftime('%b %d') print(f'Making rows for {today}...') # total area chart total = alt.Chart(df).mark_area(line={ 'color': '#336634' }, color='#9DC49E').encode( alt.X('date:T', axis=alt.Axis(title=None, labelOpacity=1)), alt.Y('total:Q', axis=alt.Axis(title='TOTAL doses/vaccinations', titleFontSize=15, titleColor='#66334B', orient='left', grid=False))).properties(width=700, height=250) # vaccination bars vaccd = alt.Chart(df).mark_area(line={ 'color': '#66334B' }, color='#783C58').encode( alt.X('date:T', axis=alt.Axis(labelOpacity=0, title=None, grid=False)), alt.Y('vaccinated:Q', axis=alt.Axis(orient='left'))).properties( width=700, height=250, ) main = alt.layer(total, vaccd).properties(width=700, height=250) new_doses = alt.Chart(df).mark_bar(color='#6F956F').encode( alt.X('date:O', axis=alt.Axis(title=None, grid=False, labelOpacity=0)), alt.Y('new:Q', axis=alt.Axis(titleColor='#66334B', orient='left', grid=False))).properties(width=700, height=250) new_vaccs = alt.Chart(df).mark_bar(color='#66334B').encode( alt.X('date:O', axis=alt.Axis(title=None, grid=False, labelOpacity=0, labelFontSize=1)), alt.Y('new_vaccinated:Q', axis=alt.Axis(title='NEW doses/vaccinations', titleColor='#66334B', titleFontSize=15, orient='left', grid=False))).properties(width=700, height=250) new = alt.layer(new_doses, new_vaccs).properties(width=700, height=250) return main, new
def _jitterbox_plot(data, height, width, mark, box_mark, whisker_mark, encoding, jitter_width, sort, box_overlay, **kwargs): """Generate a jitter-box plot with Altair. """ jitter = _jitter_plot(data, height, width, mark, encoding, jitter_width, sort, **kwargs) box = _box_plot_q(data, height, width, mark, box_mark, whisker_mark, encoding, sort, jitter_width, **kwargs) if box_overlay: return alt.layer(jitter, box) else: return alt.layer(box, jitter)
def plot_bar_chart(source): bars = alt.Chart().mark_bar().encode(x=source.columns[1] + ':O', y=alt.Y('mean(' + source.columns[0] + '):Q'), color=source.columns[1] + ':N') error_bars = alt.Chart().mark_errorbar(extent='ci').encode( x=source.columns[1] + ':O', y=source.columns[0] + ':Q') alt.layer(bars, error_bars, data=source).facet( column=alt.Column(source.columns[2] + ':N', sort=["Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]), # column=source.columns[2]+':N', ).save("圖表\\各隊一週內票房變化.html")
def plot_covariate_effects(self): """Plot covariate effects """ ce = (self.covariate_effects - 1) * 100 cov_stats = pd.melt(self.covariate_statistics.reset_index(), var_name='condition', id_vars=['covariate'], value_vars=['p5', 'p95', 'other']) cov_stats = cov_stats.replace({ 'p5': '5th', 'p95': '95th' }).set_index(['covariate', 'condition']) ce = ce.join(cov_stats, how='inner') # The left join reorders the index, pandas bug #34133 ce = ce.reorder_levels(['parameter', 'covariate', 'condition']) param_names = list(ce.index.get_level_values('parameter').unique()) plots = [] for parameter in param_names: df = ce.xs(parameter, level=0) df = df.reset_index() error_bars = alt.Chart(df).mark_errorbar(ticks=True).encode( x=alt.X('p5:Q', title='Effect size in percent', scale=alt.Scale(zero=False)), x2=alt.X2('p95:Q'), y=alt.Y('condition:N', title=None), ) rule = alt.Chart(df).mark_rule( strokeDash=[10, 4], color='gray').encode( x=alt.X('xzero:Q')).transform_calculate(xzero="0") points = alt.Chart(df).mark_point(filled=True, color='black').encode( x=alt.X('mean:Q'), y=alt.Y('condition:N'), ) text = alt.Chart(df).mark_text(dy=-15, color="red").encode( x=alt.X("mean:Q"), y=alt.Y("condition:N"), text=alt.Text("value:Q")) plot = alt.layer( error_bars, rule, points, text, data=df, width=800, height=100).facet( columns=1.0, row=alt.Facet('covariate:N', title=None), title=f'{parameter}').resolve_scale(y='independent') plots.append(plot) v = alt.vconcat(*plots).resolve_scale(x='shared') return v
def plot_hourly_altair(temperature, precipitation, time_zero, name): """Make a plot of hourly temperature and precipitation data.""" _, times_t, _, values_t, start = get_times_and_values( temperature, 'temperature', time_zero) source_t = pd.DataFrame({ 'hours': times_t[start:], 'temperature': values_t[start:], }) _, times_p, _, values_p, start = get_times_and_values( precipitation, 'precipitation', time_zero) source_p = pd.DataFrame({ 'hours': times_p[start:], 'rain': values_p[start:], }) line_t = alt.Chart(source_t).mark_line(size=3).encode( x=alt.X('hours:T'), y=alt.Y('temperature', title='Temperature (°C)'), color=alt.value("#FFAA00"), ) bar_p = alt.Chart(source_p).mark_bar(size=15).encode( x=alt.X('hours:T', title='Time'), y=alt.Y('rain', title='Precipitation (mm)'), ) chart = alt.layer( bar_p, line_t, ).resolve_scale(y='independent') chart.title = name return chart
def plot_altair(hist, dist, dist_name, bin_size): brush = alt.selection_interval(encodings=['x']) data = pd.DataFrame.from_dict({ 'rf': hist, 'p': dist }, orient='index').transpose().fillna(0).reset_index() data['index'] = data['index'] * bin_size base = alt.Chart( data, title=f'{dist_name} Estimation of EKA Goals').encode( alt.X( 'index:Q', title='Goals Scored', bin=alt.Bin(step=bin_size), axis=alt.Axis( values=[-5, 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55 ]))) bar = base.mark_bar(opacity=.7).encode(alt.Y('rf:Q')) rule = base.mark_rule(size=2).encode( alt.X('index:Q'), alt.Y('p:Q', title='Relative Frequency', axis=alt.Axis(tickCount=5))) return alt.layer(bar, rule).properties(width=600, height=500).configure_axis( titleFontSize=16).configure_title(fontSize=20)
def ruled_altair_chart(source): line = alt.Chart(source).encode(x=alt.X('yearmonthdate(date):T', axis=alt.Axis(tickSize=0, labelAngle=-90, tickCount=5, title='Date')), y=alt.Y('value', title='Count'), color='variable') # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['date'], empty='none') # Transparent selectors across the chart. This is what tells us # the x-value of the cursor selectors = alt.Chart(source).mark_point().encode( x='date', opacity=alt.value(0)).add_selection(nearest) # Draw points on the line, and highlight based on selection points = line.mark_point().encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0))) # Draw text labels near the points, and highlight based on selection text = line.mark_text( align='left', dx=5, dy=-5).encode(text=alt.condition(nearest, 'value:Q', alt.value(' '))) # Draw a rule at the location of the selection rules = alt.Chart(source).mark_rule(color='gray').encode( x='date', ).transform_filter(nearest) # Put the five layers into a chart and bind the data layers = alt.layer(line.mark_line(), selectors, points, text, rules) return layers
def movie_embedding_norm(models): """Visualizes the norm and number of ratings of the movie embeddings. Args: model: A MFModel object. """ if not isinstance(models, list): models = [models] df = pd.DataFrame({ 'title': movies['title'], 'genre': movies['genre'], 'num_ratings': movies_ratings['rating count'], }) charts = [] brush = alt.selection_interval() for i, model in enumerate(models): norm_key = 'norm' + str(i) df[norm_key] = np.linalg.norm(model.embeddings["movie_id"], axis=1) nearest = alt.selection( type='single', encodings=['x', 'y'], on='mouseover', nearest=True, empty='none') base = alt.Chart().mark_circle().encode( x='num_ratings', y=norm_key, color=alt.condition(brush, alt.value('#4c78a8'), alt.value('lightgray')) ).properties( selection=nearest).add_selection(brush) text = alt.Chart().mark_text(align='center', dx=5, dy=-5).encode( x='num_ratings', y=norm_key, text=alt.condition(nearest, 'title', alt.value(''))) charts.append(alt.layer(base, text)) return altair_viewer.show(alt.hconcat(*charts, data=df))
def _jitter_plot(data, height, width, mark, encoding, jitter_width, sort, **kwargs): """Generate a jitter plot with Altair. """ encoding_tuple = _parse_encoding_jitter(encoding, data, sort) (encoding, encoding_text, cat, val, nominal_axis_values, horizontal, zero) = encoding_tuple _check_catplot_sort(data, cat, sort) sort = _jitter_sort(data, cat, sort, horizontal) mark_jitter, mark_text = _parse_mark_jitter(mark, horizontal) df, df_text = _jitter_dataframe(data, val, cat, jitter_width, nominal_axis_values, sort, zero) chart_jitter = alt.Chart(data=df, width=width, height=height, mark=mark_jitter, encoding=encoding, **kwargs) chart_text = alt.Chart(data=df_text, width=width, height=height, mark=mark_text, encoding=encoding_text) return alt.layer(chart_jitter, chart_text)
def plot_r0(r0_samples, date, place, min_days): r0_samples_cut = r0_samples[-min_days:] columns = pd.date_range(end=date, periods=r0_samples_cut.shape[1]) data = (pd.DataFrame( r0_samples_cut, columns=columns).stack(level=0).reset_index().rename(columns={ "level_1": "Dias", 0: "r0" })[["Dias", "r0"]]) line = (alt.Chart( data, width=600, height=150, title=f"Número básico de reprodução para {place}").mark_line().encode( x="Dias", y="mean(r0)")) band = alt.Chart(data).mark_errorband(extent="stdev").encode( x=alt.X("Dias", title="Data"), y=alt.Y("r0", title="Valor"), ) output = alt.layer(band, line) return (alt.vconcat(output.interactive(), padding={ "top": 20 }).configure_title(fontSize=16).configure_axis( labelFontSize=14, titleFontSize=14).configure_legend(labelFontSize=14, titleFontSize=14))
def facet(self, width=150, height=160, independent_axes=False, **kwargs): assert ('row' in kwargs) or ('column' in kwargs), 'Give facet either row, or column' if not hasattr(self, 'chart') or self.chart is None: # TODO let's not force users to plot. have a sensible default raise RuntimeError('Plot first, then you can use facet') elif type(self.chart.data) != pd.DataFrame: facetchart = visualization.facet(alt.layer(*self.charts_for_facet), width=width, height=height, **kwargs) elif independent_axes or type(self.chart) == alt.LayerChart: facetchart = visualization.facet(self.chart, width=width, height=height, **kwargs) else: try: facetchart = self.chart.properties( width=width, height=height).facet(**kwargs) except ValueError as e: assert 'Facet charts require' in str(e) facetchart = visualization.facet(self.chart, width=width, height=height, **kwargs) return facetchart
def selectionchart(self): """Chart with bottom time period selection Returns: [obj]: [altair chart object with time selection chart] """ brush = alt.selection(type='interval', encodings=['x']) upper = self.chart.encode( alt.X('date:T', scale=alt.Scale(domain=brush))) inline = self.draw.encode(alt.X(self.target, type='temporal', title=' ', axis=alt.Axis(grid=False)), alt.Y('y', type=self.type_, scale=alt.Scale(zero=False), title=' ', axis=alt.Axis(grid=False, labels=False)), alt.Color('показатель:N', ), opacity=alt.condition( self.leg, alt.value(1), alt.value(0.2))) lower = alt.layer(inline, self.rules).properties( width=self.width, height=20).add_selection(brush) return upper & lower
def generate_chart(df): if Config.is_continuous: base = alt.Chart(df).mark_line().encode(x="x", y="y", color="label") else: base = alt.Chart(df).mark_bar(opacity=0.2).encode( x="x:O", y=alt.Y('y', stack=None), color="label", ) nearest = alt.selection( type="single", nearest=True, on="mouseover", encodings=["x"], empty="none", ) selectors = alt.Chart(df).mark_point().encode( x=f'x:{"Q" if Config.is_continuous else "O"}', opacity=alt.value(0), ).add_selection(nearest) rules = alt.Chart(df).mark_rule(color='gray').encode( x=f'x:{"Q" if Config.is_continuous else "O"}', ).transform_filter( nearest) points = base.mark_point().encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0))) text = base.mark_text(align='left', dx=5, dy=-5).encode(text=alt.condition( nearest, 'xy:N', alt.value(' '))).transform_calculate(xy=expr.join([ "(", expr.toString(expr.round(datum.x * 100) / 100), ', ', expr.toString(expr.round(datum.y * 100) / 100), ")" ], '')) chart = alt.layer(base, selectors, rules, points, text) return chart
def plot_anomalies(forecasted, chart_title=''): interval = alt.Chart(forecasted).mark_area( interpolate="basis", color='#7FC97F').encode( x=alt.X('ds:T', title='date'), y='yhat_upper', y2='yhat_lower', tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper' ]).interactive().properties(title=chart_title + ' Anomaly Detection') fact = alt.Chart(forecasted[forecasted.anomaly == 0]).mark_circle( size=15, opacity=0.7, color='Black').encode( x='ds:T', y=alt.Y('fact', title='sales'), tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper']).interactive() anomalies = alt.Chart(forecasted[forecasted.anomaly != 0]).mark_circle( size=30, color='Red').encode(x='ds:T', y=alt.Y('fact', title='sales'), tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper'], size=alt.Size('importance', legend=None)).interactive() return alt.layer(interval, fact, anomalies)\ .properties(width=870, height=450)\ .configure_title(fontSize=20)
def data_bar_jail(): county_data = read_county_from_db(session.get( 'current_state'), session.get('current_county')) # Create a label for the jail population to be included in the chart. # Result of lambda is a float, thus the slice notation is used county_data['total_jail_pop_label'] = county_data['total_jail_pop'].apply(lambda x: "{:,}".format(x)[:-2]) county_data['total_jail_pretrial_label'] = county_data['total_jail_pretrial'].apply(lambda x: "{:,}".format(x)[:-2]) # Create the chart jail = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_bar(color='#444760').encode( X('year:O', axis=Axis(title='Year')), Y('total_jail_pop', axis=Axis(title='Total Jail Population')), tooltip=[alt.Tooltip('year', title='Year'), alt.Tooltip( 'total_jail_pop_label', title='Total jail population')] ).properties( title='Jail population in {}'.format(session.get('current_county')) ).interactive() # Create pre-trial chart to overlay on top pre_trial = Chart(data=county_data, height=HEIGHT, width=WIDTH).mark_bar( color="#d66241", interpolate='step-after', line=True, ).encode( X('year:O', axis=Axis(title='Year')), Y('total_jail_pretrial', axis=Axis(title='Number of inmates')), tooltip=[alt.Tooltip('year', title='Year'), alt.Tooltip( 'total_jail_pretrial_label', title='Pre-trial jail population')] ).properties( title='Pre-trial jail population in {}'.format( session.get('current_county')) ).interactive() chart = alt.layer(jail + pre_trial) return chart.to_json()
def frame_selector_ui(summary): st.sidebar.markdown("# Frame") # The user can pick which type of object to search for. object_type = st.sidebar.selectbox("Search for which objects?", summary.columns, 2) # The user can select a range for how many of the selected objecgt should be present. min_elts, max_elts = st.sidebar.slider( "How many %ss (select a range)?" % object_type, 0, 25, [10, 20]) selected_frames = get_selected_frames(summary, object_type, min_elts, max_elts) if len(selected_frames) < 1: return None, None # Choose a frame out of the selected frames. selected_frame_index = st.sidebar.slider("Choose a frame (index)", 0, len(selected_frames) - 1, 0) # Draw an altair chart in the sidebar with information on the frame. objects_per_frame = summary.loc[selected_frames, object_type].reset_index( drop=True).reset_index() chart = alt.Chart(objects_per_frame, height=120).mark_area().encode( alt.X("index:Q", scale=alt.Scale(nice=False)), alt.Y("%s:Q" % object_type)) selected_frame_df = pd.DataFrame( {"selected_frame": [selected_frame_index]}) vline = alt.Chart(selected_frame_df).mark_rule(color="red").encode( x="selected_frame") st.sidebar.altair_chart(alt.layer(chart, vline)) selected_frame = selected_frames[selected_frame_index] return selected_frame_index, selected_frame
def plot_predict(forecasted, chart_title='Test'): interval = alt.Chart(forecasted).mark_area( interpolate="basis", color='#7FC97F').encode( x=alt.X('ds', title='date'), y='yhat_upper', y2='yhat_lower', tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper', 'yhat' ]).interactive().properties(title=chart_title + ' Snowflake Spend Forecast') fact = alt.Chart(forecasted).mark_circle( size=15, opacity=0.7, color='Black').encode( x='ds:T', y=alt.Y('fact', title='spend'), tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper', 'yhat']) predictions = alt.Chart( forecasted[forecasted['ds'] >= '2020-11-19']).mark_circle( size=15, opacity=0.7, color='Blue').encode( x='ds:T', y=alt.Y('yhat', title='spend'), tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper', 'yhat']) return alt.layer(interval, fact, predictions)\ .properties(width=870, height=450)\ .configure_title(fontSize=20)
def add_ruler_as_selector_on_single_line_chart(chart: Any, df: pd.DataFrame, x_field: str, y_field: str) -> Any: # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection( type="single", nearest=True, on="mouseover", fields=[typedef.Columns.DATE], empty="none", ) # Transparent selectors across the chart. This is what tells us # the x-value of the cursor selectors = (alt.Chart(df).mark_point().encode( x=x_field, opacity=alt.value(0), tooltip=alt.Tooltip(y_field), ).add_selection(nearest)) # Draw points on the line, and highlight based on selection points = chart.mark_point().encode(opacity=alt.condition( nearest, alt.value(1), alt.value(0)), ) # # Draw text labels near the points, and highlight based on selection # text = chart.mark_text(align="left", dx=5, dy=-5).encode( # text=alt.condition(nearest, y_field, alt.value(" ")) # ) # Draw a rule at the location of the selection rules = (alt.Chart(df).mark_rule(color="gray").encode( x=x_field).transform_filter(nearest)) # Put the five layers into a chart and bind the data return alt.layer(chart, selectors, points, rules)
def comparing_chartn(*series) -> alt.layer: series_dict = {} names = [] i = 1 for serie in series: name = "Scenario" + str(i) names.append(name) i += 1 series_dict[name] = serie dat = pd.DataFrame(series_dict) #print(dat) base = alt.Chart(dat.reset_index()).transform_fold(fold=names).encode( x=alt.X("index", title="Días desde hoy"), y=alt.Y("value:Q", title="Total de pacientes"), tooltip=["key:N", "value:Q"], color="key:N", text=alt.Text('max(daily):Q')) text = alt.Chart(dat.reset_index()).transform_fold(fold=names).encode( x=alt.X("index", aggregate={'argmax': 'value'}), y=alt.Y('max(value):Q'), color="key:N", text=alt.Text('max(value):Q')) return ( #.transform_fold(fold=["Scenario1", "Scenario2", "Scenario3"]) alt.layer(base.mark_line(), text.mark_text(dy=-10, fontSize=16)).interactive())
def plot_both(county="all_counties", time_interval="daily", **kwargs): """ Create Altair Chart of both reported and cumulative COVID-19 cases in Norwegian counties. Arguments --------- county : str, optional, default ``all_counties`` The county with the reported cases to retrieve. **Allowed counties:** ``all_counties``, ``agder``, ``innlandet``, ``more-og-romsdal``, ``nordland``, ``oslo``, ``rogaland``, ``troms-og-finnmark``, ``trondelag``, ``vestfold-og-telemark``, ``vestland``, ``viken``. time_interval : str, optional, default ``daily`` Time interval representation of the reported cases. Can be either ``daily`` or ``weekly``. **kwargs Arbitrary keyword arguments are passed along to ``chart_base()``. Returns ------- both : altair.Chart Altair Chart of both reported and cumulative cases. """ bar = plot_reported_cases(county=county, time_interval=time_interval, **kwargs) line = plot_cumulative_cases(county=county, time_interval=time_interval, **kwargs) both = alt.layer(bar, line).resolve_scale(y='independent') return both
# Transparent selectors across the chart. This is what tells us # the x-value of the cursor selectors = alt.Chart().mark_point().encode( x='x:Q', opacity=alt.value(0), ).add_selection( nearest ) # Draw points on the line, and highlight based on selection points = line.mark_point().encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0)) ) # Draw text labels near the points, and highlight based on selection text = line.mark_text(align='left', dx=5, dy=-5).encode( text=alt.condition(nearest, 'y:Q', alt.value(' ')) ) # Draw a rule at the location of the selection rules = alt.Chart().mark_rule(color='gray').encode( x='x:Q', ).transform_filter( nearest ) # Put the five layers into a chart and bind the data alt.layer(line, selectors, points, rules, text, data=source, width=600, height=300)
def plot_heatmap( self, rank="auto", normalize="auto", top_n="auto", threshold="auto", title=None, xlabel=None, ylabel=None, tooltip=None, return_chart=False, linkage="average", haxis=None, metric="euclidean", legend="auto", label=None, ): """Plot heatmap of taxa abundance/count data for several samples. Parameters ---------- rank : {'auto', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'}, optional Analysis will be restricted to abundances of taxa at the specified level. normalize : 'auto' or `bool`, optional Convert read counts to relative abundances such that each sample sums to 1.0. Setting 'auto' will choose automatically based on the data. return_chart : `bool`, optional When True, return an `altair.Chart` object instead of displaying the resulting plot in the current notebook. haxis : `string`, optional The metadata field (or tuple containing multiple categorical fields) used to group samples together. Each group of samples will be clustered independently. metric : {'braycurtis', 'manhattan', 'jaccard', 'unifrac', 'unweighted_unifrac}, optional Function to use when calculating the distance between two samples. linkage : {'average', 'single', 'complete', 'weighted', 'centroid', 'median'} The type of linkage to use when clustering axes. top_n : `int`, optional Display the top N most abundant taxa in the entire cohort of samples. threshold : `float` Display only taxa that are more abundant that this threshold in one or more samples. title : `string`, optional Text label at the top of the plot. xlabel : `string`, optional Text label along the horizontal axis. ylabel : `string`, optional Text label along the vertical axis. tooltip : `string` or `list`, optional A string or list containing strings representing metadata fields. When a point in the plot is hovered over, the value of the metadata associated with that sample will be displayed in a modal. legend: `string`, optional Title for color scale. Defaults to the field used to generate the plot, e.g. readcount_w_children or abundance. label : `string` or `callable`, optional A metadata field (or function) used to label each analysis. If passing a function, a dict containing the metadata for each analysis is passed as the first and only positional argument. The callable function must return a string. Examples -------- Plot a heatmap of the relative abundances of the top 10 most abundant families. >>> plot_heatmap(rank='family', top_n=10) """ if rank is None: raise OneCodexException("Please specify a rank or 'auto' to choose automatically") if not (threshold or top_n): raise OneCodexException("Please specify at least one of: threshold, top_n") if len(self._results) < 2: raise OneCodexException( "`plot_heatmap` requires 2 or more valid classification results." ) if top_n == "auto" and threshold == "auto": top_n = 10 threshold = None elif top_n == "auto" and threshold != "auto": top_n = None elif top_n != "auto" and threshold == "auto": threshold = None if legend == "auto": legend = self._field df = self.to_df( rank=rank, normalize=normalize, top_n=top_n, threshold=threshold, table_format="long" ) if tooltip: if not isinstance(tooltip, list): tooltip = [tooltip] else: tooltip = [] if haxis: tooltip.append(haxis) tooltip.insert(0, "Label") magic_metadata, magic_fields = self._metadata_fetch(tooltip, label=label) # add columns for prettier display df["Label"] = magic_metadata["Label"][df["classification_id"]].tolist() df["tax_name"] = ["{} ({})".format(self.taxonomy["name"][t], t) for t in df["tax_id"]] # and for metadata for f in tooltip: df[magic_fields[f]] = magic_metadata[magic_fields[f]][df["classification_id"]].tolist() # if we've already been normalized, we must cluster samples by euclidean distance. beta # diversity measures won't work with normalized distances. if self._guess_normalized(): if metric != "euclidean": raise OneCodexException( "Results are normalized. Please re-run with metric=euclidean" ) df_sample_cluster = self.to_df( rank=rank, normalize=normalize, top_n=top_n, threshold=threshold ) df_taxa_cluster = df_sample_cluster else: df_sample_cluster = self.to_df( rank=rank, normalize=False, top_n=top_n, threshold=threshold ) df_taxa_cluster = self.to_df( rank=rank, normalize=normalize, top_n=top_n, threshold=threshold ) if haxis is None: # cluster only once sample_cluster = df_sample_cluster.ocx._cluster_by_sample( rank=rank, metric=metric, linkage=linkage ) taxa_cluster = df_taxa_cluster.ocx._cluster_by_taxa(linkage=linkage) labels_in_order = magic_metadata["Label"][sample_cluster["ids_in_order"]].tolist() else: if not ( pd.api.types.is_bool_dtype(df[magic_fields[haxis]]) or pd.api.types.is_categorical_dtype(df[magic_fields[haxis]]) # noqa or pd.api.types.is_object_dtype(df[magic_fields[haxis]]) # noqa ): # noqa raise OneCodexException("Metadata field on horizontal axis can not be numerical") # taxa clustered only once taxa_cluster = df_taxa_cluster.ocx._cluster_by_taxa(linkage=linkage) # cluster samples for every group of metadata groups = magic_metadata[magic_fields[haxis]].unique() cluster_by_group = {} labels_in_order = [] plot_data = {"x": [], "y": [], "o": [], "b": []} label_data = {"x": [], "y": [], "label": []} for idx, group in enumerate(groups): # if value of metadata field is 'null', we have to use pd.isnull, can't use 'is None' if pd.isnull(group): c_ids_in_group = magic_metadata.index[ pd.isnull(magic_metadata[magic_fields[haxis]]) ] else: c_ids_in_group = magic_metadata.index[ magic_metadata[magic_fields[haxis]] == group ] if len(c_ids_in_group) == 0: continue sample_slice = df_sample_cluster.loc[c_ids_in_group] if len(c_ids_in_group) < 3: # clustering not possible in this case cluster_by_group[group] = {"ids_in_order": c_ids_in_group} else: cluster_by_group[group] = sample_slice.ocx._cluster_by_sample( rank=rank, metric=metric, linkage=linkage ) plot_data["x"].append(len(labels_in_order) + 0.25) labels_in_order.extend( magic_metadata["Label"][cluster_by_group[group]["ids_in_order"]].tolist() ) plot_data["x"].append(len(labels_in_order) - 0.25) plot_data["y"].extend([0, 0]) plot_data["o"].extend([0, 1]) plot_data["b"].extend([idx, idx]) label_data["x"].append(sum(plot_data["x"][-2:]) / 2) label_data["y"].append(1) label_data["label"].append(str(group)) label_bars = ( alt.Chart( pd.DataFrame(plot_data), width=15 * len(df_sample_cluster.index), height=10 ) .mark_line(point=False, opacity=0.5) .encode( x=alt.X( "x", axis=None, scale=alt.Scale( domain=[0, len(df_sample_cluster.index)], zero=True, nice=False ), ), y=alt.Y("y", axis=None), order="o", color=alt.Color( "b:N", scale=alt.Scale(domain=list(range(idx + 1)), range=["black"] * (idx + 1)), legend=None, ), ) ) label_text = ( alt.Chart( pd.DataFrame(label_data), width=15 * len(df_sample_cluster.index), height=10 ) .mark_text(align="center", baseline="middle") .encode( x=alt.X( "x", axis=None, scale=alt.Scale( domain=[0, len(df_sample_cluster.index)], zero=True, nice=False ), ), y=alt.Y( "y", axis=alt.Axis(title=haxis, ticks=False, domain=False, labels=False) ), text="label", ) ) top_label = alt.layer(label_text, label_bars) # should ultimately be Label, tax_name, readcount_w_children, then custom fields tooltip_for_altair = [magic_fields[f] for f in tooltip] tooltip_for_altair.insert(1, "tax_name") tooltip_for_altair.insert(2, "{}:Q".format(self._field)) alt_kwargs = dict( x=alt.X("Label:N", axis=alt.Axis(title=xlabel), sort=labels_in_order), y=alt.Y( "tax_name:N", axis=alt.Axis(title=ylabel), sort=taxa_cluster["labels_in_order"] ), color=alt.Color("{}:Q".format(self._field), legend=alt.Legend(title=legend)), tooltip=tooltip_for_altair, href="url:N", url="https://app.onecodex.com/classification/" + alt.datum.classification_id, ) chart = ( alt.Chart( df, width=15 * len(df["classification_id"].unique()), height=15 * len(df["tax_id"].unique()), ) .transform_calculate(url=alt_kwargs.pop("url")) .mark_rect() .encode(**alt_kwargs) ) if title: chart = chart.properties(title=title) if haxis: if return_chart: return top_label & chart else: (top_label & chart).display() else: if return_chart: return chart else: chart.interactive().display()
""" Ranged Dot Plot ----------------- This example shows a ranged dot plot that uses 'layer' to convey changing life expectancy for the five most populous countries (between 1955 and 2000). """ # category: other charts import altair as alt from vega_datasets import data source = data.countries.url chart = alt.layer( data=source ).transform_filter( filter={"field": 'country', "oneOf": ["China", "India", "United States", "Indonesia", "Brazil"]} ).transform_filter( filter={'field': 'year', "oneOf": [1955, 2000]} ) chart += alt.Chart().mark_line(color='#db646f').encode( x='life_expect:Q', y='country:N', detail='country:N' ) # Add points for life expectancy in 1955 & 2000 chart += alt.Chart().mark_point( size=100, opacity=1, filled=True
brush = alt.selection(type='interval', encodings=['x']) # Define the base chart, with the common parts of the # background and highlights base = alt.Chart().mark_bar().encode( x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)), y='count()' ).properties( width=180, height=130 ) # blue background with selection background = base.properties(selection=brush) # yellow highlights on the transformed data highlight = base.encode( color=alt.value('goldenrod') ).transform_filter(brush) # layer the two charts & repeat alt.layer( background, highlight, data=source ).transform_calculate( "time", "hours(datum.date)" ).repeat(column=["distance", "delay", "time"])