def scatter(x, y, s=None, color=None, style=None, size_by=None, sizes=None, x_type="quantitative", color_type="nominal", size_type="quantitative", data=None, palette=None, saturation=1, size=None, aspect=1): if data is None: data, fields = build_dataframe({"x": x, "y": y}) x, y = fields["x"], fields["y"] encodings = { "x": alt.X(field=x, type=x_type, axis={"title": x}), "y": alt.Y(field=y, type="quantitative", axis={"title": y}), } if color: if isinstance(color, alt.Color): encodings["color"] = color elif color in list(data.columns): encodings["color"] = alt.Color(field=color, type=color_type) else: encodings["color"] = alt.Color(value=vega_color(color)) if style: if style in list(data.columns): encodings["shape"] = alt.Shape(field=style, type="nominal") else: encodings["shape"] = alt.Shape(value=style) if size_by: if size_by in list(data.columns): size_enc = dict(field=size_by, type=size_type) if sizes: size_enc["scale"] = dict(range=sizes) else: size_enc = dict(value=size_by) encodings["size"] = size_enc chart = alt.Chart(data).mark_point(filled=True).encode(**encodings) size_chart(chart, size, aspect) pal = vega_palette(palette, None, saturation, vega_type=color_type) if color_type == "nominal": chart = chart.configure_range(category=pal) else: chart = chart.configure_range(ramp=pal) return chart
def plot_scatter_alt( self, color='run:Q', color_scheme='purplebluegreen', shape='method:N', width=400, height=400, ): """Plot the trials total flops vs max size. """ import altair as alt df = self.to_df() scatter = (alt.Chart(df).mark_point().encode( x=alt.X('size:Q', title='log2[SIZE]', scale=alt.Scale(zero=False)), y=alt.Y('flops:Q', title='log10[FLOPS]', scale=alt.Scale(zero=False)), size=alt.Size( 'random_strength:Q', scale=alt.Scale(range=[50, 150], type='log'), legend=None, ), shape=alt.Shape(shape), color=alt.Color(color, scale=alt.Scale(scheme=color_scheme)), tooltip=list(df.columns))) return (scatter.properties( width=width, height=height, ).configure_axis(gridColor='rgb(248, 248, 248)')).interactive()
def _make_volume_comparison_plot(df): # Consolidate by week instead of by day df = df[["day", "is_health", "bytes_up", "bytes_down" ]].groupby([pd.Grouper(key="day", freq="W-MON"), "is_health"]).sum().reset_index() df = df.melt(id_vars=["is_health", "day"], value_vars=["bytes_up", "bytes_down"], var_name="direction", value_name="bytes") df["bytes"] = df["bytes"] / 1000**3 alt.Chart(df).mark_point(opacity=1.0).encode( x=alt.X("day:T"), y=alt.Y( "bytes:Q", title="Weekly Traffic(GB)", axis=alt.Axis(labels=True), scale=alt.Scale(type="symlog"), ), # shape="direction", color=alt.Color( "direction", title="Type", ), shape=alt.Shape("is_health:N")).properties(width=500, ).save( "renders/health_domain_overall_magnitude.png", scale_factor=2, )
def graph_transit_filtre(df_transit, date_debut, date_fin, o_d): """ pour visualiser les graph de seprataion des trajets de transit et des autres en entree : df_transit : df des o_d date_debut : string au format 2019-01-28 00:00:00 date_fin : string au format 2019-01-28 00:00:00 o_d : origine destination parmi les possibles du df_transit en sortie : une chart altair avec en couleur le type de transit ou non, et en forme la source du temps de parcours pour filtrer """ titre=pd.to_datetime(date_debut).day_name(locale ='French')+' '+pd.to_datetime(date_debut).strftime('%Y-%m-%d')+' : '+o_d test_filtre_tps=(df_transit.loc[(df_transit['date_cam_1']>pd.to_datetime(date_debut)) & (df_transit['date_cam_1']<pd.to_datetime(date_fin)) & (df_transit['o_d']==o_d)]).copy() copie_df=test_filtre_tps[['date_cam_1','tps_parcours','filtre_tps', 'type']].copy() copie_df.tps_parcours=pd.to_datetime('2018-01-01')+copie_df.tps_parcours try : copie_df['filtre_tps']=copie_df.apply(lambda x : 'Transit' if x['filtre_tps'] else 'Local',axis=1) copie_df['type']=copie_df.apply(lambda x : 'Reglementaire' if x['type']=='85eme_percentile' else x['type'],axis=1) except ValueError : pass graph_filtre_tps = alt.Chart(copie_df, title=titre).mark_point().encode( x=alt.X('date_cam_1:T',axis=alt.Axis(title='Horaire', format='%Hh%M')), y=alt.Y('tps_parcours:T',axis=alt.Axis(title='Temps de parcours', format='%H:%M')), tooltip='hoursminutes(tps_parcours)', color=alt.Color('filtre_tps:N', legend=alt.Legend(title="Type de trajet")), shape=alt.Shape('type:N',legend=alt.Legend(title="Source temps de reference"))).interactive().properties(width=800,height=400) return graph_filtre_tps
def visualize_clusters(df, n_clusters, range_): graph = alt.Chart(df.reset_index()).mark_point(filled=True, size=60).encode( x=alt.X('Component 2'), y=alt.Y('Component 1'), shape=alt.Shape('playlist', scale=alt.Scale(range=["circle", "diamond", "square", "triangle-down", "triangle-up"])), color=alt.Color('Cluster', scale=alt.Scale(domain=[i for i in range(n_clusters)], range=range_)), tooltip=['name', 'artist'] ).interactive() st.altair_chart(graph, use_container_width=True)
def plot(data, phase, field, center=False, scale=None, shape=(400, 400)): detalle = [ 'test_acc', 'test_teacher/acc', 'test_loss', 'test_eval', 'train_acc', 'train_teacher/acc', 'train_loss', 'train_eval', 'distillation', 'temp' ] field_dict = { 'acc': "Accuracy [%]", 'eval': "Perdida Cross Entropy", 'loss': "Perdida de Destilación" } if scale is None: print(".") scale = 'log' if field in ['loss', 'eval'] else 'linear' else: print("scale", scale) print("lasorra") #data['train_acc']-=data['test_acc'] bar = alt.Chart(data).mark_point().encode( alt.X('temp:O', scale=alt.Scale( zero=False, base=10, type='log', ), title="Temperatura"), alt.Y('%s_%s' % (phase, field), scale=alt.Scale(zero=False, type=scale), title=field_dict[field]), shape=alt.Shape('distillation', legend=alt.Legend(title="Destilación")), color=alt.Color('student', legend=alt.Legend(title="Modelo")), size=alt.value(50), tooltip=detalle).interactive() if field == 'acc': accs = { 'Model': ['MobileNet', 'ResNet18', 'ResNet101'], 'ce_train': [95.73, 98.15, 98.52], 'ce_test': [87.8, 90.58, 90.68] } if center: d = dict(list(zip(accs['Model'], accs['ce_%s' % phase]))) data['%s_acc' % phase] -= [d[i] for i in data['student']] else: df = pd.DataFrame(accs) aggregates = alt.Chart(df).mark_rule(opacity=0.5).encode( y='ce_%s:Q' % phase, color='Model:N', size=alt.value(2)) return (aggregates + bar).properties(width=shape[0], height=shape[1]) return bar.properties(width=shape[0], height=shape[1])
def senti_circleplot(senti_df, student_id): """circle plot for sentiment""" senti_circle = (alt.Chart(senti_df).mark_point( size=100, fillOpacity=0.7).encode( alt.X(student_id), alt.Y(cts.SENTI), alt.Color(cts.ASSIGNMENT, legend=alt.Legend(orient="left")), alt.Shape(cts.ASSIGNMENT, legend=None), tooltip=[ alt.Tooltip(cts.SENTI, title="polarity"), alt.Tooltip(student_id, title="author"), ], )).interactive() return senti_circle
def area_chart_f(df_covid19_region, region_title, x_option, y_vars, stack, solve_y_scale, order_var, *y_line_var): x_var, x_axis = set_x_axis(x_option) base = (alt.Chart(df_covid19_region).encode(x=x_axis, ).properties( height=150, width=180, )) area = (base.mark_area( opacity=0.7, line=True).transform_fold(y_vars).encode( y=alt.Y('value:Q', stack=stack, axis=alt.Axis(title='count')), color=alt.Color( 'key:N', scale=alt.Scale(range=['#1f77b4', '#e41a1c', '#71f594']), legend=alt.Legend(title=None)), tooltip=[x_var, 'value:Q'], )) # order parameter. Either we do simple ordering or we give a dictionary with custom order if isinstance(order_var, dict): area = area.transform_calculate( order=order_var.__str__(), ).encode(order="order:Q") else: area = area.encode(order="key:N", ) # add a line if requested if y_line_var: line_st_3 = (base.mark_line().encode( alt.Y(y_line_var[0] + ":Q", axis=alt.Axis(title='count')), color=alt.value('black'), size=alt.value(2), shape=alt.Shape('Active_cases_label', legend=alt.Legend(title=None)))) area = alt.layer( area, line_st_3, ) # Facet construction if solve_y_scale: y_scale_rs = "independent" else: y_scale_rs = "shared" area = (area.facet(facet=region_title + ':N', columns=3).resolve_scale(y=y_scale_rs)) return area
def ScatterPlot( dataframe, x, y, color_by, marker_by, size_by, marker_color, marker_shape, marker_size, marker_thickness, xlabel = None, ylabel = None ): if (type(color_by) == type(None)): color = alt.value(marker_color) else: color = alt.Color(color_by, type = 'ordinal', scale=alt.Scale(range = 'category')) if (type(marker_by) == type(None)): marker = alt.value(marker_shape) else: marker = alt.Shape(marker_by, type = 'ordinal') if (type(size_by) == type(None)): size = alt.value(marker_size) else: size = alt.Size(size_by, type = 'quantitative') if (type(xlabel) == type(None)): xlabel = x if (type(ylabel) == type(None)): ylabel = y chart = alt.Chart(dataframe).mark_point().encode( x = alt.X(x, title=xlabel), y = alt.Y(y, title=ylabel), shape = marker, color = color, size = size, strokeWidth = alt.value(marker_thickness) ) return chart
def grafico_con_columnas( source, y='test_acc', title="Exactitud en validacion desacoplado segun destilacion", shape='student', column='feat_dist', color='layer', fill=None): #reduce data d = locals() ks = [ i for i in d.keys() if i not in ['source', 'title', 'xscale', 'yscale', 'scale', 'bs'] ] vals = [d[i] for i in ks] source = source.drop(columns=[i for i in source.columns if i not in vals]) encodings = { "shape": alt.Shape("%s:O" % shape, legend=alt.Legend(title=global_titles[shape])), "y": alt.Y(y, title=global_titles[y]), "column": alt.Column('%s:O' % column, title=global_titles[column]), "x": alt.X('%s:N' % color, title=global_titles[color]), "color": alt.Color('%s:N' % color, legend=alt.Legend(title=global_titles[color])), "opacity": alt.value(0.5) } if fill is not None: encodings["fill"] = alt.Fill( '%s:O' % fill, legend=alt.Legend(title=global_titles[fill]), scale=alt.Scale(scheme='pastel1')) d1 = alt.Chart( source, title=title).mark_point(size=100).encode(**encodings).configure_axis( titleFontSize=12, labelFontSize=12).configure_title(fontSize=15).interactive() return d1.properties(width=70, height=600)
def plotter(v_x, v_y, fptp_winner, approval_winner, borda_winner, score_winner): SYSTEM = ["fptp_winner", "approval_winner", "borda_winner", "score_winner"] COLOR_NAMES = ["Draw", "Red", "Blue", "Green"] COLORS = ["#6b6b6b", "red", "#12a9e5", "#12e551"] COLOR_SCALE = alt.Scale(domain=COLOR_NAMES, range=COLORS) v_df = pd.DataFrame({ "v_x": v_x, "v_y": v_y, "fptp_winner": fptp_winner, "approval_winner": approval_winner, "borda_winner": borda_winner, "score_winner": score_winner }) df = pd.melt(v_df, id_vars=["v_x", "v_y"], var_name="SYSTEM", value_name="winner") c_df = pd.DataFrame(CAND_DICT) c_df = pd.concat([c_df, c_df, c_df, c_df], sort=False, ignore_index=True) c_df["SYSTEM"] = SYSTEM * 3 df = pd.concat([df, c_df], sort=False, ignore_index=True) candidate_plot = alt.Chart().mark_point(filled=True).encode( alt.X('C_X', title="x"), alt.Y('C_Y', title="y"), alt.Color('name', legend=None, scale=COLOR_SCALE), alt.Shape('name', legend=alt.Legend(title="Candidates")), alt.StrokeValue("black"), opacity=alt.value(1), size=alt.value(150)) voter_plot = alt.Chart().mark_circle().encode( x='v_x', y='v_y', color=alt.Color('winner:N', legend=None, scale=COLOR_SCALE)).properties(width=200) chart = alt.layer( voter_plot, candidate_plot, data=df).facet(column='SYSTEM:N').configure_header(labelFontSize=20) return chart
def createCholorpeth(df): states = alt.topo_feature(data.us_10m.url, 'states') return alt.Chart(df).mark_geoshape().encode( shape=alt.Shape(field='geo', type='geojson'), color=alt.Color( 'normPositive:Q', scale=alt.Scale(scheme='reds'), legend=alt.Legend( title=["Confirmed Cases", "per 100,000 People", ""])), tooltip=[ alt.Tooltip('state:N', title='State'), alt.Tooltip('positive:N', title='Confirmed Cases'), alt.Tooltip('death:N', title='Confirmed Deaths') ]).transform_lookup( lookup='id', from_=alt.LookupData(data=states, key='id'), as_='geo').properties(width=800, height=600).project(type='albersUsa')
def shape_plot(df, single_select): shape = alt.Chart(df).mark_circle(opacity=0.35).encode( alt.X('CO2 emissions (kt):Q'), alt.Y('CO2 emissions per capita:Q'), color=alt.condition(single_select, 'Country Name:N', alt.value('lightgrey'), scale=alt.Scale(scheme="tableau10")), shape=alt.Shape('Country Name:N', legend=None), size=alt.value(250), ).properties(width=300, height=250, title='CO2 total emissions and emissions per capita') shape_labels = shape.mark_text(align='center', baseline='middle', dy=-25).encode(text='Country Name', size=alt.value(15)) shapes = shape # + shape_labels return shapes
def graph_delivery_dates(df): alt.themes.enable("opaque") source = df shapes = alt.Shape("name:N") colors = alt.Color("name:N", legend=alt.Legend(title="Something")) line = (alt.Chart(source).mark_line().encode( x="yearmonth(calendar_date):N", y="working_day:Q", color=alt.Color("name", legend=None), )) points = line.mark_point(filled=True, size=80, opacity=1).encode(color=colors, shape="name") alt.layer(line, points).resolve_scale(color="independent", shape="independent").save("chart.png")
def plot_multi_relaxations( df: pandas.DataFrame, prop: List[str], title: str = "Relaxation Values", replace: Dict[str, str] = None, ) -> alt.Chart: """Helper to plot relaxation quantities using Altair. Args: df: DataFrame containing quantities to plot. There should be columns with suffixes '_mean', and '_sem'. prop: The property to plot, with the respective values for that property having suffixes. title: Custom title used to label the property. replace: Replace the names of variables """ if isinstance(prop, str): prop = [prop] axis_format = "e" relax_chart_base = alt.Chart( df.query("variable in @prop").replace(replace)).encode( x=alt.X("inv_temp_norm:Q", title="Tm/T", axis=alt.Axis(format="g")), color=alt.Color("pressure:N", title="Pressure"), shape=alt.Shape("variable", title="Relaxation"), y=alt.Y( "mean:Q", title=title, scale=alt.Scale(type="log"), axis=alt.Axis(format=axis_format), ), yError=alt.YError("sem:Q"), ) return relax_chart_base.mark_errorbar() + relax_chart_base.mark_point()
def make_graph(self): debug('new graph') get_element_size('graph_container', self.set_size) if self.xlabel is None or self.ylabel is None or self.mark is None: return mark = self.mark_options[self.mark][0] xlabel = self.columns[self.xlabel] ylabel = self.columns[self.ylabel] kwargs = { 'x': alt.X(xlabel, type=self.guess_datatype(xlabel, mark)), 'y': alt.Y(ylabel, type=self.guess_datatype(ylabel, mark)) } if self.y2label is not None: y2label = self.columns[self.y2label] kwargs['y2'] = alt.Y2(y2label) if self.shape is not None: shape = self.columns[self.shape] kwargs['shape'] = alt.Shape(shape, type=self.guess_datatype(shape, mark)) if self.colors is not None: colors = self.columns[self.colors] kwargs['color'] = alt.Color(colors, type=self.guess_datatype(colors, mark)) mark_method = self.mark_options[self.mark][1] chart = mark_method(alt.Chart(self.data)).encode( **kwargs ).interactive().properties( width=self.width, height=self.height ) set_attribute(self.graph_id, 'srcdoc', html.altair_plot(chart, with_iframe=False))
def make_chart(data, mark_type='point', mark_props={}, x_title='x', x_props={}, y_title='y', y_props={}, color='x:N', shape='x:N', text_col=None, text_props={}, other_axes_and_channels={}, interactive=True, scale_factor=1.0, svg=False, save_file='chart.html', on_save=None): chart = getattr(alt.Chart(data), f"mark_{mark_type}")(**mark_props) if 'scale' in x_props: x_props['scale'] = alt.Scale(domain=x_props['scale']) if 'scale' in y_props: y_props['scale'] = alt.Scale(domain=y_props['scale']) chart = chart.encode( x=alt.X(x_title, **x_props) if x_title else None, y=alt.Y(y_title, **y_props) if y_title else None, color=color if isinstance(color, str) else alt.Color(**color), shape=shape if isinstance(shape, str) else alt.Shape(**shape), **other_axes_and_channels) if text_col: text_chart = chart.mark_text(**text_props).encode(text=text_col) chart = (chart + text_chart) chart.properties(height=900) if interactive: chart = chart.interactive() if save_file: if svg: chart.save(save_file, scale_factor=scale_factor, embed_options={'renderer':'svg'}) else: chart.save(save_file, scale_factor=scale_factor) if on_save: on_save(save_file)
def people_plot(df, select): # people df_new1 = Par_income_pre(df) person_img = 'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z' domains = [ "par_q1", "par_q2", "par_q3", "par_q4", "par_q5", "par_top10pc", "par_top1pc", 'par_toppt1pc' ] shape_scale = alt.Scale(domain=domains, range=[ person_img, person_img, person_img, person_img, person_img, person_img, person_img, person_img ]) color_scale = alt.Scale(domain=domains, range=[ '#4c78a8', '#f58518', '#e45756', '#72b7b2', '#54a24b', '#54a200', '#00a235', '#45ff00' ]) base1 = alt.Chart(df_new1) people_chart = base1.transform_calculate( row="ceil(datum.index/10)").transform_calculate( col="datum.index-datum.row*10").mark_point( filled=True, opacity=1, size=50).encode( alt.X('col:O', axis=None), alt.Y("row:O", axis=None), alt.Shape("data:N", legend=None, scale=shape_scale), alt.Color("data:N", legend=None, scale=color_scale)).add_selection( select).transform_filter(select) return people_chart
def render_curve(df, ns=[], epsilons=[], save_path=None): """Render, and optionally save, a plot of the loss-data curve. Optionally takes arguments `ns` and `epsilons` to draw lines on the plot illustrating where metrics were calculated. Arguments: - df: (pd.DataFrame) the dataframe containing a loss-data curve as returned by LossDataEstimator.compute_curve or LossDataEstimator.to_dataframe. - ns: (list<num>) the list of training set sizes to use for computing metrics. - epsilons: (list<num>) the settings of epsilon used for computing SDL and eSC. - save_path: (str) optional: a path (ending in .pdf or .png) to save the chart. saving requires the [`altair-saver`](https://github.com/altair-viz/altair_saver/) package and its dependencies. Returns: an Altair chart. Note that this chart displays well in notebooks, so calling `render_curve(df)` without a save path will work well with Jupyter. """ import altair as alt import altair_saver from . import altair_theme # noqa: F401 alt.data_transformers.disable_max_rows() if len(ns) > 0: ns = _closest_valid_ns(df, ns) title = 'Loss-data curve' color_title = 'Representation' xscale = alt.Scale(type='log') yscale = alt.Scale(type='log') x_axis = alt.X('samples', scale=xscale, title='Dataset size') y_axis = alt.Y('mean(val_loss)', scale=yscale, title='Validation loss') line = alt.Chart(df, title=title).mark_line() line = line.encode( x=x_axis, y=y_axis, color=alt.Color('name:N', title=color_title, legend=None), ) point = alt.Chart(df, title=title).mark_point(size=80, opacity=1) point = point.encode( x=x_axis, y=y_axis, color=alt.Color('name:N', title=color_title,), shape=alt.Shape('name:N', title=color_title), tooltip=['samples', 'name'] ) rules_df = pd.concat([ pd.DataFrame({'x': ns}), pd.DataFrame({'y': epsilons}) ], sort=False) rule_x = alt.Chart(rules_df).mark_rule(strokeDash=[4, 4]).encode(x='x') rule_y = alt.Chart(rules_df).mark_rule(strokeDash=[4, 4]).encode(y='y') chart = alt.layer(rule_x, rule_y, line, point).resolve_scale( color='independent', shape='independent' ) if save_path is not None: altair_saver.save(chart, save_path) return chart
def make_plot(infile): throughput = infra.pd.read_parquet(infile) # Fix plotting scale throughput["GB"] = throughput["bytes"] / (1000**3) temp = throughput.copy() temp = temp.groupby("day_bin").sum() print("Std dev", temp["GB"].std()) print("Mean", temp["GB"].mean()) # Generate a dense dataframe with all days and directions date_range = pd.DataFrame({ "day_bin": pd.date_range(infra.constants.MIN_DATE, infra.constants.MAX_DATE, freq="1D") }) category_range = pd.DataFrame({"throughput_type": ["Up", "Down", "Local"]}, dtype=object) dense_index = infra.pd.cartesian_product(date_range, category_range) throughput = dense_index.merge(throughput, how="left", left_on=["day_bin", "throughput_type"], right_on=["day_bin", "throughput_type" ]).fillna(value={ "bytes": 0, "GB": 0 }) throughput_windowed = throughput.set_index("day_bin").sort_index() throughput_windowed = throughput_windowed.groupby( ["throughput_type"]).rolling(window="7D", ).mean().reset_index() # Work around vega-lite legend merging bug label_order = { "Down": 1, "Up": 2, "Local": 3, } # Mergesort is stablely implemented : ) throughput = throughput.sort_values( ["throughput_type"], key=lambda col: col.map(lambda x: label_order[x]), kind="mergesort", ) throughput_windowed = throughput_windowed.sort_values( ["throughput_type"], key=lambda col: col.map(lambda x: label_order[x]), kind="mergesort", ) points = altair.Chart(throughput).mark_point(opacity=0.5).encode( x=altair.X( "day_bin:T", title="Time", axis=altair.Axis( labelSeparation=5, labelOverlap="parity", ), ), y=altair.Y("GB:Q", title="GB Total Per Day"), color=altair.Color( "throughput_type", sort=None, ), shape=altair.Shape( "throughput_type", sort=None, legend=altair.Legend( title="", orient="top-left", fillColor="white", labelLimit=500, padding=5, strokeColor="black", columns=3, ), )) lines = altair.Chart(throughput_windowed).mark_line().encode( x=altair.X( "day_bin:T", title="Time", axis=altair.Axis( labelSeparation=5, labelOverlap="parity", ), ), y=altair.Y("GB:Q", title="GB Total Per Day"), color=altair.Color( "throughput_type", sort=None, legend=None, ), ) (points + lines).resolve_scale( color='independent', shape='independent').properties(width=500).save( "renders/bytes_per_week.png", scale_factor=2)
'#5218FA', '#FFD300', '#BD33A4', '#F8F8FF', '#B31B1B', '#E4717A', '#ED872D', '#ACE1AF', '#7BB661' ]), legend=alt.Legend(title="Bloom Color")) wcolor = alt.Color('bloom_color:N', scale=alt.Scale(domain=[ 'Blue', 'Yellow', 'Purple', 'White', 'Red', 'Pink', 'Orange', 'Green', 'None' ], range=[ '#D6B08C', '#CA8546', '#C9AA8D', '#BB6513', '#D7A36E', '#F2D4B7', '#A0856A', '#A97D51', '#D6B08C' ])) shape = alt.Shape("species:N") def format_plotdata(data, stats, Season): """ creates a dataframe appropriate for plotting the desired density plan and section charts """ seeddf = data.subdata.copy() #this for loop generates the plotting data choices = pd.DataFrame( (seeddf.index.values, seeddf["common_name"], seeddf["plants_per_yard"], seeddf["ht"], seeddf["bloom_color"], seeddf["spring"], seeddf["summer"], seeddf["autumn"]), index=None).T.rename( columns={
'col': 2 }, { 'country': 'United States', 'animal': 'sheep', 'col': 1 }]) domains = ['person', 'cattle', 'pigs', 'sheep'] shape_scale = alt.Scale( domain=domains, range=[ 'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z', 'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z', 'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z', 'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z' ]) color_scale = alt.Scale(domain=domains, range=[ 'rgb(162,160,152)', 'rgb(194,81,64)', 'rgb(93,93,93)', 'rgb(91,131,149)' ]) alt.Chart(df).mark_point(filled=True).encode( alt.X('col:O', axis=None), alt.Y('animal:O', axis=None), alt.Row('country:N', header=alt.Header(title='')), alt.Shape('animal:N', legend=None, scale=shape_scale), alt.Color('animal:N', legend=None, scale=color_scale), alt.OpacityValue(1), alt.SizeValue(200)).properties(width=800, height=200)
def make_plot(infile): grouped_flows = infra.pd.read_parquet(infile) grouped_flows = grouped_flows.reset_index() working_times = grouped_flows.loc[ (grouped_flows["day_bin"] < "2019-07-30") | (grouped_flows["day_bin"] > "2019-08-31")] grouped_flows["outage"] = "Outage" grouped_flows.loc[(grouped_flows["day_bin"] < "2019-07-30") | (grouped_flows["day_bin"] > "2019-08-31"), "outage"] = "Normal" alt.Chart(working_times).mark_boxplot().encode( x=alt.X('hour:O', title="Hour of the Day"), y=alt.Y('user:Q', title="Active User Count"), ).save( "renders/users_per_time_of_day_boxplot_exclude_outage.png", scale_factor=2, ) alt.Chart(grouped_flows).mark_point(opacity=0.1).encode( x=alt.X('hour:O', title="Hour of the Day"), y=alt.Y('user:Q', title="Active User Count"), color=alt.Color( "outage", title="Condition", )).save( "renders/users_per_time_of_day_overplot.png", scale_factor=2, ) aggregate = working_times.groupby(["hour"]).agg({ "user": [ "mean", lambda x: x.quantile(0.90), lambda x: x.quantile(0.99), "max" ] }) # Flatten column names aggregate = aggregate.reset_index() aggregate.columns = [ ' '.join(col).strip() for col in aggregate.columns.values ] aggregate = aggregate.rename( columns={ "user mean": "Mean", "user <lambda_0>": "90th Percentile", "user <lambda_1>": "99th Percentile", "user max": "Max", }) aggregate = aggregate.melt( id_vars=["hour"], value_vars=["Max", "99th Percentile", "90th Percentile", "Mean"], var_name="type", value_name="user") print(aggregate) # Create a hybrid chart to fix legend issue with line chart and shape lines = alt.Chart(aggregate).mark_line().encode( x=alt.X( 'hour:O', title="Hour of the Day", ), y=alt.Y( 'user:Q', title="Active User Count", ), color=alt.Color( "type", legend=None, sort=None, ), ) points = lines.mark_point(size=100).encode( x=alt.X( 'hour:O', title="Hour of the Day", ), y=alt.Y( 'user:Q', title="Active User Count", ), color=alt.Color( "type", sort=None, legend=alt.Legend( orient="top-left", fillColor="white", labelLimit=500, padding=5, strokeColor="black", ), ), shape=alt.Shape( "type", title="", sort=None, ), ) alt.layer(points, lines).resolve_scale( color='independent', shape='independent').save( "renders/users_per_time_of_day_lines.png", scale_factor=2, )
def test_convert_shape_fail_temporal(column): chart = alt.Chart(df).mark_point().encode(alt.Shape(column)) mapping = convert(chart)
def test_quantitative_shape(): chart = alt.Chart(df_quant).mark_point().encode(alt.Shape('shape')) mapping = convert(chart)
y=alt.Y( my[plot]["y"][0], type=datatypes[my[plot]["y"][0]], axis=alt.Axis(title=my[plot]["y"][1], ), scale=alt.Scale(type=my[plot]["y"][2]), ), tooltip=[f"{my[plot]['x'][0]}", f"{my[plot]['y'][0]}", "product_name"], color=my_color, ).properties(width=1213, height=750).interactive().add_selection(my_selection)) if "shape" in my[plot]: shape = my[plot]["shape"][0] chart[plot] = chart[plot].encode(shape=alt.Shape( shape, type=datatypes[shape], legend=alt.Legend(title=my[plot]["shape"][1]), )) if "column" in my[plot]: chart[plot] = chart[plot].encode(column=alt.Column( my[plot]["column"][0], type=datatypes[my[plot]["column"][0]], header=alt.Header(title=my[plot]["column"][1]), )) if "row" in my[plot]: chart[plot] = chart[plot].encode(row=alt.Row( my[plot]["row"][0], type=datatypes[my[plot]["row"][0]], header=alt.Header(title=my[plot]["row"][1]), ))
def __draw_bubbles( plot_table, x_metric, y_metric, ref_group, scales, interactive_selection_group, ): """Draws the bubbles for all metrics.""" # FILTER DF fields_to_keep_in_metric_table = [ "group_size", "attribute_value", "total_entities", x_metric, y_metric, ] metric_plot_table = plot_table[fields_to_keep_in_metric_table].copy( deep=True) metric_plot_table["tooltip_group_size"] = plot_table.apply( lambda row: get_tooltip_text_group_size(row["group_size"], row[ "total_entities"]), axis=1, ) # COLOR ENCODING bubble_color_encoding = alt.condition( interactive_selection_group, alt.Color("attribute_value:N", scale=scales["color"], legend=None), alt.value(Bubble.color_faded), ) # TOOLTIP ENCODING bubble_tooltip_encoding = [ alt.Tooltip(field="attribute_value", type="nominal", title="Group"), alt.Tooltip(field="tooltip_group_size", type="nominal", title="Group Size"), alt.Tooltip(field=x_metric, type="quantitative", format=".2f", title=x_metric.upper()), alt.Tooltip(field=y_metric, type="quantitative", format=".2f", title=y_metric.upper()), ] # BUBBLE CENTERS bubbles_centers = (alt.Chart(metric_plot_table).mark_point( filled=True, size=Bubble.center_size).encode( x=alt.X(f"{x_metric}:Q", scale=scales["x"], axis=no_axis()), y=alt.Y(f"{y_metric}:Q", scale=scales["y"], axis=no_axis()), tooltip=bubble_tooltip_encoding, color=bubble_color_encoding, shape=alt.Shape("attribute_value:N", scale=scales["shape"], legend=None), )) # BUBBLE AREAS bubbles_areas = (alt.Chart(metric_plot_table).mark_circle( opacity=Bubble.opacity).encode( size=alt.Size("group_size:Q", legend=None, scale=scales["bubble_size"]), x=alt.X(f"{x_metric}:Q", scale=scales["x"], axis=no_axis()), y=alt.Y(f"{y_metric}:Q", scale=scales["y"], axis=no_axis()), tooltip=bubble_tooltip_encoding, color=bubble_color_encoding, )) return bubbles_centers + bubbles_areas
def __draw_bubbles( plot_table, metrics, ref_group, scales, selection, ): """Draws the bubbles for all metrics.""" # X AXIS GRIDLINES axis_values = [0.25, 0.5, 0.75] x_axis = alt.Axis( values=axis_values, ticks=False, domain=False, labels=False, title=None ) # COLOR bubble_color_encoding = alt.condition( selection, alt.Color("attribute_value:N", scale=scales["color"], legend=None), alt.value(Bubble.color_faded), ) # CHART INITIALIZATION bubble_centers = alt.Chart().mark_point() bubble_areas = alt.Chart().mark_circle() plot_table["tooltip_group_size"] = plot_table.apply( lambda row: get_tooltip_text_group_size( row["group_size"], row["total_entities"] ), axis=1, ) # LAYERING THE METRICS for metric in metrics: # TOOLTIP plot_table[f"tooltip_disparity_explanation_{metric}"] = plot_table.apply( lambda row: get_tooltip_text_disparity_explanation( row[f"{metric}_disparity_scaled"], row["attribute_value"], metric, ref_group, ), axis=1, ) bubble_tooltip_encoding = [ alt.Tooltip(field="attribute_value", type="nominal", title="Group"), alt.Tooltip(field="tooltip_group_size", type="nominal", title="Group Size"), alt.Tooltip( field=f"tooltip_disparity_explanation_{metric}", type="nominal", title="Disparity", ), alt.Tooltip( field=f"{metric}", type="quantitative", format=".2f", title=f"{metric}".upper(), ), ] # BUBBLE CENTERS trigger_centers = alt.selection_multi(empty="all", fields=["attribute_value"]) bubble_centers += ( alt.Chart(plot_table) .transform_calculate(metric_variable=f"'{metric.upper()}'") .mark_point(filled=True, size=Bubble.center_size) .encode( x=alt.X(f"{metric}:Q", scale=scales["x"], axis=x_axis), y=alt.Y("metric_variable:N", scale=scales["y"], axis=no_axis()), tooltip=bubble_tooltip_encoding, color=bubble_color_encoding, shape=alt.Shape( "attribute_value:N", scale=scales["shape"], legend=None ), ) .add_selection(trigger_centers) ) # BUBBLE AREAS trigger_areas = alt.selection_multi(empty="all", fields=["attribute_value"]) bubble_areas += ( alt.Chart(plot_table) .mark_circle(opacity=Bubble.opacity) .transform_calculate(metric_variable=f"'{metric.upper()}'") .encode( x=alt.X(f"{metric}:Q", scale=scales["x"], axis=x_axis), y=alt.Y("metric_variable:N", scale=scales["y"], axis=no_axis()), tooltip=bubble_tooltip_encoding, color=bubble_color_encoding, size=alt.Size("group_size:Q", legend=None, scale=scales["bubble_size"]), ) .add_selection(trigger_areas) ) return bubble_areas + bubble_centers
model = KMeans(n_clusters=5, random_state=2019) # %% labels = model.fit_predict(data_kmeans) + 1 # %% print(labels) # %% data_kmeans["label"] = ("Cluster " + pd.Series((labels)).astype(str)).values data_kmeans[["name", "result", "start"]] = data.loc[mask, ["name", "result", "start"]] # %% c = (alt.Chart(data_kmeans).mark_point().encode( shape=alt.Shape("label:N", legend=alt.Legend(title="Cluster")), x="allies_infantry", y="axis_infantry", color="result", tooltip=data_kmeans.columns.tolist(), ).interactive()) c # %% data_to_scale = data_kmeans.drop(["label", "name", "start", "result"], axis=1) data_scaled = scale(data_to_scale) # %% labels_scaled = model.fit_predict(data_scaled) + 1
st.subheader("Shape") '''The `shape` encoding channel sets the geometric shape used by `point` marks. Unlike the other channels we have seen so far, the shape channel can not be used by other mark types. The shape encoding channel should only be used with nominal data, as perceptual rank-order and magnitude comparisons are not supported. Let's encode the `cluster` field using shape as well as color. Using multiple channels for the same underlying data field is known as a redundant encoding. The resulting chart combines both color and shape information into a single symbol legend: ''' with st.echo(): chart = alt.Chart(data2000).mark_point(filled=True).encode( alt.X('fertility:Q'), alt.Y('life_expect:Q'), alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), alt.Color('cluster:N'), alt.OpacityValue(0.5), alt.Shape('cluster:N') ) st.write(chart) st.subheader("Tooltips and Ordering") ''' By this point, you might feel a bit frustrated: we've built up a chart, but we still don't know what countries the visualized points correspond to! Let's add interactive tooltips to enable exploration. The `tooltip` encoding channel determines tooltip text to show when a user moves the mouse cursor over a mark. Let's add a tooltip encoding for the `country` field, then investigate which countries are being represented. chart = alt.Chart(data2000).mark_point(filled=True).encode( alt.X('fertility:Q'), alt.Y('life_expect:Q'), alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])), alt.Color('cluster:N'),