def initialize_chart(self): # return NotImplemented x_attr = self.vis.get_attr_by_channel("x")[0] y_attr = self.vis.get_attr_by_channel("y")[0] x_attr_abv = str(x_attr.attribute) y_attr_abv = str(y_attr.attribute) if len(x_attr_abv) > 25: x_attr_abv = x_attr.attribute[:15] + "..." + x_attr.attribute[-10:] if len(y_attr_abv) > 25: y_attr_abv = y_attr.attribute[:15] + "..." + y_attr.attribute[-10:] if isinstance(x_attr.attribute, str): x_attr.attribute = x_attr.attribute.replace(".", "") if isinstance(y_attr.attribute, str): y_attr.attribute = y_attr.attribute.replace(".", "") chart = (alt.Chart(self.data).mark_rect().encode( x=alt.X( "xBinStart", type="quantitative", axis=alt.Axis(title=x_attr_abv), bin=alt.BinParams(binned=True), ), x2=alt.X2("xBinEnd"), y=alt.Y( "yBinStart", type="quantitative", axis=alt.Axis(title=y_attr_abv), bin=alt.BinParams(binned=True), ), y2=alt.Y2("yBinEnd"), opacity=alt.Opacity( "count", type="quantitative", scale=alt.Scale(type="log"), legend=None, ), )) chart = chart.configure_scale(minOpacity=0.1, maxOpacity=1) # Setting tooltip as non-null chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding")) chart = chart.interactive() # Enable Zooming and Panning #################################### # Constructing Altair Code String ## #################################### self.code += "import altair as alt\n" # self.code += f"visData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n" self.code += f"visData = pd.DataFrame({str(self.data.to_dict())})\n" self.code += f""" chart = alt.Chart(visData).mark_rect().encode( x=alt.X('xBinStart', type='quantitative', axis=alt.Axis(title='{x_attr_abv}'), bin = alt.BinParams(binned=True)), x2=alt.X2('xBinEnd'), y=alt.Y('yBinStart', type='quantitative', axis=alt.Axis(title='{y_attr_abv}'), bin = alt.BinParams(binned=True)), y2=alt.Y2('yBinEnd'), opacity = alt.Opacity('count',type='quantitative',scale=alt.Scale(type="log"),legend=None) ) chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding')) # Setting tooltip as non-null """ return chart
def make_plot(infile): grouped_flows = infra.pd.read_parquet(infile) grouped_flows = grouped_flows.reset_index() grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows["bytes_down"] # Map down to a smaller number of protocol names, including "other". grouped_flows["name"] = grouped_flows.apply( lambda row: _assign_protocol_plain_name(row.protocol, row.dest_port), axis="columns" ) test = grouped_flows test = grouped_flows.loc[(grouped_flows["protocol"]==17) & (grouped_flows["name"] == "Other UDP")].groupby("dest_port").sum() print(test.sort_values("bytes_total")) # Consolidate by week instead of by day grouped_flows = grouped_flows[["start_bin", "bytes_total", "bytes_up", "bytes_down", "name"]].groupby([pd.Grouper(key="start_bin", freq="W-MON"), "name"]).sum() grouped_flows = grouped_flows.reset_index() print(grouped_flows) # Generate an outage annotation overlay outage_info = pd.DataFrame([{"start": infra.constants.OUTAGE_START, "end": infra.constants.OUTAGE_END}]) outage_annotation = alt.Chart(outage_info).mark_rect( opacity=0.7, # cornerRadius=2, strokeWidth=2, # stroke="black" ).encode( x=alt.X("start"), x2=alt.X2("end"), color=alt.value("#FFFFFF") ) # Figure out legend sorting order by total amount. proto_totals = grouped_flows.groupby("name").sum().reset_index() legend_sort_order = proto_totals.sort_values("bytes_total", ascending=True).set_index("bytes_total").reset_index() sort_list = legend_sort_order["name"].tolist() sort_list.reverse() # Now get the up and down sorts proto_totals = grouped_flows.groupby("name").sum().reset_index() sort_down_order = proto_totals.sort_values("bytes_down", ascending=True).set_index("bytes_down").reset_index() sort_down_order["order"] = sort_down_order.index sort_down_order["direction"] = "Downlink" sort_up_order = proto_totals.sort_values("bytes_up", ascending=True).set_index("bytes_up").reset_index() sort_up_order["order"] = sort_up_order.index sort_up_order["direction"] = "Uplink" orders = sort_down_order.append(sort_up_order) grouped_flows["Downlink"] = grouped_flows["bytes_down"] / (1000**3) grouped_flows["Uplink"] = grouped_flows["bytes_up"] / (1000**3) # Melt the dataset for faceting links = grouped_flows.melt( id_vars=["name", "start_bin"], value_vars=["Downlink", "Uplink"], var_name="direction", value_name="GB" ).set_index("name") # Merge the sort orders back into the larger dataset faceted_flows = links.merge(orders, on=["name", "direction"]) area = alt.Chart().mark_area().encode( x=alt.X("start_bin:T", title="Time", axis=alt.Axis(labels=True), ), y=alt.Y("sum(GB):Q", title="Share of Traffic Per Week", stack="normalize" ), color=alt.Color( "name", title="Protocol (By Total)", scale=alt.Scale(scheme="tableau10"), sort=sort_list, ), order=alt.Order("order"), ) (area + outage_annotation).properties( width=500, ).facet( column=alt.Column( 'direction:N', title="", ), data=faceted_flows, ).save( "renders/bytes_per_protocol_trends_normalized_facet.png", scale_factor=2, ) plot = alt.Chart(grouped_flows).mark_area().encode( x=alt.X("start_bin:T", title="Time", axis=alt.Axis(labels=True), ), y=alt.Y("sum(GB):Q", title="Total Traffic Per Week(GB)", ), # shape="direction", color="name", detail="name", ).properties( # title="Local Service Use", width=500, ).save("renders/bytes_per_protocol_trends.png", scale_factor=2 ) return plot
def make_org_plot(infile): """ Generate plots to explore the traffic distribution across organizations """ grouped_flows = infra.pd.read_parquet(infile) grouped_flows = grouped_flows.reset_index() grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows[ "bytes_down"] # Consolidate by week instead of by day grouped_flows = grouped_flows[[ "start_bin", "bytes_total", "bytes_up", "bytes_down", "org" ]].groupby([pd.Grouper(key="start_bin", freq="W-MON"), "org"]).sum() grouped_flows = grouped_flows.reset_index() # Generate an outage annotation overlay outage_info = pd.DataFrame([{ "start": infra.constants.OUTAGE_START, "end": infra.constants.OUTAGE_END }]) outage_annotation = alt.Chart(outage_info).mark_rect( opacity=0.7, # cornerRadius=2, strokeWidth=2, # stroke="black" ).encode(x=alt.X("start"), x2=alt.X2("end"), color=alt.value("#FFFFFF")) # Group into other orgs number_of_main_orgs = 9 sorted_flows = grouped_flows.groupby("org").sum().sort_values( "bytes_total", ascending=False) orgs_to_other = sorted_flows.index[number_of_main_orgs:] number_othered = len(orgs_to_other) # Create a separate frame with only the main flows and the aggregated other. grouped_with_other = grouped_flows.copy() grouped_with_other["org"] = grouped_with_other["org"].replace( orgs_to_other, "Other N={}".format(number_othered)) # Group together to find orders for the legend and both areas below. org_groups = grouped_with_other.groupby("org").sum().reset_index() # Figure out legend sorting order by total amount. legend_order = org_groups.sort_values( "bytes_total", ascending=False).set_index("bytes_total").reset_index() legend_sort_list = legend_order["org"].tolist() # Figure out area layer order by amounts for upload and download. sort_order_down = org_groups.sort_values( "bytes_down", ascending=True).set_index("bytes_down").reset_index() sort_order_down["order"] = sort_order_down.index sort_order_down["direction"] = "Downlink" sort_order_up = org_groups.sort_values( "bytes_up", ascending=True).set_index("bytes_up").reset_index() sort_order_up["order"] = sort_order_up.index sort_order_up["direction"] = "Uplink" area_sort_orders = sort_order_up.append(sort_order_down) # Melt the main dataframe grouped_with_other["Downlink"] = grouped_with_other["bytes_down"] / (1000** 3) grouped_with_other["Uplink"] = grouped_with_other["bytes_up"] / (1000**3) grouped_with_other = grouped_with_other.melt( id_vars=["org", "start_bin"], value_vars=["Downlink", "Uplink"], var_name="direction", value_name="GB") # Merge the sort order back into the larger dataset grouped_with_other = grouped_with_other.merge(area_sort_orders, on=["org", "direction"]) print(grouped_with_other) area = alt.Chart().mark_area().encode( x=alt.X( "start_bin:T", title="Time", axis=alt.Axis(labels=True), ), y=alt.Y( "sum(GB):Q", title="Share of Traffic Per Week", stack="normalize", ), # shape="direction", color=alt.Color( "org", title="Organization (By Total)", scale=alt.Scale(scheme="paired"), sort=legend_sort_list, ), order=alt.Order("order"), ) (area + outage_annotation).properties(width=500, ).facet( column=alt.Column( "direction:N", title="", ), data=grouped_with_other, ).save("renders/bytes_per_category_org_facet_main.png", scale_factor=2) # Create a separate frame for just the other flows main_flows = sorted_flows.index[:number_of_main_orgs] others = grouped_flows.copy().reset_index().set_index("org") others = others.drop(main_flows).reset_index() # Figure out sorting order by total amount. sort_check = others.groupby("org").sum().reset_index() sort_order = sort_check.sort_values( "bytes_total", ascending=True).set_index("bytes_total").reset_index() sort_list = sort_order["org"].tolist() sort_list.reverse() sort_order["order"] = sort_order.index # Merge the sort order back into the larger dataset others = others.merge(sort_order[["org", "order"]], on="org") print(len(others["org"].unique())) print(others["org"].unique()) print(others) others["GB"] = others["bytes_total"] / (1000**3) area = alt.Chart(others).mark_area().encode( x=alt.X( "start_bin:T", title="Time", axis=alt.Axis(labels=True), ), y=alt.Y( "sum(GB):Q", title="Total Traffic Per Week(GB)", stack="normalize", ), # shape="direction", color=alt.Color( "org", title="Organization", scale=alt.Scale(scheme="category20c"), sort=sort_list, ), # The order actually makes this chart harder to understand, since the color needs to wrap around. order=alt.Order("order"), ) (area + outage_annotation).configure_legend( symbolLimit=100, columns=2, ).properties( width=1000, height=500, ).save("renders/bytes_per_category_org_weekly_stream_others.png", scale_factor=2)
def make_category_plot(infile): grouped_flows = infra.pd.read_parquet(infile) grouped_flows = grouped_flows.reset_index() grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows[ "bytes_down"] # Consolidate by week instead of by day grouped_flows = grouped_flows[[ "start_bin", "bytes_total", "category", "bytes_up", "bytes_down" ]].groupby([pd.Grouper(key="start_bin", freq="W-MON"), "category"]).sum() grouped_flows = grouped_flows.reset_index() # Generate an outage annotation overlay outage_info = pd.DataFrame([{ "start": infra.constants.OUTAGE_START, "end": infra.constants.OUTAGE_END }]) outage_annotation = alt.Chart(outage_info).mark_rect( opacity=0.7, # cornerRadius=2, strokeWidth=2, # stroke="black" ).encode(x=alt.X("start"), x2=alt.X2("end"), color=alt.value("#FFFFFF")) # Figure out legend sorting order by total amount. cat_totals = grouped_flows.groupby("category").sum().reset_index() legend_sort_order = cat_totals.sort_values( "bytes_total", ascending=True).set_index("bytes_total").reset_index() sort_list = legend_sort_order["category"].tolist() sort_list.reverse() # Now get the up and down sorts cat_totals = grouped_flows.groupby("category").sum().reset_index() sort_down_order = cat_totals.sort_values( "bytes_down", ascending=True).set_index("bytes_down").reset_index() sort_down_order["order"] = sort_down_order.index sort_down_order["direction"] = "Downlink" sort_up_order = cat_totals.sort_values( "bytes_up", ascending=True).set_index("bytes_up").reset_index() sort_up_order["order"] = sort_up_order.index sort_up_order["direction"] = "Uplink" orders = sort_down_order.append(sort_up_order) grouped_flows["Downlink"] = grouped_flows["bytes_down"] / (1000**3) grouped_flows["Uplink"] = grouped_flows["bytes_up"] / (1000**3) # Melt the dataset for faceting links = grouped_flows.melt(id_vars=["category", "start_bin"], value_vars=["Downlink", "Uplink"], var_name="direction", value_name="GB").set_index("category") # Merge the sort orders back into the larger dataset faceted_flows = links.merge(orders, on=["category", "direction"]) area = alt.Chart().mark_area().encode( x=alt.X( "start_bin:T", title="Time", axis=alt.Axis(labels=True), ), y=alt.Y("sum(GB):Q", title="Share of Traffic Per Week", stack="normalize"), color=alt.Color( "category", title="Category (By Total)", scale=alt.Scale(scheme="tableau20"), sort=sort_list, ), order=alt.Order("order"), ) (area + outage_annotation).properties(width=500, ).facet( column=alt.Column( 'direction:N', title="", ), data=faceted_flows, ).save( "renders/bytes_per_category_cat_facet.png", scale_factor=2, )
def altair_box(data=None, encode_x=None, encode_y=None, encode_color=alt.Color(), height=None, width=None): """Generate a box plot with Altair. Parameters ---------- data : Pandas DataFrame A tidy data frame. encode_x : str or altair.X instance Specification of x-values. encode_y : str or altair.Y instance Specification of y-values. encode_color : str or Color instance or None or Undefined (default) Specification of coloring of box plot. If Undefined (Default), all boxes are colored with Altair defaults. If None, the boxes are colored according to the categorical variable. height : float or None (default) Height of the chart, in pixels. If None, inferred. width : float or None (default) Width of the chart, in pixels. If None, inferred. Returns ------- output : Chart Altair Chart instance. """ # Make Altair instances if isinstance(encode_x, alt.X): x = encode_x else: x = alt.X(encode_x) if isinstance(encode_y, alt.Y): y = encode_y else: y = alt.Y(encode_y) # Get column names if len(x.shorthand) > 1 and x.shorthand[-2] == ':': x_name = x.shorthand[:-2] else: x_name = x.shorthand if len(y.shorthand) > 1 and y.shorthand[-2] == ':': y_name = y.shorthand[:-2] else: y_name = y.shorthand # Get axis titles if isinstance(x.title, alt.utils.schemapi.UndefinedType): x_title = x_name else: x_title = x.title if isinstance(y.title, alt.utils.schemapi.UndefinedType): y_title = y_name else: y_title = y.title # Determine types var_types = [None, None] for i, var in enumerate([x, y]): if not isinstance(var.type, alt.utils.schemapi.UndefinedType): var_types[i] = var.type[0].upper() elif len(var.shorthand) > 1 and var.shorthand[-2] == ':': var_types[i] = var.shorthand[-1] else: raise RuntimeError( f'Data type of `encode_{var}` must be specified.') # Make sure data types are given and ok if var_types[0] not in 'NO' and var_types[1] not in 'NO': raise RuntimeError('Either `x` or `y` must be nominal or ordinal.') if var_types == ['N, N']: raise RuntimeError('Cannot have both `x` and `y` be nominal.') # Decide if it's a horizontal plot or not if var_types[0] in 'NO': horizontal = False cats = x_name val = y_name if encode_color is None: encode_color = alt.Color(f'{cats}:N', title=x.title) else: horizontal = True cats = y_name val = x_name if encode_color is None: encode_color = alt.Color(f'{cats}:N', title=y.title) # Set up groupby object grouped = data.groupby(cats) n_boxes = len(grouped) # Set default heights and widths, also of bars if width is None: if horizontal: width = 400 else: width = 200 if height is None: if horizontal: height = 200 else: height = 300 if horizontal: size = height*0.9 / n_boxes else: size = width*0.9 / n_boxes # Data frame for boxes and whiskers df_box = (grouped[val].apply(_box_and_whisker) .reset_index() .rename(columns={'level_1': 'box_val'}) .pivot(index=cats, columns='box_val')) df_box.columns = df_box.columns.get_level_values(1) df_box = df_box.reset_index() # Data frame for outliers df_outlier = grouped[val].apply(_outliers).reset_index(level=0) if horizontal: chart_box = alt.Chart( data=df_box, width=width, height=height ).mark_bar( size=size ).encode( y=alt.Y(f'{cats}:N', title=y_title), x=alt.X('bottom:Q', title=x_title), x2=alt.X2('top:Q', title=x_title), color=encode_color) chart_median = alt.Chart( data=df_box, width=width, height=height ).mark_tick( size=size, color='white' ).encode( y=alt.Y(f'{cats}:N', title=y_title), x=alt.X('middle:Q', title=x_title)) chart_whisker = alt.Chart( data=df_box, width=width, height=height ).mark_rule( ).encode( y=alt.Y(f'{cats}:N', title=y_title), x=alt.X('bottom_whisker:Q', title=x_title), x2=alt.X2('top_whisker:Q', title=x_title)) chart_outliers = alt.Chart( data=df_outlier, width=width, height=height ).mark_point( ).encode( y=alt.Y(f'{cats}:N', title=y_title), x=alt.X(f'{val}:Q', title=x_title), color=encode_color) else: chart_box = alt.Chart( data=df_box, width=width, height=height ).mark_bar( size=size ).encode( x=alt.X(f'{cats}:N', title=x_title), y=alt.Y('bottom:Q', title=y_title), y2=alt.Y2('top:Q', title=y_title), color=encode_color) chart_median = alt.Chart( data=df_box, width=width, height=height ).mark_tick( size=size, color='white' ).encode( x=alt.X(f'{cats}:N', title=x_title), y=alt.Y('middle:Q', title=y_title)) chart_whisker = alt.Chart( data=df_box, width=width, height=height ).mark_rule( ).encode( x=alt.X(f'{cats}:N', title=x_title), y=alt.Y('bottom_whisker:Q', title=y_title), y2=alt.Y2('top_whisker:Q', title=y_title)) chart_outliers = alt.Chart( data=df_outlier, width=width, height=height ).mark_point( ).encode( x=alt.X(f'{cats}:N', title=x_title), y=alt.Y(f'{val}:Q', title=y_title), color=encode_color) return chart_whisker + chart_box + chart_median + chart_outliers
def value_vs_time_chart( base: alt.Chart, active_fixed_viewpoint_selector: bool, sensor_name: str, sensor_unit: str, belief_horizon_unit: str, intuitive_forecast_horizon: bool, interpolate: bool, ci: float, event_value_range: Tuple[float, float], ) -> alt.LayerChart: # Configure the stepwise line for the reference if interpolate is True: ts_line_reference_chart = base.mark_line(interpolate="monotone") else: ts_line_reference_chart = base.mark_rule().encode(x2=alt.X2("event_end:T")) ts_line_reference_chart = ts_line_reference_chart.encode( y=alt.Y( "reference_value", scale=alt.Scale(domain=(event_value_range[0], event_value_range[-1])), ), color=alt.ColorValue("black"), tooltip=[ alt.Tooltip( "event_start:T", timeUnit="yearmonthdatehoursminutes", title="Event start", ), alt.Tooltip( "event_end:T", timeUnit="yearmonthdatehoursminutes", title="Event end" ), alt.Tooltip("reference_value:Q", title="Real value", format=".2f"), ], ) # Configure the stepwise line for the beliefs if interpolate is True: ts_line_chart = base.mark_line(interpolate="monotone") else: ts_line_chart = base.mark_rule().encode(x2=alt.X2("event_end:T")) ts_line_chart = ts_line_chart.encode( y=alt.Y("expected_value", title="%s (%s)" % (sensor_name, sensor_unit)) ) if active_fixed_viewpoint_selector is True: ts_line_chart = ( ts_line_chart.transform_filter( "datum.belief_time <= nearest_x_select.belief_time" ) .transform_joinaggregate( most_recent_belief_time="max(belief_time)", groupby=["event_start", "source"], ) .transform_filter("datum.belief_time == datum.most_recent_belief_time") ) # Configure the confidence intervals if interpolate is True: confidence_interval = ts_line_chart.mark_area( interpolate="monotone", opacity=0.3 ) else: confidence_interval = ts_line_chart.mark_bar(opacity=0.3) confidence_interval = confidence_interval.encode( y="lower_value", y2="upper_value", tooltip=[ alt.Tooltip( "event_start:T", timeUnit="yearmonthdatehoursminutes", title="Event start", ), alt.Tooltip( "event_end:T", timeUnit="yearmonthdatehoursminutes", title="Event end" ), alt.Tooltip("expected_value:Q", title="Expected value", format=".2f"), alt.Tooltip( "belief_time:T", timeUnit="yearmonthdatehoursminutes", title="Belief time", ), alt.Tooltip( "belief_horizon:Q", title="%s (%s)" % ( "Forecast horizon" if intuitive_forecast_horizon else "Belief horizon", belief_horizon_unit, ), ), alt.Tooltip("source", title="Source"), alt.Tooltip( "upper_value:Q", format=".2f", title="Upper value of {0:.0f}% confidence interval".format(100 * ci), ), alt.Tooltip( "lower_value:Q", format=".2f", title="Lower value of {0:.0f}% confidence interval".format(100 * ci), ), ], ) return (ts_line_reference_chart + ts_line_chart + confidence_interval).properties( title="Model results" )
def test_quantitative_x2_y2(): chart = ChartMetadata( alt.Chart(df_quant).mark_point().encode(alt.X('a'), alt.Y('b'), alt.X2('c'), alt.Y2('alpha'))) _convert(chart)
def test_convert_x2_y2_fail_temporal(column): chart = ChartMetadata( alt.Chart(df).mark_point().encode(alt.X2(column), alt.Y2(column))) _convert(chart)
def get_gender_overview_graph(): """returns all gender related graphs Parameters: None Returns: Graph """ male = get_year_gender_df('Männer') female = get_year_gender_df('Frauen') male['Start'] = female['End'] male['Display'] = male['End'] - female['End'] female['Start'] = 0 female['Display'] = female['End'] combined = pd.concat([male, female], axis=0) print() print('Totale Anzahl Asylanträge über die letzten 25 Jahre:') print(male['End'].sum()) print() male_status = get_year_gender_status_df('Männer') female_status = get_year_gender_status_df('Frauen') combined_status = pd.concat([male_status, female_status], axis=0) color_scale = alt.Scale(domain=["Frauen", "Männer"], range=["#e5f5b8", "#58bdc0"]) bars = alt.Chart(combined).mark_bar().encode( x=alt.X('Start:Q', title='Anzahl Asylgesuche'), x2=alt.X2('End:Q', title=''), y=alt.Y('Jahr:N'), color=alt.Color( 'Gender:N', legend=alt.Legend(title='Gender'), scale=color_scale, )) text = bars.mark_text(align='left', baseline='middle', dx=3).encode(x='End:Q', y='Jahr:N', text=alt.Text('Display:Q')) line_chart_accepted = alt.Chart(combined_status).mark_line().encode( alt.X('Jahr:O'), alt.Y('Anerkennungsquote:Q', axis=alt.Axis(format='%')), color='Gender:N') line_chart_shelter = alt.Chart(combined_status).mark_line().encode( alt.X('Jahr:O'), alt.Y('Schutzquote:Q', axis=alt.Axis(format='%')), color='Gender:N') line_charts = alt.vconcat( line_chart_accepted.properties( height=220, title='Annerkennungsquote nach Geschlecht'), line_chart_shelter.properties(height=220, title='Schutzquote nach Geschlecht')) return alt.hconcat((bars + text).properties( height=540, title= 'Anzahl Asylgesuche über die Jahre, aufgeschlüsselt nach Geschlecht'), line_charts)
base_wheat = alt.Chart( data.wheat.url).transform_calculate(year_end="+datum.year + 5") base_monarchs = alt.Chart(data.monarchs.url).transform_calculate( offset="((!datum.commonwealth && datum.index % 2) ? -1: 1) * 2 + 95", off2="((!datum.commonwealth && datum.index % 2) ? -1: 1) + 95", y="95", x="+datum.start + (+datum.end - +datum.start)/2") bars = base_wheat.mark_bar(**{ "fill": "#aaa", "stroke": "#999" }).encode(x=alt.X("year:Q", axis=alt.Axis(format='d', tickCount=5)), y=alt.Y("wheat:Q", axis=alt.Axis(zindex=1)), x2=alt.X2("year_end")) area = base_wheat.mark_area(**{ "color": "#a4cedb", "opacity": 0.7 }).encode(x=alt.X("year:Q"), y=alt.Y("wages:Q")) area_line_1 = area.mark_line(**{"color": "#000", "opacity": 0.7}) area_line_2 = area.mark_line(**{"yOffset": -2, "color": "#EE8182"}) top_bars = base_monarchs.mark_bar(stroke="#000").encode( x=alt.X("start:Q"), x2=alt.X2("end"), y=alt.Y("y:Q"), y2=alt.Y2("offset"), fill=alt.Fill("commonwealth:N",
def plot_individual_effects(self): """Plot individual effects""" covs = self.covariate_baselines ie = self.individual_effects.join(covs) param_names = list(ie.index.get_level_values('parameter').unique()) ie = (ie - 1) * 100 ie = ie.sort_values(by=['observed']) plots = [] for parameter in param_names: df = ie.xs(parameter, level=1) id_order = list(df.index) id_order = [str(int(x)) for x in id_order] if len(df) > 20: id_order[10] = '...' df = df.reset_index() df['ID'] = df['ID'].astype(int).astype(str) error_bars = (alt.Chart(df).mark_errorbar(ticks=True).encode( x=alt.X('p5:Q', title='Effect size in percent', scale=alt.Scale(zero=False)), x2=alt.X2('p95:Q'), y=alt.Y('ID:N', title='ID', sort=id_order), tooltip=['ID', 'p5', 'observed', 'p95'] + list(covs.columns), )) rule = (alt.Chart(df).mark_rule( strokeDash=[10, 2], color='gray').encode( x=alt.X('xzero:Q')).transform_calculate(xzero="0")) points = (alt.Chart(df).mark_point(size=40, filled=True, color='black').encode( x=alt.X('observed:Q'), y=alt.Y('ID:N', sort=id_order), )) plot = alt.layer( points, error_bars, rule, data=df, width=700, title=f'Individuals for parameter {parameter}', ) if len(df) > 20: plot = ( plot.transform_window( sort=[alt.SortField('observed', order='ascending')], rank='row_number(observed)', ).transform_window( sort=[alt.SortField('observed', order='descending')], nrank='row_number(observed)', ).transform_filter('datum.rank <= 10 | datum.nrank <= 11'). transform_calculate( ID="datum.nrank == 11 ? '...' : datum.ID", p5="datum.nrank == 11 ? '...' : datum.p5", p95="datum.nrank == 11 ? '...' : datum.p95", observed="datum.nrank == 11 ? '...' : datum.observed", )) plots.append(plot) v = alt.vconcat(*plots).resolve_scale(x='shared') return v
def plot_covariate_effects(self): """Plot covariate effects""" ce = (self.covariate_effects - 1) * 100 cov_stats = pd.melt( self.covariate_statistics.reset_index(), var_name='condition', id_vars=['covariate'], value_vars=['p5', 'p95', 'other'], ) cov_stats = cov_stats.replace({ 'p5': '5th', 'p95': '95th' }).set_index(['covariate', 'condition']) ce = ce.join(cov_stats, how='inner') # The left join reorders the index, pandas bug #34133 ce = ce.reorder_levels(['parameter', 'covariate', 'condition']) param_names = list(ce.index.get_level_values('parameter').unique()) plots = [] for parameter in param_names: df = ce.xs(parameter, level=0) df = df.reset_index() error_bars = (alt.Chart(df).mark_errorbar(ticks=True).encode( x=alt.X('p5:Q', title='Effect size in percent', scale=alt.Scale(zero=False)), x2=alt.X2('p95:Q'), y=alt.Y('condition:N', title=None), )) rule = (alt.Chart(df).mark_rule( strokeDash=[10, 4], color='gray').encode( x=alt.X('xzero:Q')).transform_calculate(xzero="0")) points = (alt.Chart(df).mark_point(filled=True, color='black').encode( x=alt.X('mean:Q'), y=alt.Y('condition:N'), )) text = (alt.Chart(df).mark_text(dy=-15, color="red").encode( x=alt.X("mean:Q"), y=alt.Y("condition:N"), text=alt.Text("value:Q"))) plot = (alt.layer( error_bars, rule, points, text, data=df, width=700, height=100).facet( columns=1.0, row=alt.Facet('covariate:N', title=None), title=f'{parameter}').resolve_scale(y='independent')) plots.append(plot) v = alt.vconcat(*plots).resolve_scale(x='shared') return v
def wsb_chart( data: pd.DataFrame, xvar: str = "start", x2var: str = "end", xvar_middle: str = "middle", yvar: str = "mantissa", vvar: str = "original", evar: str = "multiplier", xcat: str = "category", w: int = 400, h: int = 400, color_scheme: str = "orangered", title: str = "Width-Scale Bar Chart", ) -> alt.LayerChart: _n_bars = len(data[xcat].unique()) _padding_width = (w / _n_bars) * 0.1 data_with_padding = data.copy() data_with_padding["start"] = (data_with_padding["start"] + _padding_width / 4 + _padding_width / 2 * data_with_padding.index) data_with_padding["end"] = (data_with_padding["end"] + _padding_width / 4 + _padding_width / 2 * data_with_padding.index) data_with_padding["middle"] = ( (data_with_padding["end"] - data_with_padding["start"]) / 2) + data_with_padding["start"] selection = alt.selection_single(fields=["multiplier"], bind="legend") # base = alt.Chart(data, width=w, height=h) base = alt.Chart(data_with_padding, width=w, height=h) bar = ( # base.mark_rect(xOffset=1.0, x2Offset=0.5) base.mark_rect().encode( x=alt.X( f"{xvar}:Q", axis=alt.Axis( titleY=(-0.5 + 22), labels=False, title=xcat.capitalize(), grid=False, # values=data[xvar_middle].to_list(), values=data_with_padding[xvar_middle].to_list(), ), ), x2=alt.X2(f"{x2var}:Q"), y=alt.Y( f"{yvar}:Q", axis=alt.Axis( title=yvar.capitalize(), titleAngle=0, titleAlign="left", titleY=-5, titleX=0, labelExpr="datum.value + ' ×'", ), scale=alt.Scale(domain=[0, 10]), ), color=alt.Color( f"{evar}:O", title="Magnitude Multiplier", legend=alt.Legend(labelExpr="'× ' + format(datum.value, ',')"), scale=alt.Scale(scheme=color_scheme), ), tooltip=[ alt.Tooltip(f"{xcat}:N", title=xcat.capitalize()), alt.Tooltip(f"{vvar}:N", title="Value"), alt.Tooltip(f"{yvar}:Q", title=yvar.capitalize()), alt.Tooltip(f"{evar}:O", format=",", title="Magnitude Multiplier"), ], opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), ).add_selection(selection)) # Altair/Vega-Lite: # Default `labelFontSize` = 10 # Default `tickSize` = 5 # Default `labelPadding` = 2 # Default `translate` = 0.5 text = base.mark_text(align="center", baseline="middle", fontSize=10).encode( x=alt.X(f"{xvar_middle}:Q"), y=alt.value(h + (10 / 2) + 5 + 2 + 0.5), text=alt.Text(f"{xcat}:N"), ) return alt.layer(bar, text, title=alt.TitleParams(title, anchor="start"))
def _design_chart(title: str, query_list: list, queries: list, stat: str, stat_list: dict, parts: SimpleNamespace) -> str: if stat == 'base': y_title = 'date' y_val = [ '.'.join( filter(None, [ x.get('year', ''), x.get('month', ''), x.get('day', ''), x.get('hour', '') ])) for x in queries ] if y_val[0] == '': y_title = 'query' y_val = query_list df = pd.DataFrame({**{y_title: y_val}, **stat_list}) minmax = alt.Chart(data=df, width=250).mark_bar(tooltip={ "content": "encoding" }).encode(x=alt.X('min'), x2=alt.X2('max'), y=alt.Y(y_title, sort=None), color=alt.Color('mean', scale=alt.Scale(scheme='redblue'), sort="descending")) mean = alt.Chart(data=df, width=250).mark_tick(color='white', thickness=3, tooltip={"content": "encoding"})\ .encode( x='mean', y=alt.Y(y_title, sort=None) ) graph = (minmax + mean).configure_tick( bandSize=10 # controls the width of the tick ).configure_scale(rangeStep=10 # controls the width of the bar ) if title is None: return graph.to_json() else: return graph.properties(title=title).to_json() range_ = list() single = list() none_ = list() aspects = {'year', 'month', 'day', 'hour', 'geohash', 'feature'} for part in aspects: if hasattr(parts, part): if isinstance(getattr(parts, part), list): range_.append(part) else: single.append(part) else: none_.append(part) x_title = 'query' x_val = query_list color_title = stat y_title = stat y_val = stat_list if len(range_) == 0 and len(single) >= 3: x_title = 'date' x_val = [ '.'.join( filter(None, [ x.get('year', ''), x.get('month', ''), x.get('day', ''), x.get('hour', '') ])) for x in queries ] elif len(range_) == 1: x_title = range_[0] x_val = [x[range_[0]] for x in queries] elif len(range_) == 2: x_title = range_[0] x_val = [x[range_[0]] for x in queries] y_title = range_[1] y_val = [y[range_[1]] for y in queries] df = pd.DataFrame({**{x_title: x_val}, **{y_title: y_val}, **stat_list}) graph = alt.Chart(data=df, height=300, width=400).mark_bar(tooltip={"content": "encoding"}) \ .encode( alt.X(x_title, sort=None), alt.Y(y_title, sort=None), alt.Color(color_title, scale=alt.Scale(scheme='spectral'), sort="descending") ) if title is None: return graph.to_json() else: return graph.properties(title=title).to_json()
def visualize_timelime(self): print("[INFO] generating dashboard.") lfrom = [] lto = [] lid = [] #get the data from the tracker dps = self.tracker.vis_data_points for id, dp in dps.items(): for app in dp.appereances: lfrom.append(app[0] / 30) lto.append(app[1] / 30) lid.append(str(dp.label)) #timeline chart df = pd.DataFrame(list(zip(lfrom, lto, lid)), columns=["from", "to", "label"]) chart = alt.Chart(df).mark_bar().encode(alt.X("from", title="Timeline(seconds)"), alt.X2("to", title = ""), y="label", color=alt.Color("label", \ scale=alt.Scale(scheme='dark2'))) chart.save(constants.OUT_PATH + "chart_timeline.png")