def mean_price_of_natural_gas_over_time_by_state(df): df_product_naturalGas = df[df.Product == 'NATURAL GAS'].reset_index( drop=True) highlight = alt.selection(type='single', on='mouseover', fields=['Macroregion'], nearest=True) m = alt.Chart( df_product_naturalGas, title='Mean Price of Natural Gas Over Time by State').mark_line( ).encode(x='Year_Month', y='mean(Mean_Price)', color=alt.Color( 'State', legend=alt.Legend(title="State by color"))).properties( width=1000, height=400) points = m.mark_line().encode( opacity=alt.value(0)).add_selection(highlight).properties(width=600) lines = m.mark_line().encode( size=alt.condition(~highlight, alt.value(1), alt.value(3))) st.altair_chart(points + lines)
def plot_scatter(country=None, variety=None, price_range=[4, 1500], year_range=[1900, 2017], points_range=[80, 100]): wine = wine_df # filter by price and year wine = wine[(wine['price'] >= price_range[0]) & (wine_df['price'] <= price_range[1]) & (wine['year'] >= year_range[0]) & (wine_df['year'] <= year_range[1])] if country: wine = wine[wine['country'].isin(country)] if variety: wine = wine[wine['variety'].isin(variety)] chart_1 = alt.Chart( wine, title='Wine Rating by Price').mark_point().encode( x=alt.X('price', title="Price", scale=alt.Scale(zero=False)), y=alt.Y('points', title="Score", scale=alt.Scale(zero=False)), color=alt.Color('country', scale=alt.Scale(scheme='darkred'), legend=alt.Legend(symbolLimit=18, title="Country")), tooltip=['title', 'points', 'price', 'variety']).interactive() chart = chart_1.properties(width=380, height=280) return chart.to_html()
def setup_county_vaccinated_population_chart(df, county_name): legend_dict = { "cumulative_at_least_one_dose": "At least 1 dose", "cumulative_fully_vaccinated": "Fully vaccinated", } df = (df[(df.county==county_name) & (df.variable.isin(legend_dict.keys()))] .assign(legend_value=df.variable.map(legend_dict)) ) base = base_vaccination_chart(df) chart = (base .encode( y=alt.Y("proportion:Q", title="% County's Population", axis=alt.Axis(format="%")), color=alt.Color("legend_value:N", legend=alt.Legend(title=""), scale=alt.Scale( domain=["At least 1 dose", "Fully vaccinated"], range=[navy, blue]) ) ).properties(title = f"{county_name} County: Vaccinated Population", width = chart_width*scaling_factor, height = chart_height) ) return chart
def andrews_curve( data: pd.DataFrame, xvar: str = "t", yvar: str = "curve_value", targetvar: str = "target", samplevar: str = "sample", w: int = 450, h: int = 300, ) -> alt.LayerChart: selection = alt.selection_single(fields=[targetvar], bind="legend") base = alt.Chart(data).properties(width=w, height=h).mark_line() background_chart = base.encode( x=alt.X(f"{xvar}:Q", axis=alt.Axis(title=None), scale=alt.Scale(nice=False)), y=alt.Y(f"{yvar}:Q", axis=alt.Axis(title=None)), detail=alt.Detail(f"{samplevar}:N"), color=alt.value(COLORS["light_gray"]), ) chart = background_chart.encode(color=alt.condition( selection, f"{targetvar}:N", alt.value("transparent"), legend=alt.Legend(title=f"{targetvar.title()} (click to highlight)"), ), ).add_selection(selection) return background_chart + chart
def get_new_values(model, classifier, collect, file_name): classes = {0: "Left", 1: "Forward", 2: "Right", 3: "STOP!"} global SAVED_DATA distances = model.get_distance() distances = normalize(distances) cls_val = classes[classify(distances, classifier)] if collect == "YES": with open(file_name, 'a', newline='') as out_file: writer = csv.writer(out_file) writer.writerow(distances) out_file.close() distances = [(val if val <= 1.0 else 1.0) for val in distances] df = pd.DataFrame(np.array([list(range(DEGREES)), distances]).T, columns=['degrees', 'distance']) scatter = alt.Chart(df).mark_circle(size=50).encode( x="degrees", y="distance", color=alt.Color('distance', legend=alt.Legend(title="Class: " + cls_val))).properties(width=650, height=400) return scatter
def get_chart(self, df): lines = alt.Chart(df).mark_line().encode( x=alt.X('data_somministrazione:T', title="Data di somministrazione"), y=alt.Y('totali', title="Numero di vaccini somministrati"), color=alt.Color('fornitore', title="Fornitore", legend=alt.Legend(orient="top"), sort=["Totali"]), ) hover = alt.selection_single(fields=["data_somministrazione"], nearest=True, on="mouseover", empty="none", clear="mouseout") points = lines.transform_filter(hover).mark_circle() tt = [ "data_somministrazione:T", "Totali:Q", "Pfizer/BioNTech:Q", "Moderna:Q", "Vaxzevria (AstraZeneca):Q", "Janssen:Q" ] tooltips = alt.Chart(df).transform_pivot( "fornitore", "totali", groupby=["data_somministrazione"]).mark_rule().encode( x='data_somministrazione:T', opacity=alt.condition(hover, alt.value(0.3), alt.value(0)), tooltip=tt).add_selection(hover).properties( height=450).interactive() return lines + points + tooltips
def make_heatmap_plot(y_category, damage): query_string = "" for user_select_damage in damage: query_string += 'damage_level == "' + user_select_damage + '" | ' query_string = query_string[:-2] main_title = 'Bird Strikes by' + y_category.title() if y_category == 'state': plot_height = 600 else: plot_height = 1000 if len(query_string) != 0: heatmap_plot = alt.Chart( df.query(query_string), title=main_title).mark_rect().encode( alt.X('year:N', axis=alt.Axis(title="Year", labelAngle=0)), alt.Y(y_category + ':O', axis=alt.Axis(title=y_category.title())), alt.Color('count(damage_level)', scale=alt.Scale(scheme="lighttealblue"), legend=alt.Legend(title="Bird Strikes")), alt.Tooltip(['year', 'state', 'count(damage_level)' ])).properties(width=600, height=plot_height) heatmap_plot = heatmap_plot.to_html() else: heatmap_plot = None return heatmap_plot
def plot_year_wise_trend(year_range, continent, color_axis): chosen_starting_year = year_range[0] chosen_ending_year = year_range[1] temp_df = dataset_df[(dataset_df["year"] <= chosen_ending_year) & (dataset_df["year"] >= chosen_starting_year)] if continent is None or continent == "" or len(continent) == 0: continent = dataset_df.continent.unique().tolist() temp_df = temp_df[temp_df["continent"].isin(continent)] year_wise_trend_chart = (alt.Chart( temp_df.groupby([ color_axis, "year" ]).mean()["life_expectancy"].reset_index()).mark_line().encode( alt.X("year:N", axis=alt.Axis(labelAngle=360), title="Year"), y=alt.Y("mean(life_expectancy)", scale=alt.Scale(zero=False), title="Mean Life Expectancy"), color=alt.Color( color_axis, title=None, legend=alt.Legend(orient='bottom'))).configure_axis( labelFontSize=10, titleFontSize=14, ).configure_legend(labelFontSize=12, ).properties(width=400)) return year_wise_trend_chart.to_html()
def get_wpm_plot(df): df2 = df.copy() df2['likert_var'] = np.var( df2[['Interest', 'Effective', 'Intelligence', 'Writing', 'Meet']], axis=1) df2['group'] = 'XLab' df2.loc[(df2['Start Date'] < "2021-04-05"), 'group'] = 'Amazon' p = alt.Chart(df2).mark_bar(opacity=0.8, stroke=berkeley_palette['black'], strokeWidth=0.5).encode( x = alt.X('wpm:Q', bin=alt.Bin(maxbins=100), title="Words per Minute (bin=100)"), y = alt.Y('count()', title='Frequency'), color=alt.Color('group:N', scale=alt.Scale(range = [berkeley_palette['berkeley_blue'], berkeley_palette['california_gold']]), legend = alt.Legend(title="Participant Group", padding=10, symbolType="square", symbolStrokeWidth=1, orient="right", offset=-170)) ).properties(height=300,width=650, title={'text':'Distribution of Response Time', 'subtitle':'Evaluated in Words per Minute'})\ .configure(padding={'top':20, 'left':20, 'right':20,'bottom':20})\ .configure_facet(spacing=10)\ .configure_view(stroke=None)\ .configure_title(anchor='middle')\ .configure_axis(grid=False)\ .configure_title(dy=-5) return p
def create_plot(topo_data, data, column_name, data_type, tooltip_columns=None, stroke='darkgrey', strokeWidth=0.9, legend_title=None, scheme='reds', missing_color='white'): lookup_columns = [column_name] if tooltip_columns is not None: lookup_columns.extend(tooltip_columns) if legend_title is None: legend_title = column_name base = alt.Chart(topo_data) \ .mark_geoshape() \ .encode( color=alt.Color(f'{column_name}:{data_type}', legend=alt.Legend(title=legend_title), scale=alt.Scale(scheme=scheme)), tooltip=[f'{column}:N' for column in tooltip_columns], ).transform_lookup( lookup='properties.CODE_INS', from_=alt.LookupData(data, 'niscode', lookup_columns) ).properties( width=600, height=450, ) return alt.Chart(topo_data).mark_geoshape(stroke=stroke, strokeWidth=strokeWidth) \ .encode( color=alt.value(missing_color), opacity=alt.value(0.9), ) + base
def plot_chart2(size): brush = alt.selection_interval(encodings=['x']) chart = alt.Chart(hr2[hr2.company_size == size]).transform_density( 'training_hours', groupby=['company_size'], as_=['training_hours', 'density'], ).mark_area(opacity=0.5).encode( alt.X('training_hours', title="Training Hours", axis=alt.Axis(format='~s', labelFontSize=16, titleFontSize=20)), alt.Y('density:Q', title="Hours Density", axis=alt.Axis(labelFontSize=16, titleFontSize=20)), alt.Color('company_size:N', legend=alt.Legend(title='Company Size', labelFontSize=16, titleFontSize=16))).properties(width=450, height=400) full = chart.properties(height=80).add_selection(brush) detail = chart.encode( alt.X('training_hours', title="Training Hours", axis=alt.Axis(labelFontSize=16, titleFontSize=20), scale=alt.Scale(domain=brush))) chart = detail & full return chart.to_html()
def make_chart(self, df): max_value = df['value'].max() base = alt.Chart( df.dropna()).encode(x=alt.X('yearmonthdate(datum_date):T', title=None, axis=alt.Axis(format='%d/%m'))) scatter = base.mark_point(opacity=0.5, clip=True).encode( y=alt.Y('value:Q', title=None, scale=alt.Scale(type='symlog', domain=[0, max_value])), tooltip=['datum_date', 'variable', 'value']) average = base.transform_window( frame=[-6, 0], mean_value='mean(value)', groupby=['variable']).mark_line(strokeWidth=3).encode( y=alt.Y('mean_value:Q', title=None, scale=alt.Scale(type='symlog', domain=[0, max_value]))) return (average + scatter).encode(color=alt.Color( 'variable', title=None, legend=alt.Legend(orient="top", labelLimit=250), sort=['Confirmados', 'Probables', 'Muertes'])).properties( width=600, height=400)
def make_chart(self, df): variables = [ 'Casos nuevos (último boletÃn)', 'Casos nuevos (últimos 7 boletines)' ] municipalities = self.geography() return alt.Chart(municipalities).transform_lookup( lookup='properties.NAME', from_=alt.LookupData(df, 'Municipio', variables), default='0' ).mark_geoshape().encode( color=alt.Color( alt.repeat('row'), type='quantitative', sort="descending", scale=alt.Scale( type='symlog', scheme='redgrey', domainMid=0, # WORKAROUND: Set the domain manually to forcibly # include zero or else we run into # https://github.com/vega/vega-lite/issues/6544 domain=alt.DomainUnionWith(unionWith=[0])), legend=alt.Legend(orient='left', titleLimit=400, titleOrient='left')), tooltip=[ alt.Tooltip(field='properties.NAME', type='nominal'), alt.Tooltip(alt.repeat('row'), type='quantitative') ]).properties(width=575, height=200).repeat(row=variables).resolve_scale( color='independent').configure_view( strokeWidth=0).configure_concat(spacing=80)
def mean_price_of_diesel_s10_over_time_by_region(df): df_product_diesels10 = df[df.Product == 'DIESEL S10'].reset_index( drop=True) highlight = alt.selection(type='single', on='mouseover', fields=['Macroregion'], nearest=True) k = alt.Chart( df_product_diesels10, title='Mean Price of Diesel S10 Over Time by Region').mark_line( ).encode(x='Year_Month', y='mean(Mean_Price)', color=alt.Color( 'Macroregion', legend=alt.Legend( title="Macroregion by color"))).properties(width=1000, height=400) points = k.mark_line().encode( opacity=alt.value(0)).add_selection(highlight).properties(width=600) lines = k.mark_line().encode( size=alt.condition(~highlight, alt.value(1), alt.value(3))) st.altair_chart(points + lines)
def make_since_chart(dff2, highlight_countries, baseline_countries): max_date = dff2['Date'].max() color_domain = list(dff2['Country'].unique()) color_range = list(map(get_country_colors, color_domain)) selection = alt.selection_multi(fields=['Country'], bind='legend', init=[{'Country': x} for x in highlight_countries + baseline_countries]) base = alt.Chart(dff2, width=550).encode( x='Days since 100 cases:Q', y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log')), color=alt.Color( 'Country:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=alt.Legend(columns=2)), tooltip=list(dff2), opacity=alt.condition(selection, alt.value(1), alt.value(0.05)) ) # max_day = dff2['Days since 100 cases'].max() max_day=35 ref = pd.DataFrame([[x, 100*1.3**x] for x in range(max_day+1)], columns=['Days since 100 cases', 'Confirmed Cases']) base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q', y='Confirmed Cases:Q') return ( base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) + # base_ref.transform_filter( # alt.datum['Days since 100 cases'] >= max_day # ).mark_text(dy=-6, align='right', fontSize=10, text='33% Daily Growth') + base.mark_line(point=True).add_selection(selection) + base.transform_filter( alt.datum['Date'] >= int(max_date.timestamp() * 1000) ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='Country:N') ).properties( title=f"Compare {', '.join(highlight_countries)} trajectory with {', '.join(baseline_countries)}" )
def get_likert_variance(df): df2 = df.copy() df2['likert_var'] = np.var( df2[['Interest', 'Effective', 'Intelligence', 'Writing', 'Meet']], axis=1) df2['group'] = 'XLab' df2.loc[(df2['Start Date'] < "2021-04-05"), 'group'] = 'Amazon' at = alt.Chart(df2).transform_density('likert_var', as_=['likert_var','Density'], groupby=['group'])\ .mark_area(opacity=0.5, stroke=berkeley_palette['black'], strokeWidth=2)\ .encode( x = alt.X('likert_var:Q', axis=alt.Axis(values=list(np.arange(0.0, 9.5, 0.5)), tickCount=19), title="Variance"), y = alt.Y('Density:Q'), color = alt.Color('group:N', scale=alt.Scale(domain=df2.group.unique(), range=[berkeley_palette['berkeley_blue'], berkeley_palette['california_gold']]), legend = alt.Legend(title="Participant Group", padding=10, symbolType="square", symbolStrokeWidth=1, orient="right", offset=-170)))\ .properties(height=250, width=650, title={'text':'Distribution of Variance', 'subtitle':'for Likert Scale Answers'})\ .configure(padding={'top':20, 'left':20, 'right':20,'bottom':20})\ .configure_facet(spacing=10)\ .configure_view(stroke=None)\ .configure_title(anchor='middle')\ .configure_axis(grid=False)\ .configure_title(dy=-5) return at
def plot_corr_matrix(dm_wide): dm_corr = dm_wide.corr() dm_corr = dm_corr.where(np.triu(np.ones(dm_corr.shape)).astype( np.bool)).reset_index() dm_corr_long = dm_corr.melt(var_name='index_2', value_vars=dm_wide.columns, id_vars='index', value_name='r') corr_mat = alt.Chart(dm_corr_long).mark_rect().encode( alt.X('index', sort=None, axis=alt.Axis(title=None)), alt.Y('index_2', sort=None, axis=alt.Axis(title=None)), tooltip=['r'], fill=alt.Color( 'r:Q', sort='descending', legend=alt.Legend(title='r'), scale=alt.Scale( scheme='redyellowblue', domain=[-1, 1]))).properties( width=375, height=350, ).configure_scale(bandPaddingInner=0.015).configure_view( strokeWidth=0).interactive() return corr_mat.to_dict()
def update_world_chart(year, stat_type, include_usa, gdp_pct): map_stat = 'percent_GDP' if gdp_pct else 'USD_Value' map_legend = '% GDP' if gdp_pct else 'USD Value' arms_df_tmp = arms_gdp.copy() if not include_usa: arms_df_tmp.loc[arms_df_tmp['Country'] == 'USA', map_stat] = 0 print(year, stat_type, include_usa, gdp_pct) chart = alt.Chart(world_map_skl).mark_geoshape(stroke='white').encode( color=alt.condition(alt.FieldEqualPredicate(field=map_stat, equal=0), alt.value('lightgray'), map_stat + ':Q', scale=alt.Scale(scheme='goldorange'), legend=alt.Legend(title=map_legend)), tooltip=['Country:N', map_stat + ':Q'] ).transform_lookup( lookup='id', from_=alt.LookupData(arms_df_tmp.query("Year == " + str(year)).query("Direction == '%s'" % (stat_type)), 'id', [map_stat, 'Country']) ).project('equirectangular').properties( width=720, height=300, background='white' ).configure_axis( grid=False ) return html.Iframe( sandbox='allow-scripts', id='plot', width='900', height='350', style={'border-width': '0'}, srcDoc=chart.to_html() )
def create_plot(topo_data, data, column_name, data_type, tooltip_columns=None, stroke='lightgrey', strokeWidth=0.5, legend_title=None, scheme='reds'): lookup_columns = [column_name] if tooltip_columns is not None: lookup_columns.extend(tooltip_columns) if legend_title is None: legend_title = column_name return alt.Chart(topo_data)\ .mark_geoshape(stroke=stroke, strokeWidth=strokeWidth)\ .encode( color=alt.Color(f'{column_name}:{data_type}', legend=alt.Legend(title=legend_title), scale=alt.Scale(scheme=scheme)), tooltip=[f'{column}:N' for column in tooltip_columns], ).transform_lookup( lookup='properties.CODE_INS', from_=alt.LookupData(data, 'niscode', lookup_columns) )
def player_roll_chart(self): """ """ # Make Altair bar chart plt_df = self.player_count.round(2) roll_chart = alt.Chart(plt_df).mark_bar(strokeWidth=0.5, stroke="black").encode( x=alt.X("Player:O", axis=alt.Axis(title=None, labels=False, ticks=False)), y='Count:Q', color=alt.Color('Player:N', scale=alt.Scale( domain=self.player_names, range=self.player_colors), legend=alt.Legend()), column=alt.Column("Roll:N", header=alt.Header(title=None, labelOrient="bottom", labelFontSize=22)), tooltip=list(self.player_count.columns) ).configure_view( strokeWidth=0 ).configure_title( fontSize=32, limit=800, dx=45, dy=-50, font="Arial", align="center", anchor="middle" ).configure_legend( strokeColor="black", padding=10, orient="bottom", cornerRadius=10, direction="horizontal", labelFontSize=10 ).properties( title="Roll Count by Player", width=self.screen_width / 45 ).configure_axis( grid=False, labelFontSize=14, titleFontSize=16 ) return roll_chart
def yieldSensitivityChart(source, sda_range): chart = alt.Chart(source).mark_line(point=True, size=5).encode( x= alt.X('x', axis=alt.Axis(values=[n/100 for n in sda_range], format='%', title='SDA', labelSeparation=10, labelFlush=False)), y= alt.Y('value', axis=alt.Axis(tickCount=10, format='%', title='Yield')), color= alt.Color('variable', scale=alt.Scale(scheme = 'blues'), legend=alt.Legend(labelFont='Open Sans', labelFontSize=14, titleFont='Open Sans', titleFontSize=14, titleFontWeight='normal', title="Class:") ) ).properties(title="Sensitivity of CMO Yield", width=600, height=350) chart.configure_title( fontSize=32, font='Lato Light', align='center', color='black', fontWeight=100 ).configure_axis( titleFont='Lato Light', titleFontSize=25, titleFontWeight=200, labelFont='Open Sans', labelFontSize=14, labelPadding=10, gridColor='#e2e2e2' ).configure_point( size=60 ).display()
def create_line_plot(data): """A function that creates a line plot for covid_19 CAN & USA dataset. Parameters ---------- data input data set from preprocessed csv. Returns ------- altair object returns the plot as a altair object """ data['month'] = pd.DatetimeIndex(data['date']).month months_lookup = month_num_name_map() data['month'] = data.apply(lambda row: months_lookup[row.month], axis=1) line_plt = (alt.Chart( data, title="COVID-19 Response Ratio - Canada vs USA").mark_line().encode( alt.X("month", sort=list(months_lookup.values()), title="Month(2020)"), alt.Y("mean(response_ratio)", title="Mean of Response Ratio"), color=alt.Color("iso_code", legend=alt.Legend(title="Country")))).properties( height=350, width=650) return line_plt
def plot_map_total_count(selection): # Plot of squirrel count base_map = alt.Chart(squirrel_json).mark_geoshape( stroke='black', strokeWidth=1).encode().properties(width=w, height=h) # Add Choropleth Layer choropleth = ( alt.Chart(squirrel_json, title="Central Park Squirrel Distribution: 2018 Census"). mark_geoshape().add_selection(selection).encode( # SELECTION SINGLE CONDITIONS -- Color is grey if not selected color=alt.condition(selection, 'properties.Unique_Squirrel_ID:Q', alt.value('grey'), title='Squirrel\nCount', scale=alt.Scale(scheme='greens'), legend=alt.Legend(labelFontSize=16, titleFontSize=14, tickCount=5)), opacity=alt.condition(selection, alt.value(0.8), alt.value(0.1)), tooltip=[ alt.Tooltip('properties.sitename:N', title="Park Region"), alt.Tooltip('properties.Unique_Squirrel_ID:Q', title="Squirrel Count") ])) return (base_map + choropleth)
def make_etc_coverage_heatmap(etc_coverage, mag_order=None, module_order=None): num_mags_in_frame = len(set(etc_coverage['genome'])) charts = list() for i, (etc_complex, frame) in enumerate(etc_coverage.groupby('complex')): # if this is the first chart then make y-ticks otherwise none c = alt.Chart(frame, title=etc_complex).encode( x=alt.X('module_name', title=None, axis=alt.Axis(labelLimit=0, labelAngle=90), sort=module_order), y=alt.Y('genome', axis=alt.Axis(title=None, labels=False, ticks=False), sort=mag_order), tooltip=[ alt.Tooltip('genome', title='Genome'), alt.Tooltip('module_name', title='Module Name'), alt.Tooltip('path_length', title='Module Subunits'), alt.Tooltip('path_length_coverage', title='Subunits present'), alt.Tooltip('genes', title='Genes present'), alt.Tooltip('missing_genes', title='Genes missing') ]).mark_rect().encode( color=alt.Color('percent_coverage', legend=alt.Legend(title='% Complete'), scale=alt.Scale(domain=(0, 1)))).properties( width=HEATMAP_CELL_WIDTH * len(set(frame['module_name'])), height=HEATMAP_CELL_HEIGHT * num_mags_in_frame) charts.append(c) concat_title = alt.TitleParams('ETC Complexes', anchor='middle') return alt.hconcat(*charts, spacing=5, title=concat_title)
def facet_freq_barplot(freq_df, options, subplot_column, color_column=None, plots_per_row=3): """facet bar plot for word frequencies""" # Don't display legend if color for subplots if color_column is None: color_column = subplot_column legend = None else: legend = alt.Legend(title=f"{color_column} by color") base = (alt.Chart(freq_df).mark_bar().encode( alt.X("freq", title=None), alt.Y("word", title=None, sort="-x"), tooltip=[ alt.Tooltip("freq", title="frequency"), alt.Tooltip("word", title="word"), ], opacity=alt.value(0.7), color=alt.Color(color_column, legend=legend), ).properties(width=190, )).interactive() subplts = [] for item in options: subplts.append( base.transform_filter(datum[subplot_column] == item).properties( title=item)) grid = facet_wrap(subplts, plots_per_row) return grid
def TrainTestVis(train, test): df = pd.concat([train, test]) maptt = {0: "train", 1: "test"} df["SPLIT"] = df.split_index.apply(lambda x: maptt[x]) df.head() vis = alt.Chart(df).mark_rect().encode( alt.X(field="ITEM", type="nominal", axis=alt.Axis(orient="top", labelAngle=0)), alt.Y(field="USER", type="nominal", axis=alt.Axis(orient="left")), alt.Color(field="SPLIT", type="ordinal", scale=alt.Scale(type="ordinal", scheme="darkred", nice=True), legend=alt.Legend(titleOrient='top', orient="bottom", direction= "horizontal", tickCount=5)), alt.Opacity(value=1) ).properties( width= 180, height=300 ).configure_axis( grid=False ) return vis
def airline_chart(source: alt.Chart, subset: List[str], name: str, loess=True) -> alt.Chart: chart = source.transform_filter( alt.FieldOneOfPredicate(field="airline", oneOf=subset)) highlight = alt.selection(type="single", nearest=True, on="mouseover", fields=["airline"]) points = (chart.mark_point().encode( x="day", y=alt.Y("rate", title="# of flights (normalized)"), color=alt.Color("airline", legend=alt.Legend(title=name)), tooltip=["day", "airline", "count"], opacity=alt.value(0.3), ).add_selection(highlight)) lines = chart.mark_line().encode( x="day", y="rate", color="airline", size=alt.condition(~highlight, alt.value(1), alt.value(3)), ) if loess: lines = lines.transform_loess("day", "rate", groupby=["airline"], bandwidth=0.2) return lines + points
def make_type_I_error_chart(results): df = pd.DataFrame(results) bars = alt.Chart(df).mark_bar(size=30).encode( y=alt.Y('test:N', title='Type of test', axis=alt.Axis(titleFontSize=18, labelFontSize=15)), x=alt.X('sum(error):Q', title='Probability of Type I error', axis=alt.Axis(titleFontSize=18, labelFontSize=15), stack='zero'), color=alt.Color('direction:N', legend=alt.Legend(title=None, labelFontSize=18, labelLimit=1000)), order=alt.Order('direction:N'), tooltip=alt.Tooltip(['test', 'direction', 'error'])) # text = alt.Chart().mark_text(color='black', size=15, dx=-20).encode( # y=alt.Y('test:N', title='Type of test',), # x=alt.X('error:Q', title='Probability of Type I error', stack='zero'), # text=alt.Text('error:Q', format='.3f'), # order=alt.Order('direction:N'), # tooltip=alt.Tooltip(['test', 'direction', 'error']) # ) rule = alt.Chart(pd.DataFrame( {'alpha': [.05]})).mark_rule(color='black').encode(x='alpha') return alt.layer(bars, rule).properties(height=300, width=600)
def visualize_freq(output_path): data = pd.read_csv(output_path) data['year'] = data.htrc_vol.str.split('_').str[2] data['months'] = data.htrc_vol.str.split('_').str[3] data['first_month'] = data.months.str.split('-').str[0] data['second_month'] = data.months.str.split('-').str[1] data['second_month'].fillna('dec', inplace=True) data.first_month = data.first_month.str.capitalize() data.second_month = data.second_month.str.capitalize() data['first_month_index'] = pd.to_datetime(data['first_month'], format='%b', errors='coerce').dt.month data['second_month_index'] = pd.to_datetime(data['second_month'], format='%b', errors='coerce').dt.month data = data.sort_values(by=['year', 'first_month_index']) data.htrc_vol.str.split('_') data['date'] = pd.to_datetime(data['year'].apply(str) + '-' + data['first_month_index'].apply(str), format='%Y-%m') data.date = data.date.dt.strftime('%Y-%m') data = data[data['frequency'] > 3] chart = alt.Chart(data).mark_circle( opacity=0.8, stroke='black', strokeWidth=1).encode( alt.X('date:O'), alt.Y('word', axis=alt.Axis(labelAngle=0)), alt.Size('frequency', scale=alt.Scale(range=[0, 2000]), legend=alt.Legend(title='counts')), alt.Color('word', scale=alt.Scale(scheme='category20'), legend=None)).properties(width=1400, height=10000) chart.serve()
def mean_price_of_hydrous_ethanol_over_time_by_state(df): df_product_hydrousEthanol = df[df.Product == 'HYDROUS ETHANOL'].reset_index(drop=True) highlight = alt.selection(type='single', on='mouseover', fields=['State'], nearest=True) k = alt.Chart( df_product_hydrousEthanol, title='Mean Price of Hydrous Ethanol Over Time by State').mark_line( ).encode(x='Year_Month', y='mean(Mean_Price)', color=alt.Color( 'State', legend=alt.Legend(title="State by color"))).properties( width=1000, height=400) points = k.mark_line().encode( opacity=alt.value(0)).add_selection(highlight).properties(width=600) lines = k.mark_line().encode( size=alt.condition(~highlight, alt.value(1), alt.value(3))) st.altair_chart(points + lines)