def make_plot_top(data): """ Make the time plot for San Fransisco crime data. parameters: ----------- data : pandas.DataFrame data frame containing the crime data returns: -------- alt.Chart altair plot """ # Create a plot of the Displacement and the Horsepower of the cars dataset # making the slider slider = alt.binding_range(min=0, max=23, step=1) select_hour = alt.selection_single(name='select', fields=['hour'], bind=slider, init={'hour': 19}) chart = alt.Chart(data).mark_bar(size=30).encode( x=alt.X('Category', type='nominal', title='Category'), y=alt.Y('count()', title="Count", scale=alt.Scale(domain=(0, 3300))), tooltip='count()').properties( title="Hourly crime occurrences for selected crimes", width=500, height=250).add_selection(select_hour).transform_filter( select_hour) chart.configure_title(fontSize=12) return chart
def create_visualisation(df_dic: Dict[int, pd.DataFrame], ts: np.ndarray, colours: List[str]) -> alt.Chart: time_field = "t" select_time = alt.selection_single( name="select", fields=[time_field], init={time_field: min(ts)}, bind=alt.binding_range(min=min(ts), max=max(ts), step=ts[1] - ts[0]), ) chart = (alt.Chart(pd.DataFrame({ "x": [0], "y": [0] })).mark_point(opacity=0).encode(x="x", y="y").add_selection(select_time)) for k, df in df_dic.items(): for t in ts: df_t = df[df["t"] == t] df_t.sort_index() base = alt.Chart(df_t).transform_filter(select_time) chart += base.mark_line(color=colours[k - 1]).encode( alt.X("x:Q"), alt.Y("y:Q"), alt.OpacityValue( 1)).transform_filter(select_time) + base.mark_point( color=colours[k - 1]).encode( alt.X("x:Q"), alt.Y("y:Q"), alt.OpacityValue(1)).transform_filter(select_time) return chart.interactive()
def draw_v4(): grade = pd.read_csv("data/grade_by_year.csv") resource = pd.read_csv("data/resource_by_year.csv") slider = alt.binding_range(min=2013, max=2018, step=1) select_year = alt.selection_single(name="Year", fields=['Post year'], bind=slider, init={'Post year': 2018}) g_histo = alt.Chart(grade).mark_bar().encode( x="Project Grade Level Category:N", y="Project Cost:Q", color=alt.Color("Project Grade Level Category:N"), tooltip=[ "Project Grade Level Category:N", "Project Cost:Q" ]).add_selection(select_year).transform_filter(select_year).properties( width=500, height=500) r_histo = alt.Chart(resource).mark_bar().encode( x="Project Resource Category:N", y="Project Cost:Q", color=alt.Color("Project Resource Category:N"), tooltip=[ "Project Resource Category:N", "Project Cost:Q" ]).add_selection(select_year).transform_filter(select_year).properties( width=500, height=500) st.write(g_histo) st.write(r_histo) return
def plot_tsne(source, x_col, y_col, category, perplexities, img_name): cats = list(source[category].unique()) # Base base = alt.Chart(source).mark_point(filled=True).encode(x=x_col + ':Q', y=y_col + ':Q', tooltip=img_name + ':N') # A slider filter perp_slider = alt.binding_range(min=10, max=40, step=10) slider_selection = alt.selection_single(bind=perp_slider, fields=[perplexities], name="Change") # Color changing marks rating_radio = alt.binding_radio(options=cats) rating_select = alt.selection_single(fields=[category], bind=rating_radio, name="Filter") rating_color_condition = alt.condition( rating_select, alt.Color(category + ':N', legend=None), alt.value('lightgray')) highlight_ratings = base.add_selection( rating_select, slider_selection).encode( color=rating_color_condition).transform_filter( slider_selection).properties(title="tSNE Scatter Plot") return highlight_ratings.properties(width=800, height=300)
def histogram(values, title=""): """Histogram with percentage on y and a rule at mean slider for reducing the number of values used. """ df = pd.DataFrame({values.name: values, 'num': list(range(1, len(values) + 1))}) slider = alt.binding_range(min=1, max=len(values), step=1, name='Number of samples: ') selection = alt.selection_single(bind=slider, fields=['num'], name="num", init={'num': len(values)}) base = alt.Chart(df).transform_filter('datum.num <= num_num') plot = base.transform_joinaggregate( total='count(*)' ).transform_calculate( pct='1 / datum.total' ).mark_bar().encode( alt.X(f'{values.name}:Q', bin=True), alt.Y('sum(pct):Q', axis=alt.Axis(format='%')) ).add_selection( selection ).properties( title=title ) rule = base.mark_rule(color='red').encode( x=f'mean({values.name}):Q', size=alt.value(5) ) return plot + rule
def make_plot_top(df_new=df_t4): # Create a plot of the Displacement and the Horsepower of the cars dataset # making the slider slider = alt.binding_range(min = 0, max = 23, step = 1) select_hour = alt.selection_single(name='select', fields = ['hour'], bind = slider, init={'hour': 0}) #begin of my code # typeDict = {'ASSAULT':'quantitative', # 'VANDALISM':'quantitative', # 'LARCENY/THEFT':'quantitative', # 'VEHICLE THEFT':'quantitative' # } # end chart = alt.Chart(df_new).mark_bar(size=30).encode( x=alt.X('Category',type='nominal', title='Category'), y=alt.Y('count()', title = "Count" , scale = alt.Scale(domain = (0,3300))), tooltip='count()' ).properties( title = "Per hour crime occurrences for the top 4 crimes", width=500, height = 315 ).add_selection( select_hour ).transform_filter( select_hour ) return chart
def Joint_Prob_plot(df): year_min = df["cohort"].min() year_max = df["cohort"].max() slider = alt.binding_range(min=year_min, max=year_max, step=1) select = alt.selection_single(name="year", fields=['cohort'], bind=slider, init={'cohort': year_min}) df_new = df.apply(lambda row: gen_plot_df(row), axis=1) df_new = pd.concat(list(df_new)) base = alt.Chart(df_new) # chart on Joint Prob plot_scale = alt.Scale(type="pow", exponent=0.5, scheme="greens", nice=True) color = alt.Color('MR:Q', scale=plot_scale) joint_chart = base.mark_rect().encode( x="p:O", y=alt.Y('k:O', sort=alt.EncodingSortField('k', order='descending')), color=color).add_selection(select).transform_filter(select).properties( height=200, width=200) color_scale = alt.Scale( domain=['1', '2', '3', '4', '5'], range=['#4c78a8', '#f58518', '#e45756', '#72b7b2', '#54a24b']) p_mag_1 = base.mark_bar().encode( x=alt.X("p:O"), y=alt.Y('sum(MR):Q', scale=alt.Scale(domain=(0, 1))), color=alt.Color('k:O', scale=color_scale, legend=None), order=alt.Order(aggregate='sum', type="quantitative", sort='descending')).add_selection( select).transform_filter(select).properties( height=150, width=200) k_mag_1 = base.mark_bar().encode( y=alt.Y("k:O", sort=alt.EncodingSortField('k', order='descending')), x=alt.X('sum(MR):Q', scale=alt.Scale(domain=(0, 1))), color=alt.Color('p:O', scale=color_scale, sort=alt.EncodingSortField('p', order='ascending'), legend=None), order=alt.Order(aggregate='sum', type="quantitative", sort='descending')).add_selection( select).transform_filter(select).properties( height=200, width=150) people_c = people_plot(df, select).properties(width=300, height=300) return (p_mag_1 & (joint_chart | k_mag_1) & people_c).resolve_scale(color='independent')
def world_map(highlight, highlight2): slider = alt.binding_range(min=1991, max=2011, step=1) select_year = alt.selection_single(name="Year", fields=['Year'], bind=slider, init={'Year': 2011}) map = alt.Chart(df).mark_geoshape(stroke='#aaa', strokeWidth=0.25).encode( color=alt.condition(highlight2 | highlight, 'CO2 emissions (kt):Q', alt.value('lightgrey'), scale=alt.Scale(scheme='redyellowblue', reverse=True)), tooltip=[ "Country Name", "CO2 emissions (kt)", "CO2 emissions per capita" ] ).transform_lookup( lookup='Country Name', from_=alt.LookupData( "https://raw.githubusercontent.com/KoGor/Map-Icons-Generator/master/data/world-110m-country-names.tsv", 'name', ['id', "name"])).transform_lookup( lookup='id', from_=alt.LookupData( countries, 'id', fields=["id", "type", "properties", "geometry"]) ).project(type="equirectangular").properties( width=1100, height=650, title='worldwide CO2 total emissions and emissions per capita' ).add_selection(highlight, highlight2) percapita = alt.Chart(df).mark_circle(opacity=0.4, ).encode( size=alt.Size('CO2 emissions per capita:Q', scale=alt.Scale(range=[10, 3000])), color=alt.condition(highlight2 | highlight, alt.value('red'), alt.value('lightgrey')), longitude='Longitude (average):Q', latitude='Latitude (average):Q', tooltip=[ "Country Name", "CO2 emissions (kt)", "CO2 emissions per capita" ] ).transform_lookup( lookup='Country Name', from_=alt.LookupData( "https://raw.githubusercontent.com/KoGor/Map-Icons-Generator/master/data/world-110m-country-names.tsv", 'name', ['id', "name"])).transform_lookup( lookup='id', from_=alt.LookupData( countries, 'id', fields=["id", "type", "properties", "geometry" ])).project(type="equirectangular").properties( width=900, height=400, ) return alt.layer(map, percapita) \ .add_selection(select_year) \ .transform_filter(select_year)
def write(): st.title("Nagel-Schereckenberg (NaSch) Model Simulation Page") st.write(NASCH_explanation_string) # Set parameters st.sidebar.subheader("Set Parameters") bool_periodic_boundary = st.sidebar.checkbox( "Periodic Boundary Condition?", True) bool_initrandom = st.sidebar.checkbox("Random initialization?", True) const_nSteps = st.sidebar.slider("Number of steps to simulate", 10, 2000, 100, 10) const_L = st.sidebar.slider("Number of slots", 10, 50, 20) const_vmax = st.sidebar.slider("Maximum speed", 1, 10, 5) const_density = st.sidebar.slider("Initial Density", 0.0, 0.95, 0.5, 0.05) const_stepProb = st.sidebar.slider("Step Probability", 0.05, 1.0, 0.5, 0.05) const_entryProb = st.sidebar.slider("Entry Probability (alpha)", 0.05, 0.95, 0.5, 0.05) if st.button("Run simulation?"): df, flux = simulate_nasch( n_steps=const_nSteps, n_slots=const_L, init_random=bool_initrandom, init_density=const_density, const_vmax=const_vmax, const_stepprob=const_stepProb, periodic_boundary=bool_periodic_boundary, entry_prob=const_entryProb, ) df_plot = df.copy() df_plot["img"] = ["car"] * len(df_plot) df_plot["car"] = [1] * len(df_plot) slider = alt.binding_range(min=0, max=const_nSteps - 1, step=1) select_time = alt.selection_single(name="time", fields=["time"], bind=slider, init={"time": 1}) c = (alt.Chart(df_plot).properties(height=100).mark_text( size=30, baseline="middle").encode( alt.X( "position", type="quantitative", scale=alt.Scale(domain=[0, const_L]), ), alt.Y(field="car", type="ordinal"), alt.Text("emoji:N"), ).transform_calculate(emoji="{'car':'🚗'}[datum.img]"). add_selection(select_time).transform_filter(select_time)) st.write( "Now, try and play with the slider representing the timepoint, to see the cars move in time!" ) st.altair_chart(c, use_container_width=True) fig, axes = get_spatiotemporal_plot(df, flux) st.write("Here is a summary figure as well!") st.pyplot(fig)
def traceplot(output_dir: str, chains: BEASTPosteriorDirFmt, params: str = None): CONTROL_FMT = chains[0].control.format md5sums = {c.control.view(CONTROL_FMT).md5sum() for c in chains} if len(md5sums) > 1: raise ValueError("Chains do not share a posterior distribution as they" " were generated with different inputs/parameters/" "priors, so they cannot be visualized together.") if params is None: params = [] params = list(reversed(params)) + ['likelihood'] dfs = [] for idx, chain in enumerate(chains, 1): df = chain.log.view(pd.DataFrame)[['state'] + params] df['CHAIN'] = 'Chain %d' % idx dfs.append(df) data = pd.concat(dfs) url = 'data.json' gen_end = data['state'].iloc[-1] gen_step = gen_end - data['state'].iloc[-2] slider = alt.binding_range(min=0, max=gen_end, step=gen_step, name='Burn-in: ') selector = alt.selection_single(name="BurnIn", fields=['burnin'], bind=slider, init={'burnin': 0}) traceplots = [] for param in params: line = alt.Chart(url).mark_line( interpolate='step-after', opacity=0.8 ).encode( x=alt.X('state:Q', title='Generation'), y=alt.Y(param, type='quantitative', scale=alt.Scale(zero=False)), color='CHAIN:N' ).add_selection( selector ).transform_filter( alt.datum.state >= selector.burnin ).properties(width=800).interactive(bind_y=False) hist = alt.Chart(url).mark_bar().encode( x=alt.X('count()', title='Frequency'), y=alt.Y(param, type='quantitative', bin=alt.Bin(), title=None), color='CHAIN:N' ).transform_filter( alt.datum.state >= selector.burnin ).properties(width=200) traceplot = alt.hconcat(line, hist).resolve_scale(y='shared') traceplots.append(traceplot) dash = alt.vconcat(*traceplots) dash.save(os.path.join(output_dir, 'index.html')) data.to_json(os.path.join(output_dir, url), orient='records')
def get_binding_range(self): range_max = self.get_range_max() if '7d' in self.case_a or '7d' in self.case_b: input_dropdown = alt.binding_range(min=range_max - 7, max=range_max, step=1, name='Date:') else: input_dropdown = alt.binding_range(min=0, max=range_max, step=1, name='Date:') selection = alt.selection_single(fields=['date_transformed'], bind=input_dropdown, init={'date_transformed': range_max}) return selection
def get_line_chart(multi=False): flunet_df = get_df() countries = flunet_df.columns.tolist()[1:] transformed_df = pd.melt( flunet_df, id_vars=["week"], value_vars=countries, var_name="country", value_name="flu_cases", ) transformed_df["index"] = transformed_df["week"] % 53 slider1 = alt.binding_range(min=1, max=52, step=1) slider2 = alt.binding_range(min=1, max=52, step=1) select_week1 = alt.selection_single(name="week1", fields=["week"], bind=slider1) select_week2 = alt.selection_single(name="week2", fields=["week"], bind=slider2) if multi: multi_select = alt.selection_multi(fields=["country"]) selection = alt.selection_multi(fields=["country"]) line, color = plot(selection, transformed_df) make_selector = (alt.Chart(pd.DataFrame({ "country": countries })).mark_rect().encode(y="country", color=color).add_selection(selection)) return line | make_selector selector = alt.selection( type="single", fields=["country"], bind=alt.binding_select(options=countries), name="Select", ) line, _ = plot(selector, transformed_df) return (line.add_selection(select_week2).add_selection(select_week1). transform_filter("datum.week > week1_week").transform_filter( "datum.week < week2_week"))
def plot_altair_3(xcol, ycol): select_quality = alt.selection_single(name='Select', fields=['quality'], init={'quality': 3.0}, bind=alt.binding_range(min=3.0, max=9.0, step=1.0)) chart = alt.Chart( wine, title=ycol + " VS " + xcol).mark_circle(opacity=0.5).encode( alt.X(xcol), alt.Y(ycol), alt.Size('chlorides in g/dm3'), alt.OpacityValue(0.5)).add_selection( select_quality).transform_filter(select_quality) return chart.to_html()
def getjson2(): data1_df = pd.read_csv('./data/vis2_hr_weather.csv') alt.data_transformers.enable('default', max_rows=None) interval = alt.selection_multi(fields=['weather_desc']) # interval1 = alt.selection_multi(fields = ['hr:N']) scatter = alt.Chart(data1_df).mark_circle(size=45).encode( y=alt.Y('count:Q', title='Count'), x=alt.X('temp_bin:N', title='Temperature Bucket (Farenheit)'), color=alt.condition( interval, alt.Color('weather_desc:N', scale=alt.Scale(scheme='category20')), alt.value('lightgray')), tooltip=[ alt.Tooltip(shorthand='hr:N', title=("Average Count")), alt.Tooltip(shorthand='weather_desc:N', title=("Weather")) ]).properties(selection=interval, width=400, title='Weather and Time popularity Info Plot') hour_slider = alt.binding_range(min=1, max=24, step=1) slider_selection = alt.selection_single(bind=hour_slider, fields=['hr'], name="Hour") # percent_slider = alt.binding_range(min=10, max=100, step=5) # slider_selection = alt.selection_single(bind=percent_slider, fields=['percentile'], name = "percentile") scatter2 = alt.Chart(data1_df).mark_point().encode( x=alt.X('wind_bin:N', title='Wind Speed Bucket (Km/h)'), size=alt.Size('count:Q', title='Average Count'), y=alt.Y('duration_mean:Q', title='Average Duration in Minutes'), shape='is_member:N', tooltip=[ alt.Tooltip(shorthand='duration_mean:Q', title=("Average Duration")), alt.Tooltip(shorthand='temp_bin:N', title=("Temperature Bucket (Farenheit)")), alt.Tooltip(shorthand='weather_desc', title=("Weather")), alt.Tooltip(shorthand='count:Q', title=("Average Count")) ]).properties(selection=interval, title='Counts Based on Temperature Hour and Wind Speed', width=200, height=500).transform_filter(interval).add_selection( slider_selection).transform_filter( slider_selection).interactive() data = (scatter | scatter2).to_dict() return jsonify(data)
def step4_related_factors(): # next_block() st.header("Step4: Factors that may affect CO2 emissions") st.write("Tips:") st.write("1. Add indicators you want to compare with CO2 emissions!") st.write("2. Put your mouse on a country and compare across indicators!") st.write("3. Remember to play with the year slide bar :)") slider = alt.binding_range(min=1991, max=2011, step=1) select_year = alt.selection_single(name="Year", fields=['Year'], bind=slider, init={'Year': 2011}) highlight = alt.selection_single( on='mouseover', fields=['Country Name'], empty='all') # init={"Country Name": "United States"}) dataset2 = st.multiselect("Choose factors to compare!", [ "CO2 emissions per GDP", "CO2 emissions (kt)", "CO2 emissions per capita", "Urban population (% of total)", "Renewable energy consumption (% of total final energy consumption)", "Forest area (% of land area)", "Marine protected areas (% of territorial waters)", "Population growth (annual %)", "Renewable electricity output % of total", "Terrestrial protected areas % of total", "Total greenhouse gas emissions (kt of CO2 equivalent)" ], [ "CO2 emissions (kt)", "CO2 emissions per GDP", "Renewable electricity output % of total" ]) st.write( alt.hconcat( world_map_for_factors(highlight, dataset2, select_year), alt.Chart(df).mark_point().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), color='Country Name:N', ).properties( width=160, height=160, ).repeat( row=dataset2, column=dataset2, ).transform_filter(select_year).interactive()))
def display_the_conplot(): slider = alt.binding_range(min=2006, max=2013, step=1) select_year = alt.selection_single(name='Select', fields=['Year'], bind=slider, init={'Year': 2006}) base = alt.Chart(tidy_df).add_selection(select_year).transform_filter( select_year).transform_calculate(types=alt.expr.if_( alt.datum.Half == 1.0, '1st Half', '2nd Half')).properties( width=250, ) color_scale = alt.Scale(domain=['1st Half', '2nd Half'], range=['green', 'orange']) left = base.transform_filter(alt.datum.types == '1st Half').encode( y=alt.Y('County Councils:O', axis=None), x=alt.X('sum(esb):Q', title='ESB Count', sort=alt.SortOrder('descending')), color=alt.Color('types:N', scale=color_scale, legend=None), tooltip=('sum(esb):Q')).mark_bar().properties( title='First Half of Year') middle = base.encode( y=alt.Y('County Councils:O', axis=None), text=alt.Text('County Councils:O'), ).mark_text(color='steelblue', size=15).properties(width=105) right = base.transform_filter(alt.datum.types == '2nd Half').encode( y=alt.Y('County Councils:O', axis=None), x=alt.X('sum(esb):Q', title='ESB Count'), color=alt.Color('types:N', scale=color_scale, legend=None), tooltip=('sum(esb):Q')).mark_bar().properties( title='Second Half of Year') conplot = alt.concat(left, middle, right, spacing=5) conplot.save("templates/conplot.html") return render_template("conplot.html")
def draw_v3(): df = pd.read_csv("data/state_donate_avg_sum.csv") slider = alt.binding_range(min=2013, max=2018, step=1) select_year = alt.selection_single(name="Year", fields=['Year'], bind=slider, init={'Year': 2013}) mean_ = alt.Chart(df).mark_bar().encode( x=alt.X('Donor State:N'), y=alt.Y('Mean:Q'), tooltip=['Donor State:N', 'Mean:Q' ]).add_selection(select_year).transform_filter(select_year) sum_ = alt.Chart(df).mark_bar().encode( x=alt.X('Donor State:N'), y=alt.Y('Sum:Q'), tooltip=['Donor State:N', 'Sum:Q'], ).add_selection(select_year).transform_filter(select_year) st.write(mean_) st.write(sum_) return
def getjson1(): # import altair as alt data1_df = pd.read_csv('./data/vis1_hr.csv') alt.data_transformers.enable('default', max_rows=None) interval = alt.selection_single(fields=["start_date_hr"]) scatter = alt.Chart(data1_df).mark_point(color='red', size=30).encode( x=alt.X('start_date_hr:Q', title='Hour'), y=alt.Y('start_station_count:Q', title='Average Start Station Count'), color=alt.condition(interval, alt.value('steelblue'), alt.value('lightgray')), tooltip='name').properties(selection=interval, width=800, title='Start Station Vs Count Plot') percent_slider = alt.binding_range(min=10, max=100, step=10) slider_selection = alt.selection_single(bind=percent_slider, fields=['percentile'], name="percentile") scatter1 = alt.Chart(data1_df).mark_circle().encode( x='vacancy:Q', y=alt.Y('duration_mean:Q', title='Average Duration in Minutes'), size=alt.Size('start_station_count:Q'), color='is_member:N', tooltip=[ alt.Tooltip(shorthand='name:N', title=("Start Station Name")), alt.Tooltip(shorthand='start_station_count:Q', title=("Average No. of Bikes Taken")) ]).properties(selection=interval, title='Hour and Percentile wise Start Station Analysis', width=600).transform_filter(interval).add_selection( slider_selection).transform_filter( slider_selection).interactive() # scatter & scatter1 data = (scatter & scatter1).to_dict() return jsonify(data)
bin_labels_9 = ['Under 15,000', '15,000 to 24,999', '25,000 to 34,999', '35,000 to 49,999', '50,000 to 74,999', '75,000 to 99,999', '100,000 to 149,999', '150,000 to 199,999', '200,000 and over'] newDATA['Income Range'] = for i in range(DATA): if DATA. # In[62]: alt.data_transformers.disable_max_rows() slider = alt.binding_range(min=1, max=31, step=1) select_date = alt.selection_single(name="January", fields=['Date'], bind=slider, init={'Date':1}) state_selector = alt.selection_multi(fields=['statename'], init=[{'statename':'Alabama'}]) table = alt.Chart(DATA).mark_bar().encode( x=alt.X('value:Q', title="% of Visits to Doctor about COVID", aggregate="mean", scale=alt.Scale(domain=[0, 25])), y=alt.Y('statename:N', title="State") ).add_selection( state_selector ).add_selection( select_date ).transform_filter( select_date) secondTable = alt.Chart(DATA).mark_bar().encode(
# Delete these row indexes from dataFrame, rows with '<5' value js.drop(indexNames, inplace=True) js['JobSeeker Payment'] = js['JobSeeker Payment'].str.split(',').str.join( '').astype(int) js import altair as alt alt.data_transformers.disable_max_rows() select_month = alt.selection_single(name='Select', fields=['Month'], init={'Month': 3}, bind=alt.binding_range(min=1, max=12, step=1)) chart = alt.Chart(js).mark_bar().encode( x=alt.X('Number of JobSeeker Payment'), y=alt.Y('SA2 Name', sort='-x'), tooltip=['SA2 Name', 'JobSeeker Payment']).properties( title='SA2 with Top10 Number of JobSeeker Payment each Month' ).add_selection(select_month).transform_filter( select_month).transform_window(rank='rank(JobSeeker Payment)', sort=[ alt.SortField('JobSeeker Payment', order='descending') ]).transform_filter(alt.datum.rank < 10) chart
bind=DROPDOWN_OBJ_AGE, name='Age', init={'Group':'Total'} ) SELECT_OBJ_SEX = alt.selection_single( fields=['Group'], bind=DROPDOWN_OBJ_SEX, name='Demographics', init={'Group':'Total'} ) # slider for year and accompanying selection options SLIDER = alt.binding_range( min=START_YEAR, max=END_YEAR, step=2, name='Election Year' ) SELECT_OBJ_YR = alt.selection_single( name='SelectorName', fields=['Year'], bind=SLIDER, init={'Year':START_YEAR} ) def generate_map(df_in, map_title, map_type): ''' Generate US map for given voting or registration data.
+ " There are also some songs that have been played for more than their duration, likely because the user rewound the song." + " You can use the slider to explore the relationship between the number of seconds, the proportion of the song that was" + " listened to, and the distribution of the proportions in the data. The tooltip provides the exact count of songs in the" + " bar being hovered over. To improve the quality of the data analyzed, in allowing following charts we filter out songs" + " that were listened to for less than 20 seconds.") df = df.copy() df['percent_listened'] = df['msPlayed'] / df['duration_ms'] df['percent_listened'] = df['percent_listened'].clip(0, 1.1) * 100. # unit conversions ms_per_second = 1000 seconds_per_minute = 60 minutes_per_hour = 60 # make a slider that goes from 0 to 6 minutes played with a step size of 0.5 seconds seconds_slider = alt.binding_range(min=0, max=8*seconds_per_minute, step = 0.5, name="Cutoff (seconds):") # make the selection that is based off of the slider and can be used by the chart, initialized to 20 seconds seconds_selector = alt.selection_single(name="SelectorName", fields=["cutoff"], bind=seconds_slider, init={"cutoff": 20}) # make the chart whose x-axis in the ratio of milliseconds played to duration of the song (binned over steps of 0.1, meaning 10%) # and whose y-axis is the count of the number of rows that fall into the ratio bin # the selector (above) is used to color in the records whose milliseconds played are less than the specified selection # the width and height of the chart are specified to try to provide better visibility played_vs_duration = alt.Chart(df).mark_bar().encode( alt.X("percent_listened:Q", bin=alt.Bin(step=10), title="Percent of Song"), alt.Y("count():Q", title="Count of Songs"), alt.Color("played_less_than_cutoff_seconds:N", title="Songs Played for Less than Cutoff", scale=alt.Scale(domain=['true', 'false'], range=['#d8b365', '#5ab4ac']), legend=alt.Legend(orient="bottom")), tooltip = [alt.Tooltip("count():Q", title="Count of Songs")] ).transform_calculate(
def map_state_slider(state_txt, state_counties, confirmed, confirmed_min, confirmed_max, deaths, deaths_min, deaths_max, state_fips): # Pivot confirmed data by day_num confirmed_pv = confirmed[['fips', 'day_num', 'confirmed']].copy() confirmed_pv['fips'] = confirmed_pv['fips'].astype(str) confirmed_pv['day_num'] = confirmed_pv['day_num'].astype(str) confirmed_pv['confirmed'] = confirmed_pv['confirmed'].astype('int64') confirmed_pv = confirmed_pv.pivot_table(index='fips', columns='day_num', values='confirmed', fill_value=0).reset_index() # Pivot deaths data by day_num deaths_pv = deaths[['lat', 'long_', 'day_num', 'deaths']].copy() deaths_pv['day_num'] = deaths_pv['day_num'].astype(str) deaths_pv['deaths'] = deaths_pv['deaths'].astype('int64') deaths_pv = deaths_pv.pivot_table(index=['lat', 'long_'], columns='day_num', values='deaths', fill_value=0).reset_index() # Extract column names for slider column_names = confirmed_pv_wa.columns.tolist() # Remove first element (`fips`) column_names.pop(0) # Convert to int column_values = [None] * len(column_names) for i in range(0, len(column_names)): column_values[i] = int(column_names[i]) # Disable max_rows to see more data alt.data_transformers.disable_max_rows() # Topographic information us_states = alt.topo_feature(topo_usa, 'states') us_counties = alt.topo_feature(topo_usa, 'counties') # state county boundaries base_state = alt.Chart(state_counties).mark_geoshape( fill='white', stroke='lightgray', ).properties( width=800, height=600, ).project(type='mercator') # Slider choices min_day_num = column_values[0] max_day_num = column_values[len(column_values) - 1] slider = alt.binding_range(min=min_day_num, max=max_day_num, step=1) slider_selection = alt.selection_single(fields=['day_num'], bind=slider, name="day_num", init={'day_num': min_day_num}) # Confirmed cases by county base_state_counties = alt.Chart(us_counties).mark_geoshape( stroke='black', strokeWidth=0.05).transform_lookup( lookup='id', from_=alt.LookupData(confirmed_pv, 'fips', column_names) ).transform_fold(column_names, as_=[ 'day_num', 'confirmed' ]).transform_calculate( state_id="(datum.id / 1000)|0", day_num='parseInt(datum.day_num)', confirmed='isValid(datum.confirmed) ? datum.confirmed : -1' ).encode(color=alt.condition( 'datum.confirmed > 0', alt.Color('confirmed:Q', scale=alt.Scale(domain=(confirmed_min, confirmed_max), type='symlog')), alt.value('white') )).properties( # update figure title title=f'COVID-19 WA State Confirmed Cases by County' ).transform_filter((alt.datum.state_id ) == state_fips).transform_filter(slider_selection) # deaths by long, latitude points = alt.Chart(deaths_pv).mark_point( opacity=0.75, filled=True).transform_fold( column_names, as_=['day_num', 'deaths']).transform_calculate( day_num='parseInt(datum.day_num)', deaths='isValid(datum.deaths) ? datum.deaths : -1').encode( longitude='long_:Q', latitude='lat:Q', size=alt.Size('deaths:Q', scale=alt.Scale(domain=(deaths_min, deaths_max), type='symlog'), title='deaths'), color=alt.value('#BD595D'), stroke=alt.value('brown'), ).add_selection(slider_selection).transform_filter( slider_selection) # confirmed cases (base_counties) and deaths (points) return (base_state + base_state_counties + points)
def make_plot(): def mds_special(): font = "Arial" axisColor = "#000000" gridColor = "#DEDDDD" return { "config": { "title": { "fontSize": 24, "font": font, "anchor": "start", # equivalent of left-aligned. "fontColor": "#000000" }, 'view': { "height": 300, "width": 400 }, "axisX": { "domain": True, #"domainColor": axisColor, "gridColor": gridColor, "domainWidth": 1, "grid": False, "labelFont": font, "labelFontSize": 12, "labelAngle": 0, "tickColor": axisColor, "tickSize": 5, # default, including it just to show you can change it "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "X Axis Title (units)", }, "axisY": { "domain": False, "grid": True, "gridColor": gridColor, "gridWidth": 1, "labelFont": font, "labelFontSize": 14, "labelAngle": 0, #"ticks": False, # even if you don't have a "domain" you need to turn these off. "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "Y Axis Title (units)", # titles are by default vertical left of axis so we need to hack this #"titleAngle": 0, # horizontal #"titleY": -10, # move it up #"titleX": 18, # move it to the right so it aligns with the labels }, } } # register the custom theme under a chosen name alt.themes.register('mds_special', mds_special) # enable the newly registered theme alt.themes.enable('mds_special') #alt.themes.enable('none') # to return to default # Create a plot of the Displacement and the Horsepower of the cars dataset # making the slider slider = alt.binding_range(min=0, max=23, step=1) select_hour = alt.selection_single(name='hour', fields=['hour'], bind=slider, init={'hour': 0}) chart = alt.Chart(df_t4).mark_bar().encode( x=alt.X('Category:N', title="Crime category", axis=alt.Axis(labelAngle=0)), y=alt.Y('count()', title="Count", scale=alt.Scale(domain=(0, 3300))), tooltip='count()').properties( title="Per hour crime occurrences for the top 4 crimes", width=600, height=400).add_selection(select_hour).transform_filter( select_hour) return chart
def gen_map_interactive(log_prob_fn, points): '''Generates choropleth map''' geojsonfile = 'https://raw.githubusercontent.com/seattleio/seattle-boundaries-data/master/data/neighborhoods.geojson' # First get the map geojson = download_json(geojsonfile) gdf = gpd.GeoDataFrame.from_features(geojson) gdf['centroid_lat'] = [pt.centroid.x for pt in gdf['geometry']] gdf['centroid_long'] = [pt.centroid.y for pt in gdf['geometry']] # Only keep some neighborhoods (adhoc zoom in) nhood_names_to_keep = ['Belltown', 'Central Business District', 'Minor', 'Broadway', 'Stevens', 'South Lake Union', 'Laurelhurst', 'Eastlake', 'First Hill', 'International District', 'Pioneer Square', 'Westlake', 'Lower Queen Anne', 'University District', 'Pike-Market', 'Stevens', 'Wallingford', 'Fremont', 'West Woodland', 'Adams', 'Lawton Park', 'Interbay', 'Lawton Park', 'Briarcliff', 'Southeast Magnolia', 'North Queen Anne', 'West Queen Anne', 'East Queen Anne', 'Portage Bay', 'Montlake', 'Madison Park', 'Harrison - Denny-Blaine', 'Ravenna', 'Bryant', 'Windermere', 'Roosevelt', 'Green Lake', 'Phinney Ridge', 'Sunset Hill', 'Loyal Heights', 'Whittier Heights', 'Mann', 'Madrona', 'Yesler Terrace', 'Atlantic', 'International District', 'Leschi', 'Industrial District', 'North Beacon Hill', 'Mount Baker', 'Harbor Island'] # Get a reduced gdf with just the above neighborhoods gdf = gdf.copy().loc[gdf['name'].isin(nhood_names_to_keep)] # Get densities from KDE for centroids centroids = np.stack([gdf['centroid_long'], gdf['centroid_lat']]).T centroids = torch.from_numpy(centroids) density = torch.exp(log_prob_fn(centroids)) density = density / torch.max(density) gdf['density'] = density slider = alt.binding_range(min=1, max=x_svgd_df['iteration'].max() - 1, step=1, name='Move stations with slider') select_iter = alt.selection_single(name='iteration', bind=slider, init={'iteration': 0} ) # Add Base Layer base = alt.Chart(gdf).mark_geoshape( stroke='black', strokeWidth=1 ).encode( ).properties( width=400, height=400 ) # Add Choropleth Layer chloro = alt.Chart(gdf).mark_geoshape( stroke='black' ).encode( alt.Color('density', type='quantitative', scale=alt.Scale(scheme='cividis', domain=(0, 1)) ) ) # Add points on top of the chart scatter = alt.Chart(points).mark_circle(color='purple', opacity=0.7, stroke='gray').encode( latitude='lat:Q', longitude='long:Q', ).add_selection( select_iter ).transform_filter( select_iter ) return base + chloro + scatter
""" US Population Over Time ======================= This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time. """ # category: case studies import altair as alt from vega_datasets import data source = data.population.url pink_blue = alt.Scale(domain=('Male', 'Female'), range=["steelblue", "salmon"]) slider = alt.binding_range(min=1900, max=2000, step=10) select_year = alt.selection_single(name="year", fields=['year'], bind=slider) alt.Chart(source).mark_bar().encode( x=alt.X('sex:N', axis=alt.Axis(title=None)), y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))), color=alt.Color('sex:N', scale=pink_blue), column='age:O').properties( width=20).add_selection(select_year).transform_calculate( "sex", alt.expr.if_(alt.datum.sex == 1, "Male", "Female")).transform_filter(select_year)
Rounded_IMDB_Rating="floor(datum.IMDB_Rating)", Hundred_Million_Production= "datum.Production_Budget > 100000000.0 ? 100 : 10", Release_Year="year(datum.Release_Date)").transform_filter( alt.datum.IMDB_Rating > 0).transform_filter( alt.FieldOneOfPredicate( field='MPAA_Rating', oneOf=ratings)).encode(x=alt.X( 'Worldwide_Gross:Q', scale=alt.Scale(domain=(100000, 10**9), clamp=True)), y='IMDB_Rating:Q', tooltip="Title:N") # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1) slider_selection = alt.selection_single(bind=year_slider, fields=['Release_Year'], name="Release Year_") filter_year = base.add_selection(slider_selection).transform_filter( slider_selection).properties(title="Slider Filtering") # A dropdown filter genre_dropdown = alt.binding_select(options=genres) genre_select = alt.selection_single(fields=['Major_Genre'], bind=genre_dropdown, name="Genre") filter_genres = base.add_selection(genre_select).transform_filter( genre_select).properties(title="Dropdown Filtering")
US Population Over Time ======================= This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time. """ # category: case studies import altair as alt from vega_datasets import data source = data.population.url pink_blue = alt.Scale(domain=('Male', 'Female'), range=["steelblue", "salmon"]) slider = alt.binding_range(min=1900, max=2000, step=10) select_year = alt.selection_single(name="year", fields=['year'], bind=slider) alt.Chart(source).mark_bar().encode( x=alt.X('sex:N', axis=alt.Axis(title=None)), y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))), color=alt.Color('sex:N', scale=pink_blue), column='age:O' ).properties( width=20 ).add_selection( select_year ).transform_calculate( "sex", alt.expr.if_(alt.datum.sex == 1, "Male", "Female") ).transform_filter( select_year
alt.Y('life_expect:Q', scale=alt.Scale(domain=[0,90])), alt.Size('pop:Q', scale=alt.Scale(domain=[0, 1200000000], range=[0,1000])), alt.Color('cluster:N', legend=None), alt.OpacityValue(0.5), alt.Tooltip('country:N'), alt.Order('pop:Q', sort='descending') ).add_selection(select_year).transform_filter(select_year) st.altair_chart(chart) Drag the slider back and forth to see how the data values change over time! ''' select_year = alt.selection_single( name='select', fields=['year'], init={'year': 1955}, bind=alt.binding_range(min=1955, max=2005, step=5) ) chart = alt.Chart(data).mark_point(filled=True).encode( alt.X('fertility:Q', scale=alt.Scale(domain=[0,9])), alt.Y('life_expect:Q', scale=alt.Scale(domain=[0,90])), alt.Size('pop:Q', scale=alt.Scale(domain=[0, 1200000000], range=[0,1000])), alt.Color('cluster:N', legend=None), alt.OpacityValue(0.5), alt.Tooltip('country:N'), alt.Order('pop:Q', sort='descending') ).add_selection(select_year).transform_filter(select_year) st.altair_chart(chart) st.header("Graphical Marks")
# Topographic information us_states = alt.topo_feature(topo_usa, 'states') us_counties = alt.topo_feature(topo_usa, 'counties') # state borders base_states = alt.Chart(us_states).mark_geoshape().encode( stroke=alt.value('lightgray'), fill=alt.value('white')).properties( width=1200, height=960, ).project(type='albersUsa', ) # Slider choices min_day_num = column_values[0] max_day_num = column_values[len(column_values) - 1] slider = alt.binding_range(min=min_day_num, max=max_day_num, step=1) slider_selection = alt.selection_single(fields=['day_num'], bind=slider, name="day_num", init={'day_num': max_day_num}) # Confirmed cases by county base_counties = alt.Chart(us_counties).mark_geoshape( stroke='black', strokeWidth=0.05).project(type='albersUsa').transform_lookup( lookup='id', from_=alt.LookupData(pdf_usa_conf, 'fips', column_names) ).transform_fold( column_names, as_=['day_num', 'confirmed_per100K']).transform_calculate( day_num='parseInt(datum.day_num)', confirmed_per100K=
from altair import datum pn.extension('vega') #alt.renderers.enable('altair_viewer') alt.data_transformers.disable_max_rows() hv.extension("bokeh") medal_count_year_withCate = pd.read_csv("medal_count_year_withCate.csv") medal_count_year_top10 = medal_count_year_withCate.loc[medal_count_year_withCate["Order"]<=10] year_unique = medal_count_year_top10["Year"].unique().tolist() name_unique = medal_count_year_top10["name"].unique().tolist() heatmap = hv.HeatMap(medal_count_year_top10, ["Year", "name"],["Medal"]) heatmap.opts(opts.HeatMap(radial=True,colorbar=True, start_angle=np.pi/2, width=500, height=500, yticks=None,xticks=year_unique,tools=['hover'],toolbar='above')) slider_year = alt.binding_range(min=1896, max=2016, step=4, name='Year:') selector_year = alt.selection_single(fields=['Year'],bind=slider_year, init={'Year': 2016}) select_country = alt.selection(type="single", fields=['Year']) olympic_bar = alt.Chart(medal_count_year_withCate).mark_bar(opacity=0.8).encode( x=alt.X(field="name", type='nominal', title="country",sort = '-y', axis=alt.Axis(labelFontSize=10,titleFontSize=15,labelAngle=-45)), y=alt.Y(field="MedalByCate",type="quantitative",aggregate='sum',stack='zero',title="Total Medals", axis=alt.Axis(labelFontSize=10,titleFontSize=15)), color=alt.Color(field = 'MedalCate', type = 'nominal', scale = alt.Scale(domain = ["Gold","Silver","Bronze"],range=['gold', 'silver','sienna']), legend=alt.Legend(title="Medal Category",labelFontSize = 15,symbolSize = 30,titleFontSize=10)), order=alt.Order('MedalCateOrder',sort='ascending') ) # text