def plot_diff_correlations(data): """ Function to plot the correlation between daily difference and various twitter variables. :param data: Combined data DataFrame """ # Prepare data source = data # Create date column source['date'] = source.index # Unpivot DataFrame source = source[['date', 'daily_diff_usd', 'number_of_tweets', 'likes', 'retweets', 'polarity']] source = source.melt(['date', 'daily_diff_usd'], var_name='dependent_variable', value_name='measure') # Delete index source = source.set_index(['date']) alt.Chart(source).mark_point().encode( x='measure:Q', y='daily_diff_usd:Q', color='dependent_variable:N' ) input_dropdown = alt.binding_select(options=['number_of_tweets', 'likes', 'retweets', 'polarity']) selection = alt.selection_single(fields=['dependent_variable'], bind=input_dropdown, name='Choose') color = alt.condition( selection, alt.Color('dependent_variable:N', legend=None), alt.value('lightgray')) chart = alt.Chart(source).mark_point().encode( x=alt.X('measure:Q', title='Wert der ausgewählten abh. Variable'), y=alt.Y('daily_diff_usd:Q', title='Kursdifferenz (USD)'), color='dependent_variable:N', tooltip='date:T' ).add_selection( selection ).transform_filter( selection ).properties(title="Korrelation Kursdifferenz/abhängige Variable") # Workaround for interactive charts in presentation mode chart.save("resources/diff_correlations.html")
def altair_sir_plot(df_alt, default_country): alt.data_transformers.disable_max_rows() select_country = alt.selection_single( name='Select', fields=['country'], init={'country': default_country}, bind=alt.binding_select(options=sorted(df_alt['country'].unique())) ) title = (alt.Chart(df_alt[['country', 'title']].drop_duplicates()) .mark_text(dy=-180, dx=0, size=16) .encode(text='title:N') .transform_filter(select_country)) base = alt.Chart(df_alt).encode(x='day:Q') line_cols = ['Infected', 'Removed'] # 'Susceptible' colors = ['red', 'green'] lines = (base.mark_line() .transform_fold(line_cols) .encode(x=alt.X('day:Q', title=f'days relative to today ({CovidData.cur_date})'), y=alt.Y('value:Q', axis=alt.Axis(format='%', title='Percentage of Population')), color=alt.Color('key:N', scale=alt.Scale(domain=line_cols, range=colors)))) import functools bands = functools.reduce(alt.Chart.__add__, [base.mark_area(opacity=0.1, color=color) .encode(y=f'{col}\.max:Q', y2=f'{col}\.min:Q') for col, color in zip(line_cols, colors)]) today_line = (alt.Chart(pd.DataFrame({'x': [0]})) .mark_rule(color='orange') .encode(x='x', size=alt.value(1))) return ((lines + bands + title + today_line) .add_selection(select_country) .transform_filter(select_country) .configure_title(fontSize=20) .configure_axis(labelFontSize=15, titleFontSize=18, grid=True) .properties(width=550, height=340))
def visualize_mod_time(directory, bus_num, module_num): df = build_module_average_df(directory, bus_num, module_num) df = df.reset_index() data = df.melt('DateRetrieved', var_name='voltage', value_name='counts') dates = list(data['DateRetrieved'].unique()) brush = alt.selection_interval(bind='scales') input_dropdown = alt.binding_select(options=dates) selection = alt.selection_single(fields=['DateRetrieved'], bind=input_dropdown, name=' ') color = alt.condition(selection, alt.Color('DateRetrieved:N'), alt.value('lightgray')) line = alt.Chart(data.reset_index()).mark_line().encode( x='voltage:Q', y='counts:Q', color=color, tooltip='Name:N').add_selection(brush, selection) return line
def show_yearly_sale(df): st.write('## Yearly Sales Trend') st.write( "**The dataset contains yearly sales data for various games in different regions. Let's explore the sales trend in different regions through some interactions!** 👇🏻" ) columns = [ 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales' ] select_box = alt.binding_select(options=columns, name='Select a Region:') sel = alt.selection_single(fields=['Region Sale'], bind=select_box, init={'Region Sale': 'Global_Sales'}) # line chart st.write("💡 *You can select specific region by clicking on the lines*") data = get_melt_year_df(df, columns) brush = alt.selection_single(encodings=['color']) st.write( alt.Chart(data).mark_line(opacity=0.75, thickness=10).encode( x='Year:N', y=alt.Y('value:Q', title='Sale (in millions)', aggregate='sum'), color=alt.condition(brush, "Region:N", alt.value('lightgrey'), scale=alt.Scale(scheme='tableau20')), ).add_selection(brush)) # bar chart st.write("💡 *You can select specific region with the dropdown menu below*") st.write( alt.Chart(df).transform_fold(columns, as_=[ 'Region Sale', 'Sale' ]).transform_filter(sel).mark_bar().encode( x=alt.X('Year:N'), y=alt.Y('Sale:Q', title='Sale (in millions)')).add_selection(sel).properties( height=500))
def create_viz_region_confirmed_and_rate(charts_path: str, dfs: Dict[str, pd.DataFrame]) -> None: df_region_agg_confirmed = dfs["region-agg-confirmed"] df_region_agg_growth_rate = dfs["region-agg-growth-rate"] viz_path = os.path.join(charts_path, "region-agg-chart.html") choices = sorted(list(df_region_agg_confirmed["location"].unique())) input_dropdown = alt.binding_select(options=choices) single_selector = alt.selection_single( fields=[typedef.Columns.REGION], bind=input_dropdown, name="Location", init={typedef.Columns.REGION: choices[0]}, ) color = alt.condition( single_selector, alt.Color("%s:N" % typedef.Columns.REGION, legend=None), alt.value("lightgray"), ) region_agg_confirmed_chart = (alt.Chart(df_region_agg_confirmed).mark_line( point=True).encode( alt.X(typedef.Columns.DATE, title="Date"), alt.Y(typedef.Columns.CONFIRMED, title="# New Cases"), color=color, tooltip=typedef.Columns.CONFIRMED, ).add_selection(single_selector).transform_filter(single_selector)) region_agg_growth_rate_chart = ( alt.Chart(df_region_agg_growth_rate).mark_line(point=True).encode( alt.X(typedef.Columns.DATE, title="Date"), alt.Y(typedef.Columns.GROWTH_RATE, title="Rate of New Cases"), color=color, tooltip=typedef.Columns.GROWTH_RATE, ).add_selection(single_selector).transform_filter(single_selector)) alt.vconcat(region_agg_confirmed_chart, region_agg_growth_rate_chart).save( viz_path, embed_options={"renderer": "svg"})
def get_line_chart(multi=False): flunet_df = get_df() countries = flunet_df.columns.tolist()[1:] transformed_df = pd.melt( flunet_df, id_vars=["week"], value_vars=countries, var_name="country", value_name="flu_cases", ) transformed_df["index"] = transformed_df["week"] % 53 slider1 = alt.binding_range(min=1, max=52, step=1) slider2 = alt.binding_range(min=1, max=52, step=1) select_week1 = alt.selection_single(name="week1", fields=["week"], bind=slider1) select_week2 = alt.selection_single(name="week2", fields=["week"], bind=slider2) if multi: multi_select = alt.selection_multi(fields=["country"]) selection = alt.selection_multi(fields=["country"]) line, color = plot(selection, transformed_df) make_selector = (alt.Chart(pd.DataFrame({ "country": countries })).mark_rect().encode(y="country", color=color).add_selection(selection)) return line | make_selector selector = alt.selection( type="single", fields=["country"], bind=alt.binding_select(options=countries), name="Select", ) line, _ = plot(selector, transformed_df) return (line.add_selection(select_week2).add_selection(select_week1). transform_filter("datum.week > week1_week").transform_filter( "datum.week < week2_week"))
def get_skill_count(): counts = [] for key, val in skills_count.items(): for t in val: counts.append([t[0], t[1], key]) source = pd.DataFrame(counts, columns=['Skill', 'Count', 'Category']).sort_values( 'Count', ascending=False).nlargest(50, columns='Count') # isolate the categories for the dropbox categs = np.array(source['Category'].unique()) category = np.insert(categs, 0, 'all') # Dropdownbox input_dropdown = alt.binding_select(options=category) selection = alt.selection_single( fields=['Category'], bind=input_dropdown, name='Skill ') c_skills_count = alt.Chart(source).mark_bar().encode( x=('Count:Q'), y=alt.Y('Skill', sort='-x')).add_selection(selection).transform_filter(selection) return c_skills_count
def countWordPlot(source, x_val, y_val, category): files = list(np.unique(source[y_val])) categories = list(np.unique(source[category])) color_scale = alt.Scale(domain=categories) base = alt.Chart(source).mark_circle().encode( x=x_val, y='count(' + x_val + ')', size='count(' + x_val + ')', color=alt.Color(category + ':N', scale=color_scale), tooltip=[x_val]).interactive() # A dropdown filter img_dropdown = alt.binding_select(options=files) img_select = alt.selection_single(fields=[y_val], bind=img_dropdown, name="Test") filter_imgs = base.add_selection(img_select).transform_filter( img_select).properties(title="Word Distribution of Predicted Captions", width=800, height=300) return filter_imgs
def close_for_how_long(yelp_join): st.markdown("From now on, we retrive the original information in **Temporary Closed Until**, **Covid Banner**, and **highlights**.") st.write("First, let's see when did these temporarly closed businesses plan to reopen in June 10. There are {} businesses uploading closure notification.".format(sum(yelp_covid_df['Temporary Closed Until'] != 'FALSE'))) st.write("You may select certain category you are interested in from the bottom box, and brush certain time sub-interval from the upper figure.") close_time = yelp_join[yelp_join['Temporary Closed Until'] != 'FALSE']['Temporary Closed Until'] close_time = list(close_time) close_time = [ele[:-5] for ele in close_time] category = yelp_join[yelp_join['Temporary Closed Until'] != 'FALSE']['categories'].fillna('').apply(find_category) category = list(category) df = pd.DataFrame() df['Close Until'] = close_time df['Category'] = category brush = alt.selection_interval() input_dropdown = alt.binding_select(options = cate_list, name="Category of ") picked = alt.selection_single(encodings=["color"], bind=input_dropdown) base = alt.Chart(df[df['Close Until'] < '2021-01-01']).mark_area().encode( alt.X("Close Until:T"), alt.Y("count()") ).properties(height=50, width=500).add_selection(brush) chart = base & alt.Chart(df[df['Close Until'] < '2021-01-01T00:00:00']).mark_bar(size=20).encode( alt.X("Close Until:T", scale=alt.Scale(domain=brush)), alt.Y("count()", title='Business number'), alt.Tooltip(["Close Until:T", "Category:N", "count()"]), color = alt.condition(picked, "Category:N", alt.value("lightgray")), ).add_selection(picked).properties(height=300, width=500) st.write(chart) st.write("It is interesting that the planned reopen time is quite concentrated, most during June and July, and on the start or end of a certain month.")
def make_plot_bot(data=df_t4): chart_1 = alt.Chart(data).mark_circle(size=3, opacity = 0.8).encode( longitude='X:Q', latitude='Y:Q', color = alt.Color('PdDistrict:N', legend = alt.Legend(title = "District")), tooltip = 'PdDistrict' ).project( type='albersUsa' ).properties( width=450, height=350 ) chart_2 = alt.Chart(data).mark_bar().encode( x=alt.X('PdDistrict:N', axis=None, title="District"), y=alt.Y('count()', title="Count of reports"), color=alt.Color('PdDistrict:N', legend=alt.Legend(title="District")), tooltip=['PdDistrict', 'count()'] ).properties( width=450, height=350 ) # A dropdown filter crimes_dropdown = alt.binding_select(options=list(data['Category'].unique())) crimes_select = alt.selection_single(fields=['Category'], bind=crimes_dropdown, name="Pick\ Crime") combine_chart = (chart_2 | chart_1) filter_crimes = combine_chart.add_selection( crimes_select ).transform_filter( crimes_select ) return filter_crimes
def display_img(self, args): self.output.clear_output() with self.output: for csv in args["new"]: df = pd.read_csv(csv, sep=",") df.columns = df.columns.str.replace(".", "_") dropdown = alt.binding_select(options=list(df.columns)) selection = alt.selection_single( fields=["variable"], bind=dropdown, name="Selection of", # empty=df.columns[1] ) color = alt.condition(selection, alt.Color("variable:N"), alt.value("lightgray")) scales = alt.selection_interval(encodings=["x"], bind="scales") chart = (alt.Chart(df.melt().reset_index()).mark_line().encode( x="index", y="value", color=color).add_selection( selection).transform_filter(selection).properties( width=400, height=300).add_selection(scales)) with alt.data_transformers.enable("default", max_rows=None): display(chart)
def catCountWordPlot(source, x_val, y_val, category): categories = list(np.unique(source[category])) color_scale = alt.Scale(domain=categories) base = alt.Chart(source).mark_circle().encode( x=alt.X(x_val, stack=None, scale=alt.Scale(), axis=alt.Axis(labelOverlap=True)), y='sum(' + y_val + ')', size='sum(' + y_val + ')', color=alt.Color(category + ':N', scale=color_scale), tooltip=[x_val]).interactive() # A dropdown filter img_dropdown = alt.binding_select(options=categories) img_select = alt.selection_single(fields=[category], bind=img_dropdown, name="Category") filter_imgs = base.add_selection(img_select).transform_filter( img_select).properties(title="Word Distribution of Predicted Captions", width=800, height=300) return filter_imgs
st.write(data_barvis_PA) county_data = pandasql.sqldf("select distinct geo_value from data_barvis_PA") county_details = dict() print(county_data.shape[0]) l = county_data["geo_value"].tolist() print(str(covidcast.fips_to_name(county_data.iloc[1]))) for i in range(county_data.shape[0]): county_details.update({ str(covidcast.fips_to_name(county_data.iloc[i]))[2:len( str(covidcast.fips_to_name(county_data.iloc[i]))) - 2]: l[i] }) input_drop = alt.binding_select( options=(list(county_details.values())), name="Select County to Highlight data for Bar visits") picked = alt.selection_single(encodings=["color"], bind=input_drop) scatter = alt.Chart(data_barvis_PA).mark_line().encode( x=alt.X("monthdate(time_value):O"), y=alt.Y("value:Q", axis=alt.Axis(title='Average number of daily bar visits')), tooltip=['geo_value', 'monthdate(time_value)', 'value'], color=alt.condition(picked, 'geo_value', alt.value('lightgray')), opacity=alt.condition(picked, alt.value(1), alt.value(0.05))).add_selection(picked).properties( width=800, height=400).interactive() data = fetch(4) data3_6hr = pandasql.sqldf("select * from data where geo_value like '42%'")
def largealtair_plot(): keyvalues = [ 'balanc', 'discount', 'divers', 'food', 'free coff', 'free lunch', 'gym', 'health', 'ice cream', 'incent', 'pay', 'perk', 'rais', 'sign bonus', 'stock option', 'stress', 'student loan pay', 'surf', 'vacat', 'women', 'work environ', 'work home' ] if not request.args.get("company"): raise RuntimeError("Missing company name, go back and select company") comp = request.args.get('company') #gets company name from index c = pd.read_csv('clusters.csv') # c=c.drop(labels='cluster', axis=1) #old cluster only ############################################################## # # Normalize the data before running k-means # ############################################################## sample_size = pd.read_csv('li_samplesize.csv', header=None) sample_size = sample_size.rename(columns={0: "company", 1: "sample_size"}) t = pd.merge(c, sample_size, how="left", on='company') t = t.drop(t.columns[0], axis=1) #get rid of unamed t = t.set_index('company') #leave only numbers t = t.div(t.sample_size, axis=0) t = round(t * 100, 3) t = t.drop(labels='sample_size', axis=1) #remove column, we no longer need it n = t.reset_index() #no index # d=pd.read_csv('d.csv') d = n.set_index('company') #index d = t.fillna(0) d = t.astype(float) ################################################# # # Dimensionality reduction and k-means # ################################################ twod_pca = PCA(n_components=2) X_pca = twod_pca.fit_transform(d) #lets find the best number of clusters based on silhouete cluster_silhouete = {} for n_clustersi in range(2, 10): #min between number of samples and features km1 = KMeans(n_clusters=n_clustersi) km1.fit(X_pca) # Predict the cluster for each data point preds1 = km1.predict(X_pca) # Calculate the mean silhouette coefficient for the number of clusters chosen score = silhouette_score(X_pca, preds1, metric='euclidean') cluster_silhouete[n_clustersi] = score.round(5) #order dict and maximize it sorted_cluster_stats = dict( sorted(cluster_silhouete.items(), key=lambda item: item[1], reverse=True)) n_clusterso = list(sorted_cluster_stats.keys())[0] #redifine n clusters #we run the kmeans again with the prefered number of clusters km = KMeans(n_clusters=n_clusterso, random_state=11).fit(X_pca) predsi = km.predict(X_pca) #and graph df_km = pd.DataFrame( data={ 'pca1': X_pca[:, 0], 'pca2': X_pca[:, 1], 'cluster': predsi, 'company': list(d.index) }) # df_km=df_km.set_index('company') brush = alt.selection(type='interval') #graph centers centers = km.cluster_centers_ labels = km.labels_ sauce = [] #sauce is where the centers are stored, it is the source for i in range(len(centers)): sauce.append({"x": centers[:, 0][i], "y": centers[:, 1][i]}) source = pd.DataFrame.from_records(sauce) ################# # #And graph the clusters # ############## poin = alt.Chart(source).mark_point(color='black', size=100).encode( x='x', y='y', # tooltip=['cluster center'] ) points = alt.Chart(df_km).mark_circle(size=60).encode( x='pca1:Q', y='pca2:Q', color='cluster:N', tooltip=['company:N'], shape='comp:N').properties(height=350, width=600).interactive() json3 = (points + poin).to_json() #Do a little clustering map to compare averages # c['cluster']=labels #reseting the cluster labels d['cluster'] = labels # di2=c.set_index('company') # g=di2.groupby('cluster') g = d.groupby('cluster') m = g.mean() a = m.reset_index() m_long = a.melt(id_vars='cluster', value_vars=keyvalues) m_long = m_long.rename(columns={"value": "score", "variable": "keyword"}) base2 = alt.Chart(m_long).mark_bar().encode( alt.X('cluster:N'), alt.Y('score:Q', title='Score % '), color=alt.Color('score:Q', scale=alt.Scale(scheme='darkred')), opacity=alt.value(.7), tooltip=['score:Q' ]).properties(height=200, width=200).add_selection(brush).interactive() # A dropdown filter columns = keyvalues column_dropdown = alt.binding_select(options=columns) column_select = alt.selection_single(fields=['keyword'], on='doubleclick', clear=False, bind=column_dropdown, name='search', init={'keyword': 'balanc'}) filter_columns2 = base2.add_selection(column_select).transform_filter( column_select) json4 = filter_columns2.to_json() # get_clustered_companies_to compare_with ################################## # #List of companies inside cluster group # ################################ try: number = d.loc[[comp], ['cluster']].values[0][0] except: return render_template('error.html') #number = d.loc[[comp],['cluster']].values[0][0] if number == 0: r = d.loc[d['cluster'] == 0] if number == 1: r = d.loc[d['cluster'] == 1] if number == 2: r = d.loc[d['cluster'] == 2] if number == 3: r = d.loc[d['cluster'] == 3] if number == 4: r = d.loc[d['cluster'] == 4] if number == 5: r = d.loc[d['cluster'] == 5] if number == 6: r = d.loc[d['cluster'] == 6] if number == 7: r = d.loc[d['cluster'] == 7] if number == 8: r = d.loc[d['cluster'] == 8] if number == 9: r = d.loc[d['cluster'] == 9] ######################### # #Make a plot that compares all keyvalues of all companies withing cluster # ############# ri = list(r.index.values) ind = list(d.index) not_ri = [] for item in ind: if item not in ri: not_ri.append(item) n = d.reset_index() filter_c = n[n.company.isin(ri)] c_num1 = len(n.groupby('cluster')) num = len(ri) if num < 30: height = 200 if num > 29 and num < 200: height = 2200 else: height = 3200 c_long = filter_c.melt(id_vars='company', value_vars=keyvalues) c_long = c_long.rename(columns={"value": "score", "variable": "keyword"}) base = alt.Chart(c_long).mark_bar().encode( alt.Y('company:N', title=' '), alt.X('score:Q', title='Score %'), color=alt.Color('score:Q', scale=alt.Scale(scheme='cividis')), opacity=alt.value(.7), tooltip=['score:Q']).properties(height=height) rule = alt.Chart(c_long).mark_rule(color='red').encode(y='mean(score)') # A dropdown filter columns = list(c.columns.values[2:]) column_dropdown = alt.binding_select(options=columns) column_select = alt.selection_single(fields=['keyword'], on='doubleclick', clear=False, bind=column_dropdown, name='search', init={'keyword': 'balanc'}) filter_columns = base.add_selection(column_select).transform_filter( column_select).interactive() json = (filter_columns).to_json() #filter_columns+rule).to_json() ####################################################### #Company Profile (first graph on the plot page) ######################################################## d2 = d.drop(labels='cluster', axis=1) # d2=d2.set_index('company') tra = d2.T tra.reset_index(drop=False, inplace=True) tra_long = tra.melt(id_vars='index', value_vars=comp) tra_long = tra_long.rename(columns={ "value": "score", "variable": "keyword" }) base = alt.Chart(tra_long).mark_bar().encode( alt.X('index:N', title=" "), alt.Y('score:Q', title='Score % '), color=alt.Color('score:Q', scale=alt.Scale(scheme='darkgreen')), opacity=alt.value(.7), tooltip=['score:Q']).properties(height=300, width=550).interactive() json2 = (base).to_json() #********************************************************* # #3 most important keywords to explore graph # ######################################### db = n #changed from c brush = alt.selection(type='interval') keyword1 = request.args.get("keyword1") keyword2 = request.args.get("keyword2") keyword3 = request.args.get("keyword3") # keydict={'work-life balance':"balanc","free coffee":'free coff', 'gym':"gym"} points = alt.Chart(db).mark_point().encode( x=alt.X(keyword1), y=alt.Y(keyword2), color=alt.Color('company:N', legend=None), size=keyword3, tooltip=[keyword1, keyword2, keyword3, 'pay', 'company' ]).properties(height=300, width=500).add_selection(brush).interactive() json5 = points.to_json() #************************************************************************* #RECLUSTER GROUP #need to relabel new dataFrame with new clusters #************************************************************************ #1. make new database d3 based on selections no_ind = n make_new = pd.DataFrame(no_ind['company']) #using company names if not request.args.getlist("features"): raise RuntimeError( "Must check at least two boxes, go back and select two boxes. Also make triplesure your company field is not blank." ) features = request.args.getlist("features") #and features from index if len(features) < 2: #raise error return render_template('error2.html') for keys in features: make_new[keys] = no_ind[keys] #2. prepare data to be graphed d3 = make_new.set_index('company') tra = d3.T tra.reset_index(drop=False, inplace=True) tra_long = tra.melt(id_vars='index', value_vars=comp) tra_long = tra_long.rename(columns={ "value": "score", "variable": "keyword" }) base = alt.Chart(tra_long).mark_bar().encode( alt.X('index:N', title=" "), #comp), alt.Y('score:Q', title='Score % '), color=alt.Color('score:Q', scale=alt.Scale(scheme='darkgreen')), opacity=alt.value(.7), tooltip=['score:Q']).properties(height=200, width=300).interactive() json7 = (base).to_json() twod_pca = PCA(n_components=2) X_pca = twod_pca.fit_transform(d3) km1 = KMeans(n_clusters=5, random_state=1301).fit(d3) preds = km1.predict(d3) #get the best number of clusters cluster_silhouete = {} for n_clustersi in range(2, 10): #min between number of samples and features km1 = KMeans(n_clusters=n_clustersi) km1.fit(X_pca) # Predict the cluster for each data point preds1 = km1.predict(X_pca) # Calculate the mean silhouette coefficient for the number of clusters chosen score = silhouette_score(X_pca, preds1, metric='euclidean') cluster_silhouete[n_clustersi] = score.round(5) #order dict and maximize it sorted_cluster_stats = dict( sorted(cluster_silhouete.items(), key=lambda item: item[1], reverse=True)) n_clusterso = list(sorted_cluster_stats.keys())[0] #redifine n clusters #we run the kmeans again with the prefered number of clusters km = KMeans(n_clusters=n_clusterso, random_state=11).fit(X_pca) predsi = km.predict(X_pca) #and graph # df_km = pd.DataFrame(data={'pca1':X_pca[:,0], 'pca2':X_pca [:,1], 'cluster':predsi}) df_km = pd.DataFrame( data={ 'pca1': X_pca[:, 0], 'pca2': X_pca[:, 1], 'cluster': predsi, 'company': list(d.index) }) # brush = alt.selection(type='interval') #graph centers centers = km.cluster_centers_ labels2 = km.labels_ # di=d.set_index('company') di = d di['cluster'] = labels2 number2 = di.loc[[comp], ['cluster']].values[0][0] if number2 == 0: r = di.loc[di['cluster'] == 0] if number2 == 1: r = di.loc[di['cluster'] == 1] if number2 == 2: r = di.loc[di['cluster'] == 2] if number2 == 3: r = di.loc[di['cluster'] == 3] if number2 == 4: r = di.loc[di['cluster'] == 4] if number2 == 5: r = di.loc[di['cluster'] == 5] if number2 == 6: r = di.loc[di['cluster'] == 6] if number2 == 7: r = di.loc[di['cluster'] == 7] if number2 == 8: r = di.loc[di['cluster'] == 8] if number2 == 9: r = di.loc[di['cluster'] == 9] c_num2 = len(di.groupby('cluster')) ri2 = list(r.index.values) ind = list(di.index) not_ri2 = [] for item in ind: if item not in ri2: not_ri2.append(item) #get the sauce to graph the centers sauce = [] for i in range(len(centers)): sauce.append({"x": centers[:, 0][i], "y": centers[:, 1][i]}) source = pd.DataFrame.from_records(sauce) poin = alt.Chart(source).mark_point(size=100, color='black').encode(x='x', y='y') points = alt.Chart(df_km).mark_circle(size=60).encode( x='pca1:Q', y='pca2:Q', color='cluster:N', tooltip=['company:N']).properties(height=250, width=400).interactive() #color="white", alpha=1, s=200, edgecolor='k').mark_circle(size=100 json6 = (points + poin).to_json() #filter c with only group members, label and do cluster averages dnew = d dnew['cluster'] = labels2 g2 = dnew.groupby('cluster') m = g2.mean() a2 = m.reset_index() m_long2 = a2.melt(id_vars='cluster', value_vars=keyvalues) m_long2 = m_long2.rename(columns={"value": "score", "variable": "keyword"}) brush = alt.selection(type='interval') base3 = alt.Chart(m_long2).mark_bar().encode( alt.X('cluster:N'), alt.Y('score:Q', title='Score % '), color=alt.Color('score:Q', scale=alt.Scale(scheme='darkred')), opacity=alt.value(.7), tooltip=['score:Q' ]).properties(height=200, width=200).add_selection(brush).interactive() # A dropdown filter columns = features column_dropdown = alt.binding_select(options=columns) column_select = alt.selection_single(fields=['keyword'], on='doubleclick', clear=False, bind=column_dropdown, name='search', init={'keyword': 'balanc'}) filter_columns3 = base3.add_selection(column_select).transform_filter( column_select) json8 = filter_columns3.to_json() return render_template('plot.html', json=json, json2=json2, json3=json3, json4=json4, json5=json5, json6=json6, lila=ri, lila2=ri2, not_ri2=not_ri2, not_ri=not_ri, company=comp, json7=json7, json8=json8, number=number, number2=number2, c_num2=c_num2, c_num1=c_num1)
# altair code # chart = alt.Chart(aln) brush = alt.selection(type='interval', encodings=["x"]) color = alt.Color('cutid:Q', sort="descending", scale=alt.Scale(scheme='spectral'), legend=None) mycolor = alt.condition(brush, color, alt.value('lightgray')) single_nearest = alt.selection_single(on='mouseover', nearest=True, empty='none') scales = alt.selection_interval(bind='scales') t_name = alt.binding_select(options=contigs) q_name = alt.binding_select(options=contigs) select_t = alt.selection_single(fields=['t_name'], bind=t_name, name="Target", init={"t_name": contigs[0]}) select_q = alt.selection_single(fields=['q_name'], bind=q_name, name="Query", init={"q_name": contigs[0]}) segs = chart.mark_line().encode(x=alt.X('x1:Q', title='Target position (bp)'), x2="x2:Q", y=alt.Y("y1:Q", title='Query position (bp)'),
scale=alt.Scale(domain=(100000, 10**9), clamp=True)), y='IMDB_Rating:Q', tooltip="Title:N") # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1) slider_selection = alt.selection_single(bind=year_slider, fields=['Release_Year'], name="Release Year_") filter_year = base.add_selection(slider_selection).transform_filter( slider_selection).properties(title="Slider Filtering") # A dropdown filter genre_dropdown = alt.binding_select(options=genres) genre_select = alt.selection_single(fields=['Major_Genre'], bind=genre_dropdown, name="Genre") filter_genres = base.add_selection(genre_select).transform_filter( genre_select).properties(title="Dropdown Filtering") #color changing marks rating_radio = alt.binding_radio(options=ratings) rating_select = alt.selection_single(fields=['MPAA_Rating'], bind=rating_radio, name="Rating") rating_color_condition = alt.condition(rating_select, alt.Color('MPAA_Rating:N', legend=None),
base = alt.Chart(source).mark_point(size=60).encode( x='Confirmed', y='SearchTrend', color='Country', tooltip=[ 'Country', 'Day', 'Confirmed', 'Death', 'Recovered', 'SearchTrend' ]).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter='sqrt(-2*log(random()))*cos(2*PI*random())') chart = base.properties(width=960, height=720) country_code = country_table['Code'].to_list() country_dropdown = alt.binding_select(options=country_code) country_select = alt.selection_single(fields=['Country'], bind=country_dropdown, name="Country") filtered = base.add_selection(country_select).transform_filter( country_select).properties(title="Search Trend vs Confirmed Cases", width=960, height=720) chart.save('temp/viz/test5.html') filtered.save('temp/viz/test6.html') # line chart base2 = alt.Chart(source).mark_line().encode(x='Day', y='SearchTrend',
#-------------------------------------------------- #--Time versus Value charts (Line) #-------------------------------------------------- st.title("Compare the individual metrics over time") st.markdown("**1) Select a PA County from the dropdown (below charts)**") st.markdown( "**2) Highlight any of the first two graph (emotional charts) to compare with last two (behavior)**" ) st.markdown( "Note: The counties shown are the only counties in which there was data for all four datasets." ) # st.markdown("Note: Not all counties have data for each metric for every time period (Try large counties such as Alleghany, Philadelphia, York, etc.)") #---Select County Dropdown-- county_dropdown = alt.binding_select(options=list(countyList)) selectedCounty = alt.selection_single(fields=['name'], bind=county_dropdown, name='PA County:') #---brush select area to focus on brush = alt.selection(type='interval', encodings=['x']) #---commWorry chart commWorryChart = alt.Chart(commWorrydf).mark_area(color='red').encode( alt.X("monthdate(time_value):T", axis=alt.Axis(title='Date')), alt.Y("value:Q", axis=alt.Axis(title='Percentage of people')), tooltip=[ alt.Tooltip('geo_value', title='FIPS'), alt.Tooltip('monthdate(time_value)', title='date'), alt.Tooltip('value', title='Value')
# df_5 for HDI trends country_5 = ['Norway', 'United States', 'Netherlands', 'Germany', 'Canada'] data = pd.read_csv('HDI.csv') data_5 = data[data['Country'].isin(country_5)].iloc[:, 1:] df_5 = data_5.melt('Country', var_name=['year']) ################# Altair Charts ################ # altair_vis 1 (history) country_options = list(df_point['Country'].unique()) # radio # widget = alt.binding_radio(options=country_options,name='Select Country: ') # drop down widget = alt.binding_select(options=country_options, name='Select Country: ') selectionEmoji = alt.selection_single(fields=['Country'], init={'Country': country_options[5]}, bind=widget, name='Counry') colorCondition = alt.condition(selectionEmoji, 'Country', alt.value('white')) selection1 = alt.selection_single(empty='none', on="mouseover", fields=['Country'], bind='legend') condition1 = alt.condition(selection1, alt.value(1), alt.value(0.00001)) # selection_zoom=alt.selection_interval(bind='scales',encodings=['y']) #,encodings=['x']
def make_chart(df, world_topo): categories = list(df.category.unique()) map_data = (df.set_index(["country", "country_code", "category"])["value"].div(100).groupby( ["country", "country_code", "category"]).last().unstack().reset_index()) input_dropdown = alt.binding_select(options=categories) selection_category = alt.selection_single( fields=["category"], bind=input_dropdown, name="Mobility", init={"category": "workplaces"}, ) selection_country = alt.selection_multi( fields=["country"], name="Country of", empty="all", # init={'country': 'France'} ) background = ( alt.Chart(world_topo).mark_geoshape( fill="lightgray", stroke="white", strokeWidth=0.5).transform_filter("datum.id != 10") # .transform_filter('datum.id != 304') ) foreground = (alt.Chart(world_topo).mark_geoshape( stroke="white", strokeWidth=0.5).encode( color=alt.condition( selection_country, alt.Color( "value:Q", scale=alt.Scale(scheme="blueorange", domainMid=0), legend=alt.Legend(format=".0%"), ), alt.value("lightgray"), ), tooltip=[alt.Tooltip("value:Q", format=".0%"), "country:N"], ).transform_lookup( lookup="id", from_=alt.LookupData(data=map_data, key="country_code", fields=["country"] + categories), ).transform_fold(fold=categories, as_=[ "category", "value" ]).add_selection(selection_category).transform_filter( selection_category)).add_selection(selection_country) map_chart = (background + foreground).properties( width=700, height=500, # title="Variation to baseline on March 29" ) ts_data = df.assign(value=lambda f: f["value"].div(100)) base_ts = alt.Chart(ts_data) ts_chart = ( base_ts.mark_line(point=True).encode( x="date:T", y=alt.Y("value:Q", axis=alt.Axis(format="%")), tooltip=[ alt.Tooltip("date:T", format="%a, %b %e"), alt.Tooltip("value:Q", format=".1%"), ], ) # .properties(title='Variation through time') .add_selection(selection_category).add_selection(selection_country). transform_filter(selection_category).transform_filter( selection_country).transform_aggregate( value="mean(value)", groupby=["category", "date"])) chart = (ts_chart | map_chart).properties( title= "Mobility change by geography, across different categories of places (Variation to baseline)" ) return chart
def chart_altair(df, system_ini='peninsular'): ''' Create an altair chart with the average of last 7 days of electric generation of total enery, renewable energy, solar photovoltaic energy and wind powered energy by date, the chart can be filtered by year and electric system. also add a vertical line to show the values where put the mouse in the chart. :param df: Pandas Dataframe with de average of last 7 days of electric generation by electric system, date, year and technology :param system_ini: Initial system to show in the chart, by default 'peninsular :return: altair layered chart ''' # labels of X axis to show in the chart, every first day of month from 2016-01 to 2021-12. x_labels = [ '2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01', '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01', '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01', '2016-12-31', '2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01', '2017-05-01', '2017-06-01', '2017-07-01', '2017-08-01', '2017-09-01', '2017-10-01', '2017-11-01', '2017-12-01', '2017-12-31', '2018-01-01', '2018-02-01', '2018-03-01', '2018-04-01', '2018-05-01', '2018-06-01', '2018-07-01', '2018-08-01', '2018-09-01', '2018-10-01', '2018-11-01', '2018-12-01', '2018-12-31', '2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01', '2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01', '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01', '2019-12-31', '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01', '2020-07-01', '2020-08-01', '2020-09-01', '2020-10-01', '2020-11-01', '2020-12-01', '2020-12-31', '2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01', '2021-05-01', '2021-06-01', '2021-07-01', '2021-08-01', '2021-09-01', '2021-10-01', '2021-11-01', '2021-12-01', '2021-12-31' ] # list of elements in Color to be plotted domain = ['Generación total', 'Renovable', 'Solar fotovoltaica', 'Eólica'] # colors in hexadecimal, for each element in domain list range_ = ['#85C1E9', '#239B56', '#D35400', '#F7DC6F'] # set a select box to select the system to show in the chart select_box_sys = alt.binding_select(options=list(df['system'].unique())) selection_sys = alt.selection_single(name='REE', fields=['system'], bind=select_box_sys, init={'system': system_ini}) # set a radio selector to select the year to show in the chart select_radio_year = alt.binding_radio(options=list(df['year'].unique())) selection_year = alt.selection_single(name='Choose', fields=['year'], bind=select_radio_year, init={'year': max(df['year'])}) # create a markpoint with variable fecha as X axis # with a selection that works over the variable fecha showing the nearest value where the mouse is over. nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['fecha'], empty='none') selectors = alt.Chart(df).mark_point().encode( alt.X('fecha'), opacity=alt.value(0)).add_selection(nearest).transform_filter( selection_sys).transform_filter(selection_year) # Create the main chart, with the electric generation by date, and add the selectors of year and systems bar = alt.Chart(df[df['Renov_norenov'] == 'Generación total']).mark_area( color='#85C1E9').encode( alt.X('fecha', axis=alt.Axis(values=x_labels, labelAngle=0)), alt.Y('Generacion_Mwh:Q')).add_selection( selection_sys, selection_year).transform_filter( selection_sys).transform_filter(selection_year).properties( width=1400, height=450) # Create the chart of renewable energy by fecha, also add the color with the list domain and his colors in list range_ # also add the transformers of the main chart bar_renov = alt.Chart( df[df['Tecnologia'] == 'Renovable']).mark_area().encode( alt.X('fecha'), alt.Y('Generacion_Mwh:Q'), color=alt.Color('Tecnologia', scale=alt.Scale(domain=domain, range=range_)) ).transform_filter(selection_sys).transform_filter(selection_year) # add a text chart to show the value of the bar_renov chart text_renov = bar_renov.mark_text( align='left', dx=3, dy=-20, color='#212F3C').encode( text=alt.condition(nearest, 'Generacion_Mwh', alt.value(' '))) rules = alt.Chart(df).mark_rule(color='gray').encode( x='fecha', ).transform_filter(nearest) # Create the chart of Solar photovoltaic by fecha, also add the transformers of the main chart bar_solar = alt.Chart( df[df['Tecnologia'] == 'Solar fotovoltaica']).mark_area( opacity=.8, color='#D35400').encode( alt.X('fecha'), alt.Y('Generacion_Mwh:Q')).transform_filter( selection_sys).transform_filter(selection_year) # add a text chart to show the value of the bar_renov chart text_solar = bar_solar.mark_text( align='left', dx=5, dy=-5, color='#212F3C').encode( text=alt.condition(nearest, 'Generacion_Mwh', alt.value(' '))) # Create the chart of wind power by fecha, also add the transformers of the main chart bar_eolica = alt.Chart( df[df['Tecnologia'] == 'Eólica']).mark_area(color='#F7DC6F').encode( alt.X('fecha'), alt.Y('Generacion_Mwh:Q')).transform_filter( selection_sys).transform_filter(selection_year) # add a text chart to show the value of the bar_renov chart text_eolica = bar_eolica.mark_text( align='left', dx=5, dy=-5, color='#212F3C').encode( text=alt.condition(nearest, 'Generacion_Mwh', alt.value(' '))) # retrun a altair layered chart with all the elements created in the function return alt.layer( bar, bar_renov, bar_eolica, bar_solar, selectors, rules, text_renov, text_eolica, text_solar).configure_axis( labelFontSize=13, titleFontSize=14).configure_text( fill='#212F3C', fontSize=13).configure_legend(labelFontSize=14).interactive()
def other_viz(result): #result = load5() years = list(result['YEAR_'].sort_values().unique()) states = list(result['STATE'].sort_values().unique()) firetype_df = result.groupby(['YEAR_', 'STATE', 'FIRETYPE' ]).size().reset_index(name="firetype count") firetype_df['Average'] = (firetype_df['firetype count'] / sum(firetype_df['firetype count'])) firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace( 0, 'Action Fires/Supressed Fires') firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(1, 'Natural Out') firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace( 2, 'Support Action/Assist Fire') firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace( 3, 'Fire Management/Perscribed') firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(4, 'False Alarm') firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(5, 'Severe') firetype_df2 = result.groupby(['YEAR_', 'FIRETYPE' ]).size().reset_index(name="Firetype Count") firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace( 0, 'Action Fires/Supressed Fires') firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace( 1, 'Natural Out') firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace( 2, 'Support Action/Assist Fire') firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace( 3, 'Fire Management/Perscribed') firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace( 4, 'False Alarm') firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace(5, 'Severe') year_dropdown = alt.binding_select(options=years) year_select = alt.selection_single(fields=['YEAR_'], bind=year_dropdown) state_dropdown = alt.binding_select(options=states) state_select = alt.selection_single(fields=['STATE'], bind=state_dropdown) fire_type = alt.Chart(firetype_df).mark_bar(color='firebrick').encode( x=alt.X('Average:Q', axis=alt.Axis(format='.0%')), y='FIRETYPE:N', #opacity=alt.condition( # year_select & state_select, # alt.value(1), # alt.value(.1) ).add_selection(year_select, state_select).transform_filter( year_select).transform_filter(state_select).properties(width=300, height=200, title=f'') fire_total = alt.Chart(firetype_df2).mark_circle( opacity=0.8, stroke='black', strokeWidth=1).encode( alt.X('YEAR_:O', axis=alt.Axis( labelAngle=360, values=[1980, 1985, 1990, 1995, 2000, 2005, 2010, 2016]), title='Year'), alt.Y('FIRETYPE:N', title='Types of Fire'), alt.Size('Firetype Count:Q', scale=alt.Scale(range=[0, 1000]), legend=None), alt.Color('FIRETYPE:N', legend=None), alt.Tooltip(['Firetype Count:Q', 'YEAR_']), ).properties(width=400, height=200, title='National Count of Fires by Type') fire_type | fire_total
scale=alt.Scale(scheme='reds'))) compare_map = alt.layer(basemap, compare_dotmap).properties( width=698, height=900).configure_view(stroke=None) st.altair_chart(compare_map) elif menu == 'Monitor probability of failure': # Make monitor map with Altair date slider hazard_stacked = hazard.melt( id_vars=['reference', 'longitude', 'latitude'], value_vars=timestamps, var_name='timestamp', value_name='probability of failure') timestamp_dropdown = alt.binding_select(options=timestamps) timestamp_select = alt.selection_single(fields=['timestamp'], bind=timestamp_dropdown, name="Select") monitor_dotmap = alt.Chart( hazard_stacked, title='Probability of failure by timestamp').mark_circle( stroke='#aaa', strokeWidth=0.5).add_selection( timestamp_select).transform_filter(timestamp_select).encode( latitude='latitude:Q', longitude='longitude:Q', # Added percentage formatting to tooltip tooltip=[ alt.Tooltip('reference:N'), alt.Tooltip('probability of failure:Q', format=".2%"),
'instrumentalness':'Predicts whether a track contains no vocals. “Ooh” and “aah” sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly “vocal”. The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0.', 'speechiness':'Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain both music and speech, either in sections or layered, including such cases as rap music. Values below 0.33 most likely represent music and other non-speech-like tracks. ', 'acousticness':'A confidence measure from 0.0 to 1.0 of whether the track is acoustic. 1.0 represents high confidence the track is acoustic.', } st.subheader("Use the music metric dropdown (above the charts) to select the metric that will be presented in the charts." + " Use the broad genre dropdown (below the charts) to view only the data of that genre." + " Click and drag to select a subset of points in the scatter plot and view their music metric distribution in the histogram." + " Use tooltip to see the artist name and track (song) name for a particular data point.") hoursMinutesOrdered = [] for hour in range(0, 24): for minute in range(0, 60): hoursMinutesOrdered.append(alt.DateTime(hours = hour, minutes = minute)) input_dropdown = alt.binding_select(options=broad_genres, name = "Broad Genre: ") selection = alt.selection_single(fields=['broad_genres'], bind=input_dropdown) color = alt.condition(selection, alt.Color('broad_genres:N'), alt.value('#00000000')) scatter_brush = alt.selection(type='interval') metric_dropdown = st.selectbox('Music Metric:', music_metrics) st.write(spotify_features_explanations[metric_dropdown]) base_danceability_vs_hour = alt.Chart(df).mark_point().encode( x=alt.X('hoursminutes(endTime_loc):O', title="Hour of the Day", scale = alt.Scale(domain=hoursMinutesOrdered)), y=alt.Y(metric_dropdown, type="quantitative", scale=alt.Scale(zero=False, domain=[0.0, 1.0])) ).properties( width=700, height=500 )
'Asian & Pacific Islander', 'Hispanic' ] # useful constants for mapping GEOJSON_STATES_URL = ('https://raw.githubusercontent.com/' 'vega/vega/master/docs/data/us-10m.json') MAP_PROJECTION = 'albersUsa' COLOR_SCHEME = 'yellowgreenblue' # range for year slider START_YEAR = 2000 END_YEAR = 2018 # fixed dropdown objects and selection options DROPDOWN_OBJ_AGE = alt.binding_select(options=CATEGORIES_AGE) DROPDOWN_OBJ_SEX = alt.binding_select(options=CATEGORIES_SEX) # accompanying selection options SELECT_OBJ_AGE = alt.selection_single( fields=['Group'], bind=DROPDOWN_OBJ_AGE, name='Age', init={'Group':'Total'} ) SELECT_OBJ_SEX = alt.selection_single( fields=['Group'], bind=DROPDOWN_OBJ_SEX, name='Demographics', init={'Group':'Total'}
facet=alt.Facet('bf_id_s:N', columns=len(unq_bf), spacing=-.5), color='np_id_s:N', size='a_33:O', ).properties(width=180).interactive() p_radio = alt.binding_radio(options=unq_p.tolist()) p_select = alt.selection_single(fields=['p'], bind=p_radio, name="Aspect ratio, p") p_color_condition = alt.condition( p_select, alt.Color('p:N', legend=None), alt.value('lightgray')) np_dropdown = alt.binding_select(options=unq_np.tolist()) np_select = alt.selection_single(fields=['np_id_s'], bind=np_dropdown, name="Nanoparticle") radio_p = base.add_selection(p_select).encode( color=p_color_condition, ).add_selection( np_select).transform_filter(np_select).properties( title="Select Aspect Ratio (p) and Nanoparticle") st.write(radio_p) if newplot_toggle: st.subheader('Extra Plots') "Pick variables to plot" plot_variable_y = st.selectbox('Y Variable', data.columns, 35) if 1 == 1:
# In[13]: #hide_input base = alt.Chart(dff2, width=600).encode( x='Days since 100 cases:Q', y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log')), color=alt.Color('Country:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=None), tooltip=['Country', 'Date', 'Confirmed Cases', 'Days since 100 cases']) country_selection = alt.selection_single( name='Select', fields=['Country'], bind=alt.binding_select( options=list(sorted(set(countries) - set(baseline_countries)))), init={'Country': 'US'}) date_filter = alt.datum['Date'] >= int(max_date.timestamp() * 1000) base2 = base.transform_filter( alt.FieldOneOfPredicate('Country', baseline_countries)) base3 = base.transform_filter(country_selection) base4 = base3.transform_filter(date_filter) max_day = dff2['Days since 100 cases'].max() ref = pd.DataFrame([[x, 100 * 1.33**x] for x in range(max_day + 1)], columns=['Days since 100 cases', 'Confirmed Cases']) base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q', y='Confirmed Cases:Q') base_ref_f = base_ref.transform_filter( alt.datum['Days since 100 cases'] >= max_day)
def plot_on_PA(bar_dataPA): from vega_datasets import data counties = alt.topo_feature(data.us_10m.url, 'counties') bar_dataPA = pandasql.sqldf( "select * from bar_dataPA where geo_value like '42%'") county_data = pandasql.sqldf("select distinct geo_value from bar_dataPA") county_details = dict() l = county_data["geo_value"].tolist() for i in range(county_data.shape[0]): county_details.update({ str(covidcast.fips_to_name(county_data.iloc[i]))[2:len( str(covidcast.fips_to_name(county_data.iloc[i]))) - 2]: l[i] }) data6hrs = pandasql.sqldf( "select * from bar_dataPA where geo_value like '42%'") map_pennsylvania = (alt.Chart(data=counties, ).mark_geoshape( stroke='black', strokeWidth=1, fill='lightyellow').transform_calculate( state_id="(datum.id / 1000)|0").transform_filter( (alt.datum.state_id) == 42).transform_lookup( lookup='data6hrs', from_=alt.LookupData(data6hrs, 'geo_value', ['value']), ).properties(width=500, height=400)) geolocator = Nominatim(user_agent="streamlit_app.py") lat = [] lon = [] coun = [] @st.cache def sw(): lsd = {} for i in county_details.keys(): sss = i + ", PA" y = geolocator.geocode(sss) r = [y.latitude, y.longitude] lat.append(y.latitude) #st.write(y.latitude) lon.append(y.longitude) coun.append(county_details[i]) lsd.update({county_details[i]: r}) return lat, lon, coun [lat, lon, coun] = sw() det = {'County': coun, 'Latitude': lat, 'Longitude': lon} gb = pd.DataFrame(det) bar_dataPA['time_value'] = bar_dataPA['time_value'].str.slice(0, 10) kal = pandasql.sqldf( "select bar_dataPA.geo_value,bar_dataPA.time_value,bar_dataPA.value,gb.Latitude,gb.Longitude from bar_dataPA,gb where gb.County=bar_dataPA.geo_value" ) dg = pandasql.sqldf("select distinct time_value from kal") input_drop = alt.binding_select(options=dg['time_value'].tolist(), name="Select Date") picked = alt.selection_single(encodings=["color"], bind=input_drop) points = alt.Chart(kal).mark_circle().encode( longitude='Longitude:Q', latitude='Latitude:Q', size='value:Q', color=alt.condition(picked, 'time_value', alt.value('lightgray'), legend=None), opacity=alt.condition(picked, alt.value(0.5), alt.value(0)), tooltip=['geo_value', 'value' ]).add_selection(picked).transform_filter(picked).properties( width=500, height=400) st.write(map_pennsylvania + points)
'Unsafe water source', 'Unsafe sanitation', 'Household air pollution from solid fuels', 'Air pollution', 'Outdoor air pollution' ], var_name='Risk Factor') # Country Selection countries = deaths['country'].unique() # get unique country names countries.sort() # sort alphabetically selectCountry = alt.selection_single( name='Select', # name the selection 'Select' fields=['country'], # limit selection to the country field init={'country': countries[0]}, # use first country entry as initial value bind=alt.binding_select( options=countries) # bind to a menu of unique country values ) # Year selection brush = alt.selection_interval(encodings=['x']) years = alt.Chart(deaths).mark_line().add_selection(brush).transform_filter( selectCountry).encode( alt.X('year:O', title='Year'), alt.Y('sum(value)', title='Smoking Deaths (all ages)')).properties(height=100) # Area chart - Smoking deaths by ages base = alt.Chart(deaths).mark_area().add_selection( selectCountry).transform_filter(selectCountry).transform_filter( brush).encode(alt.X('year:O', title='Year'), y=alt.Y('value:Q',
import altair as alt import geopandas as gpd import pandas as pd # read the data and extract the features that is directly usable by altair dt = gpd.read_file('harvest.shp') json_f = dt.to_json() json_features = json.loads(json_f) data_geo = alt.Data(values=json_features['features']) #plot the graphic scen_list = ['1', '65', '129', '193', '257', '321', '385', '449'] selectScen = alt.selection_single( name='Select', fields=['scenario' ], # this fails when I specify that I want 'properties.scenario' bind=alt.binding_select(options=scen_list)) alt.Chart(data_geo).mark_geoshape( fill='lightgray', stroke='white', ).encode(color=alt.Color('properties.harvest:N', title='Period'), tooltip=['properties.AREAAC:Q', 'properties.StandAge:Q' ]).properties(width=500, projection={ 'type': 'mercator' }).add_selection(selectScen).transform_filter(selectScen)