def s_avg_graph(): df_solar_world = load_data_world_s() brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart().mark_bar().encode( alt.X('Year', scale=alt.Scale(zero=False)), y='mean(Solar_LCOE (2019 USD/kWh))', opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), ).add_selection(brush).properties( title='Utility-scale solar PV weighted average cost of electricity', width=700, height=350) line = alt.Chart().mark_rule(color='firebrick').encode( y='mean(Solar_LCOE (2019 USD/kWh))', size=alt.SizeValue(3), tooltip=["mean(Solar_LCOE (2019 USD/kWh))"]).transform_filter(brush) country = st.selectbox("Select country", [ "Australia", "China", "France", "Germany", "India", "Italy", "Japan", "Netherlands", "Republic of Korea", "Spain", "Turkey", "Ukraine", "United Kingdom", "United States", "Vietnam" ]) xyz = alt.layer(bars, line, data=df_solar_world[df_solar_world["Country"] == country]) st.altair_chart(xyz)
def diverge_plot(data, question): color_scale = alt.Scale( domain=["1", "2", "3", "4", "5", "6", "7"], range=[ berkeley_palette["rose_garden"], berkeley_palette["medalist"], berkeley_palette["california_gold"], berkeley_palette["bay_fog"], berkeley_palette["lawrence"], berkeley_palette["founders_rock"], berkeley_palette["berkeley_blue"] ]) select = alt.selection_multi(fields=['rank']) p = alt.Chart()\ .transform_filter(alt.datum.question == question)\ .mark_bar().encode( x=alt.X('pct_start:Q'), x2=alt.X2('pct_end:Q'), y=alt.Y('prompt:N', axis=alt.Axis(title=None, ticks=False, domain=False, offset=5, minExtent=60)), color=alt.Color( 'rank:O', legend=None, scale=color_scale), tooltip=[alt.Tooltip('treatment:N', title='Assignment'), alt.Tooltip('question:N', title='Question'), alt.Tooltip('rank:O', title='Rank (1-7)'), alt.Tooltip('pct_of_total:Q', title='% of Total', format='.2f')], opacity=alt.condition(select, alt.OpacityValue(1.0), alt.OpacityValue(0.5)) ).properties(height=150,width=650,title={'text':''}).add_selection(select) l = alt.Chart(pd.DataFrame({'X':[0]})).mark_rule(size=3, color=berkeley_palette["pacific"], strokeDash=[10,5])\ .encode(x=alt.X('X', type='quantitative', title=None)) return alt.layer(p, l)
def create_visualisation(df_dic: Dict[int, pd.DataFrame], ts: np.ndarray, colours: List[str]) -> alt.Chart: time_field = "t" select_time = alt.selection_single( name="select", fields=[time_field], init={time_field: min(ts)}, bind=alt.binding_range(min=min(ts), max=max(ts), step=ts[1] - ts[0]), ) chart = (alt.Chart(pd.DataFrame({ "x": [0], "y": [0] })).mark_point(opacity=0).encode(x="x", y="y").add_selection(select_time)) for k, df in df_dic.items(): for t in ts: df_t = df[df["t"] == t] df_t.sort_index() base = alt.Chart(df_t).transform_filter(select_time) chart += base.mark_line(color=colours[k - 1]).encode( alt.X("x:Q"), alt.Y("y:Q"), alt.OpacityValue( 1)).transform_filter(select_time) + base.mark_point( color=colours[k - 1]).encode( alt.X("x:Q"), alt.Y("y:Q"), alt.OpacityValue(1)).transform_filter(select_time) return chart.interactive()
def w_inst_graph(): df_wind_world = load_data_world_w() brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart().mark_bar().encode( alt.X('Year', scale=alt.Scale(zero=False)), y='mean(Total installed costs (2019 USD/kW))', opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), ).add_selection(brush).properties( title='Utility-scale solar PV weighted average cost of electricity', width=700, height=350) line = alt.Chart().mark_rule(color='firebrick').encode( y='mean(Total installed costs (2019 USD/kW))', size=alt.SizeValue(3), tooltip=["mean(Total installed costs (2019 USD/kW))" ]).transform_filter(brush) country = st.selectbox("Select country", [ "Brazil", "Canada", "China", "Denmark", "France", "Germany", "India", "Italy", "Japan", "Mexico", "Spain", "Sweden", "Turkey", "United Kingdom", "United States" ]) xyz = alt.layer(bars, line, data=df_wind_world[df_wind_world["Country"] == country]) st.altair_chart(xyz)
def plot_resistivity_track(self, brush, deep_res_str='RDEP', med_res_str='RMED', shallow_res_str='RSHA') -> alt.Chart: df = self._handle_log_names( self.df, log_names=[deep_res_str, med_res_str, shallow_res_str]) df = self._melt_df(df) chart = alt.Chart(df).mark_line().encode( x=alt.X('value', axis=alt.Axis(title='Resistivity'), scale={'type': 'log'}), y=alt.Y('DEPT', sort='descending', scale={ 'domain': brush.ref(), 'zero': True }, axis=None), # x=alt.X('value:Q', axis=alt.Axis(title='Resistivity'), scale={'type': 'log'}), # y=alt.Y('DEPT:O', sort='ascending', scale={'domain': brush.ref(), 'zero': True}, axis=None), tooltip=['DEPT', 'value'], order='DEPT', color='variable', opacity=alt.OpacityValue(0.8)).properties(width=100, height=600) return chart
def plot_GR_SP(self, brush, GR_str='GR', SP_str='SP') -> alt.Chart: df = self._handle_log_names(self.df, log_names=[GR_str, SP_str]) df = self._melt_df(df) color_scale = alt.Scale(domain=[ 'SP', 'GR', 'RDEP', 'RMED', 'RSHA', 'RHOB', 'NPHI', 'DT', 'DTC' ], range=[ '#B71C1C', '#4A148C', '#1A237E', '#01579B', '#004D40', '#33691E', '#F57F17', '#E65100', '#3E2723' ]) chart = alt.Chart(df).mark_line().encode( x=alt.X('value', axis=alt.Axis(title='SP GR')), y=alt.Y('DEPT', sort='descending', scale={ 'domain': brush.ref(), 'zero': True }), # x=alt.X('value:Q', axis=alt.Axis(title='SP GR')), # y=alt.Y('DEPT:O', sort='ascending', scale={'domain': brush.ref(), 'zero': True}), color=alt.Color('variable:N', legend=None, scale=color_scale), tooltip=['DEPT', 'value'], order='DEPT', opacity=alt.OpacityValue(0.8)).properties(width=100, height=600) return chart
def create_meeting_type_summary(data: pd.DataFrame, brush: alt.selection) -> alt.Chart: """ Creates a bar chart of the number of each different type of meeting broken down by year. Also allows the user to filter down all of the other plots to specific meeting types. """ return alt.Chart(data).mark_bar().encode( x="count()", y="Type:N", color="Year", opacity=alt.condition( brush, alt.OpacityValue(1), alt.OpacityValue(0.4))).properties(selection=brush)
def plot_porosity(self, brush, density_str='RHOB', neutron_str='NPHI', sonic_str='DTC', lithology_dens=2.65) -> alt.Chart: df = self._handle_log_names( self.df, log_names=[density_str, neutron_str, sonic_str]) df['DPHI'] = (df[density_str] - lithology_dens) / (1 - lithology_dens) df['PHIS'] = (df[sonic_str] - self.DTCMA) / (self.DTCW - self.DTCMA) df = self._handle_log_names(df, log_names=['NPHI', 'DPHI', 'PHIS']) df = self._melt_df(df) chart = alt.Chart(df).mark_line().encode( x=alt.X('value', axis=alt.Axis(title='Porosity')), y=alt.Y('DEPT', sort='descending', axis=None, scale={ 'domain': brush.ref(), 'zero': True }), # x=alt.X('value:Q', axis=alt.Axis(title='Porosity')), # y=alt.Y('DEPT:O', sort='ascending', axis=None, scale={'domain': brush.ref(), 'zero': True}), tooltip=['DEPT', 'value'], order='DEPT', color='variable', opacity=alt.OpacityValue(0.8)).properties(width=100, height=600) return chart
def returned_users_chart(self): df = self.returned_users_data() brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart().mark_bar().encode( x='monthdate(dates):O', y=alt.Y('Count:Q', scale=alt.Scale(domain=[0, 2])), opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), ).add_selection(brush) line = alt.Chart().mark_rule(color='firebrick').encode( y='mean(Count):Q', size=alt.SizeValue(3)).transform_filter(brush) returned_users = alt.layer(bars, line, data=df).properties(height=300, width=500) return returned_users
def plot_word_freq_with_altair(docx, num=10): word_freq_list = Counter(docx.split()) most_common_tokens = dict(word_freq_list.most_common(num)) word_freq_df = pd.DataFrame({ "Tokens": most_common_tokens.keys(), "Counts": most_common_tokens.values() }) brush = alt.selection(type="interval", encodings=["x"]) c = alt.Chart(word_freq_df).make_bar().encode( x="Tokens", y="Counts", opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), ).add_selection(brush) st.altair_chart(c, use_container_width=True)
def draw_corr_scatter(df, selected, useful_cols): # second visualization # filtered_df = df.loc[df["Major_category"].isin(selected)] # select box # option_field_x = rev_explanations[st.selectbox( 'Choose a field for the x-axis!', useful_cols)] option_field_y = rev_explanations[st.selectbox( 'Choose a field for the y-axis!', useful_cols)] brush = alt.selection(type='interval') corr = alt.Chart(filtered_df).mark_point().encode( x=alt.X( option_field_x + ":Q", scale=alt.Scale(zero=False), axis=alt.Axis(labelOverlap=True), ), y=alt.Y( option_field_y + ":Q", scale=alt.Scale(zero=False), axis=alt.Axis(labelOverlap=True), ), opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.1)), color=alt.Y("Major_category"), tooltip=["Major", option_field_y, option_field_x]).add_selection(brush).properties(width=850, height=400) support_chart = alt.Chart(filtered_df).mark_bar().encode( x=alt.X("Major", scale=alt.Scale(zero=False), sort='-y', axis=alt.Axis(labelOverlap=True)), color=alt.condition(brush, alt.Color('Major_category:N', legend=None), alt.value('lightgray')), tooltip=["Major", "Major_category"], ).properties(width=400, height=300) visual_2 = corr & \ (support_chart.encode(y=alt.Y(option_field_x, scale=alt.Scale(zero=False))) | \ support_chart.encode(y=alt.Y(option_field_y, scale=alt.Scale(zero=False)))) st.write(visual_2)
def draw_major_statistics(df, selected, useful_cols): # first visualization # st.markdown(""" ## Let's explore various statistics of all majors! #### Instructions for use: - From the sidebar on the left, you can narrow down the major categories as you wish, by selecting / deselecting them to interact with filtered data - You can choose a field to view the statistics of all majors - The red line on the graph indicates the average value of all majors - You can also select a range of specific majors to measure the average value of of the statistics """) filtered_df = df.loc[df["Major_category"].isin(selected)] # select box # option_field = rev_explanations[st.selectbox('Choose a field!', useful_cols)] avg_brush = alt.selection(type='interval', encodings=['y']) chart = alt.Chart(filtered_df).mark_bar().encode( x=alt.X( option_field, scale=alt.Scale(zero=False), ), y=alt.Y("Major", scale=alt.Scale(zero=False), sort='-x', axis=alt.Axis(labelOverlap=True)), color=alt.Y("Major_category"), opacity=alt.condition(avg_brush, alt.OpacityValue(1), alt.OpacityValue(0.4)), tooltip=["Major", "Major_category", option_field]).add_selection(avg_brush).properties(width=1000, height=750) avg_line = alt.Chart(filtered_df).mark_rule(color='firebrick').encode( x='mean(' + option_field + '):Q', size=alt.SizeValue(3)).transform_filter(avg_brush).properties( width=1000, height=750) visual_1 = alt.layer(chart, avg_line) st.write(visual_1)
def display_main_chart(metrics_df, metric_id, metric_name, asset_names, container, scale="linear"): # the selection brush oriented on the x-axis # important not here had to comment out the interactive function below # to convert the graph to static brush = alt.selection(type='interval', encodings=['x']) base = alt.Chart(metrics_df).properties(width=800) chart = base.mark_line().encode(x=alt.X("time", type="temporal", title="Time"), y=alt.Y(metric_id, type="quantitative", title=metric_name, scale=alt.Scale(type=scale), stack=True), opacity=alt.condition( brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), tooltip=[ alt.Tooltip("time", type="temporal", title="Time"), alt.Tooltip(metric_id, type="quantitative", title=metric_name) ], color="Name").add_selection(brush) line = base.mark_rule().encode(y=alt.Y(f"average({metric_id}):Q"), size=alt.SizeValue(3), color="Name").transform_filter(brush) container.write(chart + line)
def plot_altair_3(xcol, ycol): select_quality = alt.selection_single(name='Select', fields=['quality'], init={'quality': 3.0}, bind=alt.binding_range(min=3.0, max=9.0, step=1.0)) chart = alt.Chart( wine, title=ycol + " VS " + xcol).mark_circle(opacity=0.5).encode( alt.X(xcol), alt.Y(ycol), alt.Size('chlorides in g/dm3'), alt.OpacityValue(0.5)).add_selection( select_quality).transform_filter(select_quality) return chart.to_html()
def trend(job_to_choose='Janitor'): """ Plots a line plot for a selected job. Arguments: job_to_choose; str the job to be displayed in the plot. Returns: an altair line plot """ chart = alt.Chart(df.query('job == @job_to_choose')).mark_line().encode( alt.X("year:O", title="Year"), alt.Y("count:Q", title="Count"), alt.Color("sex:N", scale=alt.Scale(domain=['women', 'men'], range=['pink', 'steelblue'])), alt.Tooltip(['year', 'count', 'sex']), alt.OpacityValue(0.7)).properties(width=600, height=300, title=job_to_choose + " Number Change Over the Years ") points = alt.Chart(df.query('job == @job_to_choose')).mark_point().encode( alt.X("year:O", title="Year"), alt.Y("count:Q", title="Count"), alt.Color('sex')) return chart + points
""" Interactive Average =================== The plot below uses an interval selection, which causes the chart to include an interactive brush (shown in grey). The brush selection parameterizes the red guideline, which visualizes the average value within the selected interval. """ # category: interactive charts import altair as alt from vega_datasets import data weather = data.seattle_weather.url brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart().mark_bar().encode( x='month(date):O', y='mean(precipitation):Q', opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7))).add_selection(brush) line = alt.Chart().mark_rule(color='firebrick').encode( y='mean(precipitation):Q', size=alt.SizeValue(3)).transform_filter(brush) alt.layer(bars, line, data=weather)
# In[ ]: #####Unemployment Rate of States##### # In[10]: brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart().mark_bar().encode( x='Years:Q', y='Unemployement Rate:Q', opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), ).properties( width=400, height=300).add_selection( brush ) line = alt.Chart().mark_rule(color='firebrick').encode( y='mean(Unemployement Rate):Q', size=alt.SizeValue(3) ).transform_filter( brush ) df1 = df_hist1_sorted.loc[0,:][Unemployment_rate_columns].values df1 = pd.DataFrame(df1 , date_range)
def jitter_summary_chart( norm_error, author=None, title=None, sort_list=None, opacity=False, color_scheme="category10", ): source = norm_error.to_dataframe().reset_index() if type(author) == list: source = source.query(f"author not in @author") elif type(author) == str: source = source.query(f"author != '{author}'") selector = alt.selection_single(empty="none", fields=["sample_id"]) if opacity: opacity = alt.condition(selector, alt.OpacityValue(1.0), alt.OpacityValue(0.5)) else: opacity = alt.OpacityValue(1.0) size = alt.condition(selector, alt.SizeValue(150), alt.SizeValue(60)) # modify that for changes in coloring. Make sure that domain is correct domain, range_ = _get_colors() stripplot = ( alt.Chart(source, width=100, height=600).mark_circle().encode( x=alt.X( "jitter:Q", title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y("normalized_error:Q", title="Normalized error"), color=alt.Color("author:N", legend=None, scale=alt.Scale(domain=domain, range=range_)), column=alt.Column( "author:N", header=alt.Header( labelAngle=-90, titleOrient="top", labelOrient="bottom", labelAlign="right", labelPadding=3, labelFontSize=20, title=title, ), sort=sort_list, ), size=size, opacity=opacity, tooltip=["author", "sample_id", "normalized_error"], ).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter="sqrt(-2*log(random()))*cos(2*PI*random())"). configure_facet(spacing=5).configure_view(stroke=None).add_selection( selector).transform_filter("datum.normalized_error !== null")) return stripplot.configure_axis( labelFontSize=20, titleFontSize=20).configure_header(titleFontSize=24)
'col': 2 }, { 'country': 'United States', 'animal': 'sheep', 'col': 1 }]) domains = ['person', 'cattle', 'pigs', 'sheep'] shape_scale = alt.Scale( domain=domains, range=[ 'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z', 'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z', 'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z', 'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z' ]) color_scale = alt.Scale(domain=domains, range=[ 'rgb(162,160,152)', 'rgb(194,81,64)', 'rgb(93,93,93)', 'rgb(91,131,149)' ]) alt.Chart(df).mark_point(filled=True).encode( alt.X('col:O', axis=None), alt.Y('animal:O', axis=None), alt.Row('country:N', header=alt.Header(title='')), alt.Shape('animal:N', legend=None, scale=shape_scale), alt.Color('animal:N', legend=None, scale=color_scale), alt.OpacityValue(1), alt.SizeValue(200)).properties(width=800, height=200)
def load_data(): df= pd.read_csv('berlinplan.csv', encoding='utf-8', error_bad_lines=False) return df.set_index("BEZIRK") data = load_data() districts = st.multiselect( "Choose countries", list(df.index), ["Mitte", "Spandau"] ) data = df.loc[districts] st.write("Berlin Districts", data.sort_index()) chart = alt.Chart(berlinplan).mark_point(filled=True).encode( alt.X('activity:Q', scale=alt.Scale(zero=False)), alt.Y('habitants:Q', scale=alt.Scale(zero=False)), alt.Size('surface:Q'), alt.Color('BEZIRK:N'), alt.OpacityValue(0.5), tooltip = [alt.Tooltip('BEZIRK:N'), alt.Tooltip('activity:Q'), alt.Tooltip('habitants:Q'), alt.Tooltip('surface:Q') ] ) st.altair_chart(chart, use_container_width=True)
def home(): ### BEHROOZ # read volume file and assign to vol data frame volume_file = ('static/Volumes') vol = pd.read_csv(volume_file, delim_whitespace=True) # GRVQHC # For GRV vs GRV quantiles by HC Column charts brush = alt.selection(type='interval') points = alt.Chart(vol).mark_point().encode( x='Oil_GRV_10_6:Q', y='Oil_GRV_quantile:Q', tooltip=['Num', 'Oil_GRV_quantile'], color=alt.condition(brush, 'Hc_column__m_:Q', alt.value('darkgray'))).add_selection(brush) bars = alt.Chart().mark_bar().encode( alt.Color('Hc_column__m_:Q', scale=alt.Scale(scheme='viridis')), y='Hc_column__m_:Q', x='Oil_GRV_10_6:Q').transform_filter(brush) GRVQHC = alt.vconcat(points, bars, data=vol) GRVQHC = GRVQHC.to_json() # GRVOWC brush = alt.selection(type='interval') points = alt.Chart(vol).mark_point().encode( alt.X('Oil_GRV_10_6:Q'), alt.Y('OWC_depth:Q', scale=alt.Scale(zero=False)), tooltip=['Num', 'Oil_GRV_quantile'], color=alt.condition(brush, 'Oil_GRV_quantile:Q', alt.value('darkgray'))).add_selection(brush) bars = alt.Chart().mark_bar().encode( alt.Color('Oil_GRV_quantile:Q', scale=alt.Scale(scheme='viridis')), y='Hc_column__m_:Q', x='Oil_GRV_10_6:Q').transform_filter(brush) GRVOWC = alt.vconcat(points, bars, data=vol) GRVOWC = GRVOWC.to_json() # GRVDNS # For the histogram of GRV density with Min,Mean,Max HC column indicators brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart(vol).mark_bar().encode( alt.X("GRV_density__m_:Q", bin=alt.Bin(maxbins=20)), alt.Y('count()', axis=alt.Axis(title='Min,Mean,Max HC')), opacity=alt.condition( brush, alt.OpacityValue(1), alt.OpacityValue(0.7))).add_selection(brush).properties(width=600, height=400) line1 = alt.Chart(vol).mark_rule(color='firebrick').encode( y='max(Hc_column__m_):Q', size=alt.SizeValue(2)).transform_filter(brush) line2 = alt.Chart(vol).mark_rule(color='yellow').encode( y='min(Hc_column__m_):Q', size=alt.SizeValue(2)).transform_filter(brush) line3 = alt.Chart(vol).mark_rule(color='green').encode( y='mean(Hc_column__m_):Q', size=alt.SizeValue(2)).transform_filter(brush) GRVDNS = alt.layer(bars, line1, line2, line3, data=vol) GRVDNS = GRVDNS.to_json() return render_template('index.html', GRVQHC=GRVQHC, GRVOWC=GRVOWC, GRVDNS=GRVDNS)
def channel_curve_compare(experiment_df, width=800, heights=(50, 400), line_size=5, legend_mark_size=100): """Creates an interactive curve comparison chart for a list of experiments. It lets you tick or untick experiments that you want to compare by clicking on the legend (shift+click for multi), you can select the x range which you want to investigate by selecting it on the top chart and you get shown the actual values on mousehover. The chart is build on top of the Altair which in turn is build on top of Vega-Lite and Vega. That means you can use the objects produces by this script (converting it first to json by .to_json() method) in your html webpage without any problem. Args: experiment_df('pandas.DataFrame'): Dataframe containing ['id','x','CHANNEL_NAME']. It can be obtained from a list of experiments by using the `neptunelib.api.concat_experiments_on_channel` function. If the len of the dataframe exceeds 5000 it will cause the MaxRowsError. Read the Note to learn why and how to disable it. width(int): width of the chart. Default is 800. heights(tuple): heights of the subcharts. The first value controls the top chart, the second controls the bottom chart. Default is (50,400). line_size(int): size of the lines. Default is 5. legend_mark_size(int): size of the marks in legend. Default is 100. Returns: `altair.Chart`: Altair chart object which will be automatically rendered in the notebook. You can also run the `.to_json()` method on it to convert it to the Vega-Lite json format. Examples: Instantiate a session:: from neptunelib.api.session import Session session = Session() Fetch a project and a list of experiments:: project = session.get_projects('neptune-ai')['neptune-ai/Salt-Detection'] experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000) Construct a channel value dataframe:: from neptunelib.api.utils import concat_experiments_on_channel compare_df = concat_experiments_on_channel(experiments,'unet_0 epoch_val iout loss') Plot interactive chart in notebook:: from neptunelib.viz.experiments import channel_curve_compare channel_curve_compare(compare_df) Note: Because Vega-Lite visualizations keep all the chart data in the HTML the visualizations can consume huge amounts of memory if not handled properly. That is why, by default the hard limit of 5000 rows is set to the len of dataframe. That being said, you can disable it by adding the following line in the notebook or code:: import altair as alt alt.data_transformers.enable('default', max_rows=None) """ assert len(experiment_df.columns ) == 3, 'Experiment dataframe should have 3 columns \ ["id","x", "CHANNEL_NAME"]. \ It has {} namely {}'.format(len(experiment_df.columns), experiment_df.columns) top_height, bottom_height = heights prep_cols, channel_name = _preprocess_columns(experiment_df.columns) experiment_df.columns = prep_cols nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['x'], empty='none') interval = alt.selection(type='interval', encodings=['x']) legend_selection = alt.selection_multi(fields=['id']) legend = alt.Chart().mark_point(filled=True, size=legend_mark_size).encode( y=alt.Y('id:N'), color=alt.condition( legend_selection, alt.Color('id:N', legend=None), alt.value('lightgray'))).add_selection(legend_selection) selectors = alt.Chart().mark_point().encode( x='x:Q', opacity=alt.value(0), ).add_selection(nearest) top_view = alt.Chart( width=width, height=top_height).mark_line(size=line_size).encode( x=alt.X('x:Q', title=None), y=alt.Y('y:Q', scale=alt.Scale(zero=False), title=None), color=alt.Color('id:N', legend=None), opacity=alt.condition( legend_selection, alt.OpacityValue(1), alt.OpacityValue(0.0))).add_selection(interval) line = alt.Chart().mark_line(size=line_size).encode( x=alt.X('x:Q', title='iteration'), y=alt.Y('y:Q', scale=alt.Scale(zero=False), title=channel_name), color=alt.Color('id:N', legend=None), opacity=alt.condition(legend_selection, alt.OpacityValue(1), alt.OpacityValue(0.0))) points = line.mark_point().encode( color=alt.condition(legend_selection, alt.Color('id:N', legend=None), alt.value('white')), opacity=alt.condition(nearest, alt.value(1), alt.value(0))) text = line.mark_text(align='left', dx=5, dy=-5).encode( text=alt.condition(nearest, 'y:Q', alt.value(' ')), opacity=alt.condition(legend_selection, alt.OpacityValue(1), alt.OpacityValue(0.0))) rules = alt.Chart().mark_rule(color='gray').encode( x='x:Q', ).transform_filter(nearest) bottom_view = alt.layer(line, selectors, points, rules, text, width=width, height=bottom_height).transform_filter(interval) combined = alt.hconcat(alt.vconcat(top_view, bottom_view), legend, data=experiment_df) return combined
""" Interactive Average =================== The plot below uses an interval selection, which causes the chart to include an interactive brush (shown in grey). The brush selection parameterizes the red guideline, which visualizes the average value within the selected interval. """ # category: interactive charts import altair as alt from vega_datasets import data weather = data.seattle_weather.url brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart().mark_bar().encode( alt.X('date:O', timeUnit='month'), y='mean(precipitation):Q', opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7))).properties(selection=brush) line = alt.Chart().mark_rule(color='firebrick').encode( y='mean(precipitation):Q', size=alt.SizeValue(3)).transform_filter(brush.ref()) alt.layer(bars, line, data=weather)
bars = alt.Chart().mark_bar().encode( alt.Color('Oil_GRV_quantile:Q', scale=alt.Scale(scheme='viridis')), y='Hc_column__m_:Q', x='Oil_GRV_10_6:Q').transform_filter(brush) GRVOWC = alt.vconcat(points, bars, data=vol) GRVOWC.to_json() # For the histogram of GRV density with Min,Mean,Max HC column indicators brush = alt.selection(type='interval', encodings=['x']) bars = alt.Chart(vol).mark_bar().encode( alt.X("GRV_density__m_:Q", bin=alt.Bin(maxbins=20)), alt.Y('count()', axis=alt.Axis(title='Min,Mean,Max HC')), opacity=alt.condition( brush, alt.OpacityValue(1), alt.OpacityValue(0.7))).add_selection(brush).properties(width=600, height=400) line1 = alt.Chart(vol).mark_rule(color='firebrick').encode( y='max(Hc_column__m_):Q', size=alt.SizeValue(2)).transform_filter(brush) line2 = alt.Chart(vol).mark_rule(color='yellow').encode( y='min(Hc_column__m_):Q', size=alt.SizeValue(2)).transform_filter(brush) line3 = alt.Chart(vol).mark_rule(color='green').encode( y='mean(Hc_column__m_):Q', size=alt.SizeValue(2)).transform_filter(brush) GRVDNS = alt.layer(bars, line1, line2, line3, data=vol) GRVDNS.to_json()
def uv_fourierSmoothPlot(data, engine, xlabel, ylabel): # Data Prep data = data.dropna().copy() data.rename(columns={'plotX1': ylabel}, inplace=True) fcomp_factor = TSMAD_CONFIGS['plotting.uv.fcomp_factor'] y = data[ylabel].values n = len(y) x = data[ylabel].index comps = np.unique( np.geomspace(1, max([int(fcomp_factor * data.shape[0]), 3]), 50).astype(int)) colors = np.linspace(start=1, stop=255, num=comps.max() * 2) for ecomp in comps: Y = np.fft.fft(y) np.put(Y, range(ecomp + 1, n), 0.0) ifft = np.fft.ifft(Y) data['tseulerF_{0}'.format(ecomp)] = ifft.real if engine == 'Static': fig, axes = plt.subplots(figsize=(9, 6)) for eidx, efcol in enumerate( [k for k in data.columns if 'tseulerF_' in k]): _opacity = (eidx + 1) / len(comps) if ( eidx + 1) / len(comps) > 0.4 else 0.4 _color = plt.cm.Reds(int(colors[int(efcol.split('_')[-1])])) axes.plot(x, data[efcol], color=_color, alpha=_opacity) _ = axes.plot(x, y, label="Original dataset", linestyle='--') _ = axes.grid(linestyle='dashed') _ = axes.legend() plt.xlabel(xlabel, fontsize=15) plt.ylabel(ylabel, fontsize=15) plt.grid(b=True, which='major', color='k', linewidth=0.25) plt.grid(b=True, which='minor', color='k', linewidth=0.125) plt.close() return pn.pane.Matplotlib(fig, tight=True) elif engine == 'Interactive': # Base Plot base = alt.Chart(data.reset_index()).mark_line() base = base.encode(x=alt.X('{0}:T'.format(data.index.name), title=''), tooltip=ylabel) base = base.properties(width=612, height=360) base = base.encode(x=alt.X('{0}:T'.format(data.index.name), title=''), y=alt.Y('{0}:Q'.format(ylabel), scale=alt.Scale(zero=False), axis=alt.Axis(format='~s')), tooltip=[ylabel]) _flayers = [base] for cidx, col in enumerate( [k for k in data.columns if 'tseulerF_' in k]): _t = int(col.split('_')[-1]) _color = get_rgbtohex(*plt.cm.Reds(int(colors[_t]))[:-1]) _opacity = (cidx + 1) / len(comps) if ( cidx + 1) / len(comps) > 0.4 else 0.4 _tf = base.encode(y=alt.Y('{0}:Q'.format(col), title='', scale=alt.Scale(zero=False)), color=alt.ColorValue(_color), opacity=alt.OpacityValue(_opacity)) _flayers.append(_tf) p = alt.layer(*_flayers).interactive() return p
def main(): """Author Attribution and Verifying App""" stc.html(HTML_BANNER) menu = ["Home","About"] choice = st.sidebar.selectbox("Menu",menu) if choice == 'Home': st.subheader("Text Analysis") raw_text = st.text_area('Enter Text Here') if len(raw_text) > 2: col1,col2 = st.beta_columns(2) process_text = nfx.remove_stopwords(raw_text) with col1: with st.beta_expander('Original Text'): st.write(raw_text) with st.beta_expander("Preview Tagged Text"): tagged_docx = generate_tags(raw_text) processed_tag_docx = mytag_visualizer(tagged_docx) stc.html(processed_tag_docx,scrolling=True) with st.beta_expander("Plot Word Freq"): st.info("Plot For Most Common Tokens") most_common_tokens = get_most_common_tokens(process_text,20) # st.write(most_common_tokens) tk_df = pd.DataFrame({'tokens':most_common_tokens.keys(),'counts':most_common_tokens.values()}) # tk_df = pd.DataFrame(most_common_tokens.items(),columns=['tokens','counts']) # st.dataframe(tk_df) # st.bar_chart(tk_df) brush = alt.selection(type='interval', encodings=['x']) c = alt.Chart(tk_df).mark_bar().encode( x='tokens', y='counts', opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), ).add_selection(brush) st.altair_chart(c,use_container_width=True) with col2: with st.beta_expander('Processed Text'): st.write(process_text) with st.beta_expander("Plot Wordcloud"): st.info("word Cloud") plot_wordcloud(process_text) with st.beta_expander("Plot Mendelhall Curve"): plot_mendelhall_curve_2(raw_text) elif len(raw_text) == 1: st.warning("Insufficient Text, Minimum must be more than 1") elif choice == "About": st.subheader("Text Analysis NLP App")
df2 = df2.assign(date=pd.to_datetime(df2[['Year','Month']].assign(day=1))) df2['Average House Price (£)'] = df2['Average House Price (£)'].round(decimals=-3) #df2['Average House Price (£)'] = df2['Average House Price (£)'].apply(lambda x: "£{:.0f}k".format((x/1000))) ##Regularisation step to decrease gap between elements reg_coef = 1e4 df2['# of Sales Regularised'] = (reg_coef+df2['# of Sales'])/reg_coef ##plot interactive selection = alt.selection_single( name='select',fields=['Year'],on='mouseover' ) chart = alt.Chart(df2).mark_point(filled=True).encode( alt.X('month(date):N'), alt.Y('Average House Price (£)',scale=alt.Scale(zero=False)), alt.Size('# of Sales Regularised:Q'), alt.OpacityValue(0.8), alt.Order('# of Sales:Q', sort='ascending'), tooltip = [alt.Tooltip('# of Sales:Q'), alt.Tooltip('Average House Price (£):Q'), alt.Tooltip('Year:Q') ], color=alt.condition(selection,'Year:N',alt.value('grey')) ).add_selection(selection).properties( width=500, height=300 ) chart chart.save('chart_test.html') chart.save('chart_test.json') ################################################# END Plot 1
selector = alt.selection_single(on='mouseover', nearest=True, empty='all', fields=['base_seg_id']) # In[225]: #df_comp = df_comp[df_comp['intersect'] == True] # In[235]: base = alt.Chart(df_comp).mark_point(filled=True).encode( alt.X('Coef_value'), alt.Y('cr'), size=alt.Size('impressions', scale=alt.Scale(domain=[100000, 10000000])), color=alt.Color('marg_imp', scale=alt.Scale(scheme='purples', domain=[start, end])), tooltip=[ alt.Tooltip('base_seg_id'), alt.Tooltip('Coef_value'), alt.Tooltip('marg_imp') ], opacity=alt.OpacityValue(0.7)).properties(width=320, height=280).facet( facet='ad_name:N', columns=2).configure_axis( #grid=False ) base # In[ ]:
def project_progress(progress_df, width=800, heights=(50, 400), line_size=5, text_size=15, opacity=0.3): """Creates an interactive project progress exploration chart. It lets you choose the resources you want to see ('experiment_count_day' or 'running_time_day'), you can see the metric/id/tags for every experiment on mouseover, you can select the x range which you want to investigate by selecting it on the top chart and you get shown the actual values on mousehover. The chart is build on top of the Altair which in turn is build on top of Vega-Lite and Vega. That means you can use the objects produces by this script (converting it first to json by .to_json() method) in your html webpage without any problem. Args: progress_df('pandas.DataFrame'): Dataframe containing ['id', 'metric', 'metric_best', 'running_time', 'running_time_day', 'experiment_count_day', 'owner', 'tags', 'timestamp', 'timestamp_day']. It can be obtained from a list of experiments by using the `neptunecontrib.api.extract_project_progress_info` function. If the len of the dataframe exceeds 5000 it will cause the MaxRowsError. Read the Note to learn why and how to disable it. width(int): width of the chart. Default is 800. heights(tuple): heights of the subcharts. The first value controls the top chart, the second controls the bottom chart. Default is (50,400). line_size(int): size of the lines. Default is 5. text_size(int): size of the text containing metric/id/tags in the middle. opacity(float): opacity of the resource bars in the background. Default is 0.3. Returns: `altair.Chart`: Altair chart object which will be automatically rendered in the notebook. You can also run the `.to_json()` method on it to convert it to the Vega-Lite json format. Examples: Instantiate a session:: from neptunelib.api.session import Session session = Session() Fetch a project and the experiment view of that project:: project = session.get_projects('neptune-ai')['neptune-ai/Salt-Detection'] leaderboard = project.get_leaderboard() Create a progress info dataframe:: from neptunecontrib.api.utils import extract_project_progress_info progress_df = extract_project_progress_info(leadearboard, metric_colname='channel_IOUT', time_colname='finished') Plot interactive chart in notebook:: from neptunecontrib.viz.projects import project_progress project_progress(progress_df) Note: Because Vega-Lite visualizations keep all the chart data in the HTML the visualizations can consume huge amounts of memory if not handled properly. That is why, by default the hard limit of 5000 rows is set to the len of dataframe. That being said, you can disable it by adding the following line in the notebook or code:: import altair as alt alt.data_transformers.enable('default', max_rows=None) """ top_height, bottom_height = heights progress_df = _prep_progress_df(progress_df) nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['timestamp'], empty='none') brush = alt.selection(type='interval', encodings=['x']) exp_box = alt.binding_select( options=['running_time_day', 'experiment_count_day']) exp_selection = alt.selection_single(name='select', fields=['resource'], bind=exp_box) top_view = alt.Chart(height=top_height, width=width).mark_line( interpolate='step-after', size=line_size).encode( x='timestamp:T', y=alt.Y('metric:Q', scale=alt.Scale(zero=False), axis=None), color=alt.Color( 'actual_or_best:N', legend=alt.Legend(title='Metric actual or current best')), ).add_selection(brush) selectors = alt.Chart().mark_point().encode( x=alt.X('timestamp:T'), opacity=alt.value(0), ).add_selection(nearest).transform_filter(brush) line = alt.Chart().mark_line( interpolate='step-after', size=line_size).encode( x=alt.X('timestamp:T'), y=alt.Y('metric:Q', scale=alt.Scale(zero=False)), color=alt.Color( 'actual_or_best:N', legend=alt.Legend(title='Metric actual or current best')), ).transform_filter(brush) points = line.mark_point().encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0))) text = line.mark_text(align='left', dx=5, dy=-5, size=text_size).encode(text=alt.condition( nearest, 'metric:Q', alt.value(' ')), color='actual_or_best:N') rules = alt.Chart().mark_rule(color='gray').encode( x=alt.X('timestamp:T'), ).transform_filter(nearest) metrics = alt.layer(line, points, text, rules, selectors).properties( height=bottom_height, width=width, ) exp_selector = alt.Chart().mark_area().encode( x=alt.X('timestamp:T'), opacity=alt.value(0), ).add_selection(exp_selection).transform_filter( exp_selection).transform_filter(brush) exp_line = alt.Chart().mark_area(interpolate='step-after').encode( x=alt.X('timestamp:T'), y=alt.Y('time_or_count:Q', scale=alt.Scale(zero=False)), color=alt.ColorValue('red'), opacity=alt.OpacityValue(opacity)).transform_filter( brush).transform_filter(exp_selection) exp_points = exp_line.mark_point(filled=True).encode( color=alt.ColorValue('black'), opacity=alt.condition(nearest, alt.value(1), alt.value(0))) exp_text = exp_line.mark_text( align='left', dx=5, dy=-5, fontWeight='bold', size=text_size).encode(text=alt.condition(nearest, 'time_or_count:Q', alt.value(' ')), color=alt.ColorValue('black')) exp_rules = alt.Chart().mark_rule(color='gray').encode( x=alt.X('timestamp:T'), ).transform_filter(nearest) exps = alt.layer(exp_line, exp_points, exp_rules, exp_text, exp_selector).properties( height=bottom_height, width=width, ) main_view = alt.layer(exps, metrics).properties( height=bottom_height, width=width, ).resolve_scale(y='independent') tags = alt.Chart(height=1, width=1).mark_text(align='left', size=text_size, fontWeight='bold').encode( x=alt.X('timestamp:T', axis=None), text=alt.condition( nearest, 'text:N', alt.value(' ')), ) combined = alt.vconcat(top_view, tags, main_view, data=progress_df) return combined