vbar_top=vbar_top_temp, color=specie_colors) for w in [ slider_k_AB, slider_k_BC, slider_order_AB, slider_order_BC, slider_time ]: w.on_change('value', update_data) # Set up layouts and add to document inputs_reaction = column(text, slider_k_AB, slider_k_BC, slider_order_AB, slider_order_BC) inputs_time = slider_time tab1 = Panel(child=row(inputs_reaction, plot_conc, column(plot_vbar, inputs_time, height=475)), title="Desktop") tab2 = Panel(child=column(inputs_reaction, plot_conc, column(plot_vbar, inputs_time, height=475)), title="Mobile") tabs = Tabs(tabs=[tab1, tab2]) curdoc().add_root(tabs) # setup row_layout_flag. If row_layout_flag is 1, layout is row format. If not equal to 1 it will display in column fromat #row_layout_flag = 1 #if (row_layout_flag == 1): # curdoc().add_root(row(inputs_reaction, plot_conc, column(plot_vbar, inputs_time, height=475))) #else: # curdoc().add_root(gridplot([[inputs_reaction, plot_conc], [None, column(plot_vbar, inputs_time, height=475)]]))
def team_tab(passing_model): result = [ "Group Stage", "Round of 16", "Quarter-finals", "Semi-finals", "Final" ] #position_details = list(passing_model.Position_Detail.unique()) result_color = factor_cmap('Round', palette=Spectral6, factors=result) checkbox = CheckboxGroup(labels=result, active=[0, 1, 2, 3, 4]) rounds = [checkbox.labels[i] for i in checkbox.active] #max_passes = int(passing_model["Passes"].max()) #pass_slider = RangeSlider( # start=0, end=max_passes, value=(70, max_passes), # step=5, title="Number of Passes") def make_dataset(rounds): source = ColumnDataSource( data=passing_model.loc[passing_model["Round"].isin(rounds), :]) #& #(passing_model["Passes"] >= pass_slider_min) & #(passing_model["Passes"] <= pass_slider_max), :]) source.data["Pass_Size"] = source.data["Passes"] / 50 source.data["xP_Mean_mean"] = np.repeat(source.data["xP_Mean"].mean(), len(source.data["Passes"])) source.data["xP_Rating_mean"] = np.repeat( source.data["xP_Rating"].mean(), len(source.data["Passes"])) return source def make_plot(source): """Need to return the span so we can update them in callback (I think)""" # Set up Plot Figure plot_size_and_tools = { 'plot_height': 100, 'plot_width': 1000, 'x_range': (source.data["xP_Rating"].min() * .8, source.data["xP_Rating"].max() * 1.2), 'y_range': (source.data["xP_Mean"].min() * .8, source.data["xP_Mean"].max() * 1.2) } plot = figure( tools=["tap", "pan", "wheel_zoom", 'box_select', 'reset', 'help'], title="Expected Passes v. Pass Difficulty") plot.y_range.flipped = True # Get Means and Ranges and Top n% for Labels xp_ms = source.data["xP_Mean_mean"][0] xp_mean_span = Span(location=xp_ms, dimension='width', line_color="black", line_dash='solid', line_width=3, line_alpha=.2) plot.add_layout(xp_mean_span) xp_rs = source.data["xP_Rating_mean"][0] xp_rating_span = Span(location=xp_rs, dimension='height', line_color="black", line_dash='solid', line_width=3, line_alpha=.2) plot.add_layout(xp_rating_span) renderer = plot.circle( "xP_Rating", "xP_Mean", size="Pass_Size", color=result_color, legend="Round", source=source, # set visual properties for selected glyphs selection_color=Spectral6[5], #color="Position_Detail", # set visual properties for non-selected glyphs nonselection_fill_alpha=0.1, nonselection_fill_color=Spectral6[0], nonselection_line_color=Spectral6[5], nonselection_line_alpha=1.0) plot.legend.location = (10, 50) plot.legend.border_line_width = 3 plot.legend.border_line_color = "black" plot.legend.border_line_alpha = 0.5 labels = LabelSet(x='xP_Rating', y='xP_Mean', text='Team', level='glyph', text_font_size='10pt', x_offset=-2, y_offset=2, source=source, render_mode='canvas') plot.add_layout(labels) # Hover tool with vline mode hover = HoverTool( tooltips=[ ('Team', '@Team'), ('Result', '@Round'), #('Position', '@Position_Detail'), ('Expected Pass Rating', '@xP_Rating'), ('Total Passes', '@Passes') ], mode='vline') plot.add_tools(hover) # Add Labels in the corners citation1 = Label(x=10, y=10, x_units='screen', y_units='screen', text='Easier Passes, Poorly Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) # Add Labels in the corners citation2 = Label(x=10, y=510, x_units='screen', y_units='screen', text='Harder Passes, Poorly Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) # Add Labels in the corners citation3 = Label(x=625, y=10, x_units='screen', y_units='screen', text='Easier Passes, Well Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) # Add Labels in the corners citation4 = Label(x=625, y=510, x_units='screen', y_units='screen', text='Easier Passes, Well Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) plot.add_layout(citation1) plot.add_layout(citation2) plot.add_layout(citation3) plot.add_layout(citation4) return plot, xp_mean_span, xp_rating_span def callback(attr, old, new): new_rounds = [checkbox.labels[i] for i in checkbox.active] # Update Dataset new_df = make_dataset(new_rounds) source.data.update(new_df.data) # Update Averages xp_ms = source.data["xP_Mean_mean"][0] xp_mean_span.location = xp_ms xp_rs = source.data["xP_Rating_mean"][0] xp_rating_span.location = xp_rs source = make_dataset(rounds) plot, xp_mean_span, xp_rating_span = make_plot(source) inputs = widgetbox(checkbox) checkbox.on_change('active', callback) #pass_slider.on_change('value', callback) # Create a row layout layout = column(inputs, plot) #layout = row(plot) # Make a tab with the layout tab = Panel(child=layout, title='Team Passing Model') return tab
# plot of sensible heat p7 = figure(plot_width=900, plot_height=450, x_axis_type='datetime', x_axis_label='Date (local)', y_axis_label='H (W/m^2)') p7.title.text = 'Daily Sensible Heat' p7.line(usa_resample.index, usa_resample.H, legend='Shrubland H', color='green', line_width=2) p7.line(ca_resample.index, ca_resample.H, legend='Boreal H', color='blue', line_width=2) tab1 = Panel(child=p1, title="Sensitivity Monthly Resample") tab11 = Panel(child=p11, title="Monthly Averaged Resistances") tab2 = Panel(child=p2, title='Latent Heat Daily Resample and Ratios') tab3 = Panel(child=p3, title='Latent Heat All Daily Resample') tab4 = Panel(child=p4, title='Resistances Monthly all 4 yrs Averaged') tab5 = Panel(child=p5, title='Resistances Daily Averages all 4 yrs avergaed') tab6 = Panel(child=p6, title='All data') tab7 = Panel(child=p7, title='Sensible Heat') tabs = Tabs(tabs=[ tab1,tab11, tab2, tab7, tab3, tab4, tab5, tab6]) show(tabs)
def accounts_tsa_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = {} # to contain churned and retained splits self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.rf = {} # random forest self.cl = PythonClickhouse('aion') self.forecast_days = 30 self.interest_var = 'address' self.trigger = -1 self.status = 'all' self.update_type = 'all' self.status = 'all' self.account_type = 'all' self.interest_var = 'amount' self.pl = {} # for rf pipeline self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # list of tier specific addresses for prediction self.address_list = [] self.address_select = Select(title='Filter by address', value='all', options=[]) self.address = 'all' self.load_data_flag = False self.day_diff = 1 self.groupby_dict = {} self.addresses = [] self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.max_loaded_date = None self.min_loaded_date = None # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'forecast': self.section_header_div(text='Forecasts:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def reset_checkboxes(self): try: self.address_selected = "" self.address_select.value = "all" except Exception: logger.error('reset checkboxes', exc_info=True) ################################################### # I/O def load_df(self, start_date, end_date): try: logger.warning("data load begun") if isinstance(start_date, str): start_date = datetime.strptime(start_date, self.DATEFORMAT) if isinstance(end_date, str): end_date = datetime.strptime(end_date, self.DATEFORMAT) if self.df is not None: self.max_loaded_date = self.df.block_timestamp.max( ).compute() self.min_loaded_date = self.df.block_timestamp.min( ).compute() if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date: logger.warning("data already loaded - %s", self.df.tail(10)) pass else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) #self.make_delta() #self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) # self.make_delta() # self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) self.df = self.filter(self.df) except Exception: logger.error('load_df', exc_info=True) ################################################### # MUNGE DATA def make_delta(self): try: if self.df is not None: if len(self.df) > 0: df = self.df.compute() for col in self.targets: col_new = col + '_diff' df[col_new] = df[col].pct_change() df[col_new] = df[col_new].fillna(0) logger.warning('diff col added : %s', col_new) self.df = self.df.fillna(self.df.mean()) self.df = dd.dataframe.from_pandas(df, npartitions=15) # logger.warning('POST DELTA:%s',self.df1.tail(20)) except Exception: logger.error('make delta', exc_info=True) ################################################## # EXPLICATORY GRAPHS # PLOTS def box_plot(self, variable): try: # logger.warning("difficulty:%s", self.df.tail(30)) # get max value of variable and multiply it by 1.1 minv = 0 maxv = 0 df = self.df if df is not None: if len(df) > 0: minv, maxv = dd.compute(df[variable].min(), df[variable].max()) else: df = SD('filter', [variable, 'status'], []).get_df() return df.hvplot.box(variable, by='status', ylim=(.9 * minv, 1.1 * maxv)) except Exception: logger.error("box plot:", exc_info=True) ################################################### # MODELS def filter(self, df): try: df = df.assign(freq=df.address) if self.status != 'all': df = df[df.status == self.status] if self.account_type != 'all': df = df[df.acccount_type == self.account_type] if self.update_type != 'all': df = df[df.update_type == self.update_type] if self.address != 'all': df = df[df.address == self.address] return df except Exception: logger.error("filter:", exc_info=True) def tsa_amount(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'amount': 'mean'}) df = df.reset_index() df = df.compute() label = 'amount_diff' df[label] = df[self.interest_var].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'amount': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) else: p *= forecast.hvplot.scatter(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) def tsa_freq(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'address': 'nunique'}) df = df.reset_index() df = df.compute() label = 'freq_diff' df[label] = df['address'].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'address': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='#').relabel(col) else: p *= forecast.hvplot.scatter( x='ds', y=col, width=600, height=250, value_label='#').relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) #################################################### # GRAPHS def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.update_type = update_type_select.value thistab.status = status_select.value thistab.account_type = account_type_select.value thistab.forecast_days = int(select_forecast_days.value) thistab.address = thistab.address_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_load(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.load_df(datepicker_start.value, datepicker_end.value) thistab.notification_updater("ready") try: # SETUP table = 'account_ext_warehouse' #cols = list(table_dict[table].keys()) cols = [ 'address', 'block_timestamp', 'account_type', 'status', 'update_type', 'amount' ] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = last_date - timedelta(days=60) # STREAMS Setup # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch', launch=-1)() stream_select_variable = streams.Stream.define('Select_variable', variable='amount')() # setup widgets datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) select_forecast_days = Select( title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) status_select = Select(title='Select account status', value=thistab.status, options=menus['status']) account_type_select = Select(title='Select account type', value=thistab.account_type, options=menus['account_type']) update_type_select = Select(title='Select transfer type', value=thistab.update_type, options=menus['update_type']) # search by address checkboxes thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses, active=[0]) # ----------------------------------- LOAD DATA # load model-making data thistab.load_df(datepicker_start.value, datepicker_end.value) # load data for period to be predicted # tables hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount, streams=[stream_launch]) tsa_amount = renderer.get_plot(hv_tsa_amount) hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch]) tsa_freq = renderer.get_plot(hv_tsa_freq) # add callbacks datepicker_start.on_change('value', update_load) datepicker_end.on_change('value', update_load) thistab.address_select.on_change('value', update) select_forecast_days.on_change('value', update) update_type_select.on_change('value', update) account_type_select.on_change('value', update) status_select.on_change('value', update) # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, thistab.address_select, select_forecast_days, update_type_select, account_type_select, status_select, thistab.checkboxes) grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['forecast']], [Spacer(width=20, height=30)], [tsa_amount.state, controls], [tsa_freq.state], [thistab.notification_div['bottom']]]) tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def panel_metrics_scatter(data): ### Create a figure ### p = figure(plot_width=settings_figure['plot_width'], plot_height=settings_figure['plot_height'], title=settings_figure['title'], toolbar_location="below", toolbar_sticky=False) p.background_fill_color = 'aliceblue' p.background_fill_alpha = 0.4 p_hist_top = figure(toolbar_location=None, plot_width=settings_figure['plot_width'], plot_height=150) p_hist_right = figure(toolbar_location=None, plot_width=150, plot_height=settings_figure['plot_height']) source_main = ColumnDataSource({ 'x': data['SMA_Deviation_Sigma'], 'y': data['MACD_signal'], 'return': calculate_percent_return(data['close'], 1) }) x_hist, x_edges = np.histogram(data['SMA_Deviation_Sigma'].dropna(), bins=100) y_hist, y_edges = np.histogram(data['MACD_signal'].dropna(), bins=100) source_hist = ColumnDataSource({ 'x_hist': x_hist, 'y_hist': y_hist, 'x_edges': x_edges[:-1], 'y_edges': y_edges[1:] }) mapper = LinearColorMapper( palette=all_palettes['RdBu'][len(source_main.data)], low=-2.5, #np.nanmin(source.data['return']), high=2.5 #np.nanmax(source.data['return']) ) p.circle(source=source_main, x='x', y='y', color={ 'field': 'return', 'transform': mapper }, fill_alpha=0.2, line_alpha=0.4, size=3, hover_fill_alpha=1.0) p_hist_top.quad(source=source_hist, bottom=0, top='x_hist', left='x_edges', right='x_edges') p_hist_right.quad(source=source_hist, bottom='y_edges', top='y_edges', left=0, right='y_hist') p_column = column(p_hist_top, row(p, p_hist_right)) def update(): period = selector_period.value data_return = calculate_percent_return(data['close'], period) bins = selector_bins.value xdata = data[selector_xmetric.value] ydata = data[selector_ymetric.value] source_main.data = {'x': xdata, 'y': ydata, 'return': data_return} x_hist, x_edges = np.histogram(xdata.dropna(), bins=bins, density=True) y_hist, y_edges = np.histogram(ydata.dropna(), bins=bins, density=True) source_hist.data = { 'x_hist': x_hist, 'y_hist': y_hist, 'x_edges': x_edges[:-1], 'y_edges': y_edges[1:] } return selector_period = create_widget(widget_settings['return_period']) selector_xmetric = create_widget(widget_settings['xmetric']) selector_ymetric = create_widget(widget_settings['ymetric']) selector_bins = create_widget(widget_settings['bins']) selector_period.on_change('value', lambda attr, old, new: update()) selector_xmetric.on_change('value', lambda attr, old, new: update()) selector_ymetric.on_change('value', lambda attr, old, new: update()) selector_bins.on_change('value', lambda attr, old, new: update()) ### Setting up the laytou ### # Widgets controlers = WidgetBox(selector_period, selector_xmetric, selector_ymetric, selector_bins, width=350) # Layout layout = row(controlers, p_column) panel = Panel(child=layout, title='Return Analysis') return panel
title = 'Delay Width (min)') binwidth_select.on_change('value', update) # RangeSlider control to select start and end of plotted delays range_select = RangeSlider(start = -60, end = 180, value = (-60, 120), step = 5, title = 'Delay Range (min)') range_select.on_change('value', update) # Find the initially selected carrieres initial_carriers = [carrier_selection.labels[i] for i in carrier_selection.active] src = make_dataset(initial_carriers, range_start = range_select.value[0], range_end = range_select.value[1], bin_width = binwidth_select.value) p = make_plot(src) # Put controls in a single element controls = WidgetBox(carrier_selection, binwidth_select, range_select) # Create a row layout layout = row(controls, p) # Make a tab with the layout tab = Panel(child=layout, title = 'Delay Histogram') tabs = Tabs(tabs=[tab]) # Add it to the current document (displays plot) curdoc().add_root(tabs)
def sentence_tab(src_csv): # Make Dataset for all States per Year def make_dataset(): new_src = src_csv.loc[ src_csv['STATEID'] == 99, ['YEAR', 'JURGT1M', 'JURLT1M', 'JURGT1F', 'JURLT1F']] return ColumnDataSource(new_src) # draw the plot def make_plot(sentenceL): l = figure(plot_width=800, plot_height=600, title="Received Sentence Lengths", x_axis_label='Year', y_axis_label='Number of Prisoners') l1 = l.circle(sentenceL.data['YEAR'], sentenceL.data['JURGT1M'], size=7, color='navy', alpha=0.5, hover_fill_alpha=1.0, hover_fill_color='navy') l2 = l.circle(sentenceL.data['YEAR'], sentenceL.data['JURLT1M'], size=7, color='red', alpha=0.5, hover_fill_alpha=1.0, hover_fill_color='red') l3 = l.circle(sentenceL.data['YEAR'], sentenceL.data['JURGT1F'], size=7, color='green', alpha=0.5, hover_fill_alpha=1.0, hover_fill_color='green') l4 = l.circle(sentenceL.data['YEAR'], sentenceL.data['JURLT1F'], size=7, color='purple', alpha=0.5, hover_fill_alpha=1.0, hover_fill_color='purple') # adding tooltips h = HoverTool(tooltips=[('Year', '@x'), ('# of Prisoners', '@y')]) l.add_tools(h) # Legend legendl = Legend( items=[('Male, More than 1 Year', [l1]), ('Male, Less than Year', [l2]), ('Female, More than 1 Year', [l3]), ('Female, Less than 1 Year', [l4])]) l.add_layout(legendl, 'right') l.legend.click_policy = 'hide' return l src = make_dataset() p = make_plot(src) # create a layout layout = row(p) # add a tab tab = Panel(child=layout, title='Sentence Lengths') return tab
inv_height_list.append(df.iloc[index]['inv_height']) inv_date_list.append(df.iloc[index]['true_local']) lapse_rate_list.append(df.iloc[index]['lapse_rate']) df_list.append(df) #print(index) print('two') print(j) # print(df) color = color_gen() p1.line(df.tmpc, df.height_m, legend = str(df.index[0]), color=colors[j], muted_color=colors[j], line_width=2, muted_alpha=0.2) # legend = 'Day = {}'.format(day) str(day.index[0]) p1.legend.click_policy="hide" tab1 = Panel(child=p1, title="RadioSonde") tabs = Tabs(tabs=[ tab1]) show(tabs) #%% inv_df = pd.DataFrame( {'datetime': inv_date_list, 'inv_height': inv_height_list, 'lapse_rate': lapse_rate_list}) inv_df2 = inv_df.drop_duplicates(subset=['datetime', 'inv_height'], keep='first') inv_df2.index = inv_df2.datetime inv_df2['Augusta_PM2_5'] = Augusta['PM2_5'] inv_df2['Adams_PM2_5'] = Adams['PM2_5'] inv_df2['Adams_temp'] = Adams['temp'] inv_df2.to_csv('/Users/matthew/Desktop/data/radiosondes/inv_height_m.csv', index=False) #%%
text= '<h2 style="color:darkslategray;font-family: "Lucida Console", Courier, monospace;">Car Sharing Utilization Tracking Tool</h2>', width=500, height=40) """ dre_p1=Panel(child=row(drs_start,width=150), title="Date Start Filter") dre_p12=Panel(child=row(drs_start_hour,width=130), title="Hour Start Filter") dre_p1=Tabs(tabs=[dre_p1]) dre_p12=Tabs(tabs=[dre_p12]) dre_p2=Panel(child=row(drs_end,width=150), title="Date End Filter") dre_p22=Panel(child=row(drs_end_hour,width=130), title="Hour End Filter") dre_p2=Tabs(tabs=[dre_p2]) dre_p22=Tabs(tabs=[dre_p22]) """ fdre_p1 = Panel(child=row(fdrs_start, fdrs_start_hour, width=280), title="From") fdre_p1 = Tabs(tabs=[fdre_p1]) fdre_p2 = Panel(child=row(fdrs_end, fdrs_end_hour, width=280), title="To") fdre_p2 = Tabs(tabs=[fdre_p2]) fine_df = Panel(child=row(column(row(fdre_p1, width=350)), column(row(fdre_p2, width=350))), title="Fine Date Range Filter") #fine_df=Tabs(tabs=[fine_df]) dre_p1 = Panel(child=row(drs_start, drs_start_hour, width=280), title="From") dre_p1 = Tabs(tabs=[dre_p1]) dre_p2 = Panel(child=row(drs_end, drs_end_hour, width=280), title="To") dre_p2 = Tabs(tabs=[dre_p2]) cum_df = Panel(child=row(column(row(dre_p1, width=350)), column(row(dre_p2, width=350))), title="Cumulative Date Range Filter")
grph_day_perc_change.line(x='Date', y='Day_Perc_Change', source=src_nikkei, color='green', legend_label='Nikkei') grph_day_perc_change.line(x='Date', y='Day_Perc_Change', source=src_hangseng, color='red', legend_label='Hang Seng') grph_day_perc_change.line(x='Date', y='Day_Perc_Change', source=src_nasdaq, color='blue', legend_label='Nasdaq') hover = HoverTool() hover.tooltips = [('Stock', '@Name'), ('Day Perc Change', '@Day_Perc_Change')] grph_day_perc_change.add_tools(hover) grph_day_perc_change.legend.click_policy = "hide" #create tabs for single plot chart (javascript roles) tab1 = Panel(child=grph_adj_close, title="Adj Close") tab2 = Panel(child=grph_Volume, title="Volume") tab3 = Panel(child=grph_day_perc_change, title="Day Perc Change") tab = Tabs(tabs=[tab1, tab2, tab3]) show(tab) save(tab)
divs2 = row([ rolling_payout_div, blank_divs[1], div1, blank_divs[4], div2, div5, blank_divs[5], div6, ]) divs0 = row([btc_collected_sold_div, column([divs1, divs2])]) charts = row([ column([payout_select, payout_chart]), column([miner_stats_chart, power_chart]) ]) tab1 = Panel(child=column([divs0, charts]), title='BTC Report') # PAGE 2 page_2_objects = {} for num, rg in enumerate(sorted(list(rig_colours.keys()))): if num == 0: page_2_objects[rg] = { 'source': ColumnDataSource(make_data(df, rg).drop_duplicates()), "chart_1": figure(plot_width=700, plot_height=200, x_axis_type='datetime', tools=[ BoxSelectTool(),
def product_reviews_over_time_with_slider_tab(dataset, metadata): heading_div = Div( text= """<br><h3 style="box-sizing: border-box; margin-top: 0px; margin-bottom: 0.5rem; font-family: "Nunito Sans", -apple-system, system-ui, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; font-weight: 600; color: rgb(26, 26, 26); font-size: 2rem; text-transform: uppercase; letter-spacing: 3px;">Pan Focus</h3><pre>Use the slider to focus on specific segment of data. This visualization is on static data and is Work In Progress. Please refer the report for more details.</pre><hr>""", width=1000, height=120, style={'text-align': 'center'}) combined_data = dataset.set_index('asin').join( metadata.set_index('Product ID')).reset_index() combined_data.columns = [ 'asin', 'reviewerID', 'overall', 'unixReviewTime', 'Description', 'price', 'Category' ] combined_data['asin'] = combined_data['asin'].astype(str) def get_product_data(product_id): product_data = combined_data[combined_data['asin'] == product_id] product_data['dtReviewTime'] = pd.to_datetime( product_data['unixReviewTime'], unit='s') product_data['reviewYear'] = product_data['dtReviewTime'].dt.strftime( '%Y') product_data['reviewMonth'] = product_data['dtReviewTime'].dt.strftime( '%Y-%m') product_data['dtReviewTime'] = product_data[ 'dtReviewTime'].dt.strftime('%Y-%m-%d') product_data = product_data.sort_values('dtReviewTime') return product_data # Default selected_product is the most_purchased_product selected_product = combined_data.asin.value_counts().head(1).index[0] filtered_data = get_product_data(selected_product) top_k = 8 if len(combined_data) > 8 else len(combined_data) top_k_products = combined_data.asin.value_counts().head( top_k).keys().tolist() bottom_k_products = combined_data.asin.value_counts().sort_values( ascending=True).head(top_k).keys().tolist() product_details = dict() product_details['asin'] = filtered_data.head(1).asin.values[0] product_details['description'] = filtered_data.head( 1).Description.values[0] product_details['category'] = filtered_data.head(1).Category.values[0] review_avg = filtered_data.groupby('Category')['overall'].agg( ['mean', 'count']).reset_index() product_details['total_reviews'] = str(review_avg['count'].values[0]) product_details['review_avg'] = str(review_avg['mean'].values[0]) price_avg = filtered_data.groupby('Category')['price'].agg( ['mean', 'count']).reset_index() product_details['price_avg'] = str(price_avg['mean'].values[0]) year_wise_reviews = filtered_data.groupby('reviewYear')['overall'].agg( ['mean', 'count']).reset_index() year_wise_reviews.columns = ['time', 'average', 'total'] year_wise_reviews['dt_time'] = pd.to_datetime(year_wise_reviews['time'], format='%Y') month_wise_reviews = filtered_data.groupby('reviewMonth')['overall'].agg( ['mean', 'count']).reset_index() month_wise_reviews.columns = ['time', 'average', 'total'] month_wise_reviews['dt_time'] = pd.to_datetime(month_wise_reviews['time'], format='%Y-%m') date_wise_reviews = filtered_data.groupby('dtReviewTime')['overall'].agg( ['mean', 'count']).reset_index() date_wise_reviews.columns = ['time', 'average', 'total'] date_wise_reviews['dt_time'] = pd.to_datetime(date_wise_reviews['time'], format='%Y-%m-%d') # Default plot is Year Wise Reviews plot_data = month_wise_reviews source = ColumnDataSource( data=dict(time_stamp=list(map(str, plot_data['time'])), dt_time=plot_data.dt_time.tolist(), total=plot_data.total.tolist(), average=plot_data.average.tolist(), color=getKColors(len(plot_data)))) # Adding hover tool hover = HoverTool(tooltips=[('Time', '@time_stamp'), ('Avg Review', '@average'), ('Total Reviews', '@total')], mode='vline') # Total Reviews Figure p1 = figure(x_range=(plot_data.dt_time.tolist()[0], plot_data.dt_time.tolist()[-1]), plot_width=1200, plot_height=250, tools="xpan", toolbar_location=None, x_axis_type="datetime") r1_l = p1.line(source=source, x='dt_time', y='total', line_width=2) r1_c = p1.circle(source=source, x='dt_time', y='total', size=15, color="red", alpha=0.5) select = figure( title= "Drag the middle and edges of the selection box to change the range below", plot_height=150, plot_width=1200, y_range=p1.y_range, x_axis_type="datetime", y_axis_type=None, tools="", toolbar_location=None, background_fill_color="#efefef") range_tool = RangeTool(x_range=p1.x_range) range_tool.overlay.fill_color = "navy" range_tool.overlay.fill_alpha = 0.2 s1 = select.line(source=source, x='dt_time', y='total') select.ygrid.grid_line_color = None select.add_tools(range_tool) select.toolbar.active_multi = range_tool p1.add_tools(hover) # Formatting axes p1.xaxis.axis_label = "Time" p1.xaxis.major_label_orientation = math.pi / 2 p1.xaxis.major_label_text_font_size = "10pt" p1.xaxis.axis_label_text_font_size = "15pt" p1.yaxis.axis_label = "Total Reviews" p1.yaxis.formatter = NumeralTickFormatter(format="0") p1.yaxis.major_label_text_font_size = "10pt" p1.yaxis.axis_label_text_font_size = "15pt" ds1_l = r1_l.data_source ds1_c = r1_c.data_source ds_sel = s1.data_source # Average Rating Figure p2 = figure(x_range=p1.x_range, plot_width=1200, plot_height=250, tools="xpan", toolbar_location=None, x_axis_type="datetime") r2_l = p2.line(source=source, x='dt_time', y='average', line_width=2) r2_c = p2.circle(source=source, x='dt_time', y='average', size=15, color="red", alpha=0.5) p2.add_tools(hover) # Formatting axes p2.xaxis.axis_label = "Time" p2.xaxis.major_label_orientation = math.pi / 2 p2.xaxis.major_label_text_font_size = "10pt" p2.xaxis.axis_label_text_font_size = "15pt" p2.yaxis.axis_label = "Average Rating" p2.yaxis.formatter = NumeralTickFormatter(format="0") p2.yaxis.major_label_text_font_size = "10pt" p2.yaxis.axis_label_text_font_size = "15pt" ds2_l = r2_l.data_source ds2_c = r2_c.data_source radio_button_group = RadioButtonGroup( labels=["Yearly", "Monthly", "Daily"], active=0, width=1200) def get_updated_plot_data_dict(new_plot_data): new_data = dict() if new_plot_data.empty: new_data['x_range'] = [] new_data['time_stamp'] = [] new_data['average'] = [] new_data['total'] = [] new_data['color'] = [] else: new_colors = getKColors(len(new_plot_data)) new_data['x_range'] = new_plot_data.dt_time.tolist() new_data['time_stamp'] = new_plot_data.time.tolist() new_data['average'] = new_plot_data.average.tolist() new_data['total'] = new_plot_data.total.tolist() new_data['color'] = new_colors return new_data def update_plot(attr, old, new): global year_wise_reviews, month_wise_reviews, date_wise_reviews new_plot_data = plot_data if radio_button_group.active == 0: try: year_wise_reviews except NameError: year_wise_reviews = None year_wise_reviews = filtered_data.groupby( 'reviewYear')['overall'].agg(['mean', 'count']).reset_index() year_wise_reviews.columns = ['time', 'average', 'total'] year_wise_reviews['dt_time'] = pd.to_datetime( year_wise_reviews['time'], format='%Y') new_plot_data = year_wise_reviews if radio_button_group.active == 1: try: month_wise_reviews except NameError: month_wise_reviews = None month_wise_reviews = filtered_data.groupby( 'reviewMonth')['overall'].agg(['mean', 'count']).reset_index() month_wise_reviews.columns = ['time', 'average', 'total'] month_wise_reviews['dt_time'] = pd.to_datetime( month_wise_reviews['time'], format='%Y-%m') new_plot_data = month_wise_reviews if radio_button_group.active == 2: try: date_wise_reviews except NameError: date_wise_reviews = None date_wise_reviews = filtered_data.groupby( 'dtReviewTime')['overall'].agg(['mean', 'count']).reset_index() date_wise_reviews.columns = ['time', 'average', 'total'] date_wise_reviews['dt_time'] = pd.to_datetime( date_wise_reviews['time'], format='%Y-%m-%d') new_plot_data = date_wise_reviews new_data = get_updated_plot_data_dict(new_plot_data) ds1_l.data = new_data ds1_c.data = new_data ds2_l.data = new_data ds2_c.data = new_data ds_sel.data = new_data source.data.update(new_data) print(source) p1.x_range.start = new_data['x_range'][0] p1.x_range.end = new_data['x_range'][-1] p2.x_range.start = new_data['x_range'][0] p2.x_range.end = new_data['x_range'][-1] range_tool.x_range.start = new_data['x_range'][0] range_tool.x_range.end = new_data['x_range'][-1] print(p1.x_range.start) print(p1.x_range.end) radio_button_group.on_change('active', update_plot) def generate_div_text(product_attributes): return """<table width="1200px" style='font-family: arial, sans-serif; border-collapse: collapse; width: 100%;'> <tr> <th style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center;'>Attribute</th> <th style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center;'>Value</th> </tr> <tr> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center; background-color: #dddddd'> Product ID </td> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center; background-color: #dddddd'> """ + product_attributes[ 'asin'] + """ </td> </tr> <tr> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center;'> Description </td> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center'> """ + product_attributes[ 'description'] + """ </td> </tr> <tr> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center; background-color: #dddddd'> Category </td> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center; background-color: #dddddd'> """ + product_attributes[ 'category'] + """ </td> </tr> <tr> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center;'> Total Reviews </td> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center'> """ + str( product_attributes['total_reviews']) + """ </td> </tr> <tr> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center; background-color: #dddddd'> Average Rating </td> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center; background-color: #dddddd'> """ + str( product_attributes['review_avg']) + """ </td> </tr> <tr> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center;'> Average Price </td> <td style='border: 1px solid #dddddd; text-align: left; padding: 8px; align: center'> """ + str( product_attributes['price_avg']) + """ </td> </tr> </table>""" product_details_div = Div(text=generate_div_text(product_details), width=1200, height=300) def update_selection(): global year_wise_reviews, month_wise_reviews, date_wise_reviews, product_details searched_data = get_product_data(search_input.value) new_data = dict() if searched_data.empty: year_wise_reviews = searched_data month_wise_reviews = searched_data date_wise_reviews = searched_data new_data['x_range'] = [] new_data['time_stamp'] = [] new_data['average'] = [] new_data['total'] = [] new_data['color'] = [] p1.x_range = Range1d(start=0, end=0) # p1.x_range.factors = new_data['x_range'] # p2.x_range.factors = new_data['x_range'] product_details_div.text = """<img alt="Sorry! No product found." src="/myapp/static/images/no_results_found.png">""" else: product_details = dict() product_details['asin'] = searched_data.head(1).asin.values[0] product_details['description'] = searched_data.head( 1).Description.values[0] product_details['category'] = searched_data.head( 1).Category.values[0] updated_review_avg = searched_data.groupby( 'Category')['overall'].agg(['mean', 'count']).reset_index() product_details['total_reviews'] = str( updated_review_avg['count'].values[0]) product_details['review_avg'] = str( updated_review_avg['mean'].values[0]) updated_price_avg = searched_data.groupby('Category')['price'].agg( ['mean', 'count']).reset_index() product_details['price_avg'] = str( updated_price_avg['mean'].values[0]) product_details_div.text = generate_div_text(product_details) year_wise_reviews = searched_data.groupby( 'reviewYear')['overall'].agg(['mean', 'count']).reset_index() year_wise_reviews.columns = ['time', 'average', 'total'] month_wise_reviews = searched_data.groupby( 'reviewMonth')['overall'].agg(['mean', 'count']).reset_index() month_wise_reviews.columns = ['time', 'average', 'total'] date_wise_reviews = searched_data.groupby( 'dtReviewTime')['overall'].agg(['mean', 'count']).reset_index() date_wise_reviews.columns = ['time', 'average', 'total'] if radio_button_group.active == 0: new_plot_data = year_wise_reviews if radio_button_group.active == 1: new_plot_data = month_wise_reviews if radio_button_group.active == 2: new_plot_data = date_wise_reviews new_data = get_updated_plot_data_dict(new_plot_data) p1.x_range = Range1d(start=new_data['x_range'][1], end=new_data['x_range'][4]) # p1.x_range.factors = new_data['x_range'] # p2.x_range.factors = new_data['x_range'] ds1_l.data = new_data ds1_c.data = new_data ds2_l.data = new_data ds2_c.data = new_data search_input = TextInput(value=selected_product, title="Product ID:") search_button = Button(label="Search", button_type="success") search_button.on_click(update_selection) top_k_pid_list = "" temp_count = 1 for i in range(len(top_k_products)): # top_k_pid_list += top_k_products[i] + ", " if temp_count % 4 == 0: top_k_pid_list += top_k_products[i] + """<br>""" temp_count = 0 else: top_k_pid_list += top_k_products[i] + ", " temp_count = temp_count + 1 bottom_k_pid_list = "" temp_count = 1 for i in range(len(bottom_k_products)): # bottom_k_pid_list += bottom_k_products[i] + ", " if temp_count % 4 == 0: bottom_k_pid_list += bottom_k_products[i] + """<br>""" temp_count = 0 else: bottom_k_pid_list += bottom_k_products[i] + ", " temp_count = temp_count + 1 pre_text_data = """<font size="4"><b>Here are a few sample product ids from your dataset:</b></font> <br><br>""" + \ """<font color="blue" size="3"><b>Top """ + str(top_k) + """ products:</b></font><br>""" + \ top_k_pid_list + """<br>""" + \ """<font color="red" size="3"><b>Bottom """ + str(top_k) + """ products:</b></font><br>""" + \ bottom_k_pid_list sample_product_ids = Div(text=pre_text_data, width=600, height=100) # layout = column(search_input, search_button, product_details_div, radio_button_group, p1, p2) layout = column(heading_div, select, p1, p2) tab = Panel(child=layout, title='Product Reviews Timeline - WITH SLIDER (WIP)') return tab
def route_tab(flights): # Make dataset for plot based on route start (origin) and # end (destination) def make_dataset(origin, destination): # Subset to the selected route subset = flights[(flights['Continent'] == destination) & (flights['Position Group'] == origin)] # Find the carriers who cover particular route carriers = list(set(subset['Country'])) # x is the delay, y is the airline xs = [] ys = [] label_dict = {} # Iterate through the unique carriers for i, carrier in enumerate(carriers): # Subset to the carrier carrier_data = subset[subset['Country'] == carrier] # Append the index of the carrier as many times as there are # Append the delays for the carrier ys.append([i for _ in range(len(carrier_data))]) xs.append(list(carrier_data['Overall Rating'])) # Map the index to the carrier label_dict[i] = carrier xs = list(chain(*xs)) ys = list(chain(*ys)) new_src = ColumnDataSource(data={'x': xs, 'y': ys}) return new_src, label_dict def make_plot(src, origin, destination, label_dict): p = figure(plot_width=800, plot_height=600, x_axis_label='Overall Rating', y_axis_label='', title='Position group in continent %s in %s' % (origin, destination)) p.circle('x', 'y', source=src, alpha=0.4, color='navy', size=15) p.yaxis[0].ticker.desired_num_ticks = len(label_dict) p.yaxis.formatter = FuncTickFormatter(code=""" var labels = %s; return labels[tick]; """ % label_dict) return p def style(p): # Title p.title.align = 'center' p.title.text_font_size = '20pt' p.title.text_font = 'serif' # Axis titles p.xaxis.axis_label_text_font_size = '14pt' p.xaxis.axis_label_text_font_style = 'bold' p.yaxis.axis_label_text_font_size = '14pt' p.yaxis.axis_label_text_font_style = 'bold' # Tick labels p.xaxis.major_label_text_font_size = '12pt' p.yaxis.major_label_text_font_size = '12pt' return p def update(attr, old, new): # Origin and destination determine values displayed origin = origin_select.value destination = dest_select.value # Get the new dataset new_src, label_dict = make_dataset(origin, destination) if len(label_dict) == 0: p.title.text = 'Position group in Continent %s in %s' % ( origin, destination) else: p.yaxis[0].ticker.desired_num_ticks = len(label_dict) p.yaxis.formatter = FuncTickFormatter(code=""" var labels = %s; return labels[tick]; """ % label_dict) p.title.text = 'Position group in Continent %s in %s' % ( origin, destination) src.data.update(new_src.data) origins = list(set(flights['Position Group'])) dests = list(set(flights['Continent'])) origin_select = Select(title='Position Group', value='Attacked', options=origins) origin_select.on_change('value', update) dest_select = Select(title='Continent', value='SA', options=dests) dest_select.on_change('value', update) initial_origin = origin_select.value initial_dest = dest_select.value src, label_dict = make_dataset(initial_origin, initial_dest) p = make_plot(src, initial_origin, initial_dest, label_dict) p = style(p) controls = WidgetBox(origin_select, dest_select) layout = row(controls, p) tab = Panel(child=layout, title='Continent Perf') return tab
from bokeh.models.widgets import Slider
def daily_stats_tab(convoStats, convoSelection): # Daily by-party and total message counts def make_timeseries_datasets(convoTitle, startDate=None, endDate=None): convo: analyser.ConvoStats = next( (x for x in convoStats if x.title == convoTitle)) participants = convo.participants participantToId = {x: i for i, x in enumerate(participants)} totalsId = len(participants) participantToId['Total'] = totalsId xs = [[] for _ in participants] + [[]] ys = [[] for _ in participants] + [[]] color = Category10_7 if len(participants) < 7 else Turbo256 colors = [color[i] for i in range(len(participants) + 1)] labels = sorted(participants) + ['Total'] for date in convo.dailyCountsBySender.keys(): convertedDate = pd.to_datetime(date) if startDate is not None and endDate is not None and ( convertedDate < startDate or convertedDate > endDate): continue for i, (sender, count) in enumerate( convo.dailyCountsBySender[date].items()): participantId = participantToId[sender] xs[participantId].append(convertedDate) ys[participantId].append(count) xs[totalsId].append(convertedDate) ys[totalsId].append(sum(convo.dailyCountsBySender[date].values())) # I need an invisible scatterplot for nice tooltips, because multiline tooltips don't work well totalX = list(chain.from_iterable(xs)) totalY = list(chain.from_iterable(ys)) totalLabels = list( chain.from_iterable([[x] * len(xs[participantToId[x]]) for x in labels])) return (ColumnDataSource(data={ 'x': xs, 'y': ys, 'color': colors, 'label': labels }), ColumnDataSource(data={ 'x': totalX, 'y': totalY, 'label': totalLabels })) def make_piechart_dataset(convoTitle, startDate=None, endDate=None): convo: analyser.ConvoStats = next( (x for x in convoStats if x.title == convoTitle)) df = pd.DataFrame(columns=[ 'sender', 'messageCount', 'messageCountAngle', 'f_messageCount', 'wordCount', 'wordCountAngle', 'f_wordCount', 'initiationCount', 'initiationCountAngle', 'f_initiationCount', 'color' ]) color = Category10_7 if len(convo.participants) <= 7 else Turbo256 allMessages = convo.messages if startDate is not None and endDate is not None: allMessages = list( filter( lambda m: m.datetime.date() >= startDate and m.datetime. date() <= endDate, allMessages)) totalWordCount = sum(len(x.content.split()) for x in allMessages) participantCount = len(convo.participants) initiationsBySender = defaultdict(int) curConvoParticipants = set() lastMessage = '' for i, message in enumerate(allMessages): if i == 0: # first message, so conversation initiated initiationsBySender[message.sender] += 1 else: timeDiff = message.datetime - allMessages[i - 1].datetime # It is assumed that if 4h passed since last message, a new conversation has been initiated hoursPassed = timeDiff.total_seconds() // (60 * 60) # Extra conditions: if the last convo only had one participant or the last message was a question, don't count a new initiation # TODO: Perhaps I should apply the same checks when calculating conversation stats, though for durations between messages etc just the time check is probably better if hoursPassed >= 4 and '?' not in lastMessage and len( curConvoParticipants) > 1: initiationsBySender[message.sender] += 1 curConvoParticipants = set() lastMessage = message.content curConvoParticipants |= {message.sender} totalInitiationCount = sum(initiationsBySender.values()) for i, participant in enumerate(sorted(convo.participants)): messages = list( filter(lambda m: m.sender == participant, allMessages)) tdf = pd.DataFrame() tdf['sender'] = [participant] tdf['messageCount'] = [len(messages)] # The +1/+2 is to avoid division by zero if no messages are present in the interval # TODO: Investigate whether I need to care about div by 0 here and in other places tdf['messageCountAngle'] = [ (len(messages) + 1) / (len(allMessages) + participantCount) * 2 * pi ] tdf['f_messageCount'] = [ f'{len(messages)} messages ({len(messages)/len(allMessages)*100:.2f}%)' ] tdf['wordCount'] = [sum(len(x.content.split()) for x in messages)] tdf['wordCountAngle'] = [ (tdf['wordCount'][0] + 1) / (totalWordCount + participantCount) * 2 * pi ] tdf['f_wordCount'] = [ f'{tdf["wordCount"][0]} words ({tdf["wordCount"][0]/totalWordCount*100:.2f}%)' ] tdf['initiationCount'] = [initiationsBySender[participant]] tdf['initiationCountAngle'] = [ initiationsBySender[participant] / totalInitiationCount * 2 * pi ] tdf['f_initiationCount'] = f'{tdf["initiationCount"][0]} initations ({tdf["initiationCount"][0]/totalInitiationCount*100:.2f}%)' tdf['color'] = color[i] df = df.append(tdf) return ColumnDataSource(df) def make_messages_display(convoTitle, startDate=None, endDate=None): convo: analyser.ConvoStats = next( (x for x in convoStats if x.title == convoTitle)) allMessages = convo.messages if startDate is not None and endDate is not None: allMessages = list( filter( lambda m: m.datetime.date() >= startDate and m.datetime. date() <= endDate, allMessages)) # TODO: A single long word will make the div ignore width settings and overflow the window rez = '<p style="overflow-wrap:break-word;width:95%;">' for i, message in enumerate(allMessages): if i > 500: break rez += f'<b>{message.sender}</b> <i>({message.datetime.strftime("%Y/%m/%d %H:%M")})</i>: {message.content} </br>' rez += '</p>' return Div(text=rez, sizing_mode='stretch_width') # Statistics for the conversations in the selected date range like average message length def make_stats_text(convoTitle, startDate=None, endDate=None): convo: analyser.ConvoStats = next( (x for x in convoStats if x.title == convoTitle)) allMessages = convo.messages if startDate is not None and endDate is not None: allMessages = list( filter( lambda m: m.datetime.date() >= startDate and m.datetime. date() <= endDate, allMessages)) totalMessageLensWords = defaultdict(int) messageCountsByParticipant = defaultdict(int) convoDurationSum = 0 convoLenWordsSum = 0 pauseBetweenConvosDurationSum = 0 pauseBetweenMessagesInConvoSum = 0 lastConvoStart = allMessages[0].datetime convoWordCount = len(allMessages[0].content.split()) convoCount = 0 convoMessageCount = 1 convoPauseBetweenMessagesSum = 0 for i, message in enumerate(allMessages): wordCount = len(message.content.split()) totalMessageLensWords[message.sender] += wordCount messageCountsByParticipant[message.sender] += 1 totalMessageLensWords['total'] += wordCount if i != 0: timeDiff = message.datetime - allMessages[i - 1].datetime hoursPassed = timeDiff.total_seconds() // (60 * 60) if hoursPassed >= 4: # A new conversation has begun convoDuration = (allMessages[i - 1].datetime - lastConvoStart).total_seconds() convoDurationSum += convoDuration convoCount += 1 convoLenWordsSum += convoWordCount pauseDuration = timeDiff.total_seconds() pauseBetweenConvosDurationSum += pauseDuration pauseBetweenMessagesInConvoSum += convoPauseBetweenMessagesSum / convoMessageCount convoMessageCount = 1 convoPauseBetweenMessagesSum = 0 convoWordCount = wordCount lastConvoStart = message.datetime else: convoWordCount += wordCount convoPauseBetweenMessagesSum += timeDiff.total_seconds() convoMessageCount += 1 # In some edge cases there may be no messages sent to the participant if convoCount == 0: return '' hours, minutes = divmod(convoDurationSum / convoCount, 60 * 60) rez = '<p style="width:95%;">' rez += f'Average conversation duration: {hours:.0f} h {minutes // 60:.0f} min</br>' if convoCount > 2: hours, minutes = divmod( pauseBetweenConvosDurationSum / (convoCount - 1), 60 * 60) rez += f'Average pause between conversations duration: {hours:.0f} h {minutes // 60:.0f} min</br>' rez += f'Average conversation length: {len(allMessages) / convoCount:.1f} messages, {convoLenWordsSum // convoCount} words</br>' minutes, seconds = divmod(convoPauseBetweenMessagesSum / convoCount, 60) rez += f'Average time between messages in a conversation: {minutes:.1f} min {seconds:.1f} s</br>' rez += f'Average message length: {totalMessageLensWords["total"] // len(allMessages)} words</br>' for participant in convo.participants: if messageCountsByParticipant[participant] == 0: continue rez += f'Average length of messages from {participant}: {totalMessageLensWords[participant] // messageCountsByParticipant[participant]} words</br>' rez += '</p>' return rez def make_timeseries_plot(src, tooltipSrc): p = figure(plot_width=600, plot_height=600, title='Daily message counts by date', x_axis_type='datetime', x_axis_label='Date', y_axis_label='Message count') p.multi_line(xs='x', ys='y', source=src, color='color', line_width=3, legend_field='label', line_alpha=0.4) tooltipScatter = p.scatter('x', 'y', source=tooltipSrc, alpha=0) hover = HoverTool( tooltips=[('Message count', '@y'), ('Details', '@x{%F}, @label')], formatters={'@x': 'datetime'}, mode='vline' ) # vline means that tooltip will be shown when mouse is in a vertical line above glyph hover.renderers = [tooltipScatter] p.add_tools(hover) return p def _make_piechart(src, startAngle, endAngle, title, bottomTitle, tooltips): p = figure(plot_height=200, plot_width=280, toolbar_location=None, title=title) p.wedge(x=0, y=1, radius=0.5, start_angle=startAngle, end_angle=endAngle, line_color='white', fill_color='color', source=src) p.axis.axis_label = None p.axis.visible = False p.grid.grid_line_color = None hover = HoverTool(tooltips=tooltips) p.add_tools(hover) p.add_layout(Title(text=bottomTitle, align="center"), "below") return p def make_piechart_plots(src): totalMessages = sum(src.data["messageCount"]) p1 = _make_piechart(src, cumsum('messageCountAngle', include_zero=True), cumsum('messageCountAngle'), 'Messages sent by participant', f'Total messages: {totalMessages}', [('Participant', '@sender'), ('Message count', '@f_messageCount')]) totalWords = sum(src.data["wordCount"]) p2 = _make_piechart(src, cumsum('wordCountAngle', include_zero=True), cumsum('wordCountAngle'), 'Word counts by participant', f'Total words: {totalWords}', [('Participant', '@sender'), ('Word count', '@f_wordCount')]) totalInitiations = sum(src.data["initiationCount"]) p3 = _make_piechart( src, cumsum('initiationCountAngle', include_zero=True), cumsum('initiationCountAngle'), 'Conversations initiated by participant', f'Total conversations: {totalInitiations}', [('Participant', '@sender'), ('Conversations initiated', '@f_initiationCount')]) return column(p1, p2, p3) def _update_pie_bottom_labels(): # Update the bottom titles of the piecharts for i, pie in enumerate(piePlots.children): # This is a bit hack-ish, but don't know a better way to do it if i == 0: totalMessages = sum(pieSrc.data["messageCount"]) pie.below[1].text = f'Total messages: {totalMessages}' elif i == 1: totalWords = sum(pieSrc.data["wordCount"]) pie.below[1].text = f'Total words: {totalWords}' elif i == 2: totalInitiations = sum(pieSrc.data["initiationCount"]) pie.below[1].text = f'Total conversations: {totalInitiations}' def on_conversation_changed(attr, oldValue, newValue): convo: analyser.ConvoStats = next( (x for x in convoStats if x.title == newValue)) # When switching to a new convo, update the date range slider to match convo data ranges initialDates = list(convo.dailyCountsBySender.keys()) start = pd.to_datetime(initialDates[0]).date() end = pd.to_datetime(initialDates[-1]).date() dateSlider.start = start dateSlider.end = end dateSlider.value = (start, end) # TODO: There is some black magic going on here, find if there is a proper way to do this newScr, newTooltipSrc = make_timeseries_datasets(newValue) src.data.update(newScr.data) tooltipSrc.data.update(newTooltipSrc.data) newPieSrc = make_piechart_dataset(newValue) pieSrc.data.update(newPieSrc.data) _update_pie_bottom_labels() messageColumn.children = [make_messages_display(newValue)] statsDisplay.text = make_stats_text(newValue) def on_date_range_changed(attr, old, new): convoToPlot = convoSelection.value startDate, endDate = dateSlider.value_as_date # TODO: There is some black magic going on here, find if there is a proper way to do this new_src, newTooltipSrc = make_timeseries_datasets( convoToPlot, startDate, endDate) src.data.update(new_src.data) tooltipSrc.data.update(newTooltipSrc.data) newPieSrc = make_piechart_dataset(convoToPlot, startDate, endDate) pieSrc.data.update(newPieSrc.data) _update_pie_bottom_labels() messageColumn.children = [ make_messages_display(convoToPlot, startDate, endDate) ] statsDisplay.text = make_stats_text(convoToPlot, startDate, endDate) # A dropdown list to select a conversation conversationTitles = sorted([x.title for x in convoStats]) convoSelection.on_change('value', on_conversation_changed) # A slider to select a date range for the analysis initialConvo: analyser.ConvoStats = next( (x for x in convoStats if x.title == conversationTitles[0])) initialDates = list(initialConvo.dailyCountsBySender.keys()) start = pd.to_datetime(initialDates[0]).date() end = pd.to_datetime(initialDates[-1]).date() dateSlider = DateRangeSlider(title='Date interval', start=start, end=date.today(), value=(start, end), step=24 * 60 * 60 * 1000) dateSlider.on_change('value_throttled', on_date_range_changed) src, tooltipSrc = make_timeseries_datasets(conversationTitles[0], start, end) p = make_timeseries_plot(src, tooltipSrc) p = style(p) pieSrc = make_piechart_dataset(conversationTitles[0], start, end) piePlots = make_piechart_plots(pieSrc) messageContents = [ make_messages_display(conversationTitles[0], start, end) ] messageColumn = column(children=messageContents, height=670, css_classes=['scrollable'], sizing_mode='stretch_width') statsDisplay = Div(text=make_stats_text(conversationTitles[0], start, end)) statsColumn = column(children=[statsDisplay], height=540, css_classes=['scrollable']) # create layout leftColumn = column(convoSelection, dateSlider, statsColumn) layout = row(leftColumn, p, piePlots, messageColumn) tab = Panel(child=layout, title='Daily statistics') return tab
# Set up Bokeh Layout with placeholders title = PreText() summary_table = DataTable() objective_table = DataTable() cost_plot = figure() energy_plot = figure() cap_plot = figure() selectors = column(scenario_select, period_select, stage_select, zone_select, capacity_select) top_row = row(selectors, summary_table, objective_table) middle_row = row(cap_plot, energy_plot) bottom_row = row(cost_plot) layout = column(title, top_row, middle_row, bottom_row) # Set up tabs tab1 = Panel(child=layout, title='General') storage_dummy = PreText(text='storage summary here, incl. duration', width=600) policy_dummy = PreText(text='policy summary here, including duals', width=600) inputs_dummy = PreText( text= 'inputs summary here, e.g. loads (profile charts, min, max, avg), costs', width=600) tab2 = Panel(child=storage_dummy, title='Storage') tab3 = Panel(child=policy_dummy, title='Policy Targets') tab4 = Panel(child=inputs_dummy, title='Inputs') tabs = Tabs(tabs=[tab1, tab2, tab3, tab4]) # Put all tabs in one application # Update Plots based on selected values update_plots(attr="", old="", new="") # Set up callback behavior (update plots if user changes selection)
def create_tab(data, name): def make_dataset(metric_fun, metric_sentiment, month_start, month_end, editorial, category, social_network, product): """Constrói os datasets para cada tipo de gráfico utilizado no dashboard Parâmetros ---------- data : DataFrame Pandas DataFrame expandido com dados do Sprinklr metric_fun : FUN Função para calcular métrica específica selecionada no widget metric_sentiment : str Sentimento relacionado à métrica escolhida (Positividade: positivo, Gradiente: negativo, Crise: negativo, Saúde do post: positivo) [restante] : str Valaores selecionados nas opções de filtros nos widgets Retorna ------- dict Dicionário com três chaves, correspondentes aos três gráficos apresentados. Cada chave é relacionada ao nome o gráfico e os valores são datasets no formato column data source """ month_start = pd.Timestamp(month_start) month_end = pd.Timestamp(month_end) # Filtragem dos dados com base nas seleções dos widgets filters = { 'Editoria': editorial, 'Categoria': category, 'SocialNetwork': social_network, 'Produto': product } filtered_data = filter_data(data, filters) # Gera datasets para cada gráfico ts_data = metric_fun(filtered_data) ts_data = ts_data[(ts_data.time >= month_start) & (ts_data.time <= month_end)] donut_data = filtered_data[(filtered_data.Month >= month_start) & (filtered_data.Month <= month_end)] donut_data = percent(donut_data) donut_data['angle'] = donut_data['value'] / sum( donut_data['value']) * 2 * pi donut_data['color'] = Category20c[donut_data.shape[0]] avg_donut_data = percent_avg(filtered_data) avg_donut_data = avg_donut_data[avg_donut_data.Month == month_end][[ 'Sentimento', 'MAVG' ]] avg_donut_data.columns = ['label', 'value'] avg_donut_data['angle'] = avg_donut_data['value'] / sum( avg_donut_data['value']) * 2 * pi avg_donut_data['color'] = Category20c[avg_donut_data.shape[0]] top_data = filter_sentiment(filtered_data, metric_sentiment) top_data = top_data[(top_data.Month >= month_start) & (top_data.Month <= month_end)] top_data = brand_health_txengages(top_data) avg_top_data = round(top_data.score.mean(), 2) top_data = top_data.sort_values('score', ascending=False).iloc[:10] top_data = top_data.sort_values('score') top_data['recorte'] = [ '1', '2', '3', '4', '5', '6', '7', '8', '9', '10' ] # Converte dataframes em column data source datasets = { 'ts': ColumnDataSource(ts_data), 'donut': ColumnDataSource(donut_data), 'avg_donut': ColumnDataSource(avg_donut_data), 'top': ColumnDataSource(top_data), 'avg_top': avg_top_data } return datasets def update(attr, old, new): """Constrói os datasets para cada tipo de gráfico utilizado no dashboard Parâmetros ---------- old : ColumnDataSource Dataframe antigo relacionado aos filtros antigos new : ColumnDataSource Dataframe novo, com linhas filtradas de acordo com seleções mais recentes """ month_start = month_select.value_as_date[0] month_end = month_select.value_as_date[1] editorial = editorial_select.value category = category_select.value product = product_select.value social_network = [ social_network_select.labels[i] for i in social_network_select.active ] metric = metric_select.value metric_attr = get_metric_attr(metric) metric_fun = metric_attr['fun'] metric_sentiment = metric_attr['sentiment'] new_src = make_dataset(metric_fun=metric_fun, metric_sentiment=metric_sentiment, month_start=month_start, month_end=month_end, editorial=editorial, category=category, social_network=social_network, product=product) src['ts'].data.update(new_src['ts'].data) src['top'].data.update(new_src['top'].data) src['avg_top'] = new_src['avg_top'] src['donut'].data.update(new_src['donut'].data) src['avg_donut'].data.update(new_src['avg_donut'].data) networks = data.SocialNetwork.unique().tolist() editorials = get_multselect_options(data, 'Editoria') categories = get_multselect_options(data, 'Categoria') products = get_multselect_options(data, 'Produto') month_select = DateRangeSlider(start=date(2019, 1, 1), end=date(2019, 8, 1), value=(date(2019, 1, 1), date(2019, 8, 1)), step=1, format="%b %Y") metric_select = Select(value="gradiente", options=[("velocity", "Parâmetro de Crise"), ("positivity", "Grau de Positividade"), ("gradiente", "Grau de Negatividade"), ("brand_health", "Saúde da Marca"), ("post_health", "Saúde do Post")]) product_select = MultiSelect(value=['Todos'], options=products) category_select = MultiSelect(value=['Todos'], options=categories) editorial_select = MultiSelect(value=['Todos'], options=editorials) social_network_select = CheckboxGroup(labels=networks, active=list(range(len(networks)))) metric_select.on_change('value', update) month_select.on_change('value', update) editorial_select.on_change('value', update) category_select.on_change('value', update) product_select.on_change('value', update) social_network_select.on_change('active', update) initial_metric_attr = get_metric_attr(metric_select.value) metric_sentiment = initial_metric_attr['sentiment'] initial_networks = [ social_network_select.labels[i] for i in social_network_select.active ] src = make_dataset(metric_fun=initial_metric_attr['fun'], metric_sentiment=metric_sentiment, month_start=month_select.value_as_date[0], month_end=month_select.value_as_date[1], editorial=editorial_select.value, category=category_select.value, social_network=initial_networks, product=product_select.value) p_ts = make_plot_ts(src['ts'], 'Evolução', metric_sentiment) p_top = make_dotplot(src['top']) avg_top = src['avg_top'] avg_top = create_div_title(f'Escore Médio: {avg_top}') p_donut = make_plot_donut(src['donut'], 'Percentual') p_avg_donut = make_plot_donut(src['avg_donut'], 'Norma Percentual') metric_title = create_div_title('MÉTRICA') month_title = create_div_title('PERÍODO') network_title = create_div_title('REDE SOCIAL') category_title = create_div_title('CATEGORIA') editorial_title = create_div_title('EDITORIA') product_title = create_div_title('PRODUTO') controls = WidgetBox( column(metric_title, metric_select, Div(height=5), month_title, month_select, Div(height=5), editorial_title, editorial_select, Div(height=5), category_title, category_select, Div(height=5), product_title, product_select, Div(height=5), network_title, social_network_select, width=250)) plots = column(p_ts, Div(height=20), row(p_donut, p_avg_donut)) layout = row(controls, Div(width=50), plots) layout = column(Div(text="", height=5), layout, Div(width=20), avg_top, p_top) tab = Panel(child=layout, title=name) return tab
def histogram_tab(meta): # Function to make a dataset for histogram based on a list of carriers # a minimum delay, maximum delay, and histogram bin width def make_dataset(carrier_list, range_start=-60, range_end=120, bin_width=5): # Dataframe to hold information by_carrier = pd.DataFrame(columns=[ 'proportion', 'left', 'right', 'f_proportion', 'f_interval', 'name', 'color' ]) range_extent = range_end - range_start # Iterate through all the carriers for i, carrier_name in enumerate(carrier_list): # Subset to the carrier subset = flights[flights['name'] == carrier_name] # Create a histogram with 5 minute bins arr_hist, edges = np.histogram(subset['arr_delay'], bins=int(range_extent / bin_width), range=[range_start, range_end]) # Divide the counts by the total to get a proportion arr_df = pd.DataFrame({ 'proportion': arr_hist / np.sum(arr_hist), 'left': edges[:-1], 'right': edges[1:] }) # Format the proportion arr_df['f_proportion'] = [ '%0.5f' % proportion for proportion in arr_df['proportion'] ] # Format the interval arr_df['f_interval'] = [ '%d to %d minutes' % (left, right) for left, right in zip(arr_df['left'], arr_df['right']) ] # Assign the carrier for labels arr_df['name'] = carrier_name # Color each carrier differently arr_df['color'] = Category20_16[i] # Add to the overall dataframe by_carrier = by_carrier.append(arr_df) # Overall dataframe by_carrier = by_carrier.sort_values(['name', 'left']) return ColumnDataSource(by_carrier) def style(p): # Title p.title.align = 'center' p.title.text_font_size = '20pt' p.title.text_font = 'serif' # Axis titles p.xaxis.axis_label_text_font_size = '14pt' p.xaxis.axis_label_text_font_style = 'bold' p.yaxis.axis_label_text_font_size = '14pt' p.yaxis.axis_label_text_font_style = 'bold' # Tick labels p.xaxis.major_label_text_font_size = '12pt' p.yaxis.major_label_text_font_size = '12pt' return p def make_plot(src): # Blank plot with correct labels p = figure(plot_width=700, plot_height=700, title='Histogram of Arrival Delays by Airline', x_axis_label='Delay (min)', y_axis_label='Proportion') # Quad glyphs to create a histogram p.quad(source=src, bottom=0, top='proportion', left='left', right='right', color='color', fill_alpha=0.7, hover_fill_color='color', legend='name', hover_fill_alpha=1.0, line_color='black') # Hover tool with vline mode hover = HoverTool(tooltips=[('Carrier', '@name'), ('Delay', '@f_interval'), ('Proportion', '@f_proportion')], mode='vline') p.add_tools(hover) # Styling p = style(p) return p def update(attr, old, new): types_to_plot = [ type_selection.labels[i] for i in type_selection.active ] new_src = make_dataset(types_to_plot, range_start=range_select.value[0], range_end=range_select.value[1], bin_width=binwidth_select.value) src.data.update(new_src.data) # Variable types and colors available_type = list(set(meta['Type'])) available_type.sort() type_colors = Category20_16 type_colors.sort() type_selection = CheckboxGroup(labels=available_type, active=[0, 1]) type_selection.on_change('active', update) binwidth_select = Slider(start=1, end=30, step=1, value=5, title='Bin Width') binwidth_select.on_change('value', update) range_select = RangeSlider(start=-60, end=180, value=(-60, 120), step=5, title='Range') range_select.on_change('value', update) # Initial carriers and data source initial_carriers = [ type_selection.labels[i] for i in type_selection.active ] src = make_dataset(initial_carriers, range_start=range_select.value[0], range_end=range_select.value[1], bin_width=binwidth_select.value) p = make_plot(src) # Put controls in a single element controls = WidgetBox(type_selection, binwidth_select, range_select) # Create a row layout layout = row(controls, p) # Make a tab with the layout tab = Panel(child=layout, title='Histogram') return tab
def country_tab(list_sp_objs): def make_data_set(speeches, country, type_display): overall_counter = Counter() word_counter = dict() dict_of_selected_counters_inp = search_mentions(country) dict_of_selected_counters_out = search_is_mentioned_by(country) tot_mentions = Counter() tot_mentioned_by = Counter() for k, val in dict_of_selected_counters_inp.items(): tot_mentions += Counter(dict(dict_of_selected_counters_inp[k])) for k, val in dict_of_selected_counters_inp.items(): tot_mentioned_by += Counter(dict(dict_of_selected_counters_out[k])) sp_country = [] for s in speeches: if s.country == country: sp_country.append(s) # sp_country = speeches[idx] # counts = defaultdict(int) for sp in list(sp_country): overall_counter += sp.word_frequency most_common_words = list(dict(overall_counter.most_common(10)).keys()) most_common_counter = Counter() for mcw in most_common_words: most_common_counter[mcw] = overall_counter[mcw] word = list(dict(most_common_counter).keys()) counts = list(dict(most_common_counter).values()) for w in most_common_words: word_counter[w] = Counter() for sp in sp_country: if sp.word_frequency[w]: add = sp.word_frequency[w] else: add = 0 word_counter[w][sp.year] += add years = range(1970, 2016, 1) selected_data = dict() for w, cnter in word_counter.items(): selected_data[w] = [] for yr in years: if yr in cnter: count = cnter[yr] else: count = float('nan') selected_data[w].append(count) # print(selected_data) multi_counts = [val for val in selected_data.values()] multi_years = [list(years)] * len(multi_counts) colors = word_colors[:len(multi_counts)] labels = most_common_words data = { 'counts': multi_counts, 'years': multi_years, 'colors': colors, 'labels': labels } if type_display == 'mentions': prepared_map_data = make_map_data(tot_mentions) else: prepared_map_data = make_map_data(tot_mentioned_by) return ColumnDataSource(data), ColumnDataSource(prepared_map_data) def update(attr, old, new): country_code = list(country_dic.keys())[list( country_dic.values()).index(country_input.value)] # print('updating ', country_input.value, country_code, dropdown.value) (word_frequency_to_plot, map_data) = make_data_set(list_sp_objs, country_code, dropdown.value) # print(country_input.value, word_frequency_to_plot) src.data.update(word_frequency_to_plot.data) map_src.data.update(map_data.data) def search_mentions(input_country, output_countries=list_country_codes): specific_mentions_dict = yearwise_data(input_country, 'mentions') return specific_mentions_dict def search_is_mentioned_by(input_country, output_countries=list_country_codes): specific_mentioned_by_dict = yearwise_data(input_country, 'is_mentioned_by') return specific_mentioned_by_dict def yearwise_data(inp_country, m): '''Function to get yearly data on either mentions or is mentioned by data for a specific country. @param inp_country: country input. @param out_country: country of interest @param m: 'mentions' or 'is_mentioned_by' Example: After receiving an input country 'IND', to find the number of mentions of 'USA' in speeches by 'IND'. -Here, dict_of_interest is the mentions dict of IND >>>yearwise_data('IND','USA','mentions') <returns two arrays: years and mentions >>>[1971,1975,1995,2000],[1,2,2,1] The same thing can be applied to an is_mentioned_by dict ''' # [x for x in dict_searched_country[1989] if x[0]=='COL'] # print(inp_country,out_country,m) input_file = Path.cwd().joinpath(f'data/{m}.pickle') # Path.cwd().joinpath('data/members_dic.pkl') with input_file.open('rb') as pkl_file: dict = pickle.load(pkl_file) try: dict_of_int = dict[inp_country] except KeyError: print("Check the country again!") return None, None return dict_of_int def make_plot(src): p = figure(plot_height=600, title='Most used words') # print('SRC', src['years'], src['counts']) # print(src.daa['labels']) p.multi_line('years', 'counts', color='colors', legend='labels', source=src) p.xaxis.axis_label = 'Year' p.yaxis.axis_label = 'Word count' # print(selected_countries) # for country in selected_countries: # p.line('years', country, source=src) return p def make_map_data(country_counter1): # 1 A mentions B , 2 A is mentioned by B unzipped = list(zip(*country_counter1)) countries = list(dict(country_counter1).keys()) country_counts = list(dict(country_counter1).values()) data = dict() data['country'] = countries data['counts'] = country_counts k = list(country_shapes.keys()) country_xs = [country_shapes[i]['lats'] for i in k] country_ys = [country_shapes[i]['lons'] for i in k] country_names = [country_shapes[i]['name'] for i in k] # country_rates = list(range(len(country_names))) country_rates = [float('NaN')] * len(country_names) country_inds = {country_shapes[j]['ID']: i for i, j in enumerate(k)} for i in range(len(data['country'])): try: country_rates[country_inds[data['country'] [i]]] = data['counts'][i] except: pass src_map = dict( x=country_xs, y=country_ys, name=country_names, rate=country_rates, ) return src_map def make_map(src_map, country, type_display): color_mapper = LogColorMapper(palette=palette) TOOLS = "pan,wheel_zoom,reset,hover,save" p = figure( plot_width=1150, plot_height=800, title='World Map', tools=TOOLS, x_axis_location=None, y_axis_location=None, tooltips=[ ("Name", "@name"), ("Mentions", "@rate") # , # ("(Long, Lat)", "($x, $y)") ]) p.grid.grid_line_color = None p.hover.point_policy = "follow_mouse" p.x_range = Range1d(start=-180, end=180) p.y_range = Range1d(start=-90, end=90) p.grid.grid_line_color = None p.patches('x', 'y', source=src_map, fill_color={ 'field': 'rate', 'transform': color_mapper }, fill_alpha=0.7, line_color="white", line_width=0.5) return (p) country_input = TextInput(value="India", title="Label:") country_code = list(country_dic.keys())[list(country_dic.values()).index( country_input.value)] country_input.on_change('value', update) # For the dropdown menu = [("Mentions", "mentions"), ("Is mentioned by", "is_mentioned_by")] dropdown = Dropdown(label="Type of display", button_type="primary", value='mentions', menu=menu) dropdown.on_change('value', update) word_colors = Category20_16 word_colors.sort() src, map_src = make_data_set(list_sp_objs, country_code, dropdown.value) p = make_plot(src) map = make_map(map_src, country_input, dropdown.value) # Put controls in a single element controls = widgetbox(country_input) # Create a row layout layout = row( column(controls, dropdown, p), map, ) # Make a tab with the layout tab = Panel(child=layout, title='Country referencing') return tab
def monitoring_app( doc, database_name, session_data, updating_options, start_immediately, ): """Create plots showing the development of the criterion and parameters. Args: doc (bokeh.Document): Argument required by bokeh. database_name (str): Short and unique name of the database. session_data (dict): Infos to be passed between and within apps. Keys of this app's entry are: - last_retrieved (int): last iteration currently in the ColumnDataSource. - database_path (str or pathlib.Path) - callbacks (dict): dictionary to be populated with callbacks. updating_options (dict): Specification how to update the plotting data. It contains rollover, update_frequency, update_chunk, jump and stride. """ # style the Document template_folder = Path(__file__).resolve().parent # conversion to string from pathlib Path is necessary for FileSystemLoader env = Environment(loader=FileSystemLoader(str(template_folder))) doc.template = env.get_template("index.html") # process inputs database = load_database(path=session_data["database_path"]) start_point = _calculate_start_point(database, updating_options) session_data["last_retrieved"] = start_point start_params = read_start_params(path_or_database=database) start_params["id"] = _create_id_column(start_params) group_to_param_ids = _map_group_to_other_column(start_params, "id") group_to_param_names = _map_group_to_other_column(start_params, "name") criterion_history, params_history = _create_cds_for_monitoring_app( group_to_param_ids) # create elements button_row = _create_button_row( doc=doc, database=database, session_data=session_data, start_params=start_params, updating_options=updating_options, ) monitoring_plots = _create_initial_convergence_plots( criterion_history=criterion_history, params_history=params_history, group_to_param_ids=group_to_param_ids, group_to_param_names=group_to_param_names, ) # add elements to bokeh Document grid = Column(children=[button_row, *monitoring_plots], sizing_mode="stretch_width") convergence_tab = Panel(child=grid, title="Convergence Tab") tabs = Tabs(tabs=[convergence_tab]) doc.add_root(tabs) if start_immediately: activation_button = doc.get_model_by_name("activation_button") activation_button.active = True
line_color='white', fill_color=factor_cmap('fruits', palette=Spectral6, factors=fruits)) p.title.text = "Model Accuracy %f" % accuracy_score(y_test_original, predictions) labels = LabelSet(x='fruits', y='counts', text='counts', level='glyph', x_offset=-15, y_offset=0, source=source, render_mode='canvas') p.yaxis.axis_label = "Counts" p.add_layout(labels) tab1 = Panel(child=p, title="Accuracy Scores") p1 = figure(plot_height=350, title="PR Curve") p1.x_range = Range1d(0, 1) p1.y_range = Range1d(0, 1) p1.yaxis.axis_label = "Precision" p1.xaxis.axis_label = "Recall" #p1.line([0],[0],name ="line2") y_score = clf.predict_proba(x_test_original)[:, 1] precision, recall, _ = precision_recall_curve(y_test_original, y_score) p1.line(precision, recall, line_width=2, line_alpha=0.6, name="line2") average_precision = average_precision_score(y_test_original, predictions) p1.title.text = "Average Precision Score %f" % average_precision tab2 = Panel(child=p1, title="PR Curve")
covid19_geosource = GeoJSONDataSource(geojson=merged_json) plot_title = None #'COVID19 outbreak in India' app_title = 'COVID19 India' India_totalCases = covid19_data['total_cases'].sum() India_totalDeaths = covid19_data['deaths'].sum() print(India_totalCases) basic_covid19_plot = covid19_plot(covid19_geosource, input_df=covid19_data, input_field='total_cases', color_field='total_cases', enable_IndiaStats=True, integer_plot=True, plot_title=plot_title) basicPlot_tab = Panel(child=basic_covid19_plot, title="⌂") if advanced_mode: preds_df.columns=['id','state', \ 'preds_cases_7', 'preds_cases_3', 'preds_cases', \ 'preds_cases_7_std', 'preds_cases_3_std', 'preds_cases_std', \ 'MAPE', 'MAPE_3', 'MAPE_7'] print(preds_df.head(10)) print(covid19_data_copy.head(10)) preds_covid19_df = pd.merge(covid19_data_copy, preds_df, on='state', how='left') preds_covid19_df = preds_covid19_df.fillna(0) print(preds_covid19_df.head(10))
def scatter_tab(df): available_indicators = list( df.columns[2:-1]) #exclude Country, Year, Region columns min_year = df['Year'].min() max_year = df['Year'].max() #create dataset based on inputs for plotting def create_dataset(data, x, y, year): source_list = [] color_palette = Dark2_8 #set color dictionary mapping regions to colors colormap = { region: color_palette[i] for i, region in enumerate(data['Region'].unique()) } #set colors for each row data['colors'] = [colormap[region] for region in data['Region']] data['line_colors'] = [colormap[region] for region in data['Region']] #use black line color for Singapore for visibility on plot data['line_colors'].loc[data['Country'] == 'Singapore'] = 'black' #subset data according to chosen year subset = data[data['Year'] == year] #create list of data sources, each corresponding to a different region, for interactive legend for region in subset['Region'].unique(): region_data = subset[subset['Region'] == region] r_source = ColumnDataSource({ 'Country': region_data['Country'], 'Region': region_data['Region'], 'x': region_data[x], 'y': region_data[y], 'colors': region_data['colors'], 'line_colors': region_data['line_colors'], 'GNI': region_data['GNI per capita(current US$)'] }) source_list.append(r_source) return source_list #create plot from dataset. sources for each region need to be explicitly specified for interactive legend def create_plot(s1, s2, s3, s4, s5, s6, s7, x, y, year): sources = [s1, s2, s3, s4, s5, s6, s7] #create plot figure p = figure(plot_width=800, plot_height=500, title='Gender indicators in {}'.format(year), x_axis_label=x, y_axis_label=y, sizing_mode='scale_both', toolbar_location="right") #set aesthetics p.title.text_font_size = '12pt' p.xaxis.axis_label_text_font_style = 'normal' p.yaxis.axis_label_text_font_style = 'normal' #loop over source list and plot glyphs for each region p1, p2, p3, p4, p5, p6, p7 = [ p.circle('x', 'y', size=4, fill_color='colors', line_color='line_colors', fill_alpha=0.8, source=source) for source in sources ] #create manual legend (necessary to place legend outside plot) regions = [s.data['Region'].unique().item() for s in sources] #extract region from each source plots = [p1, p2, p3, p4, p5, p6, p7] legenditems = [(region, [plot]) for region, plot in zip(regions, plots)] #set legend location and aesthetics legend = Legend(items=legenditems, location=(10, 0)) p.add_layout(legend, "right") p.legend.label_text_font_size = '8pt' #set interactivity mode for legend i.e. click on region to hide/show its data points p.legend.click_policy = "hide" #add hovertool to show details for each data point hover = HoverTool(tooltips=[( "Country", "@Country"), ("Region", "@Region"), ( "x:", "$x{0.1}"), ("y:", "$y{0.1}"), ("GNI per capita(US$)", "@GNI")]) p.add_tools(hover) return p #update function to update plots on change def update(attr, old, new): #get current values of inputs year = year_select.value x = x_select.value y = y_select.value #update sources new_src_list = create_dataset(df, x, y, year) s1.data, s2.data, s3.data, s4.data, s5.data, s6.data, s7.data = [ new_src.data for new_src in new_src_list ] #update labels and titles p.title.text = 'Gender indicators in {}'.format(year) p.xaxis.axis_label = x p.yaxis.axis_label = y #add a slider widget to select year year_select = Slider(start=min_year, end=max_year, step=1, value=max_year, title="Select Year") year_select.on_change('value', update) #add dropdown widgets to select indicators for y and x axes y_select = Select(title="Select indicator for y-axis", value=available_indicators[8], options=list(available_indicators)) y_select.on_change('value', update) x_select = Select(title="Select indicator for x-axis", value=available_indicators[1], options=list(available_indicators)) x_select.on_change('value', update) #set initial values to current widget values x = x_select.value y = y_select.value year = year_select.value #add a paragraph of explanatory text for user text = Div( text= """<font size="-1"><i>Click on legend to toggle data points on/off. <br />Data points for Singapore, where available, are outlined in black.</i></font>""" ) #get initial dataset and plot s1, s2, s3, s4, s5, s6, s7 = create_dataset(df, x, y, year) p = create_plot(s1, s2, s3, s4, s5, s6, s7, x, y, year) #layout widgets and plot controls = WidgetBox(year_select, y_select, x_select, text, width=400, sizing_mode='scale_both') layout = row(controls, p) tab = Panel(child=layout, title='Gender indicators in countries') return tab
intAlcohol_freqeuncySelector, intDailyScreenTimeSlider, intActivityLevelSlider, intHours_of_sleepSlider, intCarboSlider, intFatSlider, intProtnSlider ] for w in selectorsAndSliders: w.on_change('value', update_data) for w in currBloodInputFields: w.on_change('value', update_blood) for w in checkboxes: w.on_click(update_data2) tab1 = Panel(child=column(row(estPlot_blood, estPlot_bloodPressure), row(Div(), Div())), title='Estimation') tab2 = Panel(child=column(currBloodInputLayout, row(systolText, diastolText)), title="My blood test") tab3 = Panel(child=column( row(predictPlot_blood, predictPlot_bloodPressure), row(Div(text=' ', width=150), intMaleableInputsLayout)), title='My blood predictions') tabs = Tabs(tabs=[tab1, tab2, tab3]) layout = column( row(column(fixedInputs_andBP_layout, Div(height=130), currMaleableInputsLayout), tabs, width=200,
def player_tab(passing_model): positions = list(passing_model.Position.unique()) position_details = list(passing_model.Position_Detail.unique()) position_color = factor_cmap('Position_Detail', palette=Viridis10, factors=position_details) select = Select(title="Position:", value="Midfield", options=positions) max_passes = int(passing_model["Passes"].max()) pass_slider = RangeSlider(start=0, end=max_passes, value=(70, max_passes), step=5, title="Number of Passes") def make_dataset(select_value, pass_slider_min, pass_slider_max): source = ColumnDataSource(data=passing_model.loc[ (passing_model["Position"] == select_value) & (passing_model["Passes"] >= pass_slider_min) & (passing_model["Passes"] <= pass_slider_max), :]) source.data["Pass_Size"] = source.data["Passes"] / 10 source.data["xP_Mean_mean"] = np.repeat(source.data["xP_Mean"].mean(), len(source.data["Passes"])) source.data["xP_Rating_mean"] = np.repeat( source.data["xP_Rating"].mean(), len(source.data["Passes"])) return source def make_plot(source): """Need to return the span so we can update them in callback (I think) """ # Set up Plot Figure plot_size_and_tools = { 'plot_height': 100, 'plot_width': 1000, 'x_range': (source.data["xP_Rating"].min() * .8, source.data["xP_Rating"].max() * 1.2), 'y_range': (source.data["xP_Mean"].min() * .8, source.data["xP_Mean"].max() * 1.2) } plot = figure( tools=["tap", "pan", "wheel_zoom", 'box_select', 'reset', 'help'], title="Expected Passes v. Pass Difficulty") plot.y_range.flipped = True # Get Means and Ranges and Top n% for Labels xp_ms = source.data["xP_Mean_mean"][0] xp_mean_span = Span(location=xp_ms, dimension='width', line_color="black", line_dash='solid', line_width=3, line_alpha=.2) plot.add_layout(xp_mean_span) xp_rs = source.data["xP_Rating_mean"][0] xp_rating_span = Span(location=xp_rs, dimension='height', line_color="black", line_dash='solid', line_width=3, line_alpha=.2) plot.add_layout(xp_rating_span) renderer = plot.circle( "xP_Rating", "xP_Mean", size="Pass_Size", color=position_color, legend="Position_Detail", source=source, # set visual properties for selected glyphs selection_color=Spectral6[5], #color="Position_Detail", # set visual properties for non-selected glyphs nonselection_fill_alpha=0.1, nonselection_fill_color=Spectral6[0], nonselection_line_color=Spectral6[5], nonselection_line_alpha=1.0) plot.legend.location = (10, 50) plot.legend.border_line_width = 3 plot.legend.border_line_color = "black" plot.legend.border_line_alpha = 0.5 labels = LabelSet(x='xP_Rating', y='xP_Mean', text='Player', level='glyph', text_font_size='10pt', x_offset=-2, y_offset=2, source=source, render_mode='canvas') plot.add_layout(labels) # Hover tool with vline mode hover = HoverTool(tooltips=[('Team', '@Team'), ('Player', '@Player'), ('Position', '@Position_Detail'), ('Expected Pass Rating', '@xP_Rating'), ('Total Passes', '@Passes')], mode='vline') plot.add_tools(hover) # Add Labels in the corners citation1 = Label(x=10, y=10, x_units='screen', y_units='screen', text='Easier Passes, Poorly Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) # Add Labels in the corners citation2 = Label(x=10, y=510, x_units='screen', y_units='screen', text='Harder Passes, Poorly Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) # Add Labels in the corners citation3 = Label(x=625, y=10, x_units='screen', y_units='screen', text='Easier Passes, Well Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) # Add Labels in the corners citation4 = Label(x=625, y=510, x_units='screen', y_units='screen', text='Easier Passes, Well Executed', render_mode='css', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0) plot.add_layout(citation1) plot.add_layout(citation2) plot.add_layout(citation3) plot.add_layout(citation4) return plot, xp_mean_span, xp_rating_span def callback(attr, old, new): # Pass Slider range_start = pass_slider.value[0] range_end = pass_slider.value[1] # Select position_val = select.value # Update Title plot.title.text = select.value # Update Dataset new_df = make_dataset(position_val, range_start, range_end) source.data.update(new_df.data) # Update Averages xp_ms = source.data["xP_Mean_mean"][0] xp_mean_span.location = xp_ms xp_rs = source.data["xP_Rating_mean"][0] xp_rating_span.location = xp_rs source = make_dataset(select.value, pass_slider.value[0], pass_slider.value[1]) plot, xp_mean_span, xp_rating_span = make_plot(source) inputs = widgetbox(select, pass_slider) select.on_change('value', callback) pass_slider.on_change('value', callback) # Create a row layout layout = column(inputs, plot) # Make a tab with the layout tab = Panel(child=layout, title='Player Passing Model') return tab
callback1 = CustomJS(args=dict(source=precip_source), code=""" var data = source.data; var f = cb_obj.value; source.data.cat = data[f]; source.change.emit(); """) select1 = Select(title='Month', value=time_index[-1], options=time_index) select1.js_on_change('value', callback1) #slider = Slider(start=0, end=len(time_index)-1, value=0, step=1) #slider.js_on_change('value', callback) layout1 = column(p1, select1) tab1 = Panel(child=layout1, title='Precip') ## Figure 2 - flow p2 = figure(title='Surface Water Flow Index', tools=TOOLS, logo=None, active_scroll='wheel_zoom', plot_height=h, plot_width=w) p2.patches('x', 'y', source=flow_source, fill_color={ 'field': 'cat', 'transform': color_map },
def correlations_tab(ts): # Making dataset using the autocorrelation function, # input is a timeseries and default maximum lag of 10 def make_dataset_autocorr(ts, col, lag): df_to_plot_autocorr = auto_corr(ts[col], lag) return ColumnDataSource(df_to_plot_autocorr) # Making autocorrelation plot def make_autocorrplot(source): ttp = [("Lag", "$x"), ("Autocorrelation", "$y")] plot_autocorr = figure(plot_height=400, plot_width=600, tooltips=ttp, title="Autocorrelation", x_axis_label="Lag", y_axis_label="Autocorrelation", tools="hover, pan, zoom_in, zoom_out, \ reset, save") plot_autocorr.circle('lags', 'auto_corrs', source=source) plot_autocorr.line('lags', 'auto_corrs', source=source, line_width=0.5) plot_autocorr.title.text_font_size = '14pt' plot_autocorr.xaxis.axis_label_text_font_size = '12pt' plot_autocorr.xaxis.axis_label_text_font_style = 'bold' plot_autocorr.yaxis.axis_label_text_font_size = '12pt' plot_autocorr.yaxis.axis_label_text_font_style = 'bold' return plot_autocorr # Set up callbacks to interactively update data when different # time series and lag values are selected, and letting the user # set the title def update_title(attrname, old, new): plot_autocorr.title.text = text_autocorr.value def update_data_autocorr(attrname, old, new): new_source_autocorr = make_dataset_autocorr(ts, ts_select_autocorr.value, lag=lag_select_autocorr. value) source_autocorr.data.update(new_source_autocorr.data) # Set up widgets. # Input for plot titles text_autocorr = TextInput(title="Title", value='Autocorrelation') text_autocorr.on_change('value', update_title) # Select time series for Autocorrelation ts_available = ts.columns.tolist() print(ts_available) ts_available.remove('time') ts_select_autocorr = Select(value=ts_available[0], title='Time Series', options=ts_available) ts_select_autocorr.on_change('value', update_data_autocorr) # Select lag value up to a max of # the length of the time series max_lag = ts.shape[0] lag_select_autocorr = Slider(start=10, end=max_lag, step=1, value=10, title='Lag') lag_select_autocorr.on_change('value', update_data_autocorr) # Initial state and plotting. source_autocorr = make_dataset_autocorr(ts, ts_available[0], 10) plot_autocorr = make_autocorrplot(source_autocorr) # Set up layouts and add to document. # Put controls in a single element. controls_autocorr = WidgetBox(text_autocorr, ts_select_autocorr, lag_select_autocorr) # making dataset using the correlation function # inputs are two different timeseries and default maximum lag of 10 def make_dataset_corr(ts, col1, col2, lag): df_to_plot_corr = corr(ts[col1], ts[col2], lag) return ColumnDataSource(df_to_plot_corr) # Make line plot for correlation def make_corrplot(source): ttp = [("Lag", "$x"), ("Autocorrelation", "$y")] plot_corr = figure(plot_height=400, plot_width=600, tooltips=ttp, title="Correlation", x_axis_label="Lag", y_axis_label="Correlation", tools="hover, pan, zoom_in, zoom_out, reset, save") plot_corr.circle('lags', 'corrs', source=source) plot_corr.line('lags', 'corrs', source=source, line_width=0.5) plot_corr.title.text_font_size = '14pt' plot_corr.xaxis.axis_label_text_font_size = '12pt' plot_corr.xaxis.axis_label_text_font_style = 'bold' plot_corr.yaxis.axis_label_text_font_size = '12pt' plot_corr.yaxis.axis_label_text_font_style = 'bold' return plot_corr # Set up callbacks to interactively update data when different # time series and lag values are selected, and letting the user # set the title def update_title(attrname, old, new): plot_corr.title.text = text_corr.value def update_data_corr(attrname, old, new): new_source_corr = make_dataset_corr(ts, ts_select_corr.value, ts_select2_corr.value, lag=lag_select_corr.value) source_corr.data.update(new_source_corr.data) # Set up widgets. # Input for plot titles text_corr = TextInput(title="Title", value='Correlation') text_corr.on_change('value', update_title) # Select time series for Autocorrelation ts_available = ts.columns.tolist() ts_available.remove('time') ts_select_corr = Select(value=ts_available[0], title='Time Series 1', options=ts_available) ts_select_corr.on_change('value', update_data_corr) ts_select2_corr = Select(value=ts_available[0], title='Time Series 2 (shifted)', options=ts_available) ts_select2_corr.on_change('value', update_data_corr) # Select lag value up to a max of # the length of the time series max_lag = ts.shape[0] lag_select_corr = Slider(start=10, end=max_lag, step=1, value=10, title='Lag') lag_select_corr.on_change('value', update_data_corr) # Initial state and plotting. source_corr = make_dataset_corr(ts, ts_available[0], ts_available[0], 10) plot_corr = make_corrplot(source_corr) # Set up layouts and add to document. # Put controls in a single element. controls_corr = WidgetBox(text_corr, ts_select_corr, ts_select2_corr, lag_select_corr) # Create a row layout grid = gridplot([[controls_autocorr, plot_autocorr], [controls_corr, plot_corr]], plot_width=500, plot_height=500) # Make a tab with the layout. tab = Panel(child=grid, title='Autocorrelation and Correlation') return tab
S_fill=main_source_list[-3], S_save=save_source_list[-1], mainy=fig_list[1].y_range, main_laby=fig_list[1].yaxis[0], fill_lab=fig_list[2].xaxis[0], dt=data_table, ), code=input_code + key_source_code.replace('cb_obj', 'S_main')) # layout the final grid notebox = widgetbox(select_text, data_table, notes, width=650) dropbox_0 = widgetbox( dum, input_0, width=210 ) # I use the dummy div widget to have the input button ~aligned with the center of the figure dropbox_1 = widgetbox(dum2, input_1, width=210) grid = gridplot([[fig_list[0], dropbox_0], [fig_list[1], dropbox_1], [fig_list[2], notebox]], toolbar_location='left') tabs.append(Panel(child=grid, title=panel_key)) if len(tabs) == 1: final = tabs[0].child else: final = Tabs(tabs=tabs) print('\nWritting', save_name, '...') outfile = open(os.path.join(save_path, save_name), 'w') outfile.write(file_html(final, CDN, tab_name)) outfile.close()
def dt_multiline_viz( data: Dict[str, Tuple[np.ndarray, np.ndarray, List[str]]], x: str, y: str, timeunit: str, yscale: str, plot_width: int, plot_height: int, grp_cnt_stats: Dict[str, int], max_lbl_len: int = 15, z: Optional[str] = None, agg: Optional[str] = None, ) -> Panel: """ Render multi-line chart """ # pylint: disable=too-many-arguments,too-many-locals grps = list(data.keys()) palette = PALETTE * (len(grps) // len(PALETTE) + 1) if z is None: title = _make_title(grp_cnt_stats, x, y) else: title = f"{agg} of {_make_title(grp_cnt_stats, z, y)} over {x}" agg = "Frequency" if agg is None else agg fig = figure( tools=[], title=title, toolbar_location=None, plot_width=plot_width, plot_height=plot_height, y_axis_type=yscale, x_axis_type="datetime", ) ymin, ymax = np.Inf, -np.Inf plot_dict = dict() for grp, colour in zip(grps, palette): grp_name = (grp[:(max_lbl_len - 1)] + "...") if len(grp) > max_lbl_len else grp source = ColumnDataSource({ "x": data[grp][1], "y": data[grp][0], "lbl": data[grp][2] }) plot_dict[grp_name] = fig.line(x="x", y="y", source=source, color=colour, line_width=1.3) fig.add_tools( HoverTool( renderers=[plot_dict[grp_name]], tooltips=[ (f"{y}", f"{grp}"), (agg, "@y"), (timeunit, "@lbl"), ], mode="mouse", )) ymin, ymax = min(ymin, min(data[grp][0])), max(ymax, max(data[grp][0])) legend = Legend(items=[(x, [plot_dict[x]]) for x in plot_dict]) tweak_figure(fig, "line", True) fig.add_layout(legend, "right") fig.legend.click_policy = "hide" fig.yaxis.axis_label = f"{agg} of {y}" if z else "Frequency" fig.xaxis.axis_label = x if yscale == "linear": _format_axis(fig, ymin, ymax, "y") return Panel(child=fig, title="line chart")
def histogram_tab(data): # Function to make a dataset for histogram based on a list of journals, def make_dataset(topic_list): # Dataframe to hold information sum_table = data.groupby('Journal').sum() sum_table['Journal'] = ['AA', 'ASR', 'JMAS'] table = sum_table[topic_list] new_table = pd.melt(table, var_name='topics', value_name='count') journal = ["AA", "ASR", "JMAS"] * len(topic_list) new_table['Journal'] = journal grouped = new_table.groupby(['topics', 'Journal']) x = [name for name, group in grouped] counts = new_table.groupby(['topics', 'Journal'])['count'].sum().values source = ColumnDataSource(data=dict(x=x, counts=counts)) p = figure(x_range=FactorRange(*x), plot_height=250, title="Topics Break Down by Year") return source, p def style(p): # Title p.title.align = 'center' p.title.text_font_size = '20pt' p.title.text_font = 'serif' # Axis titles p.xaxis.axis_label_text_font_size = '14pt' p.xaxis.axis_label_text_font_style = 'bold' p.yaxis.axis_label_text_font_size = '14pt' p.yaxis.axis_label_text_font_style = 'bold' # Tick labels p.xaxis.major_label_text_font_size = '12pt' p.yaxis.major_label_text_font_size = '12pt' return p def make_plot(src, p): journal = ["AA", "ASR", "JMAS"] palette = [cc.rainbow[i * 15] for i in range(17)] p.vbar(x='x', top='counts', width=0.9, source=src, fill_color=factor_cmap('x', palette=palette, factors=journal, start=1, end=2)) p.y_range.start = 0 p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None hover = HoverTool() hover.tooltips = [("count", "@counts")] hover.mode = 'vline' p.add_tools(hover) # Styling p = style(p) return p def update(attr, old, new): topics_to_plot = [ topic_selection.labels[i] for i in topic_selection.active ] new_src, p = make_dataset(topics_to_plot) src.data.update(new_src.data) # Carriers and colors available_topics = [ "topics_domestic_politics", "topics_international_relations", "topics_society", "topics_econ" ] topic_selection = CheckboxGroup(labels=available_topics, active=[0, 1, 2, 3]) topic_selection.on_change('active', update) # Initial carriers and data source initial_topics = [ topic_selection.labels[i] for i in topic_selection.active ] src, p = make_dataset(initial_topics) p = make_plot(src, p) # Put controls in a single element controls = WidgetBox(topic_selection) # Create a row layout layout = row(controls, p) # Make a tab with the layout tab = Panel(child=layout, title='Histogram') return tab