def init_climate_input_wx(self, planet_climate): # Climate inputs planet_emiss = TextInput(title='Planetary IR energy out (W/m^2)', value='{:.2f}'.format(planet_climate.A)) planet_atm_forcing = TextInput( title='Atmosphere IR adjustment (W/m^2)', value='{:.1f}'.format(planet_climate.B)) solar_input = TextInput(title='Incoming solar (W/m^2) [Divided by 4]', value='{:.2f}'.format(planet_climate.Q)) energy_transport = TextInput( title='Energy transport towards poles (1/C)', value='{:.1f}'.format(planet_climate.D)) s2_input = TextInput(title='S2 (what is this for?)', value='{:.3f}'.format(planet_climate.S2)) heat_capacity = TextInput(title='Planetary heat capacity (C/yr)', value='{:.1f}'.format(planet_climate.C)) numlats = Slider(start=40, end=180, step=1, value=70, title='Number of latitudes in model') init_planet_T = Select(title='Initial planet temperature', value='normal', options=['normal', 'warm', 'cold']) calc_climate = Button(label='Simulate Climate', button_type='success') calc_climate.on_click(self.update_planet_climate) refresh_energy_in = Button(label='Refresh Solar Input') refresh_energy_in.on_click(self._update_energy_in) float_input = { 'A': planet_emiss, 'B': planet_atm_forcing, 'Q': solar_input, 'D': energy_transport, 'S2': s2_input, 'C': heat_capacity } general_input = {'nlats': numlats, 'init_condition': init_planet_T} clim_input_grp1 = WidgetBox(children=[ planet_emiss, planet_atm_forcing, solar_input, refresh_energy_in ], width=int(self._plot_width / 3)) clim_input_grp2 = WidgetBox(energy_transport, s2_input, heat_capacity) clim_input_grp3 = WidgetBox(numlats, init_planet_T) return (calc_climate, [clim_input_grp1, clim_input_grp2, clim_input_grp3], (float_input, general_input))
def _generate_device_plot(self, device_events): data_source = self._convert_events_to_datasource( device_events['events']) n_rows = device_events['n_rows'] if n_rows == 0: n_rows = 1 elif n_rows == 1: n_rows = 2 name = device_events['name'] plot = figure( title="{}".format(name), plot_height=20 * n_rows + 60, plot_width=1200, tools=self._tools, sizing_mode='stretch_both', # sizing_mode='scale_width', active_scroll='xwheel_zoom') plot.hbar(left='start', right='end', y='height', color='color', height=0.85, source=data_source, hover_fill_alpha=0.5, line_join='round', line_cap='round', hover_line_color='red') plot.x_range = Range1d(0, self._iteration_time, bounds="auto") plot.y_range = Range1d(0, n_rows) plot.yaxis.visible = False plot.ygrid.ticker = SingleIntervalTicker(interval=1) plot.ygrid.grid_line_color = None plot.ygrid.band_fill_alpha = 0.1 plot.ygrid.band_fill_color = "gray" button = Button(label=" Sync", width=20, button_type='primary', disabled=True) button.css_classes = ['xl-hidden'] button.js_on_click( CustomJS(args={ 'me': plot, }, code=self._js_update_ranges)) plot.x_range.js_on_change( 'start', CustomJS(args={ 'button': button, }, code=self._js_on_change_callback)) return plot, WidgetBox(button)
def make_wb(sizing_mode): w1 = make_widgets(sizing_mode) wb = WidgetBox( children=[ w1['oscars'], w1['genre'], w1['director'], w1['x_axis'], w1['y_axis'] ], sizing_mode=sizing_mode, width=400, ) return wb
def slider(): x = np.linspace(0, 10, 100) y = np.sin(x) source = ColumnDataSource(data=dict(x=x, y=y)) plot = figure(y_range=(-10, 10), tools='', toolbar_location=None, title="Sliders example") plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6) callback = CustomJS(args=dict(source=source), code=""" var data = source.data; var A = amp.value; var k = freq.value; var phi = phase.value; var B = offset.value; x = data['x'] y = data['y'] for (i = 0; i < x.length; i++) { y[i] = B + A*Math.sin(k*x[i]+phi); } source.change.emit(); """) amp_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Amplitude", callback=callback, callback_policy='mouseup') callback.args["amp"] = amp_slider freq_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Frequency", callback=callback) callback.args["freq"] = freq_slider phase_slider = Slider(start=0, end=6.4, value=0, step=.1, title="Phase", callback=callback) callback.args["phase"] = phase_slider offset_slider = Slider(start=-5, end=5, value=0, step=.1, title="Offset", callback=callback) callback.args["offset"] = offset_slider widgets = WidgetBox(amp_slider, freq_slider, phase_slider, offset_slider) return [widgets, plot]
def classification_tab(): pairs = [["stackoverflow.com", "academia.stackexchange.com"],["stackoverflow.com", "softwareengineering.stackexchange.com"]] # pretrained classification models nbsoac = load("app/models/10k_so_ac_bayes_model.joblib") nbsose = load("app/models/10k_so_se_bayes_model.joblib") svmsoac = load("app/models/10k_so_ac_SVM_model.joblib") svmsose = load("app/models/10k_so_se_SVM_model.joblib") learning_type = RadioButtonGroup(labels=["Bayes", "Support Vector Machine"], active=0) site_pair = RadioButtonGroup(labels=["Stack Overflow/Academia", "Stack Overflow/Software Engineering"], active=0) tai = TextAreaInput(value="", rows=6, title="Enter a post message:") predict = Button(label="Predict", button_type="success") p = Paragraph(text="""Your Site Prediction will be displayed here""", width=300, height=50) def make_prediction(): lt = learning_type.active sp = site_pair.active model = None if lt == 0: if sp == 0: model = nbsoac else: model = nbsose else: if sp == 0: model = svmsoac else: model = svmsose prediction = model.predict([tai.value])[0] p.text = "Message belongs to site: " + pairs[sp][prediction - 1] predict.on_click(make_prediction) # Put controls in a single element controls = WidgetBox(learning_type, site_pair, tai, predict, p) # Create a row layout layout = row(controls) tab = Panel(child=layout, title='Message Site Classification') return tab
def bkapp(doc): textin = TextInput(title = "Submit Blog Post:") button = Button(label="Submit", button_type="success") p = Paragraph(text="Blog entry here") def update_data(event): data = str(textin.value) vector = transformer.transform([' '.join(clean(data))]) result = model.predict(vector) if int(result) == 1: pred_text = 'Male' else: pred_text = 'Female' output = {'prediction': pred_text} p.text = "{}".format(output) button.on_click(update_data) box = WidgetBox(children = [textin, button, p]) doc.add_root(box)
def plot(self, x=None, y=None, z=None, name=''): """ :param x: str, dataframe key :param y: str, dataframe key :param z: str, dataframe key :param name: str, name of plot :return: Interactive HTML plot """ self._set_output_file(name) self._turn_values_negative(y) self.df['x'] = self.df[x] self.df['y'] = self.df[y] source = ColumnDataSource(self.df) circle_plot = self.circle_plot(x=x, y=y, source=source) line_plot = self.line_plot(x=x, y=y, source=source) xrange_slider = self._get_xaxis_slider(circle_plot) xaxis_selecter, yaxis_selecter = self._get_source_selecters(x=x, y=y, source=source) data_table = self._get_data_table(source=source) # show(widgetbox(data_table)) # spacer = Spacer(width=100, height=100) widget_list = [yaxis_selecter, xaxis_selecter, xrange_slider, data_table] widgets = WidgetBox(*widget_list) col_1 = column(circle_plot, sizing_mode='scale_width') col_2 = column(widgets, sizing_mode='scale_width') row_1 = row([col_1, col_2], sizing_mode='scale_width') return row_1
def daily_most_common_ticks(): """ A Table view of the most common ticks per day. :return: Panel """ ticks = kisa.get_daily_popular_ticks(start_date, end_date) tick_source = {} dates, species, count = zip(*((dat, sp[0], sp[1]) for (dat, sp) in ticks)) tick_source['date'] = dates tick_source['species'] = species tick_source['count'] = count tick_source = ColumnDataSource(tick_source) columns = [TableColumn(field="date", title="Päivämäärä"), TableColumn(field="species", title="Päivän yleisin laji"), TableColumn(field="count", title="Pinnojen lukumäärä")] table = DataTable(source=tick_source, columns=columns, width=PLOT_WIDTH, height=PLOT_HEIGHT) layout = WidgetBox(table) return Panel(child=layout, title="Päivien yleisimmät")
def forecasting_bcc_rentals_visitor_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = { 'category': 'nunique', 'item': 'nunique', 'area': 'nunique', 'visit_duration': 'mean', 'age': 'mean', 'gender_coded': 'mean', 'status_coded': 'mean', 'rental_employee_gender_coded': 'mean', 'rental_employee_age': 'mean', 'rental_tab': 'sum' } self.feature_list = ['age', 'rental_employee_age', 'rental_tab'] self.tsa_variable = 'rental_tab' self.forecast_days = 40 self.lag_variable = 'visit_duration' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variables = sorted(list(self.groupby_dict.keys())) self.variable = 'rental_tab' self.relationships_to_check = ['weak', 'moderate', 'strong'] self.pym = PythonMongo('aion') self.menus = { 'item': ['all'], 'category': ['all'], 'status': ['all', 'guest', 'member'], 'gender': ['all', 'male', 'female'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], 'area': ['all', 'bar', 'rentals'], 'tsa': ['rental_tab', 'visit_duration'] } self.select = {} self.select['area'] = Select(title='Select BCC area', value='all', options=self.menus['area']) self.select['item'] = Select(title='Select item', value='all', options=self.menus['item']) self.select['status'] = Select(title='Select visitor status', value='all', options=self.menus['status']) self.select['gender'] = Select(title="Select visitor gender", value='all', options=self.menus['gender']) self.select['category'] = Select(title="Select category", value='all', options=self.menus['category']) self.select['rental_employee_gender'] = Select( title="Select category", value='all', options=self.menus['category']) self.select_values = {} for item in self.select.keys(): self.select_values[item] = 'all' self.multiline_vars = {'x': 'gender', 'y': 'rental_tab'} self.timestamp_col = 'visit_start' # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div(text='Pre-transform distribution:', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div(text='Correlations:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'forecast': self.section_header_div(text='Forecasts:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def load_df(self, req_startdate, req_enddate, table, cols, timestamp_col): try: # get min and max of loaded df if self.df is not None: loaded_min = self.df[timestamp_col].min() loaded_max = self.df[timestamp_col].max() if loaded_min <= req_startdate and loaded_max >= req_enddate: df = self.df[(self.df[timestamp_col] >= req_startdate) & (self.df[timestamp_col] <= req_enddate)] return df return self.pym.load_df(req_startdate, req_enddate, table=table, cols=cols, timestamp_col=timestamp_col) except Exception: logger.error('load_df', exc_info=True) def filter_df(self, df1): try: df1 = df1[self.cols] for key, value in self.groupby_dict.items(): if value == 'count': if self.select_values[key] != 'all': df1 = df1[df1[key] == self.select_values[key]] return df1 except Exception: logger.error('filter', exc_info=True) def prep_data(self, df): try: df = self.filter_df(df) # set up code columns codes = { 'gender': { 'male': 1, 'female': 2, 'other': 3 }, 'status': { 'guest': 1, 'member': 2 } } for col in df.columns: coded_col = col + '_coded' if 'gender' in col: df[coded_col] = df[col].map(codes['gender']) if 'status' == col: df[coded_col] = df[col].map(codes['status']) self.df = df.set_index(self.timestamp_col) # groupby and resample self.df1 = self.df.groupby('name').resample( self.resample_period).agg(self.groupby_dict) self.df1 = self.df1.reset_index() self.df1 = self.df1.fillna(0) logger.warning('LINE 288 df:%s', self.df1.head(10)) except Exception: logger.error('prep data', exc_info=True) def tsa(self, launch): try: df = self.df.resample('D').agg({self.tsa_variable: 'mean'}) df = df.reset_index() label = self.tsa_variable + '_diff' df[label] = df[self.tsa_variable].diff() df = df.fillna(0) rename = {self.timestamp_col: 'ds', self.tsa_variable: 'y'} df = df.rename(columns=rename) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) else: p *= forecast.hvplot.scatter(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) return p + q except Exception: logger.error("TSA:", exc_info=True) def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.variable = new thistab.section_head_updater('lag', thistab.variable) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_IVs(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") for item in thistab.select_values.keys(): thistab.select_values[item] = thistab.select[item].value thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df = thistab.pym.load_df(start_date=datepicker_start.value, end_date=datepicker_end.value, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.df['gender_code'] = thistab.df['gender'].apply( lambda x: 1 if x == 'male' else 2) thistab.df1 = thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.df1 = thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_multiline(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.multiline_vars['x'] = multiline_x_select.value thistab.multiline_vars['y'] = multiline_y_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_forecast(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.forecast_days = int(select_forecast_days.value) thistab.tsa_variable = forecast_variable_select.value thistab.trigger += 1 stream_launch_tsa.event(launch=thistab.trigger) thistab.notification_updater("ready") try: # SETUP table = 'bcc_composite' cols = cols_to_load['guest'] + cols_to_load['rental'] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2013-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=1) first_date = last_date - timedelta(days=1000) # initial function call thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.prep_data(thistab.df) # MANAGE STREAM stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() stream_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_tsa = streams.Stream.define('Launch_tsa', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.variables) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) select_forecast_days = Select( title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) forecast_variable_select = Select(title='Select forecast variable', value=thistab.menus['tsa'][0], options=thistab.menus['tsa']) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) multiline_y_select = Select( title='Select comparative DV(y)', value=thistab.multiline_vars['y'], options=['price', 'amount', 'visit_duration']) multiline_x_select = Select(title='Select comparative IV(x)', value=thistab.multiline_vars['x'], options=[ 'category', 'gender', 'rental_employee_gender', 'status', 'item' ]) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- # tables hv_tsa = hv.DynamicMap(thistab.tsa, streams=[stream_launch_tsa]) tsa = renderer.get_plot(hv_tsa) # setup divs # handle callbacks variable_select.on_change('value', update_variable) resample_select.on_change('value', update_resample) thistab.select['area'].on_change('value', update_IVs) thistab.select['gender'].on_change('value', update_IVs) thistab.select['rental_employee_gender'].on_change('value', update_IVs) thistab.select['item'].on_change('value', update_IVs) thistab.select['category'].on_change('value', update_IVs) thistab.select['status'].on_change('value', update_IVs) select_forecast_days.on_change('value', update_forecast) forecast_variable_select.on_change('value', update_forecast) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) multiline_x_select.on_change('value', update_multiline) multiline_y_select.on_change('value', update_multiline) # COMPOSE LAYOUT # put the controls in a single element controls_tsa = WidgetBox(datepicker_start, datepicker_end, variable_select, thistab.select['status'], resample_select, thistab.select['gender'], thistab.select['category'], thistab.select['area'], forecast_variable_select, select_forecast_days) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['forecast']], [tsa.state, controls_tsa], [Spacer(width=20, height=30)], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def pm_risk_assessment_tab(panel_title): risk_matrix_src = ColumnDataSource(data=dict(Severity=[], Unlikely=[], Seldom=[], Occaisional=[], Likely=[], Definite=[])) corr_src = ColumnDataSource(data=dict( variable_1=[], variable_2=[], relationship=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = {} self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variable = 'delay_end' self.relationships_to_check = ['weak', 'moderate', 'strong'] self.status = 'all' self.gender = 'all' self.type = 'all' self.ratings = { 'severity': { 'Insignificant': 1, 'Minor': 2, 'Moderate': 3, 'Critical': 4, 'Catastrophic': 5 }, 'likelihood': { 'Unlikely': 1, 'Seldom': 2, 'Occaisional': 3, 'Likely': 4, 'Definite': 5 } } self.variables = { 'severity': list(self.ratings['severity'].keys()), 'likelihood': list(self.ratings['likelihood'].keys()), } self.pym = PythonMongo('aion') self.menus = { 'status': ['all', 'open', 'closed'], 'gender': ['all', 'male', 'female'], } self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'} self.timestamp_col = 'analysis_date' self.risks = [] self.risk = '' self.matrices = [] self.matrix = '' self.risk_select = Select(title='Select risk', value=self.risk, options=self.risks) self.risk_threshold = {'acceptable': 8, 'doubtful': 15} # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format( self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=1000, html_header='h2', margin_top=50, margin_bottom=5), 'distribution': self.section_header_div(text='Pre-transform distribution', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'matrix': self.section_header_div(text='Risk Matrix:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'risk_solution': self.section_header_div( text='Risk Matrix vs Solution :{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def load_df(self): try: risk_matrx = json_normalize( list(self.pym.db['risk_matrix'].find())) logger.warning('LINE 169:RISK MATIRX:%s', risk_matrx.head()) if len(risk_matrx) > 0: risk_matrx = drop_cols(risk_matrx, ['desc']) logger.warning('LINE 159:RISK MATIRX:%s', risk_matrx.head()) risk = json_normalize(list(self.pym.db['risk'].find())) risk = risk.rename(columns={'matrix': 'matrix_id'}) analysis = json_normalize( list(self.pym.db['risk_analysis'].find())) analysis = drop_cols(analysis, ['_id']) analysis = analysis.rename(columns={'risk': 'risk_id'}) # merges risk = risk.merge(analysis, how='inner', left_on='_id', right_on='risk_id') risk = drop_cols(risk, [ '_id', 'likelihood_comment', 'severity_comment', 'desc', 'risk_id' ]) logger.warning('LINE 167:RISK:%s', risk.head()) logger.warning('LINE 169:RISK MATIRX:%s', risk_matrx.head()) risk = risk_matrx.merge(risk, how='inner', left_on='_id', right_on='matrix_id') df = drop_cols(risk, ['_id', 'matrix_id', 'analyst']) df = df.rename(columns={'name': 'matrix'}) dfs = {} for component in ['severity', 'likelihood']: table = 'risk_' + component dfs[component] = json_normalize( list(self.pym.db[table].find())) dfs[component] = drop_cols(dfs[component], ['desc', 'level']) df = df.merge(dfs[component], how='left', left_on=component, right_on='_id') df = drop_cols(df, ['_id', 'project', component]) df = df.rename(columns={'value': component}) df[component] = df[component].fillna(0) df['composite'] = df.severity * df.likelihood # set selection variables logger.warning('LINE 154 df:%s', df) self.df = df self.matrices = list(df['matrix'].unique()) self.matrix = self.matrices[0] self.set_risks(df, matrix=self.matrix) except Exception: logger.error('load df', exc_info=True) def set_risks(self, df, matrix): try: df = df[df.matrix == matrix] self.risks = list(df['risk'].unique()) self.risk = self.risks[0] self.risk_select.options = self.risks self.df1 = df except Exception: logger.error('prep data', exc_info=True) # ////////////// DIVS ////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) # ////////////// GRAPHS //////////////////// def action_table(self, launch): try: def label_action(x): if x < self.risk_threshold['acceptable']: return 'Proceed (risk is acceptable)' elif x < self.risk_threshold['doubtful']: return 'Proceed, if no other options are available' else: return 'Do no proceed (Risk unacceptable)' df = self.df df = df.groupby(['matrix', 'risk']).agg({ 'likelihood': 'mean', 'severity': 'mean' }) df = df.reset_index() df['composite'] = df.likelihood * df.severity df['action'] = df['composite'].map(label_action) self.risk_matrix() return df.hvplot.table( columns=[ 'matrix', 'risk', 'severity', 'likelihood', 'action' ], width=1000, ) except Exception: logger.error('action table', exc_info=True) def risk_matrix(self): try: # filter df = self.df1 df = df.groupby(['matrix', 'risk']).agg({ 'likelihood': 'mean', 'severity': 'mean' }) df = df.reset_index() df = df[df['risk'] == self.risk] severity_value = int(df['severity'].mean()) #severity = [key for (key, value) in self.ratings['severity'].items() if value == severity_value][0] likelihood_value = int(df['likelihood'].mean()) logger.warning('severity=%s,likelihood=%s', severity_value, likelihood_value) # make the matrix dct = { 'Severity': list(self.ratings['severity'].keys()), } cols = list(self.ratings['likelihood'].keys()) for idx_row, val_col in enumerate( list(self.ratings['likelihood'].keys())): row = idx_row + 1 dct[val_col] = [] for idx_row, val_row in enumerate(dct['Severity']): col = idx_row + 1 val = row * col if row == severity_value and col == likelihood_value: logger.warning('CONDITIONS MET') txt = 'BINGO ' + str(val) else: txt = val dct[val_col].append(txt) logger.warning('LINE 288 %s - length=%s', val_col, len(dct[val_col])) risk_matrix_src.stream(dct, rollover=(len(dct['Severity']))) columns = [ TableColumn(field="Severity", title='severity'), TableColumn( field="Unlikely", title='unlikely', formatter=dashboard_config['formatters']['Unlikely']), TableColumn( field="Seldom", title='seldom', formatter=dashboard_config['formatters']['Seldom']), TableColumn(field="Occaisional", title='occaisional', formatter=dashboard_config['formatters'] ['Occaisional']), TableColumn( field="Likely", title='likely', formatter=dashboard_config['formatters']['Likely']), TableColumn( field="Definite", title='definite', formatter=dashboard_config['formatters']['Definite']), ] risk_matrix_table = DataTable(source=risk_matrix_src, columns=columns, width=800, height=500) self.corr() return risk_matrix_table except Exception: logger.error('risk matrix', exc_info=True) def correlate_solution_risk(self, launch): try: # load solution df = json_normalize( list(self.pym.db['project_composite1'].find( {}, { 'severity': 1, 'likelihood': 1, 'solution': 1, 'project_owner_gender': 1, 'project': 1 }))) df['solution'] = df.solution.apply(lambda x: x[0] * 10) df = df.groupby(['project']).agg({ 'severity': 'mean', 'likelihood': 'mean', 'solution': 'mean' }) df = df.reset_index() df['composite'] = df.severity * df.likelihood logger.warning('df:%s', df.head(20)) # load project for idx, col in enumerate( ['severity', 'likelihood', 'composite']): if idx == 0: p = df.hvplot.scatter(x='solution', y=col) else: p *= df.hvplot.scatter(x='solution', y=col) return p # load risk except Exception: logger.error('correlate solution risk', exc_info=True) def risk_information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret Risk assessment matrix:</h4> <ul style='margin-top:-10px;'> <li> Red: Unacceptable risk. Do NOT proceed. </li> <li> Yellow: Risky. Proceed only after ensuring better options aren't reasonable available </li> <li> Green: Acceptable risk. Proceed. </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # calculate the correlation produced by the lags vector def corr(self): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'r': [], 'p_value': [] } # load solution df = json_normalize( list(self.pym.db['project_composite1'].find( {}, { 'severity': 1, 'likelihood': 1, 'solution': 1, 'project_owner_gender': 1, 'project': 1 }))) df['solution'] = df.solution.apply(lambda x: x[0] * 10) df = df.groupby(['project']).agg({ 'severity': 'mean', 'likelihood': 'mean', 'solution': 'mean' }) df = df.reset_index() df['composite'] = df.severity * df.likelihood logger.warning('df:%s', df.head(20)) a = df['solution'].tolist() for col in ['composite', 'severity', 'likelihood']: # find lag logger.warning('column:%s', col) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append('solution') corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['r'].append(round(rvalue, 3)) corr_dict_data['p_value'].append(round(pvalue, 3)) corr_src.stream(corr_dict_data, rollover=3) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=corr_src, columns=columns, width=900, height=400) return data_table except Exception: logger.error(' corr', exc_info=True) def update_matrix(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.matrix = matrix_select.value thistab.set_risks(thistab.df, matrix=thistab.matrix) thistab.trigger += 1 stream_launch_action_table.event(launch=thistab.trigger) stream_launch_matrix.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_risk(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.risk = thistab.risk_select.value thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) thistab.risk_matrix() thistab.notification_updater("Ready!") try: # SETUP table = 'project_composite' thistab = Thistab(table, [], []) thistab.load_df() thistab.corr() # MANAGE STREAM stream_launch_action_table = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch', launch=-1)() stream_launch_risk_solution = streams.Stream.define('Launch', launch=-1)() # MAKE TABLES # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="Severity", title="severity"), TableColumn(field="Unlikely", title='unlikely', formatter=dashboard_config['formatters']['Unlikely']), TableColumn(field="Seldom", title='seldom', formatter=dashboard_config['formatters']['Seldom']), TableColumn( field="Occaisional", title='occaisional', formatter=dashboard_config['formatters']['Occaisional']), TableColumn(field="Likely", title='likely', formatter=dashboard_config['formatters']['Likely']), TableColumn(field="Definite", title='definite', formatter=dashboard_config['formatters']['Definite']), ] risk_matrix = DataTable(source=risk_matrix_src, columns=columns, width=800, height=500) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] corr_table = DataTable(source=corr_src, columns=columns, width=500, height=280) width = 800 hv_action_table = hv.DynamicMap(thistab.action_table, streams=[stream_launch_action_table]) action_table = renderer.get_plot(hv_action_table) hv_risk_solution = hv.DynamicMap(thistab.correlate_solution_risk, streams=[stream_launch_risk_solution]) risk_solution = renderer.get_plot(hv_risk_solution) # CREATE WIDGETS matrix_select = Select(title='Select matrix', value=thistab.matrix, options=thistab.matrices) # handle callbacks matrix_select.on_change('value', update_matrix) thistab.risk_select.on_change('value', update_risk) # create the dashboards controls = WidgetBox(matrix_select, thistab.risk_select) grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.title_div('Determine action', 400)], [Spacer(width=20, height=30)], [action_table.state], [thistab.section_headers['matrix']], [Spacer(width=20, height=30)], [risk_matrix, controls], [thistab.section_headers['risk_solution']], [Spacer(width=20, height=30)], [corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def crypto_clusters_eda_tab(cryptos, panel_title): global groupby_dict global features global cluster_dct #global source redis = PythonRedis() cluster_dct = redis.simple_load('clusters:cryptocurrencies') if cluster_dct is not None: groupby_dict = {} for var in cluster_dct['features']: groupby_dict[var] = 'sum' features = cluster_dct['features'] source = {} for feature in features: source[feature] = ColumnDataSource( data=dict(xs=[], ys=[], labels=[], colors=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols, panel_title=panel_title) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.items = cryptos # add all the coins to the dict self.github_cols = [ 'watch', 'fork', 'issue', 'release', 'push', 'tw_mentions', 'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative', 'tw_emojis_positive', 'tw_emojis_compound', 'tw_emojis_negative', 'tw_emojis_count', 'tw_reply_hashtags' ] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 txt = """<div style="text-align:center;background:black;width:100%;"> <h1 style="color:#fff;"> {}</h1></div>""".format( 'Welcome') self.notification_div = { 'top': Div(text=txt, width=1400, height=20), 'bottom': Div(text=txt, width=1400, height=10), } self.cluster_dct = cluster_dct self.groupby_dict = groupby_dict self.features = features self.crypto = 'all' self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.significant_effect_dict = {} self.df1 = None self.section_headers = { 'ts': self.section_header_div( 'Comparison of clusters across variables:---------------------', width=600) } self.timestamp_col = None self.colors = None # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=1400): text = '<{} style="color:#4221cc;">{}</{}>'.format( html_header, text, html_header) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ////////////////////////// UPDATERS /////////////////////// def section_head_updater(self, section, txt): try: self.section_header_div[section].text = txt except Exception: logger.error('', exc_info=True) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt # /////////////////////////// LOAD CLUSTERS ////////////////////// def prep_data(self, df, timestamp_col): def label_cluster(x): for key, values in self.cluster_dct.items(): if key not in ['timestamp', 'variables']: if x in values: return key return x try: cols = self.features + ['crypto', 'timestamp'] df = df[cols] # groupby and resample df['crypto'] = df['crypto'].map(lambda x: label_cluster(x)) df = df.rename(columns={'crypto': 'cluster'}) df = df.compute() df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce') df.set_index(timestamp_col, inplace=True) df = df.groupby('cluster').resample(self.resample_period).agg( self.groupby_dict) df.reset_index(inplace=True) df.set_index(timestamp_col, inplace=True) self.timestamp_col = timestamp_col self.df1 = df except Exception: logger.error('prep data', exc_info=True) def graph_ts(self): try: #global source if self.df1 is not None: df = self.df1.copy() clusters = df['cluster'].unique() self.colors = [''] * len(clusters) for idx, feature in enumerate(clusters): self.colors[idx] = dashboard_config['colors'][idx] if self.features is not None: for idx, feature in enumerate(self.features): df1 = df[['cluster', feature]] # pivot into columns for cluster df1 = df1.pivot(columns='cluster') data = dict(x=[df1.index.values] * len(clusters), y=[df1[name].values for name in df1], labels=clusters, colors=self.colors) source[feature].data = data except Exception: logger.error('graph ts', exc_info=True) def graph_chartify(self, timestamp_col): try: # global source if self.df1 is not None: df = self.df1.copy() df = df.reset_index() for feature in self.features: ch = chartify.Chart(blank_labels=True, x_axis_type='datetime') ch.set_title("CHARTIFY") ch.plot.line( # Data must be sorted by x column data_frame=df.sort_values(timestamp_col), x_column=timestamp_col, y_column=feature, color_column='cluster') return ch except Exception: logger.error('graph chartify', exc_info=True) def update(): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.prep_data(thistab.df, 'timestamp') thistab.graph_ts() thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = resample_select.value thistab.prep_data(thistab.df, 'timestamp') thistab.graph_ts() thistab.notification_updater("ready") try: table = 'external_daily' thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = dashboard_config['dates']['current_year_start'] # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp', cols=[]) thistab.prep_data(thistab.df, timestamp_col='timestamp') # MANAGE STREAMS --------------------------------------------------------- # CREATE WIDGETS ---------------------------------------------------------------- datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) load_dates_button = Button( label="Select dates/periods, then click me!", width=20, height=8, button_type="success") resample_select = Select(title='Select summary period', value=thistab.resample_period, options=thistab.menus['resample_periods']) # -------------------------------- PLOTS --------------------------- thistab.graph_ts() p = {} for feature in features: p[feature] = figure(x_axis_type="datetime", plot_width=1400, plot_height=400, title=feature) p[feature].multi_line( xs='x', ys='y', legend='labels', line_color='colors', line_width=5, hover_line_color='colors', hover_line_alpha=1.0, source=source[feature], ) p[feature].add_tools( HoverTool(show_arrow=False, line_policy='next', tooltips=[ ('freq', '$y'), ])) # ch = thistab.graph_chartify(timestamp_col='timestamp') # -------------------------------- CALLBACKS ------------------------ load_dates_button.on_click(update) # lags array resample_select.on_change('value', update_resample) # -----------------------------------LAYOUT ---------------------------- # COMPOSE LAYOUT # put the controls in a single element controls_left = WidgetBox(datepicker_start, load_dates_button) controls_right = WidgetBox(datepicker_end) grid_data = [ #[ch.figure], [thistab.notification_div['top']], [controls_left, controls_right], [thistab.section_headers['ts'], resample_select], ] for feature in features: grid_data.append([p[feature]]) logger.warning('p:%s', p[feature]) grid_data.append([thistab.notification_div['bottom']]) grid = gridplot(grid_data) # Make a tab with the layout tab = Panel(child=grid, title=thistab.panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(thistab.panel_title)
def tab2(): data = pd.read_csv('cdph-race-ethnicity.csv') data['date_time'] = pd.to_datetime(data['date']) max_date = data['date'].iloc[0] data = data[(data['age'] == 'all')] percentages = ['confirmed cases', 'general population'] regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white'] x = [(race, percent) for race in regions for percent in percentages] def create_dataset(df): counts = sum( zip(df['confirmed_cases_percent'], df['population_percent']), ()) # like an hstack source = ColumnDataSource(data=dict(x=x, counts=counts)) return source def create_plot(source): p = figure( x_range=FactorRange(*x), title= 'Comparison of the persent of cases by race to the general population', y_axis_label='Persentage') palette = ["#CAB2D6", "#e84d60"] p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', palette=palette, factors=percentages, start=1, end=2)) p.y_range.start = 0 p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None p.x_range.range_padding = 0.1 p.xgrid.grid_line_color = None p.legend.location = "top_left" p.legend.orientation = "horizontal" p.xgrid.grid_line_color = None p.add_tools( HoverTool(tooltips=[('Race, category', "@x"), ('Percentage', "@counts")], )) mytext = Label( x=20, y=-150, x_units='screen', text= f"Source of data: coming from a continual Times survey of California's 58 county health\n " " agencies and three city agencieas, pubished on https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/" " , access from Github repository https://github.com/datadesk/california-coronavirus-data/blob/master/cdph-race-ethnicity.csv" f" Date of last update: 2020-11-04", render_mode='css', y_units='screen', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0, ) p.add_layout(mytext) return p def callback(attr, old, new): new_src = create_dataset( data[(data['date_time'] == date_picker.value)]) src.data.update(new_src.data) # Initial Plot src = create_dataset(data[(data['date_time'] == '2020-10-01')]) p = create_plot(src) date_picker = DatePicker( title='Click to choose a date (blank means no data)', min_date="2020-05-14", max_date=date.today()) date_picker.on_change('value', callback) controls = WidgetBox(date_picker) layout = row(controls, p) tab = Panel(child=layout, title='Percentage of confirmed cases by race') return tab
'indices': {} } } source2.selected = { '0d': { 'flag': False, 'indices': [] }, '1d': { 'indices': [] }, '2d': { 'indices': {} } } reset.on_click(on_reset_click) widgetBox = WidgetBox(children=[reset], width=150) row = Row(children=[widgetBox, plot1, plot2]) document = curdoc() document.add_root(row) if __name__ == "__main__": print("\npress ctrl-C to exit") session = push_session(document) session.show() session.loop_until_closed()
Column(w1['genre'], w1['director'], w1['x_axis'], w1['y_axis'], sizing_mode=sizing_mode) Column(w2['genre'], w2['director'], w2['x_axis'], w2['y_axis'], sizing_mode=sizing_mode) layout = Column(Row(intro, sizing_mode=sizing_mode), Row(WidgetBox(w1['reviews'], w1['genre'], w1['oscars'], w1['director'], w1['x_axis'], w1['y_axis'], sizing_mode=sizing_mode), WidgetBox(w2['y_axis'], w2['reviews'], w2['genre'], w2['oscars'], w2['director'], w2['x_axis'], sizing_mode=sizing_mode), sizing_mode=sizing_mode), Row(WidgetBox(Button(label="Left column", icon=check, sizing_mode=sizing_mode), sizing_mode=sizing_mode),
def accounts_tsa_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = {} # to contain churned and retained splits self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.rf = {} # random forest self.cl = PythonClickhouse('aion') self.forecast_days = 30 self.interest_var = 'address' self.trigger = -1 self.status = 'all' self.update_type = 'all' self.status = 'all' self.account_type = 'all' self.interest_var = 'amount' self.pl = {} # for rf pipeline self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # list of tier specific addresses for prediction self.address_list = [] self.address_select = Select(title='Filter by address', value='all', options=[]) self.address = 'all' self.load_data_flag = False self.day_diff = 1 self.groupby_dict = {} self.addresses = [] self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.max_loaded_date = None self.min_loaded_date = None # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'forecast': self.section_header_div(text='Forecasts:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def reset_checkboxes(self): try: self.address_selected = "" self.address_select.value = "all" except Exception: logger.error('reset checkboxes', exc_info=True) ################################################### # I/O def load_df(self, start_date, end_date): try: logger.warning("data load begun") if isinstance(start_date, str): start_date = datetime.strptime(start_date, self.DATEFORMAT) if isinstance(end_date, str): end_date = datetime.strptime(end_date, self.DATEFORMAT) if self.df is not None: self.max_loaded_date = self.df.block_timestamp.max( ).compute() self.min_loaded_date = self.df.block_timestamp.min( ).compute() if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date: logger.warning("data already loaded - %s", self.df.tail(10)) pass else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) #self.make_delta() #self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) # self.make_delta() # self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) self.df = self.filter(self.df) except Exception: logger.error('load_df', exc_info=True) ################################################### # MUNGE DATA def make_delta(self): try: if self.df is not None: if len(self.df) > 0: df = self.df.compute() for col in self.targets: col_new = col + '_diff' df[col_new] = df[col].pct_change() df[col_new] = df[col_new].fillna(0) logger.warning('diff col added : %s', col_new) self.df = self.df.fillna(self.df.mean()) self.df = dd.dataframe.from_pandas(df, npartitions=15) # logger.warning('POST DELTA:%s',self.df1.tail(20)) except Exception: logger.error('make delta', exc_info=True) ################################################## # EXPLICATORY GRAPHS # PLOTS def box_plot(self, variable): try: # logger.warning("difficulty:%s", self.df.tail(30)) # get max value of variable and multiply it by 1.1 minv = 0 maxv = 0 df = self.df if df is not None: if len(df) > 0: minv, maxv = dd.compute(df[variable].min(), df[variable].max()) else: df = SD('filter', [variable, 'status'], []).get_df() return df.hvplot.box(variable, by='status', ylim=(.9 * minv, 1.1 * maxv)) except Exception: logger.error("box plot:", exc_info=True) ################################################### # MODELS def filter(self, df): try: df = df.assign(freq=df.address) if self.status != 'all': df = df[df.status == self.status] if self.account_type != 'all': df = df[df.acccount_type == self.account_type] if self.update_type != 'all': df = df[df.update_type == self.update_type] if self.address != 'all': df = df[df.address == self.address] return df except Exception: logger.error("filter:", exc_info=True) def tsa_amount(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'amount': 'mean'}) df = df.reset_index() df = df.compute() label = 'amount_diff' df[label] = df[self.interest_var].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'amount': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) else: p *= forecast.hvplot.scatter(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) def tsa_freq(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'address': 'nunique'}) df = df.reset_index() df = df.compute() label = 'freq_diff' df[label] = df['address'].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'address': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='#').relabel(col) else: p *= forecast.hvplot.scatter( x='ds', y=col, width=600, height=250, value_label='#').relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) #################################################### # GRAPHS def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.update_type = update_type_select.value thistab.status = status_select.value thistab.account_type = account_type_select.value thistab.forecast_days = int(select_forecast_days.value) thistab.address = thistab.address_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_load(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.load_df(datepicker_start.value, datepicker_end.value) thistab.notification_updater("ready") try: # SETUP table = 'account_ext_warehouse' #cols = list(table_dict[table].keys()) cols = [ 'address', 'block_timestamp', 'account_type', 'status', 'update_type', 'amount' ] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = last_date - timedelta(days=60) # STREAMS Setup # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch', launch=-1)() stream_select_variable = streams.Stream.define('Select_variable', variable='amount')() # setup widgets datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) select_forecast_days = Select( title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) status_select = Select(title='Select account status', value=thistab.status, options=menus['status']) account_type_select = Select(title='Select account type', value=thistab.account_type, options=menus['account_type']) update_type_select = Select(title='Select transfer type', value=thistab.update_type, options=menus['update_type']) # search by address checkboxes thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses, active=[0]) # ----------------------------------- LOAD DATA # load model-making data thistab.load_df(datepicker_start.value, datepicker_end.value) # load data for period to be predicted # tables hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount, streams=[stream_launch]) tsa_amount = renderer.get_plot(hv_tsa_amount) hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch]) tsa_freq = renderer.get_plot(hv_tsa_freq) # add callbacks datepicker_start.on_change('value', update_load) datepicker_end.on_change('value', update_load) thistab.address_select.on_change('value', update) select_forecast_days.on_change('value', update) update_type_select.on_change('value', update) account_type_select.on_change('value', update) status_select.on_change('value', update) # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, thistab.address_select, select_forecast_days, update_type_select, account_type_select, status_select, thistab.checkboxes) grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['forecast']], [Spacer(width=20, height=30)], [tsa_amount.state, controls], [tsa_freq.state], [thistab.notification_div['bottom']]]) tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def cryptocurrency_eda_tab(cryptos, panel_title): lags_corr_src = ColumnDataSource(data=dict(variable_1=[], variable_2=[], relationship=[], lag=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.items = cryptos # add all the coins to the dict self.github_cols = ['watch', 'fork', 'issue', 'release', 'push'] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 self.groupby_dict = groupby_dict self.feature_list = list(self.groupby_dict.keys()) self.variable = 'fork' self.crypto = 'all' self.lag_variable = 'push' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # track variable for AI for significant effects self.adoption_variables = { 'user': [], 'developer': ['watch', 'fork'] } self.significant_effect_dict = {} self.reset_adoption_dict(self.variable) self.relationships_to_check = ['weak', 'moderate', 'strong'] # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } #self.lag_section_head_txt = 'Lag relationships between {} and...'.format(self.variable) self.lag_section_head_txt = 'Lag relationships:' self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=self.lag_section_head_txt, width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div( text='Pre transform distribution:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div( text='non linear relationships between variables:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'non_linear': self.section_header_div( text='non linear relationships between variables:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:{}px;"> <h4 style="color:#fff;"> {}</h4></div>""".format(self.page_width, text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def reset_adoption_dict(self, variable): self.significant_effect_dict[variable] = [] def section_header_updater(self, text, section, html_header='h3', margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) self.section_headers[section].text = text # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def prep_data(self, df1): try: self.cols = list(df1.columns) df1['timestamp'] = df1['timestamp'].astype('M8[us]') df = df1.set_index('timestamp') #logger.warning('LINE 195 df:%s',df.head()) # handle lag for all variables if self.crypto != 'all': df = df[df.crypto == self.crypto] df = df.compute() #logger.warning('LINE 199: length before:%s',len(df)) df = df.groupby('crypto').resample(self.resample_period).agg( self.groupby_dict) #logger.warning('LINE 201: length after:%s',len(df)) df = df.reset_index() vars = self.feature_list.copy() if int(self.lag) > 0: for var in vars: if self.variable != var: df[var] = df[var].shift(int(self.lag)) df = df.dropna() self.df1 = df #logger.warning('line 184- prep data: df:%s',self.df.head(10)) except Exception: logger.error('prep data', exc_info=True) def set_groupby_dict(self): try: pass except Exception: logger.error('set groupby dict', exc_info=True) # ///////////////// PLOTS ///////////////////// def lags_plot(self, launch): try: df = self.df.copy() df = df[[self.lag_variable, self.variable]] df = df.compute() cols = [self.lag_variable] lags = self.lag_days.split(',') for day in lags: try: label = self.lag_variable + '_' + day df[label] = df[self.lag_variable].shift(int(day)) cols.append(label) except: logger.warning('%s is not an integer', day) df = df.dropna() self.lags_corr(df) # plot the comparison #logger.warning('in lags plot: df:%s',df.head(10)) return df.hvplot(x=self.variable, y=cols, kind='scatter', alpha=0.4) except Exception: logger.error('lags plot', exc_info=True) # calculate the correlation produced by the lags vector def lags_corr(self, df): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'lag': [], 'r': [], 'p_value': [] } a = df[self.variable].tolist() for col in df.columns: if col not in ['timestamp', self.variable]: # find lag var = col.split('_') try: tmp = int(var[-1]) lag = tmp except Exception: lag = 'None' b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append(self.variable) corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['lag'].append(lag) corr_dict_data['r'].append(round(rvalue, 4)) corr_dict_data['p_value'].append(round(pvalue, 4)) lags_corr_src.stream(corr_dict_data, rollover=(len(corr_dict_data['lag']))) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=lags_corr_src, columns=columns, width=900, height=400) return data_table except Exception: logger.error('lags corr', exc_info=True) def correlation_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'r': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop('timestamp', axis=1) #df = df.compute() a = df[self.variable].tolist() for col in self.feature_list: if col != self.variable: #logger.warning('%s:%s', col, self.variable) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) # add to dict corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['r'].append(round(rvalue, 4)) corr_dict['p-value'].append(round(pvalue, 4)) # update significant effect variables if self.variable in self.adoption_variables[ 'developer']: if any(relationship in txt for relationship in self.relationships_to_check): if self.variable not in self.significant_effect_dict.keys( ): self.significant_effect_dict[ self.variable] = [] self.significant_effect_dict[ self.variable].append(col) if self.variable in self.adoption_variables['developer']: tmp = self.significant_effect_dict[self.variable].copy() tmp = list(set(tmp)) tmp_dct = { 'features': tmp, 'timestamp': datetime.now().strftime(self.DATEFORMAT) } # write to redis save_params = 'adoption_features:developer' + '-' + self.variable self.redis.save(tmp_dct, save_params, "", "", type='checkpoint') df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'r': corr_dict['r'], 'p-value': corr_dict['p-value'] }) #logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value' ], width=550, height=400, title='Correlation between variables') except Exception: logger.error('correlation table', exc_info=True) def non_parametric_relationship_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'stat': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop('timestamp', axis=1) #df = df.compute() #logger.warning('line df:%s',df.head(10)) a = df[self.variable].tolist() for col in self.feature_list: if col != self.variable: #logger.warning('%s:%s', col, self.variable) b = df[col].tolist() stat, pvalue, txt = self.mann_whitneyu_label(a, b) corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['stat'].append(round(stat, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'stat': corr_dict['stat'], 'p-value': corr_dict['p-value'] }) #logger.warning('df:%s',df.head(23)) return df.hvplot.table( columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value' ], width=550, height=400, title='Non parametricrelationship between variables') except Exception: logger.error('non parametric table', exc_info=True) def hist(self, launch): try: return self.df.hvplot.hist(y=self.feature_list, subplots=True, shared_axes=False, bins=25, alpha=0.3, width=300).cols(4) except Exception: logger.warning('histogram', exc_info=True) def matrix_plot(self, launch=-1): try: logger.warning('line 306 self.feature list:%s', self.feature_list) df = self.df1 #df = df[self.feature_list] # get difference for money columns #thistab.prep_data(thistab.df) if 'timestamp' in df.columns: df = df.drop('timestamp', axis=1) #df = df.repartition(npartitions=1) #df = df.compute() df = df.fillna(0) #logger.warning('line 302. df: %s',df.head(10)) cols_temp = self.feature_list.copy() if self.variable in cols_temp: cols_temp.remove(self.variable) #variable_select.options = cols_lst p = df.hvplot.scatter(x=self.variable, y=cols_temp, width=330, subplots=True, shared_axes=False, xaxis=False).cols(4) return p except Exception: logger.error('matrix plot', exc_info=True) ''' def regression(self,df): try: except Exception: logger.error('matrix plot', exc_info=True) ''' def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data(thistab.df) thistab.variable = new if thistab.variable in thistab.adoption_variables['developer']: thistab.reset_adoption_dict(thistab.variable) thistab.lag_section_head_txt = 'Lag relationships between {} and...'.format( thistab.variable) #thistab.section_header_updater('lag',thistab.lag_section_head_txt) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag_plot_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_variable = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_crypto(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.crypto = crypto_select.value thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag(attr, old, new): # update lag & cryptocurrency thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'external_daily' cols = list(groupby_dict.keys()) + ['timestamp', 'crypto'] thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=200) # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp') thistab.prep_data(thistab.df) # MANAGE STREAM # date comes out stream in milliseconds #stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value='fork', options=thistab.feature_list) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) crypto_select = Select(title='Select cryptocurrency', value='all', options=['all'] + thistab.items) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] lags_corr_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=280) width = 800 hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot, streams=[stream_launch_matrix]) hv_corr_table = hv.DynamicMap(thistab.correlation_table, streams=[stream_launch_corr]) hv_nonpara_table = hv.DynamicMap( thistab.non_parametric_relationship_table, streams=[stream_launch_corr]) #hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist]) hv_lags_plot = hv.DynamicMap(thistab.lags_plot, streams=[stream_launch_lags_var]) matrix_plot = renderer.get_plot(hv_matrix_plot) corr_table = renderer.get_plot(hv_corr_table) nonpara_table = renderer.get_plot(hv_nonpara_table) lags_plot = renderer.get_plot(hv_lags_plot) # setup divs # handle callbacks variable_select.on_change('value', update_variable) lag_variable_select.on_change('value', update_lag_plot_variable) lag_select.on_change('value', update_lag) # individual lag resample_select.on_change('value', update_resample) crypto_select.on_change('value', update_crypto) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) lags_input_button.on_click(update_lags_selected) # lags array # COMPOSE LAYOUT # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, variable_select, lag_select, crypto_select, resample_select) controls_lag = WidgetBox(lag_variable_select, lags_input, lags_input_button) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [matrix_plot.state, controls], [thistab.section_headers['relationships']], [Spacer(width=20, height=30)], [thistab.section_headers['correlations']], [Spacer(width=20, height=30)], [corr_table.state, thistab.corr_information_div()], [thistab.section_headers['non_linear']], [Spacer(width=20, height=30)], [nonpara_table.state], [thistab.section_headers['lag']], [Spacer(width=20, height=30)], [lags_plot.state, controls_lag], [lags_corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('crypto:', exc_info=True) return tab_error_flag(panel_title)
def slider(): x = np.linspace(0, 10, 500) y = np.sin(x) source = ColumnDataSource(data=dict(x=x, y=y)) plot = figure(y_range=(-10, 10), tools='', toolbar_location=None, responsive='box') plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6) callback = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var A = amp.get('value') var k = freq.get('value') var phi = phase.get('value') var B = offset.get('value') x = data['x'] y = data['y'] for (i = 0; i < x.length; i++) { y[i] = B + A*Math.sin(k*x[i]+phi); } source.trigger('change'); """) amp_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Amplitude", callback=callback, callback_policy='mouseup') callback.args["amp"] = amp_slider freq_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Frequency", callback=callback) callback.args["freq"] = freq_slider phase_slider = Slider(start=0, end=6.4, value=0, step=.1, title="Phase", callback=callback) callback.args["phase"] = phase_slider offset_slider = Slider(start=-5, end=5, value=0, step=.1, title="Offset", callback=callback) callback.args["offset"] = offset_slider layout = Row(WidgetBox(amp_slider, freq_slider, phase_slider, offset_slider, responsive='box'), Column(plot, responsive='box'), responsive='box') return layout
def account_predictive_tab(page_width=1200): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = {} # to contain churned and retained splits self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.rf = {} # random forest self.cl = PythonClickhouse('aion') self.feature_list = hyp_variables self.targets = { 'classification': { 'churned': { 'cols': ['churned', 'active'], 'target_col': 'status' } }, 'regression': { 'aion_fork': { 'cols': [1, 0], 'target_col': 'aion_fork' } } } self.interest_var = 'address' self.trigger = -1 self.status = 'all' self.clf = None self.pl = {} # for rf pipeline self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # list of tier specific addresses for prediction self.address_list = [] self.prediction_address_selected = "" self.load_data_flag = False self.day_diff = 1 self.groupby_dict = {} for col in self.feature_list: self.groupby_dict[col] = 'mean' self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.metrics_div = Div(text='', width=400, height=300) self.accuracy_df = None self.inspected_variable = 'amount' # ------- DIVS setup begin self.page_width = page_width txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format( self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'churn': self.section_header_div( text= 'Churned accounts: prediction model accuracy, variable ranking:{}' .format('----'), width=int(self.page_width * .5), html_header='h2', margin_top=5, margin_bottom=-155), 'variable behavior': self.section_header_div(text='Variable behavior:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'predictions': self.section_header_div( text='Select date range to make predictions:{}'.format( self.section_divider), width=int(self.page_width * .5), html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def reset_checkboxes(self): try: self.prediction_address_selected = "" self.prediction_address_select.value = "all" except Exception: logger.error('reset checkboxes', exc_info=True) ################################################### # I/O def load_df(self, start_date="2018-04-25 00:00:00", end_date="2018-12-10 00:00:00"): try: if isinstance(start_date, str): start_date = datetime.strptime(start_date, self.DATEFORMAT) if isinstance(end_date, str): end_date = datetime.strptime(end_date, self.DATEFORMAT) self.df_load(start_date, end_date) self.df = self.df.fillna(0) #self.make_delta() #self.df = self.df.set_index('block_timestamp') #logger.warning("data loaded - %s",self.df.tail(10)) except Exception: logger.error('load_df', exc_info=True) ################################################### # MUNGE DATA def make_delta(self): try: if self.df is not None: if len(self.df) > 0: df = self.df.compute() for col in self.targets: col_new = col + '_diff' df[col_new] = df[col].pct_change() df[col_new] = df[col_new].fillna(0) logger.warning('diff col added : %s', col_new) self.df = self.df.fillna(self.df.mean()) self.df = dd.dataframe.from_pandas(df, npartitions=15) # logger.warning('POST DELTA:%s',self.df1.tail(20)) except Exception: logger.error('make delta', exc_info=True) def split_df(self, df, target): cols = self.target['classification'][target] target_col = self.target['classification'][target] for val in cols: self.df1[val] = df[target_col] == val logger.warning( "Finished split into churned and retained dataframes") ################################################## # EXPLICATORY GRAPHS # PLOTS def box_plot(self, variable): try: # logger.warning("difficulty:%s", self.df.tail(30)) # get max value of variable and multiply it by 1.1 minv = 0 maxv = 0 df = self.df if df is not None: if len(df) > 0: minv, maxv = dd.compute(df[variable].min(), df[variable].max()) else: df = SD('filter', [variable, 'status'], []).get_df() return df.hvplot.box(variable, by='status', ylim=(.9 * minv, 1.1 * maxv)) except Exception: logger.error("box plot:", exc_info=True) ################################################### # MODELS def rf_clf(self): try: logger.warning("RANDOM FOREST LAUNCHED") error_lst = [] df_temp = self.df df_temp = self.normalize(df_temp, timestamp_col='block_timestamp') # if all addresses used filter for only positive transactions for target in self.targets['classification']: # filter out joined df = df_temp.copy() if target == 'churned': df = df[df['status'] != 'joined'] #logger.warning("line 205: df columns in %s:",df.columns.tolist()) df = df.groupby(['address', 'status']).agg(self.groupby_dict) df = df.reset_index() #logger.warning("line 222: df columns in %s:",df.tail(10)) df = df.compute() ''' # only retain wanted values col_values = list(self.df[self.targets['classification'][target]['target_col']].unique()) for val in col_values: if val in self.targets['classification'][target]['cols']: pass else: df[self.targets['classification'][target]['target_col']] = \ df[df[self.targets['classification'][target]['cols']] != val] ''' X = df[self.feature_list] y = df[self.targets['classification'][target] ['target_col']] #logger.warning('y=:%s',y.head(100)) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3) self.feature_list = X_train.columns.tolist() self.pl[target] = Pipeline([ ('imp', SimpleImputer(missing_values=0, strategy='median')), ('rf', RandomForestClassifier(n_estimators=100, random_state=42, max_depth=4, class_weight='balanced')) ]) self.pl[target].fit(X_train, y_train) y_pred = self.pl[target].predict(X_test) error_lst.append( round(100 * metrics.accuracy_score(y_test, y_pred), 2)) self.accuracy_df = pd.DataFrame({ 'Outcome': list(self.targets['classification'].keys()), 'Accuracy': error_lst, }) #logger.warning('accuracy_df:%s',self.accuracy_df.head()) #self.make_tree(target=target) print('confusion matrix:\n') print(confusion_matrix(y_test, y_pred)) print('classification report:\n') print(classification_report(y_test, y_pred)) #logger.warning("clf model built:%s",self.pl) except Exception: logger.error("RF:", exc_info=True) def accuracy_table(self): try: columns = self.accuracy_df.columns.tolist() return self.accuracy_df.hvplot.table( columns=['Outcome', 'Accuracy'], width=250, title='Prediction accuracy') except Exception: logger.error("RF:", exc_info=True) def prediction_information_div(self, width=350, height=450): txt = """ <div {}> <h4 {}>Info </h4> <ul style='margin-top:-10px;'> <li> The table shows the predicted change.</br> </li> <li> For desirable outcomes: </br> ... a positive number is good! </br> ... the bigger the number the better. </br> ... a negative number is bad! </br> ... the bigger the negative number the worse it is. </li> <> For non-desirable outcomes: </br>... the inverse is true </li> <li> Use the datepicker(s) to select dates for the period desired </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def metrics_div_update(self, data): div_style = """ style='width:350px;margin-right:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """<div {}> <h4 {}>Prediction Info </h4> <ul style='margin-top:-10px;'> <li> {}% likely to churn </li> </ul> </div>""".format(div_style, self.header_style, data) self.metrics_div.text = txt def stats_information_div(self, width=400, height=300): div_style = """ style='width:350px;margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>Metadata Info </h4> <ul> <li > <h4 style='margin-bottom:-2px;'>Table left:</h4> - shows the outcome,</br> and the accuracy in %</br> <strong><i>100% is perfection!</i></strong> </li> <li> <h4 style='margin-bottom:-2px;'>Table right:</h4> - shows the desired outcome, the variables(things Aion controls) </br> and their importance to the particular outcome </br> ...which variable(s) have a greater impact on an outcome. </br>- lower = better </br>- generally only the best ranked 3 matter </br>- business advice: manipulate the top ranked variables to attain desirable outcomes </li> </ul> </div>""".format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def load_prediction_df(self, start_date, end_date): if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) cols = self.feature_list + ['address', 'block_timestamp'] self.df_predict = self.cl.load_data(table=self.table, cols=cols, start_date=start_date, end_date=end_date) logger.warning('319:in load prediction: %s', self.df_predict.head(5)) def update_prediction_addresses_select(self): self.prediction_address_select.options = ['all'] if len(self.df_predict) > 0: lst = ['all'] + list( self.df_predict['address'].unique().compute()) self.prediction_address_select.options = lst # the period for which the user wants a prediction def make_account_predictions(self, launch=-1): try: logger.warning("MAKE PREDICTIONS LAUNCHED") target = list(self.targets['classification'].keys())[0] # make df = self.df_predict #logger.warning("line 363%s",df.head(10)) # make list of address for prediction select # filter if prediction for certain addresses #logger.warning('address selected:%s',self.prediction_address_select.value) if self.prediction_address_select.value is not None: if len(self.prediction_address_select.value) > 0: if self.prediction_address_select.value not in [ 'all', '' ]: df = df[df.address == self.prediction_address_select.value] #logger.warning('line 409 predict-df post filter:%s', df.head(20)) # make table for display self.predict_df = pd.DataFrame({ 'address': [], 'likely action': [] }) for target in list(self.targets['classification'].keys()): if len(df) > 0: df = self.normalize(df, timestamp_col='block_timestamp') df = self.group_data(df, self.groupby_dict, timestamp_col='block_timestamp') interest_labels = list(df['address'].unique()) # run model df = df.fillna(0) X = df[self.feature_list] #logger.warning("df before prediction:%s",X.tail(10)) y_pred = self.pl[target].predict(X) logger.warning('y_pred:%s', y_pred) if target == 'churned': y_pred_verbose = [ 'remain' if x in ["active", 1] else "churn" for x in y_pred ] #---- make table for display self.predict_df = pd.DataFrame({ 'address': interest_labels, 'likely action': y_pred_verbose }) #------ label pools self.predict_df['address'] = self.predict_df[ 'address'].map(self.poolname_verbose_trun) #logger.warning('self.predict_df:%s',self.predict_df) churn_df = self.predict_df[ self.predict_df['likely action'] == 'churn'] perc_to_churn = round( 100 * len(churn_df) / len(self.predict_df), 1) txt = target[:-2] text = """<div {}> <h3>Percentage likely to {}:</h3> <strong 'style=color:black;'>{}%</strong></div>""".format( self.header_style, txt, perc_to_churn) self.metrics_div_update(data=perc_to_churn) else: text = """<div {}> <br/> <h3>Sorry, address not found</h3> </div>""".format(self.header_style) self.metrics_div.text = text logger.warning("end of %s predictions", target) return self.predict_df.hvplot.table( columns=['address', 'likely action'], width=500, title='Account predictions') except Exception: logger.error("prediction:", exc_info=True) def make_tree(self, target='churned'): try: if not self.pl: self.rf_clf() # Limit depth of tree to 3 levels # Extract the small tree tree_small = self.pl[target].named_steps['rf'].estimators_[5] # Save the tree as a png image export_graphviz(tree_small, out_file='small_tree.dot', feature_names=self.feature_list, rounded=True, precision=1) (graph, ) = pydot.graph_from_dot_file('small_tree.dot') # filepath = self.make_filepath('../../../static/images/small_tree.gif') # .write_png(filepath) filepath = self.make_filepath( '/home/andre/Downloads/small_tree.png') graph.write_png(filepath) logger.warning("TREE SAVED") except Exception: logger.error("make tree:", exc_info=True) def make_feature_importances(self): try: if not self.pl: self.rf_clf() results_dct = { 'outcome': [], 'feature': [], 'importance': [], 'rank_within_outcome': [] } for target in self.targets['classification'].keys(): logger.warning('make feature importances for :%s', target) # Get numerical feature importances importances = list( self.pl[target].named_steps['rf'].feature_importances_) # List of tuples with variable and importance feature_importances = [(feature, round(importance, 4)) for feature, importance in zip( self.feature_list, importances)] sorted_importances = sorted(feature_importances, key=itemgetter(1)) # logger.warning('importances :%s',importances) # logger.warning("feature_importances:%s",feature_importances) target_lst = [target] * len(importances) count = 1 rank_lst = [] for i in importances: rank_lst.append(count) count += 1 results_dct['outcome'] += target_lst results_dct['feature'] += [ i[0] for i in sorted_importances ] results_dct['importance'] += [ i[1] for i in sorted_importances ] results_dct['rank_within_outcome'] += sorted(rank_lst, reverse=True) df = pd.DataFrame.from_dict(results_dct) logger.warning('MAKE FEATURE IMPORTANCES FINISHED') return df.hvplot.table( columns=[ 'outcome', 'feature', 'importance', 'rank_within_outcome' ], width=600, title="Variables ranked by importance (for each output)") except Exception: logger.error("Feature importances:", exc_info=True) #################################################### # GRAPHS def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.load_prediction_df(datepicker_start.value, datepicker_end.value) thistab.update_prediction_addresses_select() thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_select_variable.event(variable=thistab.inspected_variable) thistab.notification_updater("ready") def update_address_predictions(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_select_variable(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.inspected_variable = select_variable.value stream_select_variable.event(variable=thistab.inspected_variable) thistab.notification_updater("ready") try: # SETUP table = 'account_ext_warehouse' #cols = list(table_dict[table].keys()) cols = hyp_variables + [ 'address', 'block_timestamp', 'account_type', 'status', 'update_type' ] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] last_date = last_date - timedelta(days=50) first_date = last_date - timedelta(days=5) # STREAMS Setup # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch', launch=-1)() stream_select_variable = streams.Stream.define('Select_variable', variable='amount')() # setup widgets datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) select_variable = Select(title='Filter by variable', value=thistab.inspected_variable, options=thistab.feature_list) # search by address checkboxes thistab.prediction_address_select = Select(title='Filter by address', value='all', options=[]) reset_prediction_address_button = Button(label="reset address(es)", button_type="success") # ----------------------------------- LOAD DATA # load model-making data end = datepicker_start.value start = end - timedelta(days=60) thistab.load_df(start, end) thistab.rf_clf() # load data for period to be predicted thistab.load_prediction_df(datepicker_start.value, datepicker_end.value) thistab.update_prediction_addresses_select() # tables hv_account_prediction_table = hv.DynamicMap( thistab.make_account_predictions, streams=[stream_launch]) account_prediction_table = renderer.get_plot( hv_account_prediction_table) hv_features_table = hv.DynamicMap(thistab.make_feature_importances) features_table = renderer.get_plot(hv_features_table) hv_accuracy_table = hv.DynamicMap(thistab.accuracy_table) accuracy_table = renderer.get_plot(hv_accuracy_table) hv_variable_plot = hv.DynamicMap(thistab.box_plot, streams=[stream_select_variable])\ .opts(plot=dict(width=800, height=500)) variable_plot = renderer.get_plot(hv_variable_plot) # add callbacks datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) thistab.prediction_address_select.on_change( 'value', update_address_predictions) reset_prediction_address_button.on_click(thistab.reset_checkboxes) select_variable.on_change('value', update_select_variable) # put the controls in a single element controls = WidgetBox(select_variable, datepicker_start, datepicker_end, thistab.prediction_address_select, reset_prediction_address_button) controls_prediction = WidgetBox(datepicker_start, datepicker_end, thistab.prediction_address_select, reset_prediction_address_button) grid = gridplot( [[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['churn']], [Spacer(width=20, height=70)], [accuracy_table.state, thistab.stats_information_div()], [features_table.state], [thistab.section_headers['variable behavior']], [Spacer(width=20, height=30)], [variable_plot.state, controls], [thistab.section_headers['predictions']], [Spacer(width=20, height=30)], [ account_prediction_table.state, thistab.metrics_div, controls_prediction ], [thistab.notification_div['bottom']]]) tab = Panel(child=grid, title='predictions: accounts by value') return tab except Exception: logger.error('rendering err:', exc_info=True) text = 'predictions: accounts by value' return tab_error_flag(text)
def init_climate_wx(self): cloud_frac_slider = Slider(start=0, end=1, step=0.05, value=self.f_cloud, title='Cloud Fraction') cloud_albedo_slider = Slider(start=0, end=1, step=0.05, value=self.A_cloud, title='Cloud Albedo') land_frac_slider = Slider(start=0, end=1, step=0.05, value=self.f_land, title='Land Fraction') land_albedo_slider = Slider(start=0, end=1, step=0.05, value=self.A_land, title='Land Albedo') tau_star_opts = [('Mars', '0.125'), ('Earth (100 ppm CO2)', '0.66'), ('Earth (200 ppm CO2)', '0.75'), ('Earth (400 ppm CO2)', '0.84'), ('Earth (800 ppm CO2)', '0.93'), ('Earth (1600 ppm CO2)', '1.02'), ('Earth (3200 ppm CO2)', '1.12'), ('Titan', '3'), ('Venus', '125')] greenhouse_dropdown = Dropdown(label='Preset Greenhouse Effect', button_type='primary', menu=tau_star_opts) tau_star_slider = Slider(start=-1, end=np.log10(150), step=0.1, value=self.tau_star, title='Atmosphere Greenhouse Effect (10^x)') refresh_s0_button = Button(label='Refresh Solar In & Calculate ' 'Hab. Zone') def _land_alb_handler(attr, old, new): self.A_land = new self.alpha = self.calc_albedo() self._update_albedo_line() def _land_frac_handler(attr, old, new): self.f_land = new self.alpha = self.calc_albedo() self._update_albedo_line() def _cloud_alb_handler(attr, old, new): self.A_cloud = new self.alpha = self.calc_albedo() self._update_albedo_line() def _cloud_frac_handler(attr, old, new): self.f_cloud = new self.alpha = self.calc_albedo() self._update_albedo_line() def _tau_slider_handler(attr, old, new): self.tau_star = 10**new self._update_greenhouse_line() def _refresh_s0_handler(): refresh_s0_button.disabled = True self._update_Ts_plot() refresh_s0_button.disabled = False def _tau_dropdown_handler(attr, old, new): slide_value = np.log10(float(new)) tau_star_slider.value = slide_value _tau_slider_handler(None, None, slide_value) cloud_albedo_slider.on_change('value', _cloud_alb_handler) cloud_frac_slider.on_change('value', _cloud_frac_handler) land_albedo_slider.on_change('value', _land_alb_handler) land_frac_slider.on_change('value', _land_frac_handler) tau_star_slider.on_change('value', _tau_slider_handler) refresh_s0_button.on_click(_refresh_s0_handler) greenhouse_dropdown.on_change('value', _tau_dropdown_handler) albedo_wx = WidgetBox(land_albedo_slider, land_frac_slider, cloud_albedo_slider, cloud_frac_slider) tau_wx = WidgetBox(greenhouse_dropdown, tau_star_slider, refresh_s0_button) return [albedo_wx, tau_wx]
def buildPlot(): #####################Setup # Grab graph colors, pop undesireable ones colors = SEABORN_PALETTES['bright'] #Grab and sort the FQs quals = fruit_df.reset_index() quals = quals['FruitQuality'].unique().tolist() for idx, i in enumerate(list(quals)): if type(i) == type(0.5): quals.pop(idx) unique_FQs = quals #a little math to get the epoch time to set the initial x range minDate = ts_to_epoch(fruit_df['Date'].min()) maxDate = ts_to_epoch(fruit_df['Date'].max()) ###########Create and format the plot plot = figure( x_axis_type="datetime", plot_width=600, plot_height=400, tools=[PanTool(), WheelZoomTool(), SaveTool(), BoxZoomTool()], x_range=DataRange1d( start=minDate, end=maxDate ), #sets the initial date range to the limits of the data y_range=DataRange1d(start=0, end=1), name='the_plot', toolbar_location='above') #some styling plot.title.text = "Historical Volatility" plot.xaxis.axis_label = "Trade Date" plot.yaxis.axis_label = "Vol" plot.background_fill_color = '#EAEBF0' plot.xgrid.grid_line_color = 'white' plot.ygrid.grid_line_color = 'white' plot.xaxis.axis_line_color = 'white' plot.xaxis.major_tick_line_color = 'white' plot.xaxis.minor_tick_line_color = 'white' plot.yaxis.axis_line_color = 'white' plot.yaxis.major_tick_line_color = 'white' plot.yaxis.minor_tick_line_color = 'white' plot.toolbar.logo = None #a list for all of the lines to reside in lines = [] legends = [] ##############Create the widgets #a console style window to show debug messages TODO: add on/off functionality debug = PreText(text="", width=1200, height=500) #echos the debug in a place more visiable for the user user_message = Paragraph(text='') #Asset_Class, Product, and From dropdown boxes. Sets dropdown's initial value. asCls = Select(title="Asset Class", options=ddOpts['Asset_Class'].unique().tolist()) asCls.value = asCls.options[0] prod = Select(title="Products", options=ddOpts[ddOpts['Asset_Class'] == asCls.value] ['Product'].unique().tolist()) prod.value = prod.options[0] whereFrom = Select(title="From", options=ddOpts[(ddOpts['Asset_Class'] == asCls.value) & (ddOpts['Product'] == prod.value)] ['From'].unique().tolist()) whereFrom.value = whereFrom.options[0] FQslider = Slider(title='Fruit Quality', start=min(unique_FQs), end=max(unique_FQs), step=1) #the amount of days back to look for the data days_back = TextInput(title='Days ago', value='365') days_back_buttons = RadioButtonGroup( labels=['10', '30', '90', '180', '365', '730'], active=4) #the date to linear fit to fixed_date_buttons = RadioButtonGroup( labels=['30', '60', '90', '120', '180', '365'], active=2) fixed_date = TextInput(title='Days to Exp', value='90') #the amount of days with which to calculate the rolling mean rolling_days_buttons = RadioButtonGroup(labels=['1', '2', '5', '10'], active=0) rolling_days = TextInput(title='Rolling Mean Days', value='1') #a dynamically resizing checkbox group that allows for the changing of the visablity of any line on the plot line_onOff = CheckboxGroup(width=400, name='line_onOff') #the associated colors to act as a legend for line_onOff legendDiv = Div(width=50) #button to add a line addLine = Button(label="Add Line") #an html rendered visualization of the data for each line descriptions = Div(text='', width=500) #resizes the plot rszButton = Button(label='resize') ##########Define functions associated with the widgets #concats any dubug call to the end of the current debug text, and changes the user message def updateDebug(inString): inString = str(inString) user_message.text = inString oldText = debug.text newText = ("*- " + str(datetime.now()) + " : " + inString) debug.text = oldText + '\n' + newText #changes the potential products and contract categories to match the user selected asset class def asClsChange(attrname, old, new): prod.options = ddOpts[ddOpts['Asset_Class'] == asCls.value]['Product'].unique().tolist() prod.value = prod.options[0] #changes the potential contract categories to match the user selected product def prodChange(attrname, old, new): whereFrom.options = ddOpts[(ddOpts['Asset_Class'] == asCls.value) & ( ddOpts['Product'] == prod.value)]['From'].unique().tolist() whereFrom.value = whereFrom.options[0] #links the days back button and text box def days_back_buttonChange(attrname, old, new): days_back.value = days_back_buttons.labels[days_back_buttons.active] #checks that the users input is an int def days_backChange(attrname, old, new): try: days_back.value = str(int(days_back.value)) except ValueError: days_back.value = '0' updateDebug('please type an integer') #links the fixed date button and text box def fixed_date_buttonChange(attrname, old, new): fixed_date.value = fixed_date_buttons.labels[fixed_date_buttons.active] #checks that the users input is an int def fixed_dateChange(attrname, old, new): try: fixed_date.value = str(int(fixed_date.value)) except ValueError: fixed_date.value = '0' updateDebug('please type an integer') #links the rolling days button and text box def rolling_days_buttonsChange(attrname, old, new): rolling_days.value = rolling_days_buttons.labels[ rolling_days_buttons.active] #checks that the users input is an int def rolling_daysChange(attrname, old, new): try: rolling_days.value = str(int(rolling_days.value)) except ValueError: rolling_days.value = '0' updateDebug('please type an integer') #fits the plot to the currently visiable lines def resize(): if len(line_onOff.active) == 0 or len(line_onOff.labels) == 0: plot.x_range.start = ts_to_epoch(fruit_df['Date'].min()) plot.x_range.end = ts_to_epoch(fruit_df['Date'].max()) plot.y_range.start = 0 plot.y_range.end = 100 else: xmin, xmax, ymin, ymax = calc_range(lines) plot.x_range.start = xmin plot.x_range.end = xmax plot.y_range.start = ymin plot.y_range.end = ymax #turn lines on or off def line_onOffChange(attrname, old, new): for i in range(len(line_onOff.labels)): if i in line_onOff.active: lines[i].glyph.visible = True else: lines[i].glyph.visible = False legendDiv.text = '<div>' for line in lines: legendDiv.text += '<br><div style="background-color: %s; float:up; padding: 4px 4px 4px 4px"></div><br>' % line.glyph.line_color legendDiv.text += '</div>' resize() #adds a line to the graph def grphUpdt(): #adds some debug messages, grabs the current time as to later show the total time taken to calculate updateDebug("Starting") updateDebug("total dataframe size: " + str(fruit_df.shape)) stTime = datetime.now() #the value to linear fit to fit_to = int(fixed_date.value) #instiantiate an empty dataframe that will eventually contain the graphs data graphData = pd.DataFrame({ 'Date': [], 'PriceVolatility': [], 'Days_to_Exp': [] }) #grab the appropriate subset of the whole dataframe based on the users input into the widgets updateDebug("querying the data..") try: workingDf = fruit_df.loc[asCls.value, prod.value, whereFrom.value] except KeyError: updateDebug( 'no data with that combination of Asset Class, Product, From') return try: workingDf = workingDf[[ 'Date', 'PriceVolatility', 'Days_to_Exp' ]][(workingDf['Date'] > (date.today() - timedelta(days=int(days_back.value))))] except KeyError: updateDebug( 'no data with that combination of Asset Class, Product, From, and days back' ) return updateDebug("done breaking down df") #a hook in the case that the users inputs resulted in an empty dataframe if (workingDf.empty): updateDebug( 'no data with that combination of Asset Class, Product, From, and days back' ) return #widdle down the database to only contain the user specified FQ try: graphData = workingDf.loc[int(FQslider.value)].copy() except KeyError: updateDebug('no data with that FQ') #another empty graph hook if (graphData.empty): updateDebug( 'no data with that combination of Asset Class, Product, Contract Category, FQ, and days back' ) return updateDebug('grabed correct FQs') #calculate linear fit on the current subset updateDebug('calculating linear fit...') graphData = mu.linearFit(fit_to=fit_to, group_on_column='Date', df=graphData, fit_column='Days_to_Exp', on_columns=['PriceVolatility']) updateDebug('finished with linear fit') # a few more debug messages updateDebug( "working df qry: Asset_Class = %s and Product = %s and From = %s and Date > %s " % (asCls.value, prod.value, whereFrom.value, str(date.today() - timedelta(days=int(days_back.value))))) updateDebug("graph data shape: " + str(workingDf.shape)) #makes sure graph data has at least 5 rows, so that rolling mean can be calculated if graphData.shape[0] > int(rolling_days.value): #make the graph legend, based on if there's a denominator specified or not this_legend = '%s - %s FQ: %s Days to Exp: %s From: %s Rolling Days: %s' % ( prod.value, whereFrom.value, int( FQslider.value), fixed_date.value, str(date.today() - timedelta(days=int(days_back.value))), rolling_days.value) #add a new line to the graph, and add the accosiated GlyphRenderer created by adding the line to the lines list. #Set the legend to the previously calculated legend, and set the color to the next color in the current theme (if there are more lines than colors, there will be multiple lines with the same color) #Calculates a 5 day rolling mean on the y values. Maybe add a slider/text box/other widget so the user can set the rolling mean themselves updateDebug('adding line to plot') lines.append( plot.line(graphData.index.values[int(rolling_days.value) - 1:], graphData['PriceVolatility'].rolling( window=int(rolling_days.value)).mean() [int(rolling_days.value) - 1:], line_width=3, color=colors[len(lines) % len(colors)])) legends.append(this_legend) updateDebug("updated graph") global descDf #either creates, or adds to, a dataframe containing statistics about the data. stats come from pandas DataFrame.describe. if descDf is None: graphData[this_legend] = graphData['PriceVolatility'] descDf = graphData[[ this_legend ]].rolling(window=int(rolling_days.value)).mean( )[int(rolling_days.value) - 1:].describe(percentiles=[]).transpose().copy() else: graphData[this_legend] = graphData['PriceVolatility'] descDf = pd.concat([ descDf, graphData[[ this_legend ]].rolling(window=int(rolling_days.value)).mean() [int(rolling_days.value) - 1:].describe(percentiles=[]).transpose().copy() ]) descDf = descDf.round(1) descriptions.text = descDf.to_html().replace('\\n', '') graphData.drop(this_legend, 1, inplace=True) #add the name of the line to the checkbox so that it can be turned off and o line_onOff.labels.append(this_legend) line_onOff.active.append(len(line_onOff.labels) - 1) legendDiv.text = '<div>' for line in lines: legendDiv.text += '<br><div style="background-color: %s; float:up; padding: 4px 4px 4px 4px"></div><br>' % line.glyph.line_color legendDiv.text += '</div>' ##leaving this in case we get around to figuring out the hover tool ##formats the date values for the hover tool, currently commented out until we, or bokeh, fix the hover tool for multiple lines #formDates= pd.to_datetime(graphData['Date'] ,format="%m-%d-%Y") #lines[-1].data_source.data['formDates'] = formDates.apply(lambda x: x.strftime('%m-%d-%Y')) ##Displays the amout of time it took to draw the line, as well as the number of points in the graph updateDebug("updated y vals, with rolling mean calculated") updateDebug( str(datetime.now() - stTime) + " FOR " + str(len(lines[-1].data_source.data['x'])) + " points") else: updateDebug("There's no data to display") del graphData del workingDf #######Link widgets to their associated functions asCls.on_change('value', asClsChange) prod.on_change('value', prodChange) days_back_buttons.on_change('active', days_back_buttonChange) days_back.on_change('value', days_backChange) fixed_date_buttons.on_change('active', fixed_date_buttonChange) fixed_date.on_change('value', fixed_dateChange) rolling_days_buttons.on_change('active', rolling_days_buttonsChange) rolling_days.on_change('value', rolling_daysChange) line_onOff.on_change('active', line_onOffChange) addLine.on_click(grphUpdt) rszButton.on_click(resize) #Formatting fixed_date_box = WidgetBox(fixed_date, fixed_date_buttons) days_back_box = WidgetBox(days_back, days_back_buttons) rolling_days_box = WidgetBox(rolling_days, rolling_days_buttons) widgets = [ asCls, prod, whereFrom, FQslider, days_back_box, fixed_date_box, rolling_days_box, addLine, rszButton, user_message ] plot_w_description = VBox(plot, descriptions, width=700) pwd_w_leg = HBox(plot_w_description, VBox(legendDiv), VBox(line_onOff), width=plot_w_description.width + line_onOff.width + 100, name='div_to_save') input_box = VBox(*widgets, width=400, height=1200) total_box = HBox(VBox(input_box), VBox(pwd_w_leg), width=input_box.width + pwd_w_leg.width + 100, height=1200) tot_w_debug = VBox(total_box, VBox(HBox(debug))) resize() return tot_w_debug
'rank': list(rank.values()), 'year': [db.column_names] * len(rank), 'color': Category20[14] }) p = Figure(plot_width=1000, plot_height=300, x_axis_label='Year', y_axis_label='Rank', toolbar_location='above', title='재배면적 상위 10개종목 순위변화') p.multi_line('year', 'rank', alpha=1, color='color', source=ranksource) p.add_tools(HoverTool(tooltips=[('Crop', '@crop')])) # 작물 select 하면 맞춰서 그래프 움직이는 부분 select = Select(title="Crop", value="감자", options=crops) def update_crop(attr, old, new): source.data = get_data(str(select.value)) select.on_change('value', update_crop) seconddiv = Div(text="""<h1>읍면동별 작물 시계열 차트</h1>""", width=1000) rankwidget = WidgetBox(titlediv, data_table, para, cums_table) chartwidget = WidgetBox(seconddiv, select) layout = column(rankwidget, p, chartwidget, p1, p2, p3, p4) curdoc().add_root(layout)
animate_start = animate_start + 2000 p_trade_feed.x_range.start = animate_start p_trade_feed.x_range.end = animate_start + 600000 callback_id = None def animate(): global callback_id, start, end if button.label == '► Play': button.label = '❚❚ Pause' callback_id = curdoc().add_periodic_callback(animate_update, 1) else: button.label = '► Play' start = datetime(2019, 3, 3, 14, 0, 0, tzinfo=timezone.utc).timestamp() * 1000 end = datetime(2019, 3, 3, 15, 0, 0, tzinfo=timezone.utc).timestamp() * 1000 curdoc().remove_periodic_callback(callback_id) button = Button(label='► Play', width=60) button.on_click(animate) left = Column(p_trade_feed, p_candlestick, p_trade_feed_hist, WidgetBox(button)) right = Column(p_order_book, p_trade_feed_distribution, p_buy_sell_pie) curdoc().add_root(Row(left, right))
def twitter_loader_tab(panel_title): class TwitterLoader(): def __init__(self, search_term='beiber'): # TWITTER SETUP self.api = None self.topic = search_term self.options = {'messages': [str(x) for x in range(10, 1000, 50)]} self.limits = { 'messages': int(self.options['messages'][0]), } self.hidden_path = dashboard_config['hidden_path'] self.DATEFORMAT = "%Y-%d-%m %H:%M:%S" self.df = None min_date = datetime.today() - timedelta(days=7) print(min_date) self.selects = { 'window': Select(title='Select rolling mean window', value='1', options=[str(x) for x in range(1, 20, 2)]), 'date_since': DatePicker(title="Tweets since:", min_date=min_date, max_date=datetime.today(), value=min_date) } self.selects_values = { 'window': int(self.selects['window'].value), 'date_since': self.selects['date_since'].value } self.resample_period = {'menu': []} for val in range(30, 350, 30): self.resample_period['menu'].append(str(val) + 'Min') self.resample_period['value'] = self.resample_period['menu'][0] # DIV VISUAL SETUP self.trigger = -1 self.html_header = 'h2' self.margin_top = 150 self.margin_bottom = -150 self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'twitter': self.section_header_div(text='Twitter search results:', width=600, html_header='h2', margin_top=155, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) # ////////////////////////// DIVS SETUP END ///////////////////////////////// # /////////////////////////// UTILS BEGIN /////////////////////////// def twitter_datetime_to_epoch(self, ts): ts = datetime.strptime(ts, '%a %b %d %H:%M:%S %z %Y') ts_epoch = ts.created_at() ts = datetime.strftime(ts, self.DATEFORMAT) ts = datetime.strptime(ts, self.DATEFORMAT) return ts, ts_epoch def write_to_file(self): try: filename = """{}_searches_for_last_{}sec_or_last_{}messages.csv""".format( self.topic, self.limits['time'], self.limits['messages']) self.df.to_csv(filename, sep='\t', index=False) except: logger.error('Error writing to file', exc_info=True) # /////////////////////////// UTILS END ///////////////////// def reset_data(self): self.df = None def get_credentials(self, filename='twitter_credentials.json'): try: filename = self.hidden_path + filename filepath = join(dirname(__file__), filename) print(filepath) if self.api is None: with open(filepath, 'r') as f: credentials_dict = json.load(f) auth = tw.OAuthHandler(credentials_dict['consumer_key'], credentials_dict['consumer_secret']) auth.set_access_token( credentials_dict['access_token_key'], credentials_dict['access_token_secret'], ) self.api = tw.API(auth, wait_on_rate_limit=True) logger.info('CREDENTIALS LOADED') try: self.api.verify_credentials() print("Authentication OK") except: print("Error during authentication") except: print('credentials not loaded') def load_data_about_topic(self): try: if self.api is None: self.get_credentials() date_since = datetime.combine( self.selects_values['date_since'], datetime.min.time()) logger.warning('LINE 186:%s,messages=%s', self.topic, self.limits['messages']) # initialize a list to hold all the tweepy Tweets alltweets = [] # make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = self.api.search(q=self.topic, count=self.limits['messages']) # save most recent tweets alltweets.extend(new_tweets) # save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 # keep grabbing tweets until there are no tweets left to grab stop = False while not stop: print(f"getting tweets before {oldest}") # all subsequent requests use the max_id param to prevent duplicates new_tweets = self.api.search(q=self.topic, count=100, max_id=oldest, tweet_mode='extended') # save most recent tweets alltweets.extend(new_tweets) if len(alltweets) > self.limits['messages'] or len( new_tweets) <= 0: stop = True # update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 print(f"...{len(alltweets)} tweets downloaded so far") # transform the tweepy tweets into a 2D array that will populate the csv results = [] for tweet in alltweets: try: results.append([tweet.created_at, tweet.text]) except: print("skipped this one") self.df = pd.DataFrame(data=results, columns=['created_at', 'text']) logger.warning('LINE 211 self.df:%s', self.df.head(20)) except: logger.error('error in loading data', exc_info=True) def run(self): try: self.load_data_about_topic() # self.write_to_file() except Exception: logger.error('run', exc_info=True) # #################################### PLOTS ###################################### def sentiment_analysis(self, launch=1): try: df = self.df[['text', 'created_at']] cols = ['pos', 'neg', 'neu'] for col in cols: if col not in df.columns: # create only once df[col] = 0 df['pos'], df['neg'], df['neu'] = zip( *df['text'].map(sentiment_analyzer_scores)) df = df.fillna(0) logger.warning('resample period:%s', self.resample_period['value']) df = df.set_index('created_at').resample(self.resample_period['value']) \ .agg({'pos': 'mean', 'neg': 'mean', 'neu': 'mean'}) df = df.reset_index() df = df.fillna(0) logger.warning('LINE 307, df:%s', df.head(30)) p = df.hvplot.line(x='created_at', y=cols, width=1200, height=600) return p except Exception: logger.error('run', exc_info=True) def visual(self, launch=1): try: p = self.df.hvplot.table(columns=['created_at', 'text'], width=1200, height=2000) return p except Exception: logger.error('output data', exc_info=True) def jitter(self, launch=1): try: df = self.df.copy() df['jitter'] = df['created_at'].diff(periods=-1) df['jitter'] = df['jitter'] * -1 df = df.dropna() p = df.hvplot.line(x='created_at', y='jitter', width=1200, height=600) return p except Exception: logger.error('output data', exc_info=True) def update_tweet_search(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.reset_data() thistab.limits['messages'] = int(inputs['messages_limit'].value) thistab.topic = inputs['search_term'].value thistab.run() thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_sentiment.event(launch_this=thistab.trigger) thistab.notification_updater("Ready!") def update_resample_period(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.resample_period['value'] = new thistab.trigger += 1 # stream_launch_rolling_mean.event(launch=thistab.trigger) stream_launch_sentiment.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP thistab = TwitterLoader() thistab.run() # MANAGE STREAM stream_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_rolling_mean = streams.Stream.define('Launch', launch=-1)() stream_launch_sentiment = streams.Stream.define('Launch', launch=-1)() # DYNAMIC GRAPHS/OUTPUT hv_visual = hv.DynamicMap(thistab.visual, streams=[stream_launch]) visual = renderer.get_plot(hv_visual) hv_jitter = hv.DynamicMap(thistab.jitter, streams=[stream_launch]) jitter = renderer.get_plot(hv_jitter) hv_sentiment_analysis = hv.DynamicMap( thistab.sentiment_analysis, streams=[stream_launch_sentiment]) sentiment_analysis = renderer.get_plot(hv_sentiment_analysis) # CREATE WIDGETS inputs = { 'search_term': TextInput(title='Enter search term. For list, use commas', value=thistab.topic), 'messages_limit': Select(title='Select messages limit (5000 = unbounded)', value=str(thistab.limits['messages']), options=thistab.options['messages']), 'resample': Select(title='Select resample period', value=thistab.resample_period['value'], options=thistab.resample_period['menu']) } tweet_search_button = Button( label='Enter filters/inputs, then press me', button_type="success") # WIDGET CALLBACK tweet_search_button.on_click(update_tweet_search) inputs['resample'].on_change('value', update_resample_period) # COMPOSE LAYOUT # group controls (filters/input elements) controls_tweet_search = WidgetBox( inputs['search_term'], inputs['messages_limit'], tweet_search_button, ) controls_rolling_mean = WidgetBox(thistab.selects['window'], ) controls_resample_period = WidgetBox(inputs['resample']) grid = gridplot([ [thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.title_div('Sentiment analysis of tweets:', 1000)], [Spacer(width=20, height=30)], [sentiment_analysis.state, controls_resample_period], [thistab.title_div('Time between tweets:', 1000)], [Spacer(width=20, height=30)], [jitter.state], [ thistab.title_div( 'Twitter search results (use filters on right, then click button):', 1000) ], [Spacer(width=20, height=30)], [visual.state, controls_tweet_search], [thistab.notification_div['bottom']], ]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('Twitter loader:', exc_info=True) return tab_error_flag(panel_title)
def aion_analytics(doc): class SelectionTab: def __init__(self): self.selected_tabs = [] self.tablist = [] self.selected_tracker = [ ] # used to monitor if a tab has already been launched self.div_style = """ style='width:300px; margin-left:-200%; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.page_width = 1200 def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) return txt def get_selections(self, checkboxes): self.selected_tabs = [ checkboxes.labels[i] for i in checkboxes.active ] return self.selected_tabs selection_tab = SelectionTab() # SETUP BOKEH OBJECTS try: tablist = [] TABS = Tabs(tabs=tablist) @gen.coroutine def load_callstack(tablist): lst = selection_tab.get_selections(selection_checkboxes) #logger.warning('selections:%s',lst) panel_title = 'EDA: crypto clusters' if panel_title in lst: if panel_title not in selection_tab.selected_tracker: eda_cc = yield crypto_clusters_eda_tab( cryptocurrencies, panel_title=panel_title) selection_tab.selected_tracker.append(panel_title) if eda_cc not in tablist: tablist.append(eda_cc) panel_title = 'clustering: cryptocurrencies' if panel_title in lst: if panel_title not in selection_tab.selected_tracker: cct = yield cryptocurrency_clustering_tab(panel_title) selection_tab.selected_tracker.append(panel_title) if cct not in tablist: tablist.append(cct) if 'predictions: accounts by value' in lst: if 'predictions: accounts by value' not in selection_tab.selected_tracker: ap = yield account_predictive_tab( page_width=selection_tab.page_width) selection_tab.selected_tracker.append( 'predictions: accounts by value') if ap not in tablist: tablist.append(ap) panel_title = 'Forecasting: Rentals' if panel_title in lst: if panel_title not in selection_tab.selected_tracker: tsa = yield forecasting_bcc_rentals_visitor_tab( panel_title) selection_tab.selected_tracker.append( panel_title=panel_title) if tsa not in tablist: tablist.append(tsa) panel_title = 'KPI: Rentals' if panel_title in lst: if panel_title not in selection_tab.selected_tracker: rentals = yield kpi_bcc_rentals_visitor_tab( panel_title=panel_title) selection_tab.selected_tracker.append(panel_title) if rentals not in tablist: tablist.append(rentals) panel_title = 'EDA: Rentals' if panel_title in lst: if panel_title not in selection_tab.selected_tracker: rentals = yield eda_bcc_rentals_visitor_tab( panel_title=panel_title) selection_tab.selected_tracker.append(panel_title) if rentals not in tablist: tablist.append(rentals) # make list unique tablist = list(set(tablist)) TABS.update(tabs=tablist) @gen.coroutine def select_tabs(): notification_div.text = """ <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(selection_tab.page_width, 'Tabs are loading') yield load_callstack(tablist) notification_div.text = """ <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(selection_tab.page_width, 'Welcome to BCC Data Science Portal') @gen.coroutine def update_selected_tabs(): notification_div.text = """ <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(selection_tab.page_width, 'Refresh underway') doc.clear() tablist = [] selection_checkboxes.active = [] mgmt = Panel(child=grid, title='Tab Selection') tablist.append(mgmt) TABS.update(tabs=tablist) doc.add_root(TABS) yield load_callstack(tablist) notification_div.text = """ <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(selection_tab.page_width, 'Welcome to Aion Data Science Portal') # ----------------------- txt = """ <div {}> <h3 style='color:blue;text-align:center'>Info:</h3> <ul style='margin-top:-10px;height:200px;'> <li> Select the tab(s) you want activated </li> <li> Then click the 'launch activity' button. </li> </ul> </div> """.format(selection_tab.div_style) information_div = Div(text=txt, width=400, height=250) footer_div = Div(text="""<hr/><div style="width:{}px;height:{}px; position:relative;background:black;"></div>""". format(selection_tab.page_width, 50), width=selection_tab.page_width, height=100) txt = """ <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(selection_tab.page_width, 'Welcome to Aion Data Science Portal') notification_div = Div(text=txt, width=selection_tab.page_width, height=40) # choose startup tabs selection_checkboxes = CheckboxGroup( labels=labels, active=[DEFAULT_CHECKBOX_SELECTION]) run_button = Button(label='Launch tabs', button_type="success") run_button.on_click(select_tabs) # setup layout controls = WidgetBox(selection_checkboxes, run_button) # create the dashboards grid = gridplot( [[notification_div], [Spacer(width=50, height=2, sizing_mode='scale_width')], [controls, information_div], [footer_div]]) # setup launch tabs mgmt = Panel(child=grid, title='Tab Selection') tablist.append(mgmt) TABS.update(tabs=tablist) doc.add_root(TABS) except Exception: logger.error("TABS:", exc_info=True)
def tab2(): data = pd.read_csv('cdph-race-ethnicity.csv') data['date_time'] = pd.to_datetime(data['date']) data = data[(data['age'] == 'all')] percentages = ['confirmed cases', 'general population'] regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white'] x = [(race, percent) for race in regions for percent in percentages] def create_dataset(df): counts = sum( zip(df['confirmed_cases_percent'], df['population_percent']), ()) source = ColumnDataSource(data=dict(x=x, counts=counts)) return source def create_plot(source): p = figure(x_range=FactorRange(*x), y_axis_label='Percentage', plot_width=1030) p.title.text = "Confirmed_case% VS Population% by races" p.title.align = "center" p.title.text_font_size = "20px" p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', factors=percentages, palette=["#c9d9d3", "#718dbf"], start=1, end=2)) p.y_range.start = 0 p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None p.x_range.range_padding = 0.1 p.xgrid.grid_line_color = None p.legend.location = "top_left" p.legend.orientation = "horizontal" p.xgrid.grid_line_color = None p.add_tools( HoverTool(tooltips=[('Race, category', "@x"), ('Percentage', "@counts")], )) p.add_layout( Title( text="Data " "published by latimes.com/coronavirustracker; download data " "from " "https://github.com/datadesk/california-coronavirus-data/cdph-race" "-ethnicity.csv in GitHub", text_font_style="italic"), 'below') p.add_layout( Title( text="Data Source: California Department of Public Health " "https://www.cdph.ca.gov/Programs/CID/DCDC/Pages/COVID-19/Race-Ethnicity.aspx", text_font_style="italic"), 'below') p.add_layout( Title(text="Date of last update: 2020-10-14", text_font_style="italic"), 'below') return p def callback(attr, old, new): new_src = create_dataset( data[(data['date_time'] == date_picker.value)]) src.data.update(new_src.data) src = create_dataset(data[(data['date_time'] == '2020-10-01')]) p = create_plot(src) date_picker = DatePicker(title='Choose a date', min_date="2020-05-14", max_date='2020-10-14') date_picker.on_change('value', callback) controls = WidgetBox(date_picker) layout = row(p, controls) tab = Panel(child=layout, title='Percentage of confirmed cases by race') return tab
def eda_projects_tab(panel_title): lags_corr_src = ColumnDataSource(data=dict(variable_1=[], variable_2=[], relationship=[], lag=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = { 'project_duration': 'sum', 'project_start_delay': 'mean', 'project_end_delay': 'mean', 'project_owner_age': 'mean', 'project_owner_gender': 'mean', 'milestone_duration': 'sum', 'milestone_start_delay': 'mean', 'milestone_end_delay': 'mean', 'milestone_owner_age': 'mean', 'milestone_owner_gender': 'mean', 'task_duration': 'sum', 'task_start_delay': 'sum', 'task_end_delay': 'mean', 'task_owner_age': 'mean', 'task_owner_gender': 'mean' } self.feature_list = list(self.groupby_dict.keys()) self.lag_variable = 'task_duration' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variables = sorted(list(self.groupby_dict.keys())) self.variable = self.variables[0] self.relationships_to_check = ['weak', 'moderate', 'strong'] self.status = 'all' self.pm_gender = 'all' self.m_gender = 'all' self.t_gender = 'all' self.type = 'all' self.pym = PythonMongo('aion') self.menus = { 'status': ['all', 'open', 'closed'], 'type': [ 'all', 'research', 'reconciliation', 'audit', 'innovation', 'construction', 'manufacturing', 'conference' ], 'gender': ['all', 'male', 'female'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], } self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'} self.timestamp_col = 'project_startdate_actual' # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div(text='Pre-transform distribution:', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div(text='Correlations:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def reset_adoption_dict(self, variable): self.significant_effect_dict[variable] = [] # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def filter_df(self, df1): if self.status != 'all': df1 = df1[df1.status == self.status] if self.pm_gender != 'all': df1 = df1[df1.project_owner_gender == self.pm_gender] if self.m_gender != 'all': df1 = df1[df1.milestone_owner_gender == self.m_gender] if self.t_gender != 'all': df1 = df1[df1.task_owner_gender == self.t_gender] if self.type != 'all': df1 = df1[df1.type == self.type] return df1 def prep_data(self, df1): try: ''' df1[self.timestamp_col] = df1[self.timestamp_col].apply(lambda x: datetime(x.year, x.month, x.day, x.hour,0,0)) ''' df1 = df1.set_index(self.timestamp_col) logger.warning('LINE 195 df:%s', df1.head()) # handle lag for all variables df = df1.copy() df = self.filter_df(df) logger.warning('LINE 199: length before:%s', len(df)) slice = df[['project']] df = df[list(self.groupby_dict.keys())] logger.warning('LINE 218: columns:%s', df.head()) df = df.astype(float) df = pd.concat([df, slice], axis=1) df = df.groupby('project').resample(self.resample_period).agg( self.groupby_dict) logger.warning('LINE 201: length after:%s', len(df)) df = df.reset_index() vars = self.feature_list.copy() if int(self.lag) > 0: for var in vars: if self.variable != var: df[var] = df[var].shift(int(self.lag)) df = df.dropna() self.df1 = df logger.warning('line 184- prep data: df:%s', self.df.head(10)) except Exception: logger.error('prep data', exc_info=True) def lags_plot(self, launch): try: df = self.df.copy() df = df[[self.lag_variable, self.variable]] cols = [self.lag_variable] lags = self.lag_days.split(',') for day in lags: try: label = self.lag_variable + '_' + day df[label] = df[self.lag_variable].shift(int(day)) cols.append(label) except: logger.warning('%s is not an integer', day) df = df.dropna() self.lags_corr(df) # plot the comparison logger.warning('in lags plot: df:%s', df.head(10)) return df.hvplot(x=self.variable, y=cols, kind='scatter', alpha=0.4) except Exception: logger.error('lags plot', exc_info=True) # calculate the correlation produced by the lags vector def lags_corr(self, df): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'lag': [], 'r': [], 'p_value': [] } a = df[self.variable].tolist() for col in df.columns: if col not in [self.timestamp_col, self.variable]: # find lag var = col.split('_') try: tmp = int(var[-1]) lag = tmp except Exception: lag = 'None' b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append(self.variable) corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['lag'].append(lag) corr_dict_data['r'].append(round(rvalue, 4)) corr_dict_data['p_value'].append(round(pvalue, 4)) lags_corr_src.stream(corr_dict_data, rollover=(len(corr_dict_data['lag']))) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=280) return data_table except Exception: logger.error('lags corr', exc_info=True) def correlation_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'r': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop(self.timestamp_col, axis=1) # df = df.compute() a = df[self.variable].tolist() for col in self.feature_list: logger.warning('col :%s', col) if col != self.variable: logger.warning('%s:%s', col, self.variable) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) # add to dict corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['r'].append(round(rvalue, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'r': corr_dict['r'], 'p-value': corr_dict['p-value'] }) # logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value' ], width=550, height=200, title='Correlation between variables') except Exception: logger.error('correlation table', exc_info=True) def non_parametric_relationship_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'stat': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop(self.timestamp_col, axis=1) # df = df.compute() # logger.warning('line df:%s',df.head(10)) a = df[self.variable].tolist() for col in self.feature_list: logger.warning('col :%s', col) if col != self.variable: logger.warning('%s:%s', col, self.variable) b = df[col].tolist() stat, pvalue, txt = self.mann_whitneyu_label(a, b) corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['stat'].append(round(stat, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'stat': corr_dict['stat'], 'p-value': corr_dict['p-value'] }) # logger.warning('df:%s',df.head(23)) return df.hvplot.table( columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value' ], width=550, height=200, title='Non parametric relationship between variables') except Exception: logger.error('non parametric table', exc_info=True) def hist(self, launch): try: return self.df.hvplot.hist(y=self.feature_list, subplots=True, shared_axes=False, bins=25, alpha=0.3, width=300).cols(4) except Exception: logger.warning('histogram', exc_info=True) def matrix_plot(self, launch=-1): try: logger.warning('line 306 self.feature list:%s', self.feature_list) df = self.df1 if df is not None: # thistab.prep_data(thistab.df) if self.timestamp_col in df.columns: df = df.drop(self.timestamp_col, axis=1) df = df.fillna(0) # logger.warning('line 302. df: %s',df.head(10)) cols_temp = self.feature_list.copy() if self.variable in cols_temp: cols_temp.remove(self.variable) # variable_select.options = cols_lst p = df.hvplot.scatter(x=self.variable, y=cols_temp, width=330, subplots=True, shared_axes=False, xaxis=False).cols(4) else: p = df.hvplot.scatter(x=[0, 0, 0], y=[0, 0, 0], width=330) return p except Exception: logger.error('matrix plot', exc_info=True) def multiline(self, launch=1): try: yvar = self.multiline_vars['y'] xvar = self.multiline_vars['x'] df = self.df.copy() df = df[[xvar, yvar, self.timestamp_col]] df = df.set_index(self.timestamp_col) df = df.groupby(xvar).resample(self.resample_period).agg( {yvar: 'mean'}) df = df.reset_index() lines = df[xvar].unique() # split data frames dfs = {} for idx, line in enumerate(lines): dfs[line] = df[df[xvar] == line] dfs[line] = dfs[line].fillna(0) logger.warning('LINE 428:%s - %s:', line, dfs[line].head()) if idx == 0: p = dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=1200, height=500).relabel(line) else: p *= dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=2, height=500).relabel(line) return p except Exception: logger.error('multiline plot', exc_info=True) def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data(thistab.df) if 'milestone owner gender' == new: thistab.variable = 'm_gender_code' if 'project owner gender' == new: thistab.variable = 'pm_gender_code' if 'task owner gender' == new: thistab.variable = 't_gender_code' if thistab.variable in thistab.adoption_variables['developer']: thistab.reset_adoption_dict(thistab.variable) thistab.section_head_updater('lag', thistab.variable) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag_plot_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_variable = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_IVs(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.pm_gender = pm_gender_select.value thistab.m_gender = m_gender_select.value thistab.t_gender = t_gender_select.value thistab.status = status_select.value thistab.type = type_select.value thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag(attr, old, new): # update lag & cryptocurrency thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df = thistab.pym.load_df(start_date=datepicker_start.value, end_date=datepicker_end.value, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.df['project_owner_gender'] = thistab.df[ 'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['milestone_owner_gender'] = thistab.df[ 'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['task_owner_gender'] = thistab.df[ 'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_multiline(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.multiline_vars['x'] = multiline_x_select.value thistab.multiline_vars['y'] = multiline_y_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'project_composite1' thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2013-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=30) # initial function call thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) if len(thistab.df) > 0: thistab.df['manager_gender'] = thistab.df['project_owner_gender'] thistab.df['project_owner_gender'] = thistab.df[ 'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['milestone_owner_gender'] = thistab.df[ 'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['task_owner_gender'] = thistab.df[ 'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) logger.warning('LINE 527:columns %s', list(thistab.df.columns)) thistab.prep_data(thistab.df) # MANAGE STREAM stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() stream_launch = streams.Stream.define('Launch', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.variables) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) type_select = Select(title='Select project type', value=thistab.type, options=thistab.menus['type']) status_select = Select(title='Select project status', value=thistab.status, options=thistab.menus['status']) pm_gender_select = Select(title="Select project owner's gender", value=thistab.pm_gender, options=thistab.menus['gender']) m_gender_select = Select(title="Select milestone owner's gender", value=thistab.m_gender, options=thistab.menus['gender']) t_gender_select = Select(title="Select task owner's gender", value=thistab.t_gender, options=thistab.menus['gender']) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) multiline_y_select = Select(title='Select comparative DV(y)', value=thistab.multiline_vars['y'], options=[ 'remuneration', 'delay_start', 'delay_end', 'project_duration' ]) multiline_x_select = Select( title='Select comparative IV(x)', value=thistab.multiline_vars['x'], options=['manager_gender', 'type', 'status']) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] lags_corr_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=200) hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot, streams=[stream_launch_matrix]) hv_corr_table = hv.DynamicMap(thistab.correlation_table, streams=[stream_launch_corr]) hv_nonpara_table = hv.DynamicMap( thistab.non_parametric_relationship_table, streams=[stream_launch_corr]) # hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist]) hv_lags_plot = hv.DynamicMap(thistab.lags_plot, streams=[stream_launch_lags_var]) hv_multiline = hv.DynamicMap(thistab.multiline, streams=[stream_launch]) matrix_plot = renderer.get_plot(hv_matrix_plot) corr_table = renderer.get_plot(hv_corr_table) nonpara_table = renderer.get_plot(hv_nonpara_table) lags_plot = renderer.get_plot(hv_lags_plot) multiline = renderer.get_plot(hv_multiline) # setup divs # handle callbacks variable_select.on_change('value', update_variable) lag_variable_select.on_change('value', update_lag_plot_variable) lag_select.on_change('value', update_lag) # individual lag resample_select.on_change('value', update_resample) pm_gender_select.on_change('value', update_IVs) m_gender_select.on_change('value', update_IVs) t_gender_select.on_change('value', update_IVs) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) lags_input_button.on_click(update_lags_selected) # lags array status_select.on_change('value', update_IVs) type_select.on_change('value', update_IVs) multiline_x_select.on_change('value', update_multiline) multiline_y_select.on_change('value', update_multiline) # COMPOSE LAYOUT # put the controls in a single element controls_lag = WidgetBox(lags_input, lags_input_button, lag_variable_select) controls_multiline = WidgetBox(multiline_x_select, multiline_y_select) controls_page = WidgetBox(datepicker_start, datepicker_end, variable_select, type_select, status_select, resample_select, pm_gender_select, m_gender_select, t_gender_select) controls_gender = WidgetBox(pm_gender_select, m_gender_select, t_gender_select) # create the dashboards grid = gridplot( [[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['relationships']], [Spacer(width=20, height=30)], [matrix_plot.state, controls_page], [thistab.section_headers['correlations']], [Spacer(width=20, height=30)], [corr_table.state, thistab.corr_information_div()], [thistab.title_div('Compare levels in a variable', 400)], [Spacer(width=20, height=30)], [multiline.state, controls_multiline], [thistab.section_headers['lag']], [Spacer(width=20, height=30)], [lags_plot.state, controls_lag], [lags_corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
p.ygrid.grid_line_color = None callback = CustomJS(args=dict(source=source), code=""" var data = source.data; var f = CLTV.value; var g = cost.value; x = data['x'] y = data['y'] x[0] = Math.round(f/g) source.trigger('change'); """) text_input1 = TextInput(value="", title="CLTV", callback=callback) callback.args["CLTV"] = text_input1 text_input2 = TextInput(value="", title="Cost", callback=callback) callback.args["cost"] = text_input2 text = 'Text for option A will go here.' div = Div(text=text, width=200, height=100) # div.js_on_change(text, callback) # callback.args["cost"] = text_input2 widgets = WidgetBox(text_input1, text_input2) grid = gridplot([widgets, p, div], ncols=2, plot_width=250, plot_height=250) output_file("bar.html") show(grid)
def eda_country_indexes_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = {} self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.countries = [] self.country = 'Barbados' self.relationships_to_check = ['weak', 'moderate', 'strong'] self.pym = PythonMongo('aion') self.menus = { 'status': ['all', 'open', 'closed'], 'gender': ['all', 'male', 'female'], } self.multiline_vars = {'x': '', 'y': ''} self.timestamp_col = 'timestamp' # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format( self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'info': self.section_header_div(text='Country indexes') } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def load_df(self): try: df = json_normalize( list(self.pym.db[self.table].find({}, {'_id': False}))) df = df.fillna(0) logger.warning('LINE 96: country indicator:%s', df.head()) self.countries = [] self.df = df except Exception: logger.error('load', exc_info=True) def get_row_column_labels(self, txt): x = txt.split('.') if x[0] not in self.countries: self.countries.append(x[0]) sorted(self.countries) x[-1] = x[-1].replace('-', '_') return x[0], x[-1] def melt_df(self): try: # logger.warning('%s',df.head(20)) temp_dct = {'country': []} # loop through items counter = 0 values_present = [] for col in self.df.columns: if col != 'timestamp': # label for each coin, only run once if counter == 0: row, column = self.get_row_column_labels(col) temp_dct['country'].append(row) if column not in temp_dct.keys(): temp_dct[column] = [] try: tmp = self.df[[col]] val = tmp.values[0] except Exception: val = [0] temp_dct[column].append(val[0]) #logger.warning('LINE 140 tmp dict:%s',temp_dct) # find items that are not present # not_present = list counter += 1 ''' # logger.warning('item-length=%s-%s',key,len(temp_dct[key])) # convert to dataframe for item in temp_dct.keys(): # logger.warning('%s length = %s',item,len(temp_dct[item])) if len(temp_dct[item]) == 0: temp_dct[item] = [0] * len(temp_dct) ''' self.df1 = pd.DataFrame.from_dict(temp_dct) # logger.warning('df after melt:%s',self.df1.head()) except Exception: logger.error('melt coins', exc_info=True) def plot_country_rows(self, launch): try: if self.df1 is None: self.melt_df() except Exception: logger.error('plot', exc_info=True) def update_country(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.country = thistab.country_select.value thistab.trigger += 1 stream_launch_action_table.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'country_indexes' thistab = Thistab(table, [], []) thistab.load_df() # MANAGE STREAM stream_launch_action_table = streams.Stream.define('Launch', launch=-1)() # MAKE TABLES # --------------------- PLOTS--------------------------------- hv_action_table = hv.DynamicMap(thistab.plot_country_rows, streams=[stream_launch_action_table]) action_table = renderer.get_plot(hv_action_table) # CREATE WIDGETS country_select = Select(title='Select matrix', value=thistab.load_df(), options=thistab.countries) # handle callbacks country_select.on_change('value', update_country) # create the dashboards controls = WidgetBox() grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.title_div('info', 400)], [Spacer(width=20, height=30)], [action_table.state], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
if (mode == 'None') { data['x'] = []; data['y'] = []; } else { if (mode == 'Linear') { interp = linear; } else if (mode == 'Step (before)') { interp = step; step.mode = 'before'; } else if (mode == 'Step (center)') { interp = step; step.mode = 'center'; } else if (mode == 'Step (after)') { interp = step; step.mode = 'after'; } for (i=0; i < %d; i++) { data['x'][i] = i * dx data['y'][i] = interp.compute(data['x'][i]) } } source.trigger('change') """ % (N, N)) mode = Select(title='Interpolation Mode', value='None', options=[ 'None', 'Linear', 'Step (before)', 'Step (center)', 'Step (after)' ], callback=callback) output_file("transform_interpolator.html", title="Example Transforms") show(Column(WidgetBox(mode, width=300), p))
port=server_port, dir='compute'), url_predict=generate_url(MYIP, port=server_port, dir='predict'))) def redraw(): static_source.data = default_data['results'] field_data = req.post(url=generate_url(MYIP, port=server_port, dir='fields'), timeout=20).json() sliders = WidgetBox(children=list( Slider(**dict( zip(list(f.keys()) + ['callback'], list(f.values()) + [callback]))) for f in field_data['results']), width=30) #scatter = Scatter3d(x='x', y='y', z='z', color='color', data_source=static_source) plot = figure(title='PCA Plot', plot_height=300, plot_width=400, responsive=True, tools="pan,reset,save,wheel_zoom") plot.scatter(x='x', y='y', color='color', source=static_source) def_cont = req.post(url=generate_url(MYIP, port=server_port, dir=['predict', 'default']), timeout=20).json()