def forecasting_bcc_rentals_visitor_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = { 'category': 'nunique', 'item': 'nunique', 'area': 'nunique', 'visit_duration': 'mean', 'age': 'mean', 'gender_coded': 'mean', 'status_coded': 'mean', 'rental_employee_gender_coded': 'mean', 'rental_employee_age': 'mean', 'rental_tab': 'sum' } self.feature_list = ['age', 'rental_employee_age', 'rental_tab'] self.tsa_variable = 'rental_tab' self.forecast_days = 40 self.lag_variable = 'visit_duration' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variables = sorted(list(self.groupby_dict.keys())) self.variable = 'rental_tab' self.relationships_to_check = ['weak', 'moderate', 'strong'] self.pym = PythonMongo('aion') self.menus = { 'item': ['all'], 'category': ['all'], 'status': ['all', 'guest', 'member'], 'gender': ['all', 'male', 'female'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], 'area': ['all', 'bar', 'rentals'], 'tsa': ['rental_tab', 'visit_duration'] } self.select = {} self.select['area'] = Select(title='Select BCC area', value='all', options=self.menus['area']) self.select['item'] = Select(title='Select item', value='all', options=self.menus['item']) self.select['status'] = Select(title='Select visitor status', value='all', options=self.menus['status']) self.select['gender'] = Select(title="Select visitor gender", value='all', options=self.menus['gender']) self.select['category'] = Select(title="Select category", value='all', options=self.menus['category']) self.select['rental_employee_gender'] = Select( title="Select category", value='all', options=self.menus['category']) self.select_values = {} for item in self.select.keys(): self.select_values[item] = 'all' self.multiline_vars = {'x': 'gender', 'y': 'rental_tab'} self.timestamp_col = 'visit_start' # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div(text='Pre-transform distribution:', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div(text='Correlations:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'forecast': self.section_header_div(text='Forecasts:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def load_df(self, req_startdate, req_enddate, table, cols, timestamp_col): try: # get min and max of loaded df if self.df is not None: loaded_min = self.df[timestamp_col].min() loaded_max = self.df[timestamp_col].max() if loaded_min <= req_startdate and loaded_max >= req_enddate: df = self.df[(self.df[timestamp_col] >= req_startdate) & (self.df[timestamp_col] <= req_enddate)] return df return self.pym.load_df(req_startdate, req_enddate, table=table, cols=cols, timestamp_col=timestamp_col) except Exception: logger.error('load_df', exc_info=True) def filter_df(self, df1): try: df1 = df1[self.cols] for key, value in self.groupby_dict.items(): if value == 'count': if self.select_values[key] != 'all': df1 = df1[df1[key] == self.select_values[key]] return df1 except Exception: logger.error('filter', exc_info=True) def prep_data(self, df): try: df = self.filter_df(df) # set up code columns codes = { 'gender': { 'male': 1, 'female': 2, 'other': 3 }, 'status': { 'guest': 1, 'member': 2 } } for col in df.columns: coded_col = col + '_coded' if 'gender' in col: df[coded_col] = df[col].map(codes['gender']) if 'status' == col: df[coded_col] = df[col].map(codes['status']) self.df = df.set_index(self.timestamp_col) # groupby and resample self.df1 = self.df.groupby('name').resample( self.resample_period).agg(self.groupby_dict) self.df1 = self.df1.reset_index() self.df1 = self.df1.fillna(0) logger.warning('LINE 288 df:%s', self.df1.head(10)) except Exception: logger.error('prep data', exc_info=True) def tsa(self, launch): try: df = self.df.resample('D').agg({self.tsa_variable: 'mean'}) df = df.reset_index() label = self.tsa_variable + '_diff' df[label] = df[self.tsa_variable].diff() df = df.fillna(0) rename = {self.timestamp_col: 'ds', self.tsa_variable: 'y'} df = df.rename(columns=rename) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) else: p *= forecast.hvplot.scatter(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) return p + q except Exception: logger.error("TSA:", exc_info=True) def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.variable = new thistab.section_head_updater('lag', thistab.variable) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_IVs(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") for item in thistab.select_values.keys(): thistab.select_values[item] = thistab.select[item].value thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df = thistab.pym.load_df(start_date=datepicker_start.value, end_date=datepicker_end.value, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.df['gender_code'] = thistab.df['gender'].apply( lambda x: 1 if x == 'male' else 2) thistab.df1 = thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.df1 = thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_multiline(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.multiline_vars['x'] = multiline_x_select.value thistab.multiline_vars['y'] = multiline_y_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_forecast(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.forecast_days = int(select_forecast_days.value) thistab.tsa_variable = forecast_variable_select.value thistab.trigger += 1 stream_launch_tsa.event(launch=thistab.trigger) thistab.notification_updater("ready") try: # SETUP table = 'bcc_composite' cols = cols_to_load['guest'] + cols_to_load['rental'] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2013-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=1) first_date = last_date - timedelta(days=1000) # initial function call thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.prep_data(thistab.df) # MANAGE STREAM stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() stream_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_tsa = streams.Stream.define('Launch_tsa', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.variables) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) select_forecast_days = Select( title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) forecast_variable_select = Select(title='Select forecast variable', value=thistab.menus['tsa'][0], options=thistab.menus['tsa']) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) multiline_y_select = Select( title='Select comparative DV(y)', value=thistab.multiline_vars['y'], options=['price', 'amount', 'visit_duration']) multiline_x_select = Select(title='Select comparative IV(x)', value=thistab.multiline_vars['x'], options=[ 'category', 'gender', 'rental_employee_gender', 'status', 'item' ]) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- # tables hv_tsa = hv.DynamicMap(thistab.tsa, streams=[stream_launch_tsa]) tsa = renderer.get_plot(hv_tsa) # setup divs # handle callbacks variable_select.on_change('value', update_variable) resample_select.on_change('value', update_resample) thistab.select['area'].on_change('value', update_IVs) thistab.select['gender'].on_change('value', update_IVs) thistab.select['rental_employee_gender'].on_change('value', update_IVs) thistab.select['item'].on_change('value', update_IVs) thistab.select['category'].on_change('value', update_IVs) thistab.select['status'].on_change('value', update_IVs) select_forecast_days.on_change('value', update_forecast) forecast_variable_select.on_change('value', update_forecast) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) multiline_x_select.on_change('value', update_multiline) multiline_y_select.on_change('value', update_multiline) # COMPOSE LAYOUT # put the controls in a single element controls_tsa = WidgetBox(datepicker_start, datepicker_end, variable_select, thistab.select['status'], resample_select, thistab.select['gender'], thistab.select['category'], thistab.select['area'], forecast_variable_select, select_forecast_days) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['forecast']], [tsa.state, controls_tsa], [Spacer(width=20, height=30)], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def pm_risk_assessment_tab(panel_title): risk_matrix_src = ColumnDataSource(data=dict(Severity=[], Unlikely=[], Seldom=[], Occaisional=[], Likely=[], Definite=[])) corr_src = ColumnDataSource(data=dict( variable_1=[], variable_2=[], relationship=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = {} self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variable = 'delay_end' self.relationships_to_check = ['weak', 'moderate', 'strong'] self.status = 'all' self.gender = 'all' self.type = 'all' self.ratings = { 'severity': { 'Insignificant': 1, 'Minor': 2, 'Moderate': 3, 'Critical': 4, 'Catastrophic': 5 }, 'likelihood': { 'Unlikely': 1, 'Seldom': 2, 'Occaisional': 3, 'Likely': 4, 'Definite': 5 } } self.variables = { 'severity': list(self.ratings['severity'].keys()), 'likelihood': list(self.ratings['likelihood'].keys()), } self.pym = PythonMongo('aion') self.menus = { 'status': ['all', 'open', 'closed'], 'gender': ['all', 'male', 'female'], } self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'} self.timestamp_col = 'analysis_date' self.risks = [] self.risk = '' self.matrices = [] self.matrix = '' self.risk_select = Select(title='Select risk', value=self.risk, options=self.risks) self.risk_threshold = {'acceptable': 8, 'doubtful': 15} # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format( self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=1000, html_header='h2', margin_top=50, margin_bottom=5), 'distribution': self.section_header_div(text='Pre-transform distribution', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'matrix': self.section_header_div(text='Risk Matrix:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'risk_solution': self.section_header_div( text='Risk Matrix vs Solution :{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def load_df(self): try: risk_matrx = json_normalize( list(self.pym.db['risk_matrix'].find())) logger.warning('LINE 169:RISK MATIRX:%s', risk_matrx.head()) if len(risk_matrx) > 0: risk_matrx = drop_cols(risk_matrx, ['desc']) logger.warning('LINE 159:RISK MATIRX:%s', risk_matrx.head()) risk = json_normalize(list(self.pym.db['risk'].find())) risk = risk.rename(columns={'matrix': 'matrix_id'}) analysis = json_normalize( list(self.pym.db['risk_analysis'].find())) analysis = drop_cols(analysis, ['_id']) analysis = analysis.rename(columns={'risk': 'risk_id'}) # merges risk = risk.merge(analysis, how='inner', left_on='_id', right_on='risk_id') risk = drop_cols(risk, [ '_id', 'likelihood_comment', 'severity_comment', 'desc', 'risk_id' ]) logger.warning('LINE 167:RISK:%s', risk.head()) logger.warning('LINE 169:RISK MATIRX:%s', risk_matrx.head()) risk = risk_matrx.merge(risk, how='inner', left_on='_id', right_on='matrix_id') df = drop_cols(risk, ['_id', 'matrix_id', 'analyst']) df = df.rename(columns={'name': 'matrix'}) dfs = {} for component in ['severity', 'likelihood']: table = 'risk_' + component dfs[component] = json_normalize( list(self.pym.db[table].find())) dfs[component] = drop_cols(dfs[component], ['desc', 'level']) df = df.merge(dfs[component], how='left', left_on=component, right_on='_id') df = drop_cols(df, ['_id', 'project', component]) df = df.rename(columns={'value': component}) df[component] = df[component].fillna(0) df['composite'] = df.severity * df.likelihood # set selection variables logger.warning('LINE 154 df:%s', df) self.df = df self.matrices = list(df['matrix'].unique()) self.matrix = self.matrices[0] self.set_risks(df, matrix=self.matrix) except Exception: logger.error('load df', exc_info=True) def set_risks(self, df, matrix): try: df = df[df.matrix == matrix] self.risks = list(df['risk'].unique()) self.risk = self.risks[0] self.risk_select.options = self.risks self.df1 = df except Exception: logger.error('prep data', exc_info=True) # ////////////// DIVS ////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) # ////////////// GRAPHS //////////////////// def action_table(self, launch): try: def label_action(x): if x < self.risk_threshold['acceptable']: return 'Proceed (risk is acceptable)' elif x < self.risk_threshold['doubtful']: return 'Proceed, if no other options are available' else: return 'Do no proceed (Risk unacceptable)' df = self.df df = df.groupby(['matrix', 'risk']).agg({ 'likelihood': 'mean', 'severity': 'mean' }) df = df.reset_index() df['composite'] = df.likelihood * df.severity df['action'] = df['composite'].map(label_action) self.risk_matrix() return df.hvplot.table( columns=[ 'matrix', 'risk', 'severity', 'likelihood', 'action' ], width=1000, ) except Exception: logger.error('action table', exc_info=True) def risk_matrix(self): try: # filter df = self.df1 df = df.groupby(['matrix', 'risk']).agg({ 'likelihood': 'mean', 'severity': 'mean' }) df = df.reset_index() df = df[df['risk'] == self.risk] severity_value = int(df['severity'].mean()) #severity = [key for (key, value) in self.ratings['severity'].items() if value == severity_value][0] likelihood_value = int(df['likelihood'].mean()) logger.warning('severity=%s,likelihood=%s', severity_value, likelihood_value) # make the matrix dct = { 'Severity': list(self.ratings['severity'].keys()), } cols = list(self.ratings['likelihood'].keys()) for idx_row, val_col in enumerate( list(self.ratings['likelihood'].keys())): row = idx_row + 1 dct[val_col] = [] for idx_row, val_row in enumerate(dct['Severity']): col = idx_row + 1 val = row * col if row == severity_value and col == likelihood_value: logger.warning('CONDITIONS MET') txt = 'BINGO ' + str(val) else: txt = val dct[val_col].append(txt) logger.warning('LINE 288 %s - length=%s', val_col, len(dct[val_col])) risk_matrix_src.stream(dct, rollover=(len(dct['Severity']))) columns = [ TableColumn(field="Severity", title='severity'), TableColumn( field="Unlikely", title='unlikely', formatter=dashboard_config['formatters']['Unlikely']), TableColumn( field="Seldom", title='seldom', formatter=dashboard_config['formatters']['Seldom']), TableColumn(field="Occaisional", title='occaisional', formatter=dashboard_config['formatters'] ['Occaisional']), TableColumn( field="Likely", title='likely', formatter=dashboard_config['formatters']['Likely']), TableColumn( field="Definite", title='definite', formatter=dashboard_config['formatters']['Definite']), ] risk_matrix_table = DataTable(source=risk_matrix_src, columns=columns, width=800, height=500) self.corr() return risk_matrix_table except Exception: logger.error('risk matrix', exc_info=True) def correlate_solution_risk(self, launch): try: # load solution df = json_normalize( list(self.pym.db['project_composite1'].find( {}, { 'severity': 1, 'likelihood': 1, 'solution': 1, 'project_owner_gender': 1, 'project': 1 }))) df['solution'] = df.solution.apply(lambda x: x[0] * 10) df = df.groupby(['project']).agg({ 'severity': 'mean', 'likelihood': 'mean', 'solution': 'mean' }) df = df.reset_index() df['composite'] = df.severity * df.likelihood logger.warning('df:%s', df.head(20)) # load project for idx, col in enumerate( ['severity', 'likelihood', 'composite']): if idx == 0: p = df.hvplot.scatter(x='solution', y=col) else: p *= df.hvplot.scatter(x='solution', y=col) return p # load risk except Exception: logger.error('correlate solution risk', exc_info=True) def risk_information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret Risk assessment matrix:</h4> <ul style='margin-top:-10px;'> <li> Red: Unacceptable risk. Do NOT proceed. </li> <li> Yellow: Risky. Proceed only after ensuring better options aren't reasonable available </li> <li> Green: Acceptable risk. Proceed. </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # calculate the correlation produced by the lags vector def corr(self): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'r': [], 'p_value': [] } # load solution df = json_normalize( list(self.pym.db['project_composite1'].find( {}, { 'severity': 1, 'likelihood': 1, 'solution': 1, 'project_owner_gender': 1, 'project': 1 }))) df['solution'] = df.solution.apply(lambda x: x[0] * 10) df = df.groupby(['project']).agg({ 'severity': 'mean', 'likelihood': 'mean', 'solution': 'mean' }) df = df.reset_index() df['composite'] = df.severity * df.likelihood logger.warning('df:%s', df.head(20)) a = df['solution'].tolist() for col in ['composite', 'severity', 'likelihood']: # find lag logger.warning('column:%s', col) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append('solution') corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['r'].append(round(rvalue, 3)) corr_dict_data['p_value'].append(round(pvalue, 3)) corr_src.stream(corr_dict_data, rollover=3) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=corr_src, columns=columns, width=900, height=400) return data_table except Exception: logger.error(' corr', exc_info=True) def update_matrix(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.matrix = matrix_select.value thistab.set_risks(thistab.df, matrix=thistab.matrix) thistab.trigger += 1 stream_launch_action_table.event(launch=thistab.trigger) stream_launch_matrix.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_risk(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.risk = thistab.risk_select.value thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) thistab.risk_matrix() thistab.notification_updater("Ready!") try: # SETUP table = 'project_composite' thistab = Thistab(table, [], []) thistab.load_df() thistab.corr() # MANAGE STREAM stream_launch_action_table = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch', launch=-1)() stream_launch_risk_solution = streams.Stream.define('Launch', launch=-1)() # MAKE TABLES # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="Severity", title="severity"), TableColumn(field="Unlikely", title='unlikely', formatter=dashboard_config['formatters']['Unlikely']), TableColumn(field="Seldom", title='seldom', formatter=dashboard_config['formatters']['Seldom']), TableColumn( field="Occaisional", title='occaisional', formatter=dashboard_config['formatters']['Occaisional']), TableColumn(field="Likely", title='likely', formatter=dashboard_config['formatters']['Likely']), TableColumn(field="Definite", title='definite', formatter=dashboard_config['formatters']['Definite']), ] risk_matrix = DataTable(source=risk_matrix_src, columns=columns, width=800, height=500) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] corr_table = DataTable(source=corr_src, columns=columns, width=500, height=280) width = 800 hv_action_table = hv.DynamicMap(thistab.action_table, streams=[stream_launch_action_table]) action_table = renderer.get_plot(hv_action_table) hv_risk_solution = hv.DynamicMap(thistab.correlate_solution_risk, streams=[stream_launch_risk_solution]) risk_solution = renderer.get_plot(hv_risk_solution) # CREATE WIDGETS matrix_select = Select(title='Select matrix', value=thistab.matrix, options=thistab.matrices) # handle callbacks matrix_select.on_change('value', update_matrix) thistab.risk_select.on_change('value', update_risk) # create the dashboards controls = WidgetBox(matrix_select, thistab.risk_select) grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.title_div('Determine action', 400)], [Spacer(width=20, height=30)], [action_table.state], [thistab.section_headers['matrix']], [Spacer(width=20, height=30)], [risk_matrix, controls], [thistab.section_headers['risk_solution']], [Spacer(width=20, height=30)], [corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def KPI_social_media_tab(panel_title,DAYS_TO_LOAD=90): class Thistab(KPI): def __init__(self, table,cols=[]): KPI.__init__(self, table,name='social_media',cols=cols) self.table = table self.df = None self.pym = PythonMongo('aion') self.checkboxgroup = {} self.KPI_card_div = self.initialize_cards(self.page_width, height=350) self.ptd_startdate = datetime(datetime.today().year,1,1,0,0,0) self.timestamp_col = 'timestamp' self.items = None self.social_media = 'twitter' self.crypto = 'aion' self.groupby_dict = { 'twu_tweets': 'sum', 'twu_mentions': 'sum', 'twu_positive': 'mean', 'twu_compound': 'mean', 'twu_neutral': 'mean', 'twu_negative': 'mean', 'twu_emojis_positive': 'mean', 'twu_emojis_compound': 'mean', 'twu_emojis_neutral': 'mean', 'twu_emojis_negative': 'mean', 'twu_emojis': 'sum', 'twu_favorites': 'sum', 'twu_retweets': 'sum', 'twu_hashtags': 'sum', 'twu_replies': 'sum', 'twr_tweets': 'sum', 'twr_mentions': 'sum', 'twr_positive': 'mean', 'twr_compound': 'mean', 'twr_neutral': 'mean', 'twr_negative': 'mean', 'twr_emojis_positive': 'mean', 'twr_emojis_compound': 'mean', 'twr_emojis_neutral': 'mean', 'twr_emojis_negative': 'mean', 'twr_emojis': 'sum', 'twr_favorites': 'sum', 'twr_retweets': 'sum', 'twr_hashtags': 'sum', 'twr_replies': 'sum' } self.variables = sorted(list(self.groupby_dict.keys())) self.variable = self.variables[0] self.vars_dict = None self.idvars = None self.external_hourly_labels = ['fork', 'release', 'push', 'watch', 'issue', 'twu_tweets', 'twu_mentions', 'twu_positive', 'twu_compound', 'twu_neutral', 'twu_negative', 'twu_emojis_positive', 'twu_emojis_compound', 'twu_emojis_negative', 'twu_emojis', 'twu_retweets', 'twu_hashtags', 'twu_replies', 'twu_favorites', 'twr_tweets', 'twr_mentions', 'twr_positive', 'twr_compound', 'twr_neutral', 'twr_negative', 'twr_emojis_positive', 'twr_emojis_compound', 'twr_emojis_negative', 'twr_emojis', 'twr_retweets', 'twr_hashtags', 'twr_replies', 'twr_favorites', ] self.datepicker_pop_start = DatePicker( title="Period start", min_date=self.initial_date, max_date=dashboard_config['dates']['last_date'], value=dashboard_config['dates']['last_date']) # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'cards': self.section_header_div(text='Period to date:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5,margin_bottom=-155), 'pop': self.section_header_div( text='Period over period:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def load_df(self, start_date, end_date, cols, timestamp_col='timestamp_of_first_event', supplemental_where=None): try: if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) end_date += timedelta(days=1) temp_cols = cols.copy() if self.table != 'external_daily': if 'amount' not in temp_cols: temp_cols.append('amount') df = self.ch.load_data(self.table, temp_cols, start_date, end_date, timestamp_col, supplemental_where) # filter out the double entry # df = df[df['value'] >= 0] if self.items is None: df_temp = df['crypto'] if df_temp is not None: df_temp = df_temp.compute() self.items = sorted(list(set(list(df_temp)))) logger.warning('LINE 148, items:%s',self.items) if len(cols) > 0: return df[cols] else: return df # df[timestamp_col] = df[timestamp_col].map(lambda x: clean_dates_from_db(x)) except Exception: logger.error('load df', exc_info=True) def melt_df(self, df): try: # logger.warning('%s',df.head(20)) temp_dct = { 'timestamp': [], 'crypto': [], 'twu_tweets': [], 'twu_mentions': [], 'twu_positive': [], 'twu_compound': [], 'twu_neutral': [], 'twu_negative': [], 'twu_emojis_positive': [], 'twu_emojis_compound': [], 'twu_emojis_neutral': [], 'twu_emojis_negative': [], 'twu_emojis': [], 'twu_favorites': [], 'twu_retweets': [], 'twu_hashtags': [], 'twu_replies': [], 'twr_tweets': [], 'twr_mentions': [], 'twr_positive': [], 'twr_compound': [], 'twr_neutral': [], 'twr_negative': [], 'twr_emojis_positive': [], 'twr_emojis_compound': [], 'twr_emojis_neutral': [], 'twr_emojis_negative': [], 'twr_emojis': [], 'twr_favorites': [], 'twr_retweets': [], 'twr_hashtags': [], 'twr_replies': [], } # loop through items counter = 0 values_present = [] for col in df.columns: if col not in ['timestamp','month','year','day','hour']: # split item_tmp = col.split('.') #logger.warning('LINE 228:%s', col) key_len = len(item_tmp[0]) col_label = item_tmp[-1][key_len+1:] if col_label in temp_dct.keys(): # label for each coin, only run once if counter == 0: temp_dct['crypto'].append(get_coin_name(col, key_len)) # get value from dataframe try: tmp = df[[col]] val = tmp.values[0] except: val = [0] logger.warning('LINE 228:%s', col_label) temp_dct[col_label].append(val) else: pass ''' if col == 'timestamp': tmp = df[[col]] val = tmp.values[0] temp_dct['timestamp'].append(val) ''' #values_present.appe;nd(col) logger.warning('LINE 234:%s', temp_dct) df = pd.DataFrame.from_dict(temp_dct) logger.warning('df after melt:%s',df) return df except Exception: logger.error('melt coins', exc_info=True) ''' def load_df(self, start_date,end_date,cols, table='external_hourly',timestamp_col='timestamp'): try: if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) end_date = start_date + timedelta(days=1) df = self.pym.load_df(start_date, end_date,cols=cols, table=table, timestamp_col='timestamp') logger.warning('df:%s',df.head()) self.items = get_items() groupby_dict, self.vars_dict, self.idvars = set_vars(self.items) if df is not None: if len(df) > 0: if '_id' in df.columns: df = df.drop(['_id'], axis=1) logger.warning('length df:%s',len(df)) df = self.melt_df(df) return df return df except Exception: logger.error('load external data', exc_info=True) ''' def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"> <{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=170): div_style = """ style='width:350px;margin-right:-800px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret sentiment score</h4> <ul style='margin-top:-10px;'> <li> Sentiment scores: positive, negative, neutral. </li> <li> The sentiment scores are percentages. </li> <li> The sentiment scores are averaged over the period. </li> <li> (e.g.) Interpretation: over the quarter to date twitter comments were 18% positive </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def initialize_cards(self,width,height=250): try: txt = '' for period in ['year','quarter','month','week']: design = random.choice(list(KPI_card_css.keys())) txt += self.card(title='',data='',card_design=design) text = """<div style="margin-top:100px;display:flex; flex-direction:row;"> {} </div>""".format(txt) div = Div(text=text, width=width, height=height) return div except Exception: logger.error('initialize cards', exc_info=True) # ------------------------- CARDS END ----------------------------------- def period_to_date(self, df, timestamp=None, timestamp_filter_col=None, cols=[], period='week'): try: if timestamp is None: timestamp = datetime.now() timestamp = datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, 0, 0) start = self.first_date_in_period(timestamp, period) # filter df[timestamp_filter_col] = pd.to_datetime(df[timestamp_filter_col], format=self.DATEFORMAT_PTD) logger.warning('df:%s', df['timestamp']) df = df[(df[timestamp_filter_col] >= start) & (df[timestamp_filter_col] <= timestamp)] if len(cols) > 0: df = df[cols] return df except Exception: logger.error('period to date', exc_info=True) def period_over_period(self, df, start_date, end_date, period, history_periods=2, timestamp_col='timestamp_of_first_event'): try: # filter cols if necessary string = '0 {}(s) prev(current)'.format(period) # filter out the dates greater than today df_current = df.copy() df_current['period'] = string # label the days being compared with the same label df_current = self.label_dates_pop(df_current, period, timestamp_col) #logger.warning('LINE 244:%s', df_current.head(15)) # zero out time information start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0) end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0) cols = list(df.columns) counter = 1 if isinstance(history_periods, str): history_periods = int(history_periods) # make dataframes for request no. of periods start, end = self.shift_period_range(period, start, end) while counter < history_periods and start >= self.initial_date: # load data if period == 'quarter': logger.warning('start:end %s:%s', start, end) if self.crypto != 'all': supplemental_where = "AND crypto = '{}'".format(self.crypto) df_temp = self.load_df(start, end, cols, timestamp_col, supplemental_where=supplemental_where) df_temp = df_temp.compute() df_temp[timestamp_col] = pd.to_datetime(df_temp[timestamp_col]) if df_temp is not None: if len(df_temp) > 1: string = '{} {}(s) prev'.format(counter, period) # label period df_temp = df_temp.assign(period=string) # relabel days to get matching day of week,doy, dom, for different periods df_temp = self.label_dates_pop(df_temp, period, timestamp_col) # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp)) df_current = pd.concat([df_current, df_temp]) del df_temp gc.collect() # shift the loading window counter += 1 start, end = self.shift_period_range(period, start, end) return df_current except Exception: logger.error('period over period', exc_info=True) # label dates for period over period (pop) def label_dates_pop(self, df, period, timestamp_col): df[timestamp_col] = pd.to_datetime(df[timestamp_col]) def label_qtr_pop(y): try: curr_quarter = int((y.month - 1) / 3 + 1) start = datetime(y.year, 3 * curr_quarter - 2, 1) return abs((start - y).days) except Exception: logger.error('df label quarter', exc_info=True) try: if period == 'week': df['dayset'] = df[timestamp_col].dt.dayofweek elif period == 'month': df['dayset'] = df[timestamp_col].dt.day elif period == 'year': df['dayset'] = df[timestamp_col].timetuple().tm_yday elif period == 'quarter': df['dayset'] = df[timestamp_col].apply(lambda x: label_qtr_pop(x)) return df except Exception: logger.error('label data ', exc_info=True) # -------------------- GRAPHS ------------------------------------------- def graph_periods_to_date(self, df1, timestamp_filter_col, variable): try: if self.crypto != 'all': df1 = df1[df1.crypto == self.crypto] df1 = df1.compute() dct = {} for idx, period in enumerate(['week', 'month', 'quarter', 'year']): df = self.period_to_date(df1, timestamp=dashboard_config['dates']['last_date'], timestamp_filter_col=timestamp_filter_col, period=period) # get unique instances df = df[[variable]] df = df.drop_duplicates(keep='first') #logger.warning('post duplicates dropped:%s', df.head(10)) if self.groupby_dict[variable] == 'sum': data = int(df[variable].sum()) elif self.groupby_dict[variable] == 'mean': data = "{}%".format(round(df[variable].mean(),3)) del df gc.collect() dct[period] = data self.update_cards(dct) except Exception: logger.error('graph periods to date', exc_info=True) def graph_period_over_period(self, period): try: periods = [period] start_date = self.pop_start_date end_date = self.pop_end_date if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) today = datetime.combine(datetime.today().date(), datetime.min.time()) ''' - if the start day is today (there is no data for today), adjust start date ''' if start_date == today: logger.warning('START DATE of WEEK IS TODAY.!NO DATA DATA') start_date = start_date - timedelta(days=7) self.datepicker_pop_start.value = start_date cols = [self.variable, self.timestamp_col] supplemental_where = None if self.crypto != 'all': supplemental_where = "AND crypto = '{}'".format(self.crypto) df = self.load_df(start_date=start_date, end_date=end_date, cols=cols, timestamp_col='timestamp',supplemental_where=supplemental_where) if abs(start_date - end_date).days > 7: if 'week' in periods: periods.remove('week') if abs(start_date - end_date).days > 31: if 'month' in periods: periods.remove('month') if abs(start_date - end_date).days > 90: if 'quarter' in periods: periods.remove('quarter') df = df.compute() for idx, period in enumerate(periods): df_period = self.period_over_period(df, start_date=start_date, end_date=end_date, period=period, history_periods=self.pop_history_periods, timestamp_col='timestamp') logger.warning('LINE 368: dayset:%s',df_period.head(30)) groupby_cols = ['dayset', 'period'] if len(df_period) > 0 : # logger.warning('line 150 df_period columns:%s',df.columns) df_period = df_period.groupby(groupby_cols).agg({self.variable: 'sum'}) df_period = df_period.reset_index() else: df_period = df_period.rename(index=str,columns={'day':'dayset'}) prestack_cols = list(df_period.columns) df_period = self.split_period_into_columns(df_period, col_to_split='period', value_to_copy=self.variable) # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality if period == 'quarter': min_day = df_period['dayset'].min() logger.warning('LINE 252: MINIUMUM DAY:%s', min_day) df_period = df_period[df_period['dayset'] > min_day] poststack_cols = list(df_period.columns) title = "{} over {}".format(period, period) plotcols = list(np.setdiff1d(poststack_cols, prestack_cols)) # include current period if not extant df_period, plotcols = self.pop_include_zeros(df_period,plotcols=plotcols, period=period) # logger.warning('line 155 cols to plot:%s',plotcols) if self.groupby_dict[self.variable] == 'sum': xlabel = 'frequency' elif self.groupby_dict[self.variable] == 'mean': xlabel = '%' if idx == 0: p = df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False,width=1200, height=400,value_label=xlabel) else: p += df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False,width=1200, height=400,value_label=xlabel) return p except Exception: logger.error('period over period to date', exc_info=True) def update(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") thistab.crypto = crypto_select.value thistab.variable = variable_select.value thistab.social_media = social_media_select.value thistab.graph_periods_to_date(thistab.df,'timestamp',thistab.variable) thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_period_over_period(): thistab.notification_updater("Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.pop_start_date = thistab.datepicker_pop_start.value # trigger period over period thistab.pop_end_date = datepicker_pop_end.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") try: cols = [] thistab = Thistab(table='external_daily', cols=cols) # ------------------------------------- SETUP ---------------------------- # format dates first_date_range = thistab.initial_date last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime(last_date.year,1,1,0,0,0) loadcols = ['timestamp','crypto'] + thistab.variables loadcols = [] thistab.df = thistab.load_df(first_date, last_date,loadcols,timestamp_col='timestamp') thistab.graph_periods_to_date(thistab.df,timestamp_filter_col='timestamp',variable=thistab.variable) thistab.section_header_updater('cards',label='') thistab.section_header_updater('pop',label='') # MANAGE STREAM # date comes out stream in milliseconds # --------------------------------CREATE WIDGETS --------------------------------- thistab.pop_end_date = last_date thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week') stream_launch = streams.Stream.define('Launch',launch=-1)() datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_end_date) pop_number_select = Select(title='Select # of comparative periods', value=str(5), options=thistab.menus['history_periods']) pop_button = Button(label="Select dates/periods, then click me!",width=15,button_type="success") variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.variables) social_media_select = Select(title='Select social media',value=thistab.social_media, options=thistab.menus['social_media']) crypto_select = Select(title='Select item/crypto of interest', value=thistab.crypto, options=thistab.items) # --------------------------------- GRAPHS --------------------------- hv_pop_week = hv.DynamicMap(thistab.pop_week,streams=[stream_launch]) pop_week = renderer.get_plot(hv_pop_week) hv_pop_month = hv.DynamicMap(thistab.pop_month,streams=[stream_launch]) pop_month = renderer.get_plot(hv_pop_month) hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch]) pop_quarter = renderer.get_plot(hv_pop_quarter) # -------------------------------- CALLBACKS ------------------------ variable_select.on_change('value', update) pop_button.on_click(update_period_over_period) # lags array social_media_select.on_change('value', update) crypto_select.on_change('value', update) # -----------------------------------LAYOUT ---------------------------- # put the controls in a single element controls_pop = WidgetBox(thistab.datepicker_pop_start, datepicker_pop_end,pop_number_select,pop_button) controls_top = WidgetBox(social_media_select,crypto_select,variable_select) grid = gridplot([ [thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.information_div()], [thistab.section_headers['cards']], [Spacer(width=20, height=2)], [thistab.KPI_card_div,controls_top], [thistab.section_headers['pop']], [Spacer(width=20, height=25)], [pop_week.state,controls_pop], [pop_month.state], [pop_quarter.state], [thistab.notification_div['bottom']] ]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def eda_country_indexes_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = {} self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.countries = [] self.country = 'Barbados' self.relationships_to_check = ['weak', 'moderate', 'strong'] self.pym = PythonMongo('aion') self.menus = { 'status': ['all', 'open', 'closed'], 'gender': ['all', 'male', 'female'], } self.multiline_vars = {'x': '', 'y': ''} self.timestamp_col = 'timestamp' # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format( self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'info': self.section_header_div(text='Country indexes') } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def load_df(self): try: df = json_normalize( list(self.pym.db[self.table].find({}, {'_id': False}))) df = df.fillna(0) logger.warning('LINE 96: country indicator:%s', df.head()) self.countries = [] self.df = df except Exception: logger.error('load', exc_info=True) def get_row_column_labels(self, txt): x = txt.split('.') if x[0] not in self.countries: self.countries.append(x[0]) sorted(self.countries) x[-1] = x[-1].replace('-', '_') return x[0], x[-1] def melt_df(self): try: # logger.warning('%s',df.head(20)) temp_dct = {'country': []} # loop through items counter = 0 values_present = [] for col in self.df.columns: if col != 'timestamp': # label for each coin, only run once if counter == 0: row, column = self.get_row_column_labels(col) temp_dct['country'].append(row) if column not in temp_dct.keys(): temp_dct[column] = [] try: tmp = self.df[[col]] val = tmp.values[0] except Exception: val = [0] temp_dct[column].append(val[0]) #logger.warning('LINE 140 tmp dict:%s',temp_dct) # find items that are not present # not_present = list counter += 1 ''' # logger.warning('item-length=%s-%s',key,len(temp_dct[key])) # convert to dataframe for item in temp_dct.keys(): # logger.warning('%s length = %s',item,len(temp_dct[item])) if len(temp_dct[item]) == 0: temp_dct[item] = [0] * len(temp_dct) ''' self.df1 = pd.DataFrame.from_dict(temp_dct) # logger.warning('df after melt:%s',self.df1.head()) except Exception: logger.error('melt coins', exc_info=True) def plot_country_rows(self, launch): try: if self.df1 is None: self.melt_df() except Exception: logger.error('plot', exc_info=True) def update_country(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.country = thistab.country_select.value thistab.trigger += 1 stream_launch_action_table.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'country_indexes' thistab = Thistab(table, [], []) thistab.load_df() # MANAGE STREAM stream_launch_action_table = streams.Stream.define('Launch', launch=-1)() # MAKE TABLES # --------------------- PLOTS--------------------------------- hv_action_table = hv.DynamicMap(thistab.plot_country_rows, streams=[stream_launch_action_table]) action_table = renderer.get_plot(hv_action_table) # CREATE WIDGETS country_select = Select(title='Select matrix', value=thistab.load_df(), options=thistab.countries) # handle callbacks country_select.on_change('value', update_country) # create the dashboards controls = WidgetBox() grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.title_div('info', 400)], [Spacer(width=20, height=30)], [action_table.state], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def crypto_clusters_eda_tab(cryptos, panel_title): global groupby_dict global features global cluster_dct #global source redis = PythonRedis() cluster_dct = redis.simple_load('clusters:cryptocurrencies') if cluster_dct is not None: groupby_dict = {} for var in cluster_dct['features']: groupby_dict[var] = 'sum' features = cluster_dct['features'] source = {} for feature in features: source[feature] = ColumnDataSource( data=dict(xs=[], ys=[], labels=[], colors=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols, panel_title=panel_title) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.items = cryptos # add all the coins to the dict self.github_cols = [ 'watch', 'fork', 'issue', 'release', 'push', 'tw_mentions', 'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative', 'tw_emojis_positive', 'tw_emojis_compound', 'tw_emojis_negative', 'tw_emojis_count', 'tw_reply_hashtags' ] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 txt = """<div style="text-align:center;background:black;width:100%;"> <h1 style="color:#fff;"> {}</h1></div>""".format( 'Welcome') self.notification_div = { 'top': Div(text=txt, width=1400, height=20), 'bottom': Div(text=txt, width=1400, height=10), } self.cluster_dct = cluster_dct self.groupby_dict = groupby_dict self.features = features self.crypto = 'all' self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.significant_effect_dict = {} self.df1 = None self.section_headers = { 'ts': self.section_header_div( 'Comparison of clusters across variables:---------------------', width=600) } self.timestamp_col = None self.colors = None # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=1400): text = '<{} style="color:#4221cc;">{}</{}>'.format( html_header, text, html_header) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ////////////////////////// UPDATERS /////////////////////// def section_head_updater(self, section, txt): try: self.section_header_div[section].text = txt except Exception: logger.error('', exc_info=True) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt # /////////////////////////// LOAD CLUSTERS ////////////////////// def prep_data(self, df, timestamp_col): def label_cluster(x): for key, values in self.cluster_dct.items(): if key not in ['timestamp', 'variables']: if x in values: return key return x try: cols = self.features + ['crypto', 'timestamp'] df = df[cols] # groupby and resample df['crypto'] = df['crypto'].map(lambda x: label_cluster(x)) df = df.rename(columns={'crypto': 'cluster'}) df = df.compute() df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce') df.set_index(timestamp_col, inplace=True) df = df.groupby('cluster').resample(self.resample_period).agg( self.groupby_dict) df.reset_index(inplace=True) df.set_index(timestamp_col, inplace=True) self.timestamp_col = timestamp_col self.df1 = df except Exception: logger.error('prep data', exc_info=True) def graph_ts(self): try: #global source if self.df1 is not None: df = self.df1.copy() clusters = df['cluster'].unique() self.colors = [''] * len(clusters) for idx, feature in enumerate(clusters): self.colors[idx] = dashboard_config['colors'][idx] if self.features is not None: for idx, feature in enumerate(self.features): df1 = df[['cluster', feature]] # pivot into columns for cluster df1 = df1.pivot(columns='cluster') data = dict(x=[df1.index.values] * len(clusters), y=[df1[name].values for name in df1], labels=clusters, colors=self.colors) source[feature].data = data except Exception: logger.error('graph ts', exc_info=True) def graph_chartify(self, timestamp_col): try: # global source if self.df1 is not None: df = self.df1.copy() df = df.reset_index() for feature in self.features: ch = chartify.Chart(blank_labels=True, x_axis_type='datetime') ch.set_title("CHARTIFY") ch.plot.line( # Data must be sorted by x column data_frame=df.sort_values(timestamp_col), x_column=timestamp_col, y_column=feature, color_column='cluster') return ch except Exception: logger.error('graph chartify', exc_info=True) def update(): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.prep_data(thistab.df, 'timestamp') thistab.graph_ts() thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = resample_select.value thistab.prep_data(thistab.df, 'timestamp') thistab.graph_ts() thistab.notification_updater("ready") try: table = 'external_daily' thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = dashboard_config['dates']['current_year_start'] # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp', cols=[]) thistab.prep_data(thistab.df, timestamp_col='timestamp') # MANAGE STREAMS --------------------------------------------------------- # CREATE WIDGETS ---------------------------------------------------------------- datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) load_dates_button = Button( label="Select dates/periods, then click me!", width=20, height=8, button_type="success") resample_select = Select(title='Select summary period', value=thistab.resample_period, options=thistab.menus['resample_periods']) # -------------------------------- PLOTS --------------------------- thistab.graph_ts() p = {} for feature in features: p[feature] = figure(x_axis_type="datetime", plot_width=1400, plot_height=400, title=feature) p[feature].multi_line( xs='x', ys='y', legend='labels', line_color='colors', line_width=5, hover_line_color='colors', hover_line_alpha=1.0, source=source[feature], ) p[feature].add_tools( HoverTool(show_arrow=False, line_policy='next', tooltips=[ ('freq', '$y'), ])) # ch = thistab.graph_chartify(timestamp_col='timestamp') # -------------------------------- CALLBACKS ------------------------ load_dates_button.on_click(update) # lags array resample_select.on_change('value', update_resample) # -----------------------------------LAYOUT ---------------------------- # COMPOSE LAYOUT # put the controls in a single element controls_left = WidgetBox(datepicker_start, load_dates_button) controls_right = WidgetBox(datepicker_end) grid_data = [ #[ch.figure], [thistab.notification_div['top']], [controls_left, controls_right], [thistab.section_headers['ts'], resample_select], ] for feature in features: grid_data.append([p[feature]]) logger.warning('p:%s', p[feature]) grid_data.append([thistab.notification_div['bottom']]) grid = gridplot(grid_data) # Make a tab with the layout tab = Panel(child=grid, title=thistab.panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(thistab.panel_title)
def twitter_loader_tab(panel_title): class TwitterLoader(): def __init__(self, search_term='beiber'): # TWITTER SETUP self.api = None self.topic = search_term self.options = {'messages': [str(x) for x in range(10, 1000, 50)]} self.limits = { 'messages': int(self.options['messages'][0]), } self.hidden_path = dashboard_config['hidden_path'] self.DATEFORMAT = "%Y-%d-%m %H:%M:%S" self.df = None min_date = datetime.today() - timedelta(days=7) print(min_date) self.selects = { 'window': Select(title='Select rolling mean window', value='1', options=[str(x) for x in range(1, 20, 2)]), 'date_since': DatePicker(title="Tweets since:", min_date=min_date, max_date=datetime.today(), value=min_date) } self.selects_values = { 'window': int(self.selects['window'].value), 'date_since': self.selects['date_since'].value } self.resample_period = {'menu': []} for val in range(30, 350, 30): self.resample_period['menu'].append(str(val) + 'Min') self.resample_period['value'] = self.resample_period['menu'][0] # DIV VISUAL SETUP self.trigger = -1 self.html_header = 'h2' self.margin_top = 150 self.margin_bottom = -150 self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'twitter': self.section_header_div(text='Twitter search results:', width=600, html_header='h2', margin_top=155, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) # ////////////////////////// DIVS SETUP END ///////////////////////////////// # /////////////////////////// UTILS BEGIN /////////////////////////// def twitter_datetime_to_epoch(self, ts): ts = datetime.strptime(ts, '%a %b %d %H:%M:%S %z %Y') ts_epoch = ts.created_at() ts = datetime.strftime(ts, self.DATEFORMAT) ts = datetime.strptime(ts, self.DATEFORMAT) return ts, ts_epoch def write_to_file(self): try: filename = """{}_searches_for_last_{}sec_or_last_{}messages.csv""".format( self.topic, self.limits['time'], self.limits['messages']) self.df.to_csv(filename, sep='\t', index=False) except: logger.error('Error writing to file', exc_info=True) # /////////////////////////// UTILS END ///////////////////// def reset_data(self): self.df = None def get_credentials(self, filename='twitter_credentials.json'): try: filename = self.hidden_path + filename filepath = join(dirname(__file__), filename) print(filepath) if self.api is None: with open(filepath, 'r') as f: credentials_dict = json.load(f) auth = tw.OAuthHandler(credentials_dict['consumer_key'], credentials_dict['consumer_secret']) auth.set_access_token( credentials_dict['access_token_key'], credentials_dict['access_token_secret'], ) self.api = tw.API(auth, wait_on_rate_limit=True) logger.info('CREDENTIALS LOADED') try: self.api.verify_credentials() print("Authentication OK") except: print("Error during authentication") except: print('credentials not loaded') def load_data_about_topic(self): try: if self.api is None: self.get_credentials() date_since = datetime.combine( self.selects_values['date_since'], datetime.min.time()) logger.warning('LINE 186:%s,messages=%s', self.topic, self.limits['messages']) # initialize a list to hold all the tweepy Tweets alltweets = [] # make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = self.api.search(q=self.topic, count=self.limits['messages']) # save most recent tweets alltweets.extend(new_tweets) # save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 # keep grabbing tweets until there are no tweets left to grab stop = False while not stop: print(f"getting tweets before {oldest}") # all subsequent requests use the max_id param to prevent duplicates new_tweets = self.api.search(q=self.topic, count=100, max_id=oldest, tweet_mode='extended') # save most recent tweets alltweets.extend(new_tweets) if len(alltweets) > self.limits['messages'] or len( new_tweets) <= 0: stop = True # update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 print(f"...{len(alltweets)} tweets downloaded so far") # transform the tweepy tweets into a 2D array that will populate the csv results = [] for tweet in alltweets: try: results.append([tweet.created_at, tweet.text]) except: print("skipped this one") self.df = pd.DataFrame(data=results, columns=['created_at', 'text']) logger.warning('LINE 211 self.df:%s', self.df.head(20)) except: logger.error('error in loading data', exc_info=True) def run(self): try: self.load_data_about_topic() # self.write_to_file() except Exception: logger.error('run', exc_info=True) # #################################### PLOTS ###################################### def sentiment_analysis(self, launch=1): try: df = self.df[['text', 'created_at']] cols = ['pos', 'neg', 'neu'] for col in cols: if col not in df.columns: # create only once df[col] = 0 df['pos'], df['neg'], df['neu'] = zip( *df['text'].map(sentiment_analyzer_scores)) df = df.fillna(0) logger.warning('resample period:%s', self.resample_period['value']) df = df.set_index('created_at').resample(self.resample_period['value']) \ .agg({'pos': 'mean', 'neg': 'mean', 'neu': 'mean'}) df = df.reset_index() df = df.fillna(0) logger.warning('LINE 307, df:%s', df.head(30)) p = df.hvplot.line(x='created_at', y=cols, width=1200, height=600) return p except Exception: logger.error('run', exc_info=True) def visual(self, launch=1): try: p = self.df.hvplot.table(columns=['created_at', 'text'], width=1200, height=2000) return p except Exception: logger.error('output data', exc_info=True) def jitter(self, launch=1): try: df = self.df.copy() df['jitter'] = df['created_at'].diff(periods=-1) df['jitter'] = df['jitter'] * -1 df = df.dropna() p = df.hvplot.line(x='created_at', y='jitter', width=1200, height=600) return p except Exception: logger.error('output data', exc_info=True) def update_tweet_search(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.reset_data() thistab.limits['messages'] = int(inputs['messages_limit'].value) thistab.topic = inputs['search_term'].value thistab.run() thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_sentiment.event(launch_this=thistab.trigger) thistab.notification_updater("Ready!") def update_resample_period(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.resample_period['value'] = new thistab.trigger += 1 # stream_launch_rolling_mean.event(launch=thistab.trigger) stream_launch_sentiment.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP thistab = TwitterLoader() thistab.run() # MANAGE STREAM stream_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_rolling_mean = streams.Stream.define('Launch', launch=-1)() stream_launch_sentiment = streams.Stream.define('Launch', launch=-1)() # DYNAMIC GRAPHS/OUTPUT hv_visual = hv.DynamicMap(thistab.visual, streams=[stream_launch]) visual = renderer.get_plot(hv_visual) hv_jitter = hv.DynamicMap(thistab.jitter, streams=[stream_launch]) jitter = renderer.get_plot(hv_jitter) hv_sentiment_analysis = hv.DynamicMap( thistab.sentiment_analysis, streams=[stream_launch_sentiment]) sentiment_analysis = renderer.get_plot(hv_sentiment_analysis) # CREATE WIDGETS inputs = { 'search_term': TextInput(title='Enter search term. For list, use commas', value=thistab.topic), 'messages_limit': Select(title='Select messages limit (5000 = unbounded)', value=str(thistab.limits['messages']), options=thistab.options['messages']), 'resample': Select(title='Select resample period', value=thistab.resample_period['value'], options=thistab.resample_period['menu']) } tweet_search_button = Button( label='Enter filters/inputs, then press me', button_type="success") # WIDGET CALLBACK tweet_search_button.on_click(update_tweet_search) inputs['resample'].on_change('value', update_resample_period) # COMPOSE LAYOUT # group controls (filters/input elements) controls_tweet_search = WidgetBox( inputs['search_term'], inputs['messages_limit'], tweet_search_button, ) controls_rolling_mean = WidgetBox(thistab.selects['window'], ) controls_resample_period = WidgetBox(inputs['resample']) grid = gridplot([ [thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.title_div('Sentiment analysis of tweets:', 1000)], [Spacer(width=20, height=30)], [sentiment_analysis.state, controls_resample_period], [thistab.title_div('Time between tweets:', 1000)], [Spacer(width=20, height=30)], [jitter.state], [ thistab.title_div( 'Twitter search results (use filters on right, then click button):', 1000) ], [Spacer(width=20, height=30)], [visual.state, controls_tweet_search], [thistab.notification_div['bottom']], ]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('Twitter loader:', exc_info=True) return tab_error_flag(panel_title)
def EDA_business_events_tab(panel_title, DAYS_TO_LOAD=90): timeline_source = ColumnDataSource(data=dict( Item=[], Start=[], End=[], Color=[], start=[], end=[], ID=[], ID1=[] )) class Thistab(KPI): def __init__(self, table, cols=[]): KPI.__init__(self, table, name='business', cols=cols) self.table = table self.df = None self.df1 = None self.df_pop = None self.checkboxgroup = {} self.period_to_date_cards = { } self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0) self.timestamp_col = 'start_actual' self.pym = PythonMongo('aion') self.groupby_dict = { 'event': 'count', 'type':'count', 'rate':'sum', 'event_duration': 'sum', 'start_delay': 'mean', 'end_delay': ' mean', 'event_location':'count', 'patron':'count', 'patron_likes':'nunique', 'patron_gender':'count', 'patron_age':'mean', 'patron_friend':'nunique', 'patron_friend_gender':'count', 'patron_friend_age':'mean', 'patron_discovery':'nunique', 'manager':'count', 'manager_gender':'count', 'manager_age':'mean', 'manager_education':'count', 'manager_parish':'count', 'staff':'count', 'staff_gender':'count', 'staff_age':'mean', 'staff_education':'count', 'staff_parish':'count', 'remuneration':'sum', } self.menus = { 'company': [], 'type': [], 'patron':[], 'manager':[], 'gender': ['all', 'male', 'female','other'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], } #self.variables = sorted(list(self.groupby_dict.keys())) self.variable = 'rate' # ######### SETUP FILTERS ######################### self.selects = { 'event': Select(title='Select event', value="all",options=['all']), 'company' : Select(title='Select company', value="all",options=['all']), 'patron_likes' : Select(title='Select patron likes/hobbies', value='all', options=['all']), 'patron' : Select(title='Select patron', value='all', options=['all']), 'manager_education' : Select(title="Select manager's education", value='all', options=['all']), 'staff_education' : Select(title="Select staff's education", value='all', options=['all']), 'manager_gender' : Select(title="Select manager's gender", value='all', options=self.menus['gender']), 'staff_gender' : Select(title="Select staff's gender", value='all', options=self.menus['gender']), 'patron_gender' : Select(title="Select patron's gender", value='all', options=self.menus['gender']), 'manager_parish' : Select(title="Select manager's parish", value='all', options=['all']), 'staff_parish' : Select(title="Select staff's parish", value='all', options=['all']), 'patron_parish' : Select(title="Select patron's parish", value='all', options=['all']), 'type': Select(title="Select event type", value='all', options=['all']), } self.vars = { 'event': 'all', 'company': 'all', 'patron_likes': 'all', 'patron': 'all', 'manager_education': 'all', 'staff_education': 'all', 'manager_gender': 'all', 'staff_gender': 'all', 'patron_gender': 'all', 'manager_parish':'all', 'patron_parish':'all', 'type':'all' } self.multiline_vars = { 'xs' : ['patron_likes','manager_education','staff_education', 'manager_gender','staff_gender','patron_gender','manager_parish', 'patron_parish','type'], 'ys': ['rate','remuneration','attendance'] } self.multiline_variable = { 'x':'manager_gender', 'y':'rate' } self.resample_period = { 'multiline' : 'D' } self.chord_data = { 'rename': { 'patron': 'source', 'company': 'target', 'rate': 'value' }, 'percentile_threshold': .75, } self.feature_list = self.multiline_vars['xs'] + ['rate','remuneration','start_delay','end_delay', 'staff_age','manager_age','patron_age'] self.percentile_threshold = 10 self.tsa_variable = 'event' self.forecast_days = 30 self.initial_date = datetime.strptime('2015-01-01 00:00:00',self.DATEFORMAT) self.datepicker_pop_start = DatePicker( title="Period start", min_date=self.initial_date, max_date=dashboard_config['dates']['last_date'], value=dashboard_config['dates']['last_date']) # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'cards': self.section_header_div(text='Period to date:{}'.format(self.section_divider), width=1000, html_header='h2', margin_top=50, margin_bottom=5), 'pop': self.section_header_div(text='Period over period:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'chord': self.section_header_div(text='Patron networks:{}'.format(self.section_divider), width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'tsa': self.section_header_div(text='Forecasts (TSA):{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'multiline': self.section_header_div(text='Comparative graphs:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'patron info': self.section_header_div(text='Patron info:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div(text='Statistically Significant Relationships:---', width=600, html_header='h2', margin_top=5, margin_bottom=-155), } self.KPI_card_div = self.initialize_cards(self.page_width, height=40) start = datetime(2014, 1, 1, 0, 0, 0) end = datetime(2019, 5, 15, 0, 0, 0) self.tools = [BoxZoomTool(), ResetTool(), PanTool(), SaveTool(), WheelZoomTool()] self.timeline_vars = { 'company': '', 'event': '', 'types': ['all'], 'type': 'all', 'DF': None, 'G': figure( title=None, x_axis_type='datetime', width=1200, height=900, y_range=[], x_range=Range1d(start, end), toolbar_location=None), 'toolbar_box': ToolbarBox() } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret sentiment score</h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def initialize_cards(self, width, height=250): try: txt = '' for period in ['year', 'quarter', 'month', 'week']: design = random.choice(list(KPI_card_css.keys())) txt += self.card(title='', data='', card_design=design) text = """<div style="margin-top:100px;display:flex; flex-direction:row;"> {} </div>""".format(txt) div = Div(text=text, width=width, height=height) return div except Exception: logger.error('initialize cards', exc_info=True) def df_load(self, req_startdate, req_enddate, table, cols, timestamp_col): try: # get min and max of loaded df if self.df is not None: loaded_min = self.df[timestamp_col].min() loaded_max = self.df[timestamp_col].max() if loaded_min <= req_startdate and loaded_max >= req_enddate: df = self.df[(self.df[timestamp_col] >= req_startdate) & (self.df[timestamp_col] <= req_enddate)] else: df = self.pym.load_df(req_startdate, req_enddate, table=table, cols=cols, timestamp_col=timestamp_col) else: df = self.pym.load_df(req_startdate, req_enddate, table=table, cols=cols, timestamp_col=timestamp_col) logger.warning('LINE 316: df:%s',df.head()) if df is not None and len(df) > 0: self.filter_df(df) return df except Exception: logger.error('df_load', exc_info=True) def load_menus(self,df1): try: logger.warning('LINE 315:column%s',list(df1.columns)) for col in self.vars.keys(): self.selects[col].options = ['all'] + list(df1[col].unique()) except Exception: logger.error('load menus',exc_info=True) def filter_df(self, df1): try: for key in self.vars.keys(): logger.warning('LINE 343-self.df1-%s:%s', key,self.vars[key]) if self.vars[key] != 'all': logger.warning('LINE 345:key for filtering :%s',key) df1 = df1[df1[key] == self.vars[key]] return df1 logger.warning('LINE 342-self.df1:%s',self.df1.head()) except Exception: logger.error('period to date', exc_info=True) # ------------------------- CARDS END ----------------------------------- def period_to_date(self, df, timestamp=None, timestamp_filter_col='start_actual', cols=[], period='week'): try: if timestamp is None: timestamp = datetime.now() timestamp = datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, 0, 0) start = self.first_date_in_period(timestamp, period) # filter df[timestamp_filter_col] = pd.to_datetime(df[timestamp_filter_col], format=self.DATEFORMAT_PTD) df = df[(df[timestamp_filter_col] >= start) & (df[timestamp_filter_col] <= timestamp)] if len(cols) > 0: df = df[cols] return df except Exception: logger.error('period to date', exc_info=True) def period_over_period(self, df, start_date, end_date, period, history_periods=2, timestamp_col='start_actual'): try: # filter cols if necessary string = '0 {}(s) prev(current)'.format(period) # filter out the dates greater than today df_current = df df_current['period'] = string # label the days being compared with the same label if len(df_current) > 0: df_current = self.label_dates_pop(df_current, period, timestamp_col) # zero out time information start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0) end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0) cols = list(df.columns) counter = 1 if isinstance(history_periods, str): history_periods = int(history_periods) # make dataframes for request no. of periods start, end = self.shift_period_range(period, start, end) while counter < history_periods and start >= self.initial_date: # load data if period == 'quarter': logger.warning('start:end %s:%s', start, end) df_temp = self.df_load(start, end,self.table,cols, timestamp_col) if df_temp is not None: if len(df_temp) > 1: string = '{} {}(s) prev'.format(counter, period) # label period df_temp['period'] = string # relabel days to get matching day of week,doy, dom, for different periods df_temp = self.label_dates_pop(df_temp, period, timestamp_col) # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp)) df_current = pd.concat([df_current, df_temp]) df_current = df_current.reset_index() del df_temp gc.collect() # shift the loading window counter += 1 start, end = self.shift_period_range(period, start, end) if period == 'week': logger.warning('LINE 327 df_current:%s', df_current.head(10)) return df_current except Exception: logger.error('period over period', exc_info=True) def label_dates_pop(self, df, period, timestamp_col): if df is not None: if len(df) > 0: df[timestamp_col] = pd.to_datetime(df[timestamp_col]) def label_qtr_pop(y): try: curr_quarter = int((y.month - 1) / 3 + 1) start = datetime(y.year, 3 * curr_quarter - 2, 1) return abs((start - y).days) except Exception: logger.error('df label quarter', exc_info=True) try: if period == 'week': df['dayset'] = df[timestamp_col].dt.dayofweek elif period == 'month': df['dayset'] = df[timestamp_col].dt.day elif period == 'year': df['dayset'] = df[timestamp_col].timetuple().tm_yday elif period == 'quarter': df['dayset'] = df[timestamp_col].apply(lambda x: label_qtr_pop(x)) return df except Exception: logger.error('label data ', exc_info=True) # -------------------- GRAPHS ------------------------------------------- def graph_periods_to_date(self, df1, timestamp_filter_col, variable): try: #df1 = df1.compute() dct = {} for idx, period in enumerate(['week', 'month', 'quarter', 'year']): df = self.period_to_date(df1, timestamp=dashboard_config['dates']['last_date'], timestamp_filter_col=timestamp_filter_col, period=period) # get unique instances df = df[[variable]] df = df.drop_duplicates(keep='first') # logger.warning('post duplicates dropped:%s', df.head(10)) if self.groupby_dict[variable] == 'sum': data = int(df[variable].sum()) elif self.groupby_dict[variable] == 'mean': data = "{}%".format(round(df[variable].mean(), 3)) else: data = int(len(list(df[variable].unique()))) del df gc.collect() dct[period] = data self.update_cards(dct) except Exception: logger.error('graph periods to date', exc_info=True) def graph_period_over_period(self, period): try: periods = [period] start_date = self.pop_start_date end_date = self.pop_end_date if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) today = datetime.combine(datetime.today().date(), datetime.min.time()) if start_date == today: logger.warning('START DATE of WEEK IS TODAY.!NO DATA DATA') start_date = start_date - timedelta(days=7) self.datepicker_pop_start.value = start_date cols = [self.variable, self.timestamp_col] df = self.df_load(req_startdate=start_date, req_enddate=end_date, table=self.table, cols=cols, timestamp_col=self.timestamp_col) if abs(start_date - end_date).days > 7: if 'week' in periods: periods.remove('week') if abs(start_date - end_date).days > 31: if 'month' in periods: periods.remove('month') if abs(start_date - end_date).days > 90: if 'quarter' in periods: periods.remove('quarter') for idx, period in enumerate(periods): df_period = self.period_over_period(df, start_date=start_date, end_date=end_date, period=period, history_periods=self.pop_history_periods, timestamp_col='start_actual') logger.warning('LINE 368: dayset:%s', df_period.head(30)) groupby_cols = ['dayset', 'period'] if len(df_period) > 0: # logger.warning('line 150 df_period columns:%s',df.columns) df_period = df_period.groupby(groupby_cols).agg({self.variable: 'sum'}) df_period = df_period.reset_index() else: df_period = df_period.rename(index=str, columns={'day': 'dayset'}) prestack_cols = list(df_period.columns) df_period = self.split_period_into_columns(df_period, col_to_split='period', value_to_copy=self.variable) # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality if period == 'quarter': if 'dayset' in df_period.columns: min_day = df_period['dayset'].min() logger.warning('LINE 252: MINIUMUM DAY:%s', min_day) df_period = df_period[df_period['dayset'] > min_day] poststack_cols = list(df_period.columns) title = "{} over {}".format(period, period) plotcols = list(np.setdiff1d(poststack_cols, prestack_cols)) # include current period if not extant df_period, plotcols = self.pop_include_zeros(df_period, plotcols=plotcols, period=period) # logger.warning('line 155 cols to plot:%s',plotcols if self.groupby_dict[self.variable] == 'mean': xlabel = '%' else: xlabel = 'frequency' if 'dayset' not in df_period.columns: leng = len(df_period) if leng > 0: df_period['dayset'] = 0 else: df_period['dayset'] = '' if idx == 0: p = df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False, width=1200, height=400, value_label=xlabel) else: p += df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False, width=1200, height=400, value_label=xlabel) return p except Exception: logger.error('period over period to date', exc_info=True) def patron_info_table(self,launch): try: tmp_df = None tmp_df1 = None if self.vars['patron'] != 'all': tmp_df = self.df1['patron_friend','patron_friend_gender','patron_friend_parish'] tmp_df.drop_duplicates(keep='first', inplace=True) # likes tmp_df1 = self.df1['patron', 'patron_likes', 'patron_gender', 'patron_discovery', 'patron_parish'] tmp_df1.drop_duplicates(keep='first', inplace=True) if tmp_df is None: tmp_df = pd.DataFrame() if tmp_df1 is None: tmp_df1 = pd.DataFrame() p = tmp_df.hvplot.table(width=400) q = tmp_df1.hvplot.table(width=600) return q + p except Exception: logger.error('patron friends table', exc_info=True) def chord_diagram(self, launch): try: def normalize_value(x, total): x = int((x / total) * 1000) if x <= 0: return 1 return x df = self.df1.copy() # chord setup var1 = 'staff' var2 = 'patron' # -------------- nodes data = {} data['nodes'] = [] source_list = df[var1].tolist() names = list(set(source_list)) var1_dict = dict(zip(df[var2], df[var1])) type_dict = {} types = list(set(df['type'].tolist())) name_dict = {} for idx, name in enumerate(names): name_dict[name] = idx for idx, name in enumerate(names): type_tmp = var1_dict[name] index = name_dict[name] data['nodes'].append({'OwnerID': index, 'index': idx, 'Type': type_tmp}) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') # --------- make the links data['links'] = [] for idx, row in df.iterrows(): src = name_dict[row[var1]] tgt = name_dict[row[var2]] val = row['rate'] data['links'].append({'source': src, 'target': tgt, 'value': val}) links = pd.DataFrame(data['links']) # get the individual links links = links.groupby(['source', 'target'])['value'].sum() links = links.reset_index() total = links['value'].sum() links['value'] = links['value'].apply(lambda x: normalize_value(x, total)) # filter for top percentile quantile_val = links['value'].quantile(self.chord_data['percentile_threshold']) links = links[links['value'] >= quantile_val] # logger.warning('after quantile filter:%s',len(links)) chord_ = hv.Chord((links, nodes), ['source', 'target'], ['value']) chord_.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='Type', node_color=dim('index').str(), width=1000, height=1000)) return chord_ except Exception: logger.error('chord diagram', exc_info=True) def forecasts(self, launch): try: logger.warning('LINE 660: self.df1 :%s', self.df1.head()) df = self.df.copy() df = df.set_index(self.timestamp_col) #logger.warning('LINE 648: df:%s', df.head()) tsa_variable = self.tsa_variable if self.tsa_variable in ['remuneration','rate']: df = df.resample('D').agg({tsa_variable: 'sum'}) else: # calculate attendance if self.tsa_variable == 'attendance': tsa_variable = 'patrons' df = df.resample('D').agg({tsa_variable: 'count'}) label = 'freq_diff' #df[label] = df[tsa_variable].diff() df = df.fillna(0) df = df.reset_index() logger.warning('LINE 672: df:%s', df.head()) rename = {self.timestamp_col: 'ds', tsa_variable: 'y'} df = df.rename(columns=rename) #logger.warning('df:%s', df.head()) df = df[['ds', 'y']] #logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print('LINE 689 forecast columns:',list(forecast.columns)) if tsa_variable in ['rate','remuneration']: value_label = '$' else: value_label = '#' for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label=value_label).relabel(col) else: p *= forecast.hvplot.scatter(x='ds' , y=col, width=600, height=250, value_label=value_label).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label=value_label).relabel(col) else: if 'weekly' in forecast.columns: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label=value_label).relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) def kruskal_label(self,df,var,treatment): try: # get unique levels try: stat,pvalue = kruskal(*[group[var].values for name, group in df.groupby(treatment)]) logger.warning('stat:%s,pvalue:%s', stat, pvalue) if pvalue > 0.05: txt = 'No' else: txt = 'Yes' return stat, pvalue, txt except Exception: stat = 'na' pvalue = 'na' txt = 'na' logger.warning('Line 737: not enough groups') return stat, pvalue, txt except Exception: logger.error('kruskal label', exc_info=True) def non_para_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'stat': [], 'p-value': [] } # prep df df = self.df1.copy() df = df.drop(self.timestamp_col, axis=1) logger.warning('LINE 758; df:%s',list(df.columns)) for var in ['rate','remuneration','patron']: for treatment in self.vars.keys(): logger.warning('col :%s', treatment) df_tmp = df[[var,treatment]] if treatment != var: if var == 'patron': df_tmp = df_tmp.groupby([treatment]).agg({'patron': 'nunique'}) df_tmp = df_tmp.reset_index() stat, pvalue, txt = self.kruskal_label(df_tmp,var,treatment) # add to dict corr_dict['Variable 1'].append(var) corr_dict['Variable 2'].append(treatment) corr_dict['Relationship'].append(txt) if isinstance(pvalue, float): corr_dict['stat'].append(round(stat, 3)) else: corr_dict['stat'].append(stat) if isinstance(pvalue,float): corr_dict['p-value'].append(round(pvalue, 3)) else: corr_dict['p-value'].append(pvalue) logger.warning('LINE 756:%s-%s completed',var,treatment) df = pd.DataFrame( { 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'stat': corr_dict['stat'], 'p-value': corr_dict['p-value'] }) # logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=['Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value'], width=550, height=600, title='Effect of variable levels on outcomes') except Exception: logger.error('correlation table', exc_info=True) def multiline(self, launch=1): try: yvar = self.multiline_variable['y'] if self.multiline_variable['y'] == 'attendance': yvar = 'patron' xvar = self.multiline_variable['x'] df = self.df1.copy() for key in thistab.vars.keys(): if thistab.vars[key] != 'all': if key != xvar: df = df[df[key] == self.vars[key]] df = df[[xvar, yvar, self.timestamp_col]] df = df.set_index(self.timestamp_col) if yvar == 'patron': df = df.groupby(xvar).resample(self.resample_period['multiline']).agg({yvar: 'nunique'}) df = df.reset_index() logger.warning('LINE 817: df:%s', df.head()) else: df = df.groupby(xvar).resample(self.resample_period['multiline']).agg({yvar: 'sum'}) df = df.reset_index() logger.warning('LINE 820: df:%s',df.head()) lines = df[xvar].unique() # split data frames dfs = {} for idx, line in enumerate(lines): dfs[line] = df[df[xvar] == line] dfs[line] = dfs[line].fillna(0) #logger.warning('LINE 788:%s - %s:', line, dfs[line].head()) if idx == 0: p = dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=1200, height=500).relabel(line) else: p *= dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=1200, height=500).relabel(line) return p except Exception: logger.error('multiline plot', exc_info=True) def update(): thistab.notification_updater("Calculations underway. Please be patient") thistab.filter_df(thistab.df) thistab.load_menus(thistab.df1) thistab.graph_periods_to_date(thistab.df, thistab.timestamp_col, thistab.variable) thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_period_over_period(): thistab.notification_updater("Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.pop_start_date = thistab.datepicker_pop_start.value # trigger period over period thistab.pop_end_date = datepicker_pop_end.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_tsa_variable_launch.event(launch=thistab.trigger) thistab.trigger += 1 stream_tsa_variable_launch.event(launch=thistab.trigger) thistab.resample_period['multiline'] = select_resample_period['multiline'].value thistab.multiline_variable['x'] = multiline_x_select.value thistab.multiline_variable['y'] = multiline_y_select.value thistab.notification_updater("ready") def update_forecasts(): thistab.notification_updater("Calculations underway. Please be patient") for key in thistab.vars.keys(): thistab.vars[key] = thistab.selects[key] thistab.filter_df(thistab.df) thistab.tsa_variable = tsa_variable_select.value thistab.forecast_days = int(select_forecast_days.value) thistab.trigger += 1 stream_tsa_variable_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_multiline_variables(): thistab.notification_updater("Calculations underway. Please be patient") thistab.resample_period['multiline'] = select_resample_period['multiline'].value thistab.multiline_variable['x'] = multiline_x_select.value thistab.multiline_variable['y'] = multiline_y_select.value thistab.trigger += 1 stream_multiline_variable_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") try: cols = [] thistab = Thistab(table='business_composite', cols=cols) # ------------------------------------- SETUP ---------------------------- # format dates first_date_range = thistab.initial_date last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime(last_date.year, 1, 1, 0, 0, 0) loadcols = [] thistab.df = thistab.df_load(first_date, last_date,thistab.table,loadcols, timestamp_col=thistab.timestamp_col) thistab.df1 = thistab.filter_df(thistab.df) thistab.load_menus(thistab.df) thistab.graph_periods_to_date(thistab.df, timestamp_filter_col=thistab.timestamp_col, variable=thistab.variable) thistab.section_header_updater('cards', label='') thistab.section_header_updater('pop', label='') # MANAGE STREAM # date comes out stream in milliseconds # --------------------------------CREATE WIDGETS --------------------------------- thistab.pop_end_date = last_date thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week') stream_launch = streams.Stream.define('Launch', launch=-1)() stream_tsa_variable_launch = streams.Stream.define('Launch', launch=-1)() stream_multiline_variable_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_end_date) pop_number_select = Select(title='Select # of comparative periods', value=str(5), options=thistab.menus['history_periods']) pop_button = Button(label="Select dates/periods, then click me!", width=15, button_type="success") filter_button = Button(label="Select filters, then click me!", width=15, button_type="success") multiline_button = Button(label="Select multiline variables, then click me!", width=15, button_type="success") tsa_variable_select = Select(title='Select forecast variable', value='rate',options=['rate','remuneration','attendance']) tsa_button = Button(label="Select forecast variables, then click me!", width=15, button_type="success") select_forecast_days = Select(title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) multiline_y_select = Select(title='Select numerical variable for comparison', value=thistab.multiline_variable['y'], options=thistab.multiline_vars['ys']) multiline_x_select = Select(title='Select categorical variable for comparison', value=thistab.multiline_variable['x'], options=thistab.multiline_vars['xs']) select_resample_period = { 'multiline' : Select(title='Select resample period', value=thistab.resample_period['multiline'], options=['D','W','M','Q']) } # --------------------------------- GRAPHS --------------------------- hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch]) pop_week = renderer.get_plot(hv_pop_week) hv_pop_month = hv.DynamicMap(thistab.pop_month, streams=[stream_launch]) pop_month = renderer.get_plot(hv_pop_month) hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch]) pop_quarter = renderer.get_plot(hv_pop_quarter) hv_tsa = hv.DynamicMap(thistab.forecasts, streams=[stream_tsa_variable_launch]) tsa = renderer.get_plot(hv_tsa) hv_chord = hv.DynamicMap(thistab.chord_diagram, streams=[stream_launch]) chord = renderer.get_plot(hv_chord) hv_patron_info = hv.DynamicMap(thistab.patron_info_table, streams=[stream_launch]) patron_info = renderer.get_plot(hv_patron_info) hv_non_para_table = hv.DynamicMap(thistab.non_para_table,streams=[stream_launch_corr]) non_para_table = renderer.get_plot(hv_non_para_table) hv_multiline = hv.DynamicMap(thistab.multiline, streams=[stream_multiline_variable_launch]) multiline = renderer.get_plot(hv_multiline) # -------------------------------- CALLBACKS ------------------------ filter_button.on_click(update) pop_button.on_click(update_period_over_period) # lags array tsa_button.on_click(update_forecasts) multiline_button.on_click(update_multiline_variables) # controls controls_multiline = WidgetBox( multiline_x_select, multiline_y_select, select_resample_period['multiline'], multiline_button ) controls_tsa = WidgetBox( tsa_variable_select, select_forecast_days, tsa_button ) controls_pop = WidgetBox( pop_number_select, pop_button, ) controls_filters = WidgetBox( thistab.selects['event'], thistab.selects['company'], thistab.selects['patron'], thistab.selects['patron_likes'], thistab.selects['manager_education'], thistab.selects['staff_education'], thistab.selects['manager_gender'], thistab.selects['staff_gender'], thistab.selects['patron_gender'], thistab.selects['manager_parish'], thistab.selects['staff_parish'], thistab.selects['patron_parish'], thistab.selects['type'], ) # create the dashboards grid_data = [ [thistab.notification_div['top']], [Spacer(width=20, height=40)], [thistab.section_headers['cards']], [Spacer(width=20, height=2)], [thistab.KPI_card_div,controls_filters], [thistab.section_headers['pop']], [Spacer(width=20, height=25)], [pop_week.state, controls_pop], [pop_month.state], [pop_quarter.state], [thistab.section_headers['patron info']], [Spacer(width=20, height=25)], [patron_info.state], [chord.state], [thistab.section_headers['tsa']], [Spacer(width=20, height=25)], [tsa.state, controls_tsa], [thistab.section_headers['relationships']], [Spacer(width=20, height=25)], [non_para_table.state], [thistab.section_headers['multiline']], [Spacer(width=20, height=25)], [multiline.state, controls_multiline], [thistab.notification_div['bottom']] ] grid = gridplot(grid_data) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def KPI_developer_adoption_tab(page_width,DAYS_TO_LOAD=90): class Thistab(KPI): def __init__(self, table,cols=[]): KPI.__init__(self, table,name='developer',cols=cols) self.table = table self.df = None self.checkboxgroup = {} self.period_to_date_cards = { 'year': self.card('',''), 'quarter': self.card('', ''), 'month': self.card('', ''), 'week': self.card('', '') } self.ptd_startdate = datetime(datetime.today().year,1,1,0,0,0) self.timestamp_col = 'block_timestamp' self.variable = self.menus['developer_adoption_DVs'][0] self.datepicker_pop_start = DatePicker( title="Period start", min_date=self.initial_date, max_date=dashboard_config['dates']['last_date'], value=dashboard_config['dates']['last_date']) # ------- DIVS setup begin self.page_width = page_width self.KPI_card_div = self.initialize_cards(width=self.page_width,height=1000) txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'cards': self.section_header_div( text='Period to date:{}'.format(self.section_divider), width=int(self.page_width*.5), html_header='h2', margin_top=5,margin_bottom=-155), 'pop': self.section_header_div( text='Period over period:{}'.format(self.section_divider), width=int(self.page_width*.5), html_header='h2', margin_top=5, margin_bottom=-155), 'sig_ratio': self.section_header_div( text='Time series of ratio of DV to significant IVs'.format(self.section_divider), width=int(self.page_width*.5), html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=300): div_style = """ style='width:350px;margin-right:-800px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # -------------------- CARDS ----------------------------------------- def initialize_cards(self, width, height=250): try: txt = '' for idx,period in enumerate(['year', 'quarter', 'month', 'week']): design = random.choice(list(KPI_card_css.keys())) txt += self.card(title='', data='', card_design=design) text = """<div style="margin-top:100px;display:flex;flex-direction:column;"> {} </div>""".format(txt) div = Div(text=text, width=width, height=height) return div except Exception: logger.error('initialize cards', exc_info=True) # -------------------- GRAPHS ------------------------------------------- def graph_periods_to_date(self,df1,timestamp_filter_col,variable): try: dct = {} for idx,period in enumerate(['week','month','quarter','year']): all_txt = """<div style="width:{}px;display:flex;flex-direction:row;">"""\ .format(int(self.page_width*.6)) # go to next row df = self.period_to_date(df1, timestamp=dashboard_config['dates']['last_date'], timestamp_filter_col=timestamp_filter_col, period=period) # get unique instances df = df.compute() df = df.drop_duplicates(keep='first') count = len(df) gc.collect() denom = df[variable].sum() if denom != 0: payroll_to_date = self.payroll_to_date(period) cost_per_var = round(payroll_to_date/denom,2) tmp_var = self.variable.split('_') title = "{} to date".format(period) title += "</br>${} per {}".format(cost_per_var,tmp_var[-1]) else: title = "{} to date".format(period) design = random.choice(list(KPI_card_css.keys())) all_txt += self.card(title=title,data=count,card_design=design) # add the statistically significant point estimates all_txt += self.calc_sig_effect_card_data(df,interest_var=self.variable, period=period) all_txt += """</div>""" print(all_txt) dct[period] = all_txt del df self.update_significant_DV_cards(dct) except Exception: logger.error('graph periods to date',exc_info=True) def graph_period_over_period(self,period): try: periods = [period] start_date = self.pop_start_date end_date = self.pop_end_date if isinstance(start_date,date): start_date = datetime.combine(start_date,datetime.min.time()) if isinstance(end_date,date): end_date = datetime.combine(end_date,datetime.min.time()) today = datetime.combine(datetime.today().date(),datetime.min.time()) ''' - if the start day is today (there is no data for today), adjust start date ''' if start_date == today: logger.warning('START DATE of WEEK IS TODAY.!NO DATA DATA') start_date = start_date - timedelta(days=7) self.datepicker_pop_start.value = start_date cols = [self.variable,self.timestamp_col, 'day'] df = self.load_df(start_date=start_date,end_date=end_date,cols=cols,timestamp_col='block_timestamp') if abs(start_date - end_date).days > 7: if 'week' in periods: periods.remove('week') if abs(start_date - end_date).days > 31: if 'month' in periods: periods.remove('month') if abs(start_date - end_date).days > 90: if 'quarter' in periods: periods.remove('quarter') for idx,period in enumerate(periods): df_period = self.period_over_period(df, start_date = start_date, end_date=end_date, period=period, history_periods=self.pop_history_periods, timestamp_col='block_timestamp') groupby_cols = ['dayset', 'period'] if len(df_period) > 0: df_period = df_period.groupby(groupby_cols).agg({self.variable: 'sum'}) df_period = df_period.reset_index() df_period = df_period.compute() else: df_period = df_period.compute() df_period = df_period.rename(index=str, columns={'day': 'dayset'}) prestack_cols = list(df_period.columns) logger.warning('Line 179:%s', df_period.head(10)) df_period = self.split_period_into_columns(df_period, col_to_split='period', value_to_copy=self.variable) # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality if period == 'quarter': min_day = df_period['dayset'].min() logger.warning('LINE 252: MINIUMUM DAY:%s', min_day) df_period = df_period[df_period['dayset'] > min_day] logger.warning('line 180 df_period columns:%s', df_period.head(50)) poststack_cols = list(df_period.columns) title = "{} over {}".format(period, period) plotcols = list(np.setdiff1d(poststack_cols, prestack_cols)) df_period, plotcols = self.pop_include_zeros(df_period=df_period, plotcols=plotcols, period=period) if idx == 0: p = df_period.hvplot.bar('dayset',plotcols,rot=45,title=title, stacked=False,width=int(self.page_width*.8),height=400,value_label='#') else: p += df_period.hvplot.bar('dayset',plotcols,rot=45,title=title, stacked=False,width=int(self.page_width*.8),height=400,value_label='#') return p except Exception: logger.error('period over period to date', exc_info=True) # -------------------------------- PLOT TRENDS FOR SIGNIFICANT RATIOS -------------------------- def graph_significant_ratios_ts(self,launch=-1): try: df = self.make_significant_ratios_df(self.df,resample_period=self.resample_period, interest_var=self.variable, timestamp_col='block_timestamp') # clean if self.variable in df.columns: df = df.drop(self.variable,axis=1) #df = df.compute() # plot return df.hvplot.line(width=int(self.page_width*.8),height=400) except Exception: logger.error('graph significant ratios',exc_info=True) def update_variable(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") thistab.variable = variable_select.value thistab.graph_periods_to_date(thistab.df,'block_timestamp',thistab.variable) thistab.section_header_updater('cards',label='') thistab.section_header_updater('pop',label='') thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_sig_ratio.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_period_over_period(): thistab.notification_updater("Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.pop_start_date = thistab.datepicker_pop_start.value # trigger period over period thistab.pop_end_date = datepicker_pop_end.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_resample(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") thistab.resample_period = resample_select.value thistab.trigger += 1 stream_launch_sig_ratio.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_history_periods(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") try: cols = ['aion_fork','aion_watch','aion_release','aion_issue','aion_push','block_timestamp'] thistab = Thistab(table='account_ext_warehouse', cols=cols) # ------------------------------------- SETUP ---------------------------- # format dates first_date_range = thistab.initial_date last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime(last_date.year,1,1,0,0,0) thistab.df = thistab.load_df(first_date, last_date,cols,'block_timestamp') thistab.graph_periods_to_date(thistab.df,timestamp_filter_col='block_timestamp',variable=thistab.variable) thistab.section_header_updater('cards',label='') thistab.section_header_updater('pop',label='') # MANAGE STREAM # date comes out stream in milliseconds # --------------------------------CREATE WIDGETS --------------------------------- thistab.pop_end_date = last_date thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week') stream_launch = streams.Stream.define('Launch',launch=-1)() stream_launch_sig_ratio = streams.Stream.define('Launch_sigratio',launch=-1)() datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_end_date) pop_number_select = Select(title='Select # of comparative periods', value=str(thistab.pop_history_periods), options=thistab.menus['history_periods']) pop_button = Button(label="Select dates/periods, then click me!",width=15,button_type="success") variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.menus['developer_adoption_DVs']) resample_select = Select(title='Select resample period', value=thistab.resample_period, options=thistab.menus['resample_period']) # --------------------------------- GRAPHS --------------------------- hv_sig_ratios = hv.DynamicMap(thistab.graph_significant_ratios_ts, streams=[stream_launch_sig_ratio]) sig_ratios= renderer.get_plot(hv_sig_ratios) hv_pop_week = hv.DynamicMap(thistab.pop_week,streams=[stream_launch]) pop_week = renderer.get_plot(hv_pop_week) hv_pop_month = hv.DynamicMap(thistab.pop_month,streams=[stream_launch]) pop_month = renderer.get_plot(hv_pop_month) hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch]) pop_quarter = renderer.get_plot(hv_pop_quarter) # -------------------------------- CALLBACKS ------------------------ variable_select.on_change('value', update_variable) pop_button.on_click(update_period_over_period) # lags array resample_select.on_change('value', update_resample) pop_number_select.on_change('value',update_history_periods) # -----------------------------------LAYOUT ---------------------------- # put the controls in a single element controls_ptd = WidgetBox(variable_select, resample_select) controls_pop = WidgetBox(thistab.datepicker_pop_start, datepicker_pop_end, pop_number_select,pop_button) grid_data = [ [thistab.notification_div['top']], [Spacer(width=20, height=40)], [thistab.section_headers['sig_ratio']], [Spacer(width=20, height=25)], [sig_ratios.state, controls_ptd], [thistab.section_headers['cards']], [Spacer(width=20, height=2)], [thistab.KPI_card_div], [thistab.section_headers['pop']], [Spacer(width=20, height=25)], [pop_week.state,controls_pop], [pop_month.state], [pop_quarter.state], [thistab.notification_div['bottom']] ] grid = gridplot(grid_data) # Make a tab with the layout tab = Panel(child=grid, title='KPI: developer adoption') return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag('KPI: developer adoption')
def account_predictive_tab(page_width=1200): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = {} # to contain churned and retained splits self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.rf = {} # random forest self.cl = PythonClickhouse('aion') self.feature_list = hyp_variables self.targets = { 'classification': { 'churned': { 'cols': ['churned', 'active'], 'target_col': 'status' } }, 'regression': { 'aion_fork': { 'cols': [1, 0], 'target_col': 'aion_fork' } } } self.interest_var = 'address' self.trigger = -1 self.status = 'all' self.clf = None self.pl = {} # for rf pipeline self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # list of tier specific addresses for prediction self.address_list = [] self.prediction_address_selected = "" self.load_data_flag = False self.day_diff = 1 self.groupby_dict = {} for col in self.feature_list: self.groupby_dict[col] = 'mean' self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.metrics_div = Div(text='', width=400, height=300) self.accuracy_df = None self.inspected_variable = 'amount' # ------- DIVS setup begin self.page_width = page_width txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format( self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'churn': self.section_header_div( text= 'Churned accounts: prediction model accuracy, variable ranking:{}' .format('----'), width=int(self.page_width * .5), html_header='h2', margin_top=5, margin_bottom=-155), 'variable behavior': self.section_header_div(text='Variable behavior:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'predictions': self.section_header_div( text='Select date range to make predictions:{}'.format( self.section_divider), width=int(self.page_width * .5), html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def reset_checkboxes(self): try: self.prediction_address_selected = "" self.prediction_address_select.value = "all" except Exception: logger.error('reset checkboxes', exc_info=True) ################################################### # I/O def load_df(self, start_date="2018-04-25 00:00:00", end_date="2018-12-10 00:00:00"): try: if isinstance(start_date, str): start_date = datetime.strptime(start_date, self.DATEFORMAT) if isinstance(end_date, str): end_date = datetime.strptime(end_date, self.DATEFORMAT) self.df_load(start_date, end_date) self.df = self.df.fillna(0) #self.make_delta() #self.df = self.df.set_index('block_timestamp') #logger.warning("data loaded - %s",self.df.tail(10)) except Exception: logger.error('load_df', exc_info=True) ################################################### # MUNGE DATA def make_delta(self): try: if self.df is not None: if len(self.df) > 0: df = self.df.compute() for col in self.targets: col_new = col + '_diff' df[col_new] = df[col].pct_change() df[col_new] = df[col_new].fillna(0) logger.warning('diff col added : %s', col_new) self.df = self.df.fillna(self.df.mean()) self.df = dd.dataframe.from_pandas(df, npartitions=15) # logger.warning('POST DELTA:%s',self.df1.tail(20)) except Exception: logger.error('make delta', exc_info=True) def split_df(self, df, target): cols = self.target['classification'][target] target_col = self.target['classification'][target] for val in cols: self.df1[val] = df[target_col] == val logger.warning( "Finished split into churned and retained dataframes") ################################################## # EXPLICATORY GRAPHS # PLOTS def box_plot(self, variable): try: # logger.warning("difficulty:%s", self.df.tail(30)) # get max value of variable and multiply it by 1.1 minv = 0 maxv = 0 df = self.df if df is not None: if len(df) > 0: minv, maxv = dd.compute(df[variable].min(), df[variable].max()) else: df = SD('filter', [variable, 'status'], []).get_df() return df.hvplot.box(variable, by='status', ylim=(.9 * minv, 1.1 * maxv)) except Exception: logger.error("box plot:", exc_info=True) ################################################### # MODELS def rf_clf(self): try: logger.warning("RANDOM FOREST LAUNCHED") error_lst = [] df_temp = self.df df_temp = self.normalize(df_temp, timestamp_col='block_timestamp') # if all addresses used filter for only positive transactions for target in self.targets['classification']: # filter out joined df = df_temp.copy() if target == 'churned': df = df[df['status'] != 'joined'] #logger.warning("line 205: df columns in %s:",df.columns.tolist()) df = df.groupby(['address', 'status']).agg(self.groupby_dict) df = df.reset_index() #logger.warning("line 222: df columns in %s:",df.tail(10)) df = df.compute() ''' # only retain wanted values col_values = list(self.df[self.targets['classification'][target]['target_col']].unique()) for val in col_values: if val in self.targets['classification'][target]['cols']: pass else: df[self.targets['classification'][target]['target_col']] = \ df[df[self.targets['classification'][target]['cols']] != val] ''' X = df[self.feature_list] y = df[self.targets['classification'][target] ['target_col']] #logger.warning('y=:%s',y.head(100)) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3) self.feature_list = X_train.columns.tolist() self.pl[target] = Pipeline([ ('imp', SimpleImputer(missing_values=0, strategy='median')), ('rf', RandomForestClassifier(n_estimators=100, random_state=42, max_depth=4, class_weight='balanced')) ]) self.pl[target].fit(X_train, y_train) y_pred = self.pl[target].predict(X_test) error_lst.append( round(100 * metrics.accuracy_score(y_test, y_pred), 2)) self.accuracy_df = pd.DataFrame({ 'Outcome': list(self.targets['classification'].keys()), 'Accuracy': error_lst, }) #logger.warning('accuracy_df:%s',self.accuracy_df.head()) #self.make_tree(target=target) print('confusion matrix:\n') print(confusion_matrix(y_test, y_pred)) print('classification report:\n') print(classification_report(y_test, y_pred)) #logger.warning("clf model built:%s",self.pl) except Exception: logger.error("RF:", exc_info=True) def accuracy_table(self): try: columns = self.accuracy_df.columns.tolist() return self.accuracy_df.hvplot.table( columns=['Outcome', 'Accuracy'], width=250, title='Prediction accuracy') except Exception: logger.error("RF:", exc_info=True) def prediction_information_div(self, width=350, height=450): txt = """ <div {}> <h4 {}>Info </h4> <ul style='margin-top:-10px;'> <li> The table shows the predicted change.</br> </li> <li> For desirable outcomes: </br> ... a positive number is good! </br> ... the bigger the number the better. </br> ... a negative number is bad! </br> ... the bigger the negative number the worse it is. </li> <> For non-desirable outcomes: </br>... the inverse is true </li> <li> Use the datepicker(s) to select dates for the period desired </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def metrics_div_update(self, data): div_style = """ style='width:350px;margin-right:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """<div {}> <h4 {}>Prediction Info </h4> <ul style='margin-top:-10px;'> <li> {}% likely to churn </li> </ul> </div>""".format(div_style, self.header_style, data) self.metrics_div.text = txt def stats_information_div(self, width=400, height=300): div_style = """ style='width:350px;margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>Metadata Info </h4> <ul> <li > <h4 style='margin-bottom:-2px;'>Table left:</h4> - shows the outcome,</br> and the accuracy in %</br> <strong><i>100% is perfection!</i></strong> </li> <li> <h4 style='margin-bottom:-2px;'>Table right:</h4> - shows the desired outcome, the variables(things Aion controls) </br> and their importance to the particular outcome </br> ...which variable(s) have a greater impact on an outcome. </br>- lower = better </br>- generally only the best ranked 3 matter </br>- business advice: manipulate the top ranked variables to attain desirable outcomes </li> </ul> </div>""".format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def load_prediction_df(self, start_date, end_date): if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) cols = self.feature_list + ['address', 'block_timestamp'] self.df_predict = self.cl.load_data(table=self.table, cols=cols, start_date=start_date, end_date=end_date) logger.warning('319:in load prediction: %s', self.df_predict.head(5)) def update_prediction_addresses_select(self): self.prediction_address_select.options = ['all'] if len(self.df_predict) > 0: lst = ['all'] + list( self.df_predict['address'].unique().compute()) self.prediction_address_select.options = lst # the period for which the user wants a prediction def make_account_predictions(self, launch=-1): try: logger.warning("MAKE PREDICTIONS LAUNCHED") target = list(self.targets['classification'].keys())[0] # make df = self.df_predict #logger.warning("line 363%s",df.head(10)) # make list of address for prediction select # filter if prediction for certain addresses #logger.warning('address selected:%s',self.prediction_address_select.value) if self.prediction_address_select.value is not None: if len(self.prediction_address_select.value) > 0: if self.prediction_address_select.value not in [ 'all', '' ]: df = df[df.address == self.prediction_address_select.value] #logger.warning('line 409 predict-df post filter:%s', df.head(20)) # make table for display self.predict_df = pd.DataFrame({ 'address': [], 'likely action': [] }) for target in list(self.targets['classification'].keys()): if len(df) > 0: df = self.normalize(df, timestamp_col='block_timestamp') df = self.group_data(df, self.groupby_dict, timestamp_col='block_timestamp') interest_labels = list(df['address'].unique()) # run model df = df.fillna(0) X = df[self.feature_list] #logger.warning("df before prediction:%s",X.tail(10)) y_pred = self.pl[target].predict(X) logger.warning('y_pred:%s', y_pred) if target == 'churned': y_pred_verbose = [ 'remain' if x in ["active", 1] else "churn" for x in y_pred ] #---- make table for display self.predict_df = pd.DataFrame({ 'address': interest_labels, 'likely action': y_pred_verbose }) #------ label pools self.predict_df['address'] = self.predict_df[ 'address'].map(self.poolname_verbose_trun) #logger.warning('self.predict_df:%s',self.predict_df) churn_df = self.predict_df[ self.predict_df['likely action'] == 'churn'] perc_to_churn = round( 100 * len(churn_df) / len(self.predict_df), 1) txt = target[:-2] text = """<div {}> <h3>Percentage likely to {}:</h3> <strong 'style=color:black;'>{}%</strong></div>""".format( self.header_style, txt, perc_to_churn) self.metrics_div_update(data=perc_to_churn) else: text = """<div {}> <br/> <h3>Sorry, address not found</h3> </div>""".format(self.header_style) self.metrics_div.text = text logger.warning("end of %s predictions", target) return self.predict_df.hvplot.table( columns=['address', 'likely action'], width=500, title='Account predictions') except Exception: logger.error("prediction:", exc_info=True) def make_tree(self, target='churned'): try: if not self.pl: self.rf_clf() # Limit depth of tree to 3 levels # Extract the small tree tree_small = self.pl[target].named_steps['rf'].estimators_[5] # Save the tree as a png image export_graphviz(tree_small, out_file='small_tree.dot', feature_names=self.feature_list, rounded=True, precision=1) (graph, ) = pydot.graph_from_dot_file('small_tree.dot') # filepath = self.make_filepath('../../../static/images/small_tree.gif') # .write_png(filepath) filepath = self.make_filepath( '/home/andre/Downloads/small_tree.png') graph.write_png(filepath) logger.warning("TREE SAVED") except Exception: logger.error("make tree:", exc_info=True) def make_feature_importances(self): try: if not self.pl: self.rf_clf() results_dct = { 'outcome': [], 'feature': [], 'importance': [], 'rank_within_outcome': [] } for target in self.targets['classification'].keys(): logger.warning('make feature importances for :%s', target) # Get numerical feature importances importances = list( self.pl[target].named_steps['rf'].feature_importances_) # List of tuples with variable and importance feature_importances = [(feature, round(importance, 4)) for feature, importance in zip( self.feature_list, importances)] sorted_importances = sorted(feature_importances, key=itemgetter(1)) # logger.warning('importances :%s',importances) # logger.warning("feature_importances:%s",feature_importances) target_lst = [target] * len(importances) count = 1 rank_lst = [] for i in importances: rank_lst.append(count) count += 1 results_dct['outcome'] += target_lst results_dct['feature'] += [ i[0] for i in sorted_importances ] results_dct['importance'] += [ i[1] for i in sorted_importances ] results_dct['rank_within_outcome'] += sorted(rank_lst, reverse=True) df = pd.DataFrame.from_dict(results_dct) logger.warning('MAKE FEATURE IMPORTANCES FINISHED') return df.hvplot.table( columns=[ 'outcome', 'feature', 'importance', 'rank_within_outcome' ], width=600, title="Variables ranked by importance (for each output)") except Exception: logger.error("Feature importances:", exc_info=True) #################################################### # GRAPHS def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.load_prediction_df(datepicker_start.value, datepicker_end.value) thistab.update_prediction_addresses_select() thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_select_variable.event(variable=thistab.inspected_variable) thistab.notification_updater("ready") def update_address_predictions(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_select_variable(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.inspected_variable = select_variable.value stream_select_variable.event(variable=thistab.inspected_variable) thistab.notification_updater("ready") try: # SETUP table = 'account_ext_warehouse' #cols = list(table_dict[table].keys()) cols = hyp_variables + [ 'address', 'block_timestamp', 'account_type', 'status', 'update_type' ] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] last_date = last_date - timedelta(days=50) first_date = last_date - timedelta(days=5) # STREAMS Setup # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch', launch=-1)() stream_select_variable = streams.Stream.define('Select_variable', variable='amount')() # setup widgets datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) select_variable = Select(title='Filter by variable', value=thistab.inspected_variable, options=thistab.feature_list) # search by address checkboxes thistab.prediction_address_select = Select(title='Filter by address', value='all', options=[]) reset_prediction_address_button = Button(label="reset address(es)", button_type="success") # ----------------------------------- LOAD DATA # load model-making data end = datepicker_start.value start = end - timedelta(days=60) thistab.load_df(start, end) thistab.rf_clf() # load data for period to be predicted thistab.load_prediction_df(datepicker_start.value, datepicker_end.value) thistab.update_prediction_addresses_select() # tables hv_account_prediction_table = hv.DynamicMap( thistab.make_account_predictions, streams=[stream_launch]) account_prediction_table = renderer.get_plot( hv_account_prediction_table) hv_features_table = hv.DynamicMap(thistab.make_feature_importances) features_table = renderer.get_plot(hv_features_table) hv_accuracy_table = hv.DynamicMap(thistab.accuracy_table) accuracy_table = renderer.get_plot(hv_accuracy_table) hv_variable_plot = hv.DynamicMap(thistab.box_plot, streams=[stream_select_variable])\ .opts(plot=dict(width=800, height=500)) variable_plot = renderer.get_plot(hv_variable_plot) # add callbacks datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) thistab.prediction_address_select.on_change( 'value', update_address_predictions) reset_prediction_address_button.on_click(thistab.reset_checkboxes) select_variable.on_change('value', update_select_variable) # put the controls in a single element controls = WidgetBox(select_variable, datepicker_start, datepicker_end, thistab.prediction_address_select, reset_prediction_address_button) controls_prediction = WidgetBox(datepicker_start, datepicker_end, thistab.prediction_address_select, reset_prediction_address_button) grid = gridplot( [[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['churn']], [Spacer(width=20, height=70)], [accuracy_table.state, thistab.stats_information_div()], [features_table.state], [thistab.section_headers['variable behavior']], [Spacer(width=20, height=30)], [variable_plot.state, controls], [thistab.section_headers['predictions']], [Spacer(width=20, height=30)], [ account_prediction_table.state, thistab.metrics_div, controls_prediction ], [thistab.notification_div['bottom']]]) tab = Panel(child=grid, title='predictions: accounts by value') return tab except Exception: logger.error('rendering err:', exc_info=True) text = 'predictions: accounts by value' return tab_error_flag(text)
def account_activity_tab(DAYS_TO_LOAD=90,panel_title=None): class Thistab(Mytab): def __init__(self, table,cols=[], dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols,panel_title=panel_title) self.table = table self.cols = cols self.period = menus['period'][0] self.update_type = menus['update_type'][0] self.status = menus['status'][0] self.account_type = menus['account_type'][0] self.trigger = 0 self.df_warehouse = None # correlation self.variable = 'aion_fork' self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.feature_list = hyp_variables.copy() self.groupby_dict = groupby_dict self.pym = PythonMongo('aion') # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'account activity': self.section_header_div(text='Account activity:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div(text='Relationships:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def clean_data(self, df): df = df.fillna(0) df[df == -inf] = 0 df[df == inf] = 0 return df def load_df(self,start_date, end_date,cols,timestamp_col): try: # make timestamp into index self.df_load(start_date, end_date,cols=cols,timestamp_col='block_timestamp') #logger.warning('df loaded:%s',self.df.head()) except Exception: logger.warning('load df',exc_info=True) def prep_data(self): try: #self.df = dd.dataframe.from_pandas(self.df,npartitions=10) # make timestamp into index logger.warning('%s',self.df['block_timestamp'].head()) self.df1 = self.df.set_index('block_timestamp') except Exception: logger.warning('load df',exc_info=True) def plot_account_activity(self,launch=-1): try: df = self.df1 if self.update_type != 'all': df = df[df['update_type'] == self.update_type] if self.account_type != 'all': df = df[df['account_type'] == self.account_type] logger.warning('df columns:%s',df.columns) df = df[df.amount >= 0] #logger.warning('line 100 df:%s',df.head(30)) df = df.resample(self.period).agg({'address':'count'}) df = df.reset_index() df = df.compute() df = df.rename(index=str,columns={'address':'period_activity'}) df['activity_delta(%)'] = df['period_activity'].pct_change(fill_method='ffill') df['activity_delta(%)'] = df['activity_delta(%)'].multiply(100) df = df.fillna(0) logger.warning('df in balance after resample:%s',df.tail(10)) # make timestamp into index return df.hvplot.line(x='block_timestamp', y=['period_activity'], title='# of transactions')+\ df.hvplot.line(x='block_timestamp', y=['activity_delta(%)'], title='% change in # of transactions') # make timestamp into index except Exception: logger.warning('plot account activity',exc_info=True) def plot_account_status(self, launch=-1): try: state = self.status #logger.warning('df1 head:%s',self.df1.columns) df = self.df1 if self.account_type != 'all': df = self.df1[self.df1['account_type'] == self.account_type] df = df[df['status'] == state] df = df.resample(self.period).agg({'status': 'count'}) df = df.reset_index() df = df.compute() df['perc_change'] = df['status'].pct_change(fill_method='ffill') df.perc_change = df.perc_change.multiply(100) df = df.fillna(0) # df = self.clean_data(df) # make timestamp into index value_label = '# '+state gc.collect() title1 = 'accounts {} by period'.format(state) title2 = 'percentage {} change by period'.format(state) return df.hvplot.line(x='block_timestamp', y=['status'], value_label=value_label, title=title1) + \ df.hvplot.line(x='block_timestamp', y=['perc_change'], value_label='%', title=title2) except Exception: logger.error('plot account status', exc_info=True) def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): div_style = """ style='width:350px; margin-left:-500px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def hist(self,launch): try: return self.corr_df.hvplot.hist( y=self.variable, bins=50, alpha=0.3,width=350,xaxis=False) except Exception: logger.warning('histogram', exc_info=True) def correlation_table(self,launch): try: corr_dict = { 'Variable 1':[], 'Variable 2':[], 'Relationship':[], 'r':[], 'p-value':[] } df = self.corr_df logger.warning(' df:%s',df.head(10)) a = df[self.variable].tolist() for col in df.columns.tolist(): logger.warning('col :%s', col) if col != self.variable: logger.warning('%s:%s', col, self.variable) b = df[col].tolist() slope, intercept, rvalue, pvalue, std_err = linregress(a, b) logger.warning('slope:%s,intercept:%s,rvalue:%s,pvalue:%s,std_err:%s', slope, intercept, rvalue, pvalue, std_err) if pvalue < 0.05: if abs(rvalue) <= self.weak_thresh: txt = 'none' else: strength = 'weak' if rvalue > 0: direction = 'positive' if rvalue < 0: direction = 'negative' if abs(rvalue) > self.mod_thresh: strength = 'moderate' if abs(rvalue) > self.strong_thresh: strength = 'strong' txt = "{} {}".format(strength,direction) else: txt = 'Not significant' corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['r'].append(round(rvalue,4)) corr_dict['p-value'].append(round(pvalue,4)) df = pd.DataFrame( { 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'r':corr_dict['r'], 'p-value':corr_dict['p-value'] }) logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=['Variable 1', 'Variable 2','Relationship','r','p-value'], width=700,height=400,title='Correlation between variables') except Exception: logger.warning('correlation table', exc_info=True) def matrix_plot(self,launch=-1): try: #logger.warning('line 306 self.feature list:%s',self.feature_list) if self.update_type != 'all': df = self.df1[self.df1['update_type'] == self.update_type] else: df = self.df1 #df = df[self.feature_list] # get difference for money columns #logger.warning('line 282 df; %s', list(df.columns)) df = df.resample(self.period).mean() #logger.warning('line 285 df; %s', self.groupby_dict) df = df.reset_index() #logger.warning('line 286 df; %s', df.head()) df = df.drop('block_timestamp',axis=1) df = df.fillna(0) df = df.compute() df['aion_close'] = df['aion_close'] df['aion_market_cap'] = df['aion_market_cap'] df['bitcoin_close'] = df['bitcoin_close'] df['ethereum_close'] = df['ethereum_close'] df['bitcoin_market_cap'] = df['aion_market_cap'] df['ethereum_market_cap'] = df['aion_market_cap'] df = df.fillna(0) #logger.warning('line 302. df: %s',df.head(10)) self.corr_df = df.copy() cols_lst = df.columns.tolist() cols_temp = cols_lst.copy() if self.variable in cols_temp: cols_temp.remove(self.variable) variable_select.options = cols_lst logger.warning('line 305 cols temp:%s',cols_temp) logger.warning('line 306 self.variable:%s',self.variable) logger.warning('line 307 df columns:%s',df.columns) p = df.hvplot.scatter(x=self.variable,y=cols_temp,width=400, subplots=True,shared_axes=False,xaxis=False).cols(3) return p except Exception: logger.error('matrix plot', exc_info=True) def update(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.load_df(datepicker_start.value,datepicker_end.value) thistab.prep_data() thistab.update_type = update_type_select.value thistab.status = status_select.value thistab.account_type = account_type_select.value thistab.variable = variable_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_matrix.event(launch=thistab.trigger) thistab.notification_updater("Ready.") def update_resample(attr,old,new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data() thistab.period = new thistab.update_type = update_type_select.value thistab.status = status_select.value thistab.account_type = account_type_select.value thistab.variable = variable_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_account_type(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data() thistab.account_type = new thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_update_type(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data() thistab.update_type = new thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data() thistab.variable = new thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_status(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data() thistab.status = new thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: cols = list(set(hyp_variables + ['address','update_type','account_type','balance', 'status','block_timestamp','timestamp_of_first_event'])) thistab = Thistab(table='account_ext_warehouse',cols=cols) # STATIC DATES # format dates first_date_range = "2018-04-25 00:00:00" first_date_range = datetime.strptime(first_date_range, thistab.DATEFORMAT) last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime_to_date(last_date - timedelta(days=DAYS_TO_LOAD)) ''' thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, cols=cols,table='account_ext_warehouse',timestamp_col='block_timestamp') ''' thistab.load_df(start_date=first_date, end_date=last_date,cols=cols, timestamp_col='block_timestamp') thistab.prep_data() # MANAGE STREAM # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch',launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix',launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr',launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) period_select = Select(title='Select aggregation period', value=thistab.period, options=menus['period']) variable_select = Select(title='Select variable', value='aion_fork', options=sorted(hyp_variables)) status_select = Select(title='Select account status', value=thistab.status, options=menus['status']) account_type_select = Select(title='Select account type', value=thistab.account_type, options=menus['account_type']) update_type_select = Select(title='Select transfer type', value=thistab.update_type, options=menus['update_type']) # --------------------- PLOTS---------------------------------- width = 800 hv_account_status = hv.DynamicMap(thistab.plot_account_status, streams=[stream_launch]).opts(plot=dict(width=width, height=400)) hv_account_activity = hv.DynamicMap(thistab.plot_account_activity, streams=[stream_launch]).opts(plot=dict(width=width, height=400)) hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot, streams=[stream_launch_matrix]) hv_corr_table = hv.DynamicMap(thistab.correlation_table, streams=[stream_launch_corr]) hv_hist_plot = hv.DynamicMap(thistab.hist,streams=[stream_launch_corr]) account_status = renderer.get_plot(hv_account_status) account_activity = renderer.get_plot(hv_account_activity) matrix_plot = renderer.get_plot(hv_matrix_plot) corr_table = renderer.get_plot(hv_corr_table) hist_plot = renderer.get_plot(hv_hist_plot) # handle callbacks datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) period_select.on_change('value',update_resample) update_type_select.on_change('value',update_update_type) account_type_select.on_change('value',update_account_type) variable_select.on_change('value',update_variable) status_select.on_change('value',update_status) # COMPOSE LAYOUT # put the controls in a single element controls = WidgetBox( datepicker_start, datepicker_end, period_select, update_type_select, account_type_select, status_select, variable_select) # create the dashboards grid = gridplot([ [thistab.notification_div['top']], [Spacer(width=20, height=50)], [thistab.section_headers['relationships']], [Spacer(width=20, height=30)], [matrix_plot.state,controls], [corr_table.state, thistab.corr_information_div()], [hist_plot.state], [thistab.section_headers['account activity']], [Spacer(width=20, height=30)], [account_status.state], [account_activity.state], [thistab.notification_div['bottom']] ]) # Make a tab with the layout tab = Panel(child=grid, title=thistab.panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(thistab.panel_title)
def eda_projects_tab(panel_title): lags_corr_src = ColumnDataSource(data=dict(variable_1=[], variable_2=[], relationship=[], lag=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = { 'project_duration': 'sum', 'project_start_delay': 'mean', 'project_end_delay': 'mean', 'project_owner_age': 'mean', 'project_owner_gender': 'mean', 'milestone_duration': 'sum', 'milestone_start_delay': 'mean', 'milestone_end_delay': 'mean', 'milestone_owner_age': 'mean', 'milestone_owner_gender': 'mean', 'task_duration': 'sum', 'task_start_delay': 'sum', 'task_end_delay': 'mean', 'task_owner_age': 'mean', 'task_owner_gender': 'mean' } self.feature_list = list(self.groupby_dict.keys()) self.lag_variable = 'task_duration' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variables = sorted(list(self.groupby_dict.keys())) self.variable = self.variables[0] self.relationships_to_check = ['weak', 'moderate', 'strong'] self.status = 'all' self.pm_gender = 'all' self.m_gender = 'all' self.t_gender = 'all' self.type = 'all' self.pym = PythonMongo('aion') self.menus = { 'status': ['all', 'open', 'closed'], 'type': [ 'all', 'research', 'reconciliation', 'audit', 'innovation', 'construction', 'manufacturing', 'conference' ], 'gender': ['all', 'male', 'female'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], } self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'} self.timestamp_col = 'project_startdate_actual' # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div(text='Pre-transform distribution:', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div(text='Correlations:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def reset_adoption_dict(self, variable): self.significant_effect_dict[variable] = [] # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def filter_df(self, df1): if self.status != 'all': df1 = df1[df1.status == self.status] if self.pm_gender != 'all': df1 = df1[df1.project_owner_gender == self.pm_gender] if self.m_gender != 'all': df1 = df1[df1.milestone_owner_gender == self.m_gender] if self.t_gender != 'all': df1 = df1[df1.task_owner_gender == self.t_gender] if self.type != 'all': df1 = df1[df1.type == self.type] return df1 def prep_data(self, df1): try: ''' df1[self.timestamp_col] = df1[self.timestamp_col].apply(lambda x: datetime(x.year, x.month, x.day, x.hour,0,0)) ''' df1 = df1.set_index(self.timestamp_col) logger.warning('LINE 195 df:%s', df1.head()) # handle lag for all variables df = df1.copy() df = self.filter_df(df) logger.warning('LINE 199: length before:%s', len(df)) slice = df[['project']] df = df[list(self.groupby_dict.keys())] logger.warning('LINE 218: columns:%s', df.head()) df = df.astype(float) df = pd.concat([df, slice], axis=1) df = df.groupby('project').resample(self.resample_period).agg( self.groupby_dict) logger.warning('LINE 201: length after:%s', len(df)) df = df.reset_index() vars = self.feature_list.copy() if int(self.lag) > 0: for var in vars: if self.variable != var: df[var] = df[var].shift(int(self.lag)) df = df.dropna() self.df1 = df logger.warning('line 184- prep data: df:%s', self.df.head(10)) except Exception: logger.error('prep data', exc_info=True) def lags_plot(self, launch): try: df = self.df.copy() df = df[[self.lag_variable, self.variable]] cols = [self.lag_variable] lags = self.lag_days.split(',') for day in lags: try: label = self.lag_variable + '_' + day df[label] = df[self.lag_variable].shift(int(day)) cols.append(label) except: logger.warning('%s is not an integer', day) df = df.dropna() self.lags_corr(df) # plot the comparison logger.warning('in lags plot: df:%s', df.head(10)) return df.hvplot(x=self.variable, y=cols, kind='scatter', alpha=0.4) except Exception: logger.error('lags plot', exc_info=True) # calculate the correlation produced by the lags vector def lags_corr(self, df): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'lag': [], 'r': [], 'p_value': [] } a = df[self.variable].tolist() for col in df.columns: if col not in [self.timestamp_col, self.variable]: # find lag var = col.split('_') try: tmp = int(var[-1]) lag = tmp except Exception: lag = 'None' b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append(self.variable) corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['lag'].append(lag) corr_dict_data['r'].append(round(rvalue, 4)) corr_dict_data['p_value'].append(round(pvalue, 4)) lags_corr_src.stream(corr_dict_data, rollover=(len(corr_dict_data['lag']))) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=280) return data_table except Exception: logger.error('lags corr', exc_info=True) def correlation_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'r': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop(self.timestamp_col, axis=1) # df = df.compute() a = df[self.variable].tolist() for col in self.feature_list: logger.warning('col :%s', col) if col != self.variable: logger.warning('%s:%s', col, self.variable) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) # add to dict corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['r'].append(round(rvalue, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'r': corr_dict['r'], 'p-value': corr_dict['p-value'] }) # logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value' ], width=550, height=200, title='Correlation between variables') except Exception: logger.error('correlation table', exc_info=True) def non_parametric_relationship_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'stat': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop(self.timestamp_col, axis=1) # df = df.compute() # logger.warning('line df:%s',df.head(10)) a = df[self.variable].tolist() for col in self.feature_list: logger.warning('col :%s', col) if col != self.variable: logger.warning('%s:%s', col, self.variable) b = df[col].tolist() stat, pvalue, txt = self.mann_whitneyu_label(a, b) corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['stat'].append(round(stat, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'stat': corr_dict['stat'], 'p-value': corr_dict['p-value'] }) # logger.warning('df:%s',df.head(23)) return df.hvplot.table( columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value' ], width=550, height=200, title='Non parametric relationship between variables') except Exception: logger.error('non parametric table', exc_info=True) def hist(self, launch): try: return self.df.hvplot.hist(y=self.feature_list, subplots=True, shared_axes=False, bins=25, alpha=0.3, width=300).cols(4) except Exception: logger.warning('histogram', exc_info=True) def matrix_plot(self, launch=-1): try: logger.warning('line 306 self.feature list:%s', self.feature_list) df = self.df1 if df is not None: # thistab.prep_data(thistab.df) if self.timestamp_col in df.columns: df = df.drop(self.timestamp_col, axis=1) df = df.fillna(0) # logger.warning('line 302. df: %s',df.head(10)) cols_temp = self.feature_list.copy() if self.variable in cols_temp: cols_temp.remove(self.variable) # variable_select.options = cols_lst p = df.hvplot.scatter(x=self.variable, y=cols_temp, width=330, subplots=True, shared_axes=False, xaxis=False).cols(4) else: p = df.hvplot.scatter(x=[0, 0, 0], y=[0, 0, 0], width=330) return p except Exception: logger.error('matrix plot', exc_info=True) def multiline(self, launch=1): try: yvar = self.multiline_vars['y'] xvar = self.multiline_vars['x'] df = self.df.copy() df = df[[xvar, yvar, self.timestamp_col]] df = df.set_index(self.timestamp_col) df = df.groupby(xvar).resample(self.resample_period).agg( {yvar: 'mean'}) df = df.reset_index() lines = df[xvar].unique() # split data frames dfs = {} for idx, line in enumerate(lines): dfs[line] = df[df[xvar] == line] dfs[line] = dfs[line].fillna(0) logger.warning('LINE 428:%s - %s:', line, dfs[line].head()) if idx == 0: p = dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=1200, height=500).relabel(line) else: p *= dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=2, height=500).relabel(line) return p except Exception: logger.error('multiline plot', exc_info=True) def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data(thistab.df) if 'milestone owner gender' == new: thistab.variable = 'm_gender_code' if 'project owner gender' == new: thistab.variable = 'pm_gender_code' if 'task owner gender' == new: thistab.variable = 't_gender_code' if thistab.variable in thistab.adoption_variables['developer']: thistab.reset_adoption_dict(thistab.variable) thistab.section_head_updater('lag', thistab.variable) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag_plot_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_variable = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_IVs(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.pm_gender = pm_gender_select.value thistab.m_gender = m_gender_select.value thistab.t_gender = t_gender_select.value thistab.status = status_select.value thistab.type = type_select.value thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag(attr, old, new): # update lag & cryptocurrency thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df = thistab.pym.load_df(start_date=datepicker_start.value, end_date=datepicker_end.value, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.df['project_owner_gender'] = thistab.df[ 'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['milestone_owner_gender'] = thistab.df[ 'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['task_owner_gender'] = thistab.df[ 'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_multiline(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.multiline_vars['x'] = multiline_x_select.value thistab.multiline_vars['y'] = multiline_y_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'project_composite1' thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2013-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=30) # initial function call thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) if len(thistab.df) > 0: thistab.df['manager_gender'] = thistab.df['project_owner_gender'] thistab.df['project_owner_gender'] = thistab.df[ 'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['milestone_owner_gender'] = thistab.df[ 'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['task_owner_gender'] = thistab.df[ 'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) logger.warning('LINE 527:columns %s', list(thistab.df.columns)) thistab.prep_data(thistab.df) # MANAGE STREAM stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() stream_launch = streams.Stream.define('Launch', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.variables) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) type_select = Select(title='Select project type', value=thistab.type, options=thistab.menus['type']) status_select = Select(title='Select project status', value=thistab.status, options=thistab.menus['status']) pm_gender_select = Select(title="Select project owner's gender", value=thistab.pm_gender, options=thistab.menus['gender']) m_gender_select = Select(title="Select milestone owner's gender", value=thistab.m_gender, options=thistab.menus['gender']) t_gender_select = Select(title="Select task owner's gender", value=thistab.t_gender, options=thistab.menus['gender']) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) multiline_y_select = Select(title='Select comparative DV(y)', value=thistab.multiline_vars['y'], options=[ 'remuneration', 'delay_start', 'delay_end', 'project_duration' ]) multiline_x_select = Select( title='Select comparative IV(x)', value=thistab.multiline_vars['x'], options=['manager_gender', 'type', 'status']) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] lags_corr_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=200) hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot, streams=[stream_launch_matrix]) hv_corr_table = hv.DynamicMap(thistab.correlation_table, streams=[stream_launch_corr]) hv_nonpara_table = hv.DynamicMap( thistab.non_parametric_relationship_table, streams=[stream_launch_corr]) # hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist]) hv_lags_plot = hv.DynamicMap(thistab.lags_plot, streams=[stream_launch_lags_var]) hv_multiline = hv.DynamicMap(thistab.multiline, streams=[stream_launch]) matrix_plot = renderer.get_plot(hv_matrix_plot) corr_table = renderer.get_plot(hv_corr_table) nonpara_table = renderer.get_plot(hv_nonpara_table) lags_plot = renderer.get_plot(hv_lags_plot) multiline = renderer.get_plot(hv_multiline) # setup divs # handle callbacks variable_select.on_change('value', update_variable) lag_variable_select.on_change('value', update_lag_plot_variable) lag_select.on_change('value', update_lag) # individual lag resample_select.on_change('value', update_resample) pm_gender_select.on_change('value', update_IVs) m_gender_select.on_change('value', update_IVs) t_gender_select.on_change('value', update_IVs) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) lags_input_button.on_click(update_lags_selected) # lags array status_select.on_change('value', update_IVs) type_select.on_change('value', update_IVs) multiline_x_select.on_change('value', update_multiline) multiline_y_select.on_change('value', update_multiline) # COMPOSE LAYOUT # put the controls in a single element controls_lag = WidgetBox(lags_input, lags_input_button, lag_variable_select) controls_multiline = WidgetBox(multiline_x_select, multiline_y_select) controls_page = WidgetBox(datepicker_start, datepicker_end, variable_select, type_select, status_select, resample_select, pm_gender_select, m_gender_select, t_gender_select) controls_gender = WidgetBox(pm_gender_select, m_gender_select, t_gender_select) # create the dashboards grid = gridplot( [[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['relationships']], [Spacer(width=20, height=30)], [matrix_plot.state, controls_page], [thistab.section_headers['correlations']], [Spacer(width=20, height=30)], [corr_table.state, thistab.corr_information_div()], [thistab.title_div('Compare levels in a variable', 400)], [Spacer(width=20, height=30)], [multiline.state, controls_multiline], [thistab.section_headers['lag']], [Spacer(width=20, height=30)], [lags_plot.state, controls_lag], [lags_corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def kpi_inventory_tab(panel_title, credentials): class Thistab(KPI): def __init__(self, table, credentials, cols=[]): KPI.__init__(self, table, name='inventory', cols=cols, credentials=credentials) self.table = table self.df = None # setup selects self.menus = {'gender': ['all', 'Male', 'Female']} self.select = {} self.select_values = {} for key in self.menus.keys(): title = 'Select {}'.format(key) self.select[key] = Select(title=title, value='all', options=self.menus[key]) self.select_values[key] = 'all' self.timestamp_col = 'timestamp_delivered' self.variable = 'delivery_amount' self.groupby_dict = {'delivery_amount': 'sum'} self.multiline_resample_period = 'D' self.pop = { 'history_periods': 3, 'aggregate': 'mean', 'start': datetime(2015, 1, 5, 0, 0, 0), 'end': self.pop_start_date + timedelta(days=8) } self.cols = cols self.load_data_start_date = datetime(2014, 1, 1, 0, 0, 0) self.load_data_end_date = datetime.now() self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0) # cards self.KPI_card_div = self.initialize_cards(self.page_width, height=350) # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'cards': self.section_header_div(text='Period to date({})):{}'.format( self.variable, self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'pop': self.section_header_div(text='Period over period:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'dow': self.section_header_div( text='Compare days of the week:'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def section_header_div_updater(self, which_header, update_text): text = """<div style="margin-top:150 px;margin-bottom:--150px;"> <h2 style="color:#4221cc;">{}</h2></div>""" \ .format(update_text) self.section_headers[which_header].text = text # ---------------------- DIVS ---------------------------- def reset_checkboxes(self, value='all', checkboxgroup=''): try: self.checkboxgroup[checkboxgroup].value = value except Exception: logger.error('reset checkboxes', exc_info=True) def information_div(self, width=400, height=300): div_style = """ style='width:350px;margin-right:-800px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ------------------- LOAD AND SETUP DATA ----------------------- def filter_df(self, df): try: for item in self.select_values.keys(): if self.select_values[item] != 'all': df = df[df[item] == self.select_values[item]] return df except Exception: logger.error('filters', exc_info=True) def set_select_menus(self, df): try: for item in self.select.keys(): if item in df.columns and len(df) > 0: lst = list(set(df[item].values)) lst.append('all') sorted(lst) self.select[item].options = lst except Exception: logger.error('set filters menus', exc_info=True) # -------------------- CARDS ----------------------------------------- def initialize_cards(self, width, height=250): try: txt = '' for period in ['year', 'quarter', 'month', 'week']: design = random.choice(list(KPI_card_css.keys())) txt += self.card(title='', data='', card_design=design) text = """<div style="margin-top:100px;display:flex; flex-direction:row;"> {} </div>""".format(txt) div = Div(text=text, width=width, height=height) return div except Exception: logger.error('initialize cards', exc_info=True) # -------------------- GRAPHS ------------------------------------------- def graph_periods_to_date(self, df1, timestamp_filter_col, variable): try: dct = {} for idx, period in enumerate( ['week', 'month', 'quarter', 'year']): if df1 is not None: df = self.period_to_date( df1, timestamp=dashboard_config['dates']['last_date'], timestamp_filter_col=timestamp_filter_col, period=period) # get unique instances # df = df[[variable]] df = df.drop_duplicates(keep='first') # logger.warning('post duplicates dropped:%s', df.head(10)) data = 0 if self.groupby_dict[variable] == 'sum': data = int(df[variable].sum()) elif self.groupby_dict[variable] == 'mean': data = "{}%".format(round(df[variable].mean(), 3)) else: data = int(df[variable].count()) del df gc.collect() dct[period] = data else: dct[period] = 0 self.update_cards(dct) except Exception: logger.error('graph periods to date', exc_info=True) def period_over_period(self, df, start_date, end_date, period, history_periods, timestamp_col): def label_qtr_pop(y): try: curr_quarter = int((y.month - 1) / 3 + 1) start = datetime(y.year, 3 * curr_quarter - 2, 1) if isinstance(y, date): start = start.date() return abs((start - y).days) except Exception: logger.error('df label quarter', exc_info=True) try: # make columns for each history period if len(df) == 0: dfi = pd.date_range(self.pop['start'], self.pop['end'], freq='D') dfi.rename(self.timestamp_col) df = pd.DataFrame(random, index=dfi) df[self.variable] = 0 print('LINE 239:%s', df.head()) df = df.rename(columns={ self.variable: '0_periods_prev', self.timestamp_col: 'date' }) df.set_index('date', inplace=True) for count in range(1, history_periods + 1): label = f"{count}_periods_prev" print('LINE 252,label', label) print('LINE 253', df.head()) try: if period == 'month': df[label] = df.shift(periods=30) elif period == 'year': df[label] = df.shift(periods=365) elif period == 'week': df[label] = df.shift(periods=7) elif period == 'quarter': df[label] = df.shift(periods=90) df = df.fillna(0) except Exception: df[label] = 0 print('LINE 265, COUNT:', count) return df except Exception: logger.error('graph period over period', exc_info=True) def graph_period_over_period(self, period): try: periods = [period] start_date = self.pop['start'] end_date = self.pop['end'] if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) cols = [self.variable, self.timestamp_col] df = self.load_df(start_date, end_date, cols=cols, timestamp_col=self.timestamp_col) for idx, period in enumerate(periods): df_period = self.period_over_period( df, start_date=start_date, end_date=end_date, period=period, history_periods=self.pop['history_periods'], timestamp_col=self.timestamp_col) title = "{} over {}".format(period, period) plotcols = list(df.columns) plotcols = plotcols.remove(self.variable) if idx == 0: p = df_period.hvplot.bar('date', plotcols, rot=45, title=title, stacked=False, width=1200, height=500) else: p += df_period.hvplot.bar('date', plotcols, rot=45, title=title, stacked=False, width=1200, height=500) return p except Exception: logger.error('period over period to date', exc_info=True) def pop_week(self, launch=-1): try: return self.graph_period_over_period('week') except Exception: logger.error('pop week', exc_info=True) def pop_month(self, launch=-1): try: return self.graph_period_over_period('month') except Exception: logger.error('pop month', exc_info=True) def pop_quarter(self, launch=-1): try: return self.graph_period_over_period('quarter') except Exception: logger.error('pop quarter', exc_info=True) def pop_year(self, launch=-1): try: return self.graph_period_over_period('year') except Exception: logger.error('pop year', exc_info=True) def multiline_dow(self, launch=1): try: df = self.df.copy() dct = {'Y': 'year', 'M': 'month', 'W': 'week', 'Q': 'Qtr'} resample_period = dct[self.multiline_resample_period] yvar = self.multiline_vars['y'] xvar = 'day_of_week' df[resample_period] = df[self.timestamp_col].dt.to_period( self.multiline_resample_period) df[resample_period] = df[resample_period].astype('str') df[xvar] = df[self.timestamp_col].dt.day_name() df = df.groupby([xvar, resample_period]).agg({yvar: 'mean'}) df = df.reset_index() # logger.warning('LINE 402 df:%s',df.head(20)) p = df.hvplot.line(resample_period, yvar, by='day_of_week', width=1200, height=500) p.opts(xrotation=45) return p except Exception: logger.error('multiline plot', exc_info=True) def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") for item in thistab.select_values.keys(): thistab.select_values[item] = thistab.select[item].value thistab.graph_periods_to_date(thistab.df, thistab.timestamp_col, thistab.variable) thistab.section_header_updater('cards') thistab.section_header_updater('pop') thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_variable(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.variable = variable_select.value thistab.graph_periods_to_date(thistab.df, thistab.timestamp_col, thistab.variable) thistab.section_header_div_updater('cards', thistab.variable) # thistab.section_header_updater('cards',label='') # thistab.section_header_updater('pop',label='') thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_period_over_period(): thistab.notification_updater( "Calculations underway. Please be patient") thistab.pop['history_periods'] = history_periods_select.value thistab.pop_start_date = datepicker_pop_start.value # trigger period over period thistab.pop_end_date = datepicker_pop_end.value # trigger period thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_history_periods(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.pop['history_periods'] = pop_number_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") try: table = 'inventory_warehouse' cols = ['delivery_amount', 'timestamp_delivered', 'gender'] thistab = Thistab(table, credentials=credentials, cols=cols) # ------------------------------------- SETUP ---------------------------- # format dates first_date_range = thistab.initial_date last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime(2014, 1, 1, 0, 0, 0) cols = [thistab.variable, thistab.timestamp_col] thistab.df = thistab.load_df(first_date, last_date, cols, thistab.timestamp_col) thistab.set_select_menus(thistab.df) thistab.graph_periods_to_date( thistab.df, timestamp_filter_col=thistab.timestamp_col, variable=thistab.variable) # MANAGE STREAM # date comes out stream in milliseconds # --------------------------------CREATE WIDGETS --------------------------------- daynum = datetime.now().day if daynum > 3: thistab.pop_end_date = datetime.now().date() - timedelta( days=daynum) thistab.pop_start_date = thistab.pop_end_date - timedelta(days=7) else: thistab.pop_start_date = thistab.first_date_in_period( thistab.pop_end_date, 'week') logger.warning('LINE 500: POP Start: END %s:%s', thistab.pop_start_date, thistab.pop_end_date) stream_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_multiline = streams.Stream.define('Launch', launch=-1)() history_periods_select = Select( title='Select # of comparative periods', value=str(thistab.pop['history_periods']), options=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']) datepicker_pop_start = DatePicker(title="Period start", min_date=first_date_range, max_date=last_date_range, value=thistab.load_data_start_date) datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.load_data_end_date) pop_number_select = Select( title='Select # of comparative periods', value=str(5), options=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']) pop_button = Button(label="Select dates/periods, then click me!", width=15, button_type="success") variable_select = Select(title='Select variable', value=thistab.variable, options=[thistab.variable] + list(thistab.select_values.keys())) # --------------------------------- GRAPHS --------------------------- hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch]) pop_week = renderer.get_plot(hv_pop_week) hv_pop_month = hv.DynamicMap(thistab.pop_month, streams=[stream_launch]) pop_month = renderer.get_plot(hv_pop_month) hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch]) pop_quarter = renderer.get_plot(hv_pop_quarter) # -------------------------------- CALLBACKS ------------------------ # datepicker_start.on_change('value', update) # datepicker_end.on_change('value', update) for key in thistab.select_values.keys(): thistab.select[key].on_change('value', update) variable_select.on_change('value', update_variable) pop_button.on_click(update_period_over_period) # -----------------------------------LAYOUT ---------------------------- # put the controls in a single element controls = WidgetBox(variable_select, thistab.select['gender']) controls_pop = WidgetBox(datepicker_pop_start, datepicker_pop_end, history_periods_select, pop_button) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['cards']], [Spacer(width=20, height=2)], [thistab.KPI_card_div, controls], [thistab.section_headers['pop']], [Spacer(width=20, height=25)], [pop_week.state, controls_pop], [pop_month.state], [pop_quarter.state], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def kpi_bcc_rentals_visitor_tab(panel_title): class Thistab(KPI): def __init__(self, table, cols=[]): KPI.__init__(self, table,name='rentals',cols=cols) self.table = table self.df = None self.pym = PythonMongo('aion') self.checkboxgroup = { 'category': [], 'item' : [], 'area':[], 'visit_duration':[] } self.multiline_vars = { 'y' : 'visit_duration' } self.groupby_dict = { 'item':'count', 'area':'count', 'category':'count', 'status':'count', 'gender':'count', 'visit_duration':'sum' } # setup selects self.select_values = {} self.select_menus = {} for item in ['area', 'item', 'category', 'status', 'gender']: self.select_values[item] = 'all' self.select_menus[item] = ['all'] self.select = {} for item in ['area', 'item', 'category', 'status', 'gender']: self.select[item] = Select(title='Select ' + item, value='all', options=self.select_menus[item]) self.timestamp_col = 'visit_start' self.variable = 'item' self.multiline_resample_period = 'M' self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0) # cards self.KPI_card_div = self.initialize_cards(self.page_width, height=350) # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'cards': self.section_header_div(text='Period to date:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'pop': self.section_header_div(text='Period over period:{}'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'dow': self.section_header_div(text='Compare days of the week:'.format(self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # ---------------------- DIVS ---------------------------- def reset_checkboxes(self, value='all',checkboxgroup=''): try: self.checkboxgroup[checkboxgroup].value = value except Exception: logger.error('reset checkboxes', exc_info=True) def information_div(self, width=400, height=300): div_style = """ style='width:350px;margin-right:-800px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> e7 </li> <li> </li> <li> </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ------------------- LOAD AND SETUP DATA ----------------------- def filter_df(self, df): try: for item in self.select_values.keys(): if self.select_values[item] != 'all': df = df[df[item] == self.select_values[item]] return df except Exception: logger.error('filters', exc_info=True) def set_select_menus(self,df): try: for item in self.select.keys(): if item in df.columns and len(df) > 0: logger.warning('LINE 151: item: %s', item) lst = list(set(df[item].values)) lst.append('all') sorted(lst) logger.warning('LINE 157: LIST: %s',lst) self.select[item].options = lst except Exception: logger.error('set filters menus', exc_info=True) def load_df_pym(self, req_startdate, req_enddate, cols, timestamp_col): try: # get min and max of loaded df if self.df is not None: loaded_min = self.df[timestamp_col].min() loaded_max = self.df[timestamp_col].max() if loaded_min <= req_startdate and loaded_max >= req_enddate: df = self.df[(self.df[timestamp_col] >= req_startdate) & (self.df[timestamp_col] <= req_enddate)] df = self.filter_df(df) else: df = self.pym.load_df(req_startdate, req_enddate, table=self.table, cols=cols, timestamp_col=timestamp_col) else: df = self.pym.load_df(req_startdate, req_enddate, table=self.table, cols=cols, timestamp_col=timestamp_col) df = self.filter_df(df) #logger.warning('LINE 185: item: %s', df.head()) self.set_select_menus(df) return df except Exception: logger.error('load_df', exc_info=True) # -------------------- CARDS ----------------------------------------- def initialize_cards(self, width, height=250): try: txt = '' for period in ['year', 'quarter', 'month', 'week']: design = random.choice(list(KPI_card_css.keys())) txt += self.card(title='', data='', card_design=design) text = """<div style="margin-top:100px;display:flex; flex-direction:row;"> {} </div>""".format(txt) div = Div(text=text, width=width, height=height) return div except Exception: logger.error('initialize cards', exc_info=True) # -------------------- GRAPHS ------------------------------------------- def graph_periods_to_date(self, df1, timestamp_filter_col, variable): try: dct = {} for idx, period in enumerate(['week', 'month', 'quarter', 'year']): df = self.period_to_date(df1, timestamp=dashboard_config['dates']['last_date'], timestamp_filter_col=timestamp_filter_col, period=period) # get unique instances df = df[[variable]] df = df.drop_duplicates(keep='first') #logger.warning('post duplicates dropped:%s', df.head(10)) data = 0 if self.groupby_dict[variable] == 'sum': data = int(df[variable].sum()) elif self.groupby_dict[variable] == 'mean': data = "{}%".format(round(df[variable].mean(),3)) else: data = int(df[variable].count()) del df gc.collect() dct[period] = data self.update_cards(dct) except Exception: logger.error('graph periods to date', exc_info=True) def period_over_period(self, df, start_date, end_date, period, history_periods=2, timestamp_col='timestamp_of_first_event'): try: # filter cols if necessary string = '0 {}(s) prev(current)'.format(period) # filter out the dates greater than today df_current = df.copy() df_current['period'] = string # label the days being compared with the same label df_current = self.label_dates_pop(df_current, period, timestamp_col) #logger.warning('LINE 244:%s', df_current.head(15)) # zero out time information start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0) end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0) cols = list(df.columns) counter = 1 if isinstance(history_periods, str): history_periods = int(history_periods) # make dataframes for request no. of periods start, end = self.shift_period_range(period, start, end) while counter < history_periods and start >= self.initial_date: # load data if period == 'quarter': logger.warning('start:end %s:%s', start, end) df_temp = self.load_df_pym(start, end, cols, timestamp_col) df_temp[timestamp_col] = pd.to_datetime(df_temp[timestamp_col]) if df_temp is not None: if len(df_temp) > 1: string = '{} {}(s) prev'.format(counter, period) # label period df_temp[period] = string # relabel days to get matching day of week,doy, dom, for different periods df_temp = self.label_dates_pop(df_temp, period, timestamp_col) # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp)) df_current = pd.concat([df_current, df_temp]) del df_temp gc.collect() # shift the loading window counter += 1 start, end = self.shift_period_range(period, start, end) return df_current except Exception: logger.error('period over period', exc_info=True) def graph_period_over_period(self,period): try: periods = [period] start_date = self.pop_start_date end_date = self.pop_end_date if isinstance(start_date,date): start_date = datetime.combine(start_date,datetime.min.time()) if isinstance(end_date,date): end_date = datetime.combine(end_date,datetime.min.time()) #cols = [self.variable, self.timestamp_col, 'day'] cols = [self.variable, self.timestamp_col] df = self.load_df_pym(start_date,end_date,cols=cols, timestamp_col=self.timestamp_col) for idx,period in enumerate(periods): df_period = self.period_over_period(df, start_date=start_date, end_date=end_date, period=period, history_periods=self.pop_history_periods, timestamp_col=self.timestamp_col) logger.warning('LINE 274 start:end=%s:%s,%s,%s len(Df),df.head',start_date,end_date,len(df),df.head()) groupby_cols = ['dayset','period'] if len(df_period) > 0: df_period = df_period.groupby(groupby_cols).agg({self.variable:'count'}) df_period = df_period.reset_index() else: df_period = df_period.rename(index=str,columns={'day':'dayset'}) prestack_cols = list(df_period.columns) logger.warning('Line 179:%s', df_period.head(10)) df_period = self.split_period_into_columns(df_period,col_to_split='period', value_to_copy=self.variable) logger.warning('line 180 df_period columns:%s',df_period.head(50)) poststack_cols = list(df_period.columns) title = "{} over {}".format(period,period) plotcols = list(np.setdiff1d(poststack_cols, prestack_cols)) df_period,plotcols = self.pop_include_zeros(df_period=df_period,plotcols=plotcols,period=period) if 'dayset' not in df_period.columns: leng = len(df_period) if leng > 0: df_period['dayset'] = 0 else: df_period['dayset'] = '' if idx == 0: p = df_period.hvplot.bar('dayset',plotcols,rot=45,title=title, stacked=False) else: p += df_period.hvplot.bar('dayset',plotcols,rot=45,title=title, stacked=False) return p except Exception: logger.error('period over period to date', exc_info=True) def pop_week(self, launch=-1): try: return self.graph_period_over_period('week') except Exception: logger.error('pop week', exc_info=True) def pop_month(self, launch=-1): try: return self.graph_period_over_period('month') except Exception: logger.error('pop month', exc_info=True) def pop_quarter(self, launch=-1): try: return self.graph_period_over_period('quarter') except Exception: logger.error('pop quarter', exc_info=True) def pop_year(self, launch=-1): try: return self.graph_period_over_period('year') except Exception: logger.error('pop year', exc_info=True) def multiline_dow(self, launch=1): try: df = self.df.copy() dct = { 'Y':'year', 'M':'month', 'W':'week', 'Q':'Qtr' } resample_period = dct[self.multiline_resample_period] yvar = self.multiline_vars['y'] xvar = 'day_of_week' df[resample_period] = df[self.timestamp_col].dt.to_period(self.multiline_resample_period) df[xvar] = df[self.timestamp_col].dt.day_name() df = df.groupby([xvar,resample_period]).agg({yvar: 'mean'}) df = df.reset_index() p = df.hvplot.line(x=self.timestamp_col, y=yvar,groupby=resample_period, width=1200, height=500) return p except Exception: logger.error('multiline plot', exc_info=True) def update(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") for item in ['area','category','gender','item']: thistab.select_values[item] = thistab.select[item].value thistab.graph_periods_to_date(thistab.df,thistab.timestamp_col) thistab.section_header_updater('cards') thistab.section_header_updater('pop') thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_variable(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") thistab.variable = variable_select.value thistab.graph_periods_to_date(thistab.df,'block_timestamp',thistab.variable) thistab.section_header_updater('cards',label='') thistab.section_header_updater('pop',label='') thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_period_over_period(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") thistab.pop_history_periods = history_periods_select.value thistab.pop_start_date=datepicker_period_start.value # trigger period over period thistab.pop_end_date=datepicker_period_end.value # trigger period thistab.trigger +=1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_history_periods(attrname, old, new): thistab.notification_updater("Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_multiline(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.multiline_vars['y'] = multiline_y_select.value thistab.multiline_resample_period = multiline_resample_period_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: table = 'bcc_composite' cols = cols_to_load['guest'] + cols_to_load['rental'] thistab = Thistab(table, cols) # ------------------------------------- SETUP ---------------------------- # format dates first_date_range = thistab.initial_date last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime(2014,1,1,0,0,0) thistab.df = thistab.load_df_pym(first_date, last_date,cols,thistab.timestamp_col) thistab.graph_periods_to_date(thistab.df,timestamp_filter_col=thistab.timestamp_col, variable=thistab.variable) thistab.section_header_updater('cards') thistab.section_header_updater('pop') ''' df_temp = thistab.df[(thistab.df['visit_start'].dt.year == 2019) & (thistab.df['visit_start'].dt.month == 8)] logger.warning('LINE 416: df_temp:%s,%s',len(df_temp),df_temp.head(30)) ''' # MANAGE STREAM # date comes out stream in milliseconds # --------------------------------CREATE WIDGETS --------------------------------- datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) thistab.pop_end_date = datetime.now().date() daynum = thistab.pop_end_date.day if daynum < 3: thistab.pop_end_date = datetime.now().date() - timedelta(days=daynum) thistab.pop_start_date = thistab.pop_end_date - timedelta(days=7) else: thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week') stream_launch = streams.Stream.define('Launch',launch=-1)() datepicker_period_start = DatePicker(title="Period start", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_start_date) datepicker_period_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_end_date) history_periods_select = Select(title='Select # of comparative periods', value='2', options=thistab.menus['history_periods']) datepicker_pop_start = DatePicker(title="Period start", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_start_date) datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_end_date) pop_number_select = Select(title='Select # of comparative periods', value=str(5), options=thistab.menus['history_periods']) pop_button = Button(label="Select dates/periods, then click me!", width=15, button_type="success") variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.menus['bcc']['rental']) multiline_y_select = Select(title='Select comparative DV(y)', value=thistab.multiline_vars['y'], options=['price', 'amount', 'visit_duration']) multiline_resample_period_select = Select(title='Select comparative DV(y)', value=thistab.multiline_resample_period, options=['W','M','Q','Y']) # --------------------------------- GRAPHS --------------------------- hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch]) pop_week = renderer.get_plot(hv_pop_week) hv_pop_month = hv.DynamicMap(thistab.pop_month, streams=[stream_launch]) pop_month = renderer.get_plot(hv_pop_month) hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch]) pop_quarter = renderer.get_plot(hv_pop_quarter) hv_multiline_dow = hv.DynamicMap(thistab.multiline_dow, streams=[stream_launch]) multiline_dow = renderer.get_plot(hv_multiline_dow) # -------------------------------- CALLBACKS ------------------------ #datepicker_start.on_change('value', update) #datepicker_end.on_change('value', update) for item in ['area','category','gender','item']: thistab.select[item].on_change('value',update) history_periods_select.on_change('value',update_period_over_period) datepicker_period_start.on_change('value',update_period_over_period) datepicker_period_end.on_change('value',update_period_over_period) pop_number_select.on_change('value',update_history_periods) variable_select.on_change('value', update_variable) multiline_y_select.on_change('value', update_multiline) multiline_resample_period_select.on_change('value', update_multiline) # -----------------------------------LAYOUT ---------------------------- # put the controls in a single element controls = WidgetBox(datepicker_start,datepicker_end, thistab.select['area'], thistab.select['category'], thistab.select['item'], thistab.select['gender'], thistab.select['status'], ) controls_pop = WidgetBox(datepicker_pop_start, datepicker_pop_end, history_periods_select, pop_button) controls_multiline = WidgetBox(multiline_y_select, multiline_resample_period_select) # create the dashboards grid = gridplot([ [thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['cards']], [Spacer(width=20, height=2)], [thistab.KPI_card_div,controls], [thistab.section_headers['pop']], [Spacer(width=20, height=25)], [pop_week.state,controls_pop], [pop_month.state], [pop_quarter.state], [thistab.section_headers['dow']], [Spacer(width=20, height=30)], [multiline_dow.state, controls_multiline], [thistab.notification_div['bottom']] ]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def cryptocurrency_clustering_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') # add all the coins to the dict self.github_cols = ['watch', 'fork', 'issue', 'release', 'push'] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 self.groupby_dict = groupby_dict self.feature_list = list(self.groupby_dict.keys()) self.kmean_model = {} self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.k = '1' self.max_clusters_menu = [str(k) for k in range(1, 12)] self.launch_cluster_table = False # launch cluster self.cryptos = None # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'Crypto families': self.section_header_div(text='Crypto families:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=150): div_style = """ style='width:350px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> A cluster is statistical grouping of items based on a composite similarity of the variables under review. </li> <li> I have highlighted the peers in our cluster (aion_cluster), and simply labeled the other clusters with numbers. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ////////////////// HELPER FUNCTIONS //////////////////// def set_groupby_dict(self): try: lst = ['mention', 'hashtags', 'tweets', 'replies', 'favorites'] for col in self.cols: if col not in self.groupby_dict.keys(): if not string_contains_list(lst, col): self.groupby_dict[col] = 'mean' else: self.groupby_dict[col] = 'sum' except Exception: logger.error('set groupby dict', exc_info=True) # ///////////////////////////////////////////////////////////// def optimalK(self, data, nrefs=3, maxClusters=10): try: """ Calculates KMeans optimal K using Gap Statistic from Tibshirani, Walther, Hastie Params: data: ndarry of shape (n_samples, n_features) nrefs: number of sample reference datasets to create maxClusters: Maximum number of clusters to test for Returns: (gaps, optimalK) """ gaps = np.zeros((len(range(1, maxClusters)), )) resultsdf = pd.DataFrame({'clusterCount': [], 'gap': []}) for gap_index, k in enumerate( range(1, len(self.max_clusters_menu))): logger.warning('starting for k=%s', k) # Holder for reference dispersion results refDisps = np.zeros(nrefs) # For n references, generate random sa,kmple and perform kmeans getting resulting dispersion of each loop for i in range(nrefs): logger.warning('nref=%s', i) # Create new random reference set randomReference = np.random.random_sample( size=data.shape) # Fit to it km = KMeans(k) km.fit(randomReference) refDisp = km.inertia_ refDisps[i] = refDisp # Fit cluster to original data and create dispersion self.kmean_model[k] = KMeans(k, random_state=42) self.kmean_model[k].fit(data) origDisp = km.inertia_ # Calculate gap statistic gap = np.log(np.mean(refDisps)) - np.log(origDisp) # Assign this loop's gap statistic to gaps gaps[gap_index] = gap resultsdf = resultsdf.append( { 'clusterCount': k, 'gap': gap }, ignore_index=True) return ( gaps.argmax() + 1, resultsdf ) # Plus 1 because index of 0 means 1 cluster is optimal, index 2 = 3 clusters are optimal except Exception: logger.error('optimal', exc_info=True) def cluster_table(self, launch): try: # prep df = self.df.groupby(['crypto']).agg(groupby_dict) df = df.compute() logger.warning('df after groupby:%s', df) self.cryptos = df.index.tolist() logger.warning('self.cryptos:%s', self.cryptos) print(self.cryptos) X = df[self.feature_list] scaler = StandardScaler() X = scaler.fit_transform(X) self.k, gapdf = self.optimalK(X, nrefs=3, maxClusters=len( self.max_clusters_menu)) logger.warning('Optimal k is:%s ', self.k) # Labels of each point labels = self.kmean_model[self.k].labels_ # Nice Pythonic way to get the indices of the points for each corresponding cluster mydict = { 'cluster_' + str(i): np.where(labels == i)[0].tolist() for i in range(self.kmean_model[self.k].n_clusters) } mydict_verbose = mydict.copy( ) # make a dictionary with the clusters and name of the cryptos # Transform this dictionary into dct with matching crypto labels dct = { 'crypto': self.cryptos, 'cluster': [''] * len(self.cryptos) } # get index aion to identify the aion cluster aion_idx = self.cryptos.index('aion') for key, values in mydict.items(): if aion_idx in values: key = 'aion_cluster' mydict_verbose[key] = [] for crypto_index in values: try: dct['cluster'][int(crypto_index)] = key mydict_verbose[key].append( self.cryptos[int(crypto_index)]) except: logger.warning('cannot change to int:%s', crypto_index) # save to redis self.write_clusters(mydict_verbose) logger.warning('line 229: cluster labels:%s', mydict_verbose) df = pd.DataFrame.from_dict(dct) self.launch_cluster_table = False cols = ['crypto', 'cluster'] return df.hvplot.table(columns=cols, width=500, height=1200, title='Cluster table') except Exception: logger.error('cluster table', exc_info=True) def write_clusters(self, my_dict): try: # write to redis cluster_dct = my_dict.copy() cluster_dct['timestamp'] = datetime.now().strftime( self.DATEFORMAT) cluster_dct['features'] = self.feature_list save_params = 'clusters:cryptocurrencies' self.redis.save(cluster_dct, save_params, "", "", type='checkpoint') logger.warning('%s saved to redis', save_params) except: logger.error('', exc_info=True) def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.trigger += 1 stream_launch_elbow_plot.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'external_daily' #cols = list(groupby_dict.keys()) + ['crypto'] thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=340) # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp') thistab.cols = sorted(list(thistab.df.columns)) # MANAGE STREAMS stream_launch_elbow_plot = streams.Stream.define('Launch_elbow_plot', launch=-1)() stream_launch_cluster_table = streams.Stream.define( 'Launch_cluster_table', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) # PLOTS hv_cluster_table = hv.DynamicMap(thistab.cluster_table, streams=[stream_launch_cluster_table]) cluster_table = renderer.get_plot(hv_cluster_table) # COMPOSE LAYOUT # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.information_div(), controls], [thistab.section_headers['Crypto families']], [Spacer(width=20, height=30)], [cluster_table.state], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('crypto:', exc_info=True) return tab_error_flag(panel_title)
def blockminer_tab(page_width): # source for top N table topN_src = ColumnDataSource( data=dict(percentage=[], address=[], block_count=[])) class This_tab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.df2 = None self.key_tab = 'blockminer' self.n = 20 # ------- DIVS setup begin self.page_width = page_width txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = {} # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def load_this_data(self, start_date, end_date): end_date = datetime.combine(end_date, datetime.min.time()) start_date = datetime.combine(start_date, datetime.min.time()) logger.warning('load_data start date:%s', start_date) logger.warning('load_data end date:%s', end_date) # load only mined blocks and remove the double entry supplemental_where = "AND update_type = 'mined_block' AND amount >= 0" self.df_load(start_date, end_date, supplemental_where=supplemental_where, cols=['address', 'amount', 'block_time']) logger.warning('after load:%s', self.df.head(30)) return self.prep_dataset(start_date, end_date) def prep_dataset(self, start_date, end_date): try: logger.warning("prep dataset start date:%s", start_date) self.df1 = self.df1[['address', 'block_time']] self.df1['address'] = self.df1['address']\ .map(self.poolname_verbose_trun) self.df1 = self.df1.groupby(['address' ]).agg({'block_time': 'count'}) self.df1 = self.df1.reset_index() self.df1 = self.df1.rename( columns={'block_time': 'block_count'}) self.df1['percentage'] = 100*self.df1['block_count']\ /self.df1['block_count'].sum() self.df1['percentage'] = self.df1['percentage'].map( lambda x: round(x, 1)) self.df1 = self.df1.reset_index() logger.warning("topN column:%s", self.df1.columns.tolist()) #logger.warning('END prep dataset DF1:%s', self.df1.head()) return self.df1.hvplot.bar( 'address', 'block_count', rot=90, height=600, width=self.page_width, title='# of blocks mined by miner address', hover_cols=['percentage']) except Exception: logger.error('prep dataset:', exc_info=True) def view_topN(self): logger.warning("top n called:%s", self.n) # change n from string to int try: #table_n = df1.hvplot.table(columns=['address','percentage'], #title=title, width=400) logger.warning('top N:%s', self.n) df2 = self.df1.nlargest(self.n, 'percentage') df2 = df2.compute() logger.warning('in view top n :%s', df2.head(10)) new_data = dict(percentage=df2.percentage.tolist(), address=df2.address.tolist(), block_count=df2.block_count.tolist()) topN_src.stream(new_data, rollover=self.n) columns = [ TableColumn(field="address", title="Address"), TableColumn(field="percentage", title="percentage"), TableColumn(field="block_count", title="# of blocks") ] table_n = DataTable(source=topN_src, columns=columns, width=300, height=600) gc.collect() return table_n except Exception: logger.error('view_topN:', exc_info=True) def set_n(self, n): if isinstance(n, int): pass else: try: self.n = int(n) except Exception: logger.error('set_n', exc_info=True) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:green;">{}</h2>'.format(text) return Div(text=text, width=width, height=20) def notification_updater_2(self, text): self.notification_div.text = '<h3 style="color:red">{}</h3>'.format( text) def spacing_div(self, width=20, height=100): return Div(text='', width=width, height=height) def spacing_paragraph(self, width=20, height=100): return Paragraph(text='', width=width, height=height) def update(attrname, old, new): this_tab.notification_updater( "Calculations underway. Please be patient") stream_start_date.event(start_date=datepicker_start.value) stream_end_date.event(end_date=datepicker_end.value) this_tab.set_n(topN_select.value) this_tab.view_topN() this_tab.notification_updater("ready") # update based on selected top n def update_topN(): this_tab.notification_updater("Calculations in progress! Please wait.") logger.warning('topN selected value:%s', topN_select.value) this_tab.set_n(topN_select.value) this_tab.view_topN() this_tab.notification_updater("ready") try: # create class and get date range cols = ['address', 'block_timestamp', 'block_time'] this_tab = This_tab('account_ext_warehouse', cols, []) #STATIC DATES first_date_range = "2018-04-23 00:00:00" first_date_range = datetime.strptime(first_date_range, "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = last_date_range first_date = datetime_to_date(last_date - timedelta(days=60)) # STREAMS Setup # date comes out stream in milliseconds stream_start_date = streams.Stream.define('Start_date', start_date=first_date)() stream_end_date = streams.Stream.define('End_date', end_date=last_date)() # create a text widget for top N topN_select = Select(title='Top N', value=str(this_tab.n), options=menu) datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) # ALL MINERS. # --------------------- ALL MINERS ---------------------------------- hv_bar_plot = hv.DynamicMap( this_tab.load_this_data, streams=[stream_start_date, stream_end_date], datashade=True) renderer = hv.renderer('bokeh') bar_plot = renderer.get_plot(hv_bar_plot) # --------------------- TOP N MINERS ----------------------------------- # set up data source for the ton N miners table this_tab.view_topN() columns = [ TableColumn(field="address", title="Address"), TableColumn(field="percentage", title="percentage"), TableColumn(field="block_count", title="# of blocks") ] topN_table = DataTable(source=topN_src, columns=columns, width=400, height=600) # add callbacks datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) topN_select.on_change("value", lambda attr, old, new: update_topN()) download_button = Button(label='Save Table to CSV', button_type="success") download_button.callback = CustomJS( args=dict(source=topN_src), code=open( join(dirname(__file__), "../../../static/js/topN_download.js")).read()) # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, download_button, topN_select) # create the dashboards grid = gridplot([[this_tab.notification_div['top']], [Spacer(width=20, height=70)], [ topN_table, controls, ], [bar_plot.state]]) # Make a tab with the layout tab = Panel(child=grid, title='miners: blocks') return tab except Exception: logger.error("Blockminer", exc_info=True) return tab_error_flag('miners: blocks')
def cryptocurrency_eda_tab(cryptos, panel_title): lags_corr_src = ColumnDataSource(data=dict(variable_1=[], variable_2=[], relationship=[], lag=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.items = cryptos # add all the coins to the dict self.github_cols = ['watch', 'fork', 'issue', 'release', 'push'] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 self.groupby_dict = groupby_dict self.feature_list = list(self.groupby_dict.keys()) self.variable = 'fork' self.crypto = 'all' self.lag_variable = 'push' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # track variable for AI for significant effects self.adoption_variables = { 'user': [], 'developer': ['watch', 'fork'] } self.significant_effect_dict = {} self.reset_adoption_dict(self.variable) self.relationships_to_check = ['weak', 'moderate', 'strong'] # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } #self.lag_section_head_txt = 'Lag relationships between {} and...'.format(self.variable) self.lag_section_head_txt = 'Lag relationships:' self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=self.lag_section_head_txt, width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div( text='Pre transform distribution:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div( text='non linear relationships between variables:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'non_linear': self.section_header_div( text='non linear relationships between variables:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:{}px;"> <h4 style="color:#fff;"> {}</h4></div>""".format(self.page_width, text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def reset_adoption_dict(self, variable): self.significant_effect_dict[variable] = [] def section_header_updater(self, text, section, html_header='h3', margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) self.section_headers[section].text = text # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def prep_data(self, df1): try: self.cols = list(df1.columns) df1['timestamp'] = df1['timestamp'].astype('M8[us]') df = df1.set_index('timestamp') #logger.warning('LINE 195 df:%s',df.head()) # handle lag for all variables if self.crypto != 'all': df = df[df.crypto == self.crypto] df = df.compute() #logger.warning('LINE 199: length before:%s',len(df)) df = df.groupby('crypto').resample(self.resample_period).agg( self.groupby_dict) #logger.warning('LINE 201: length after:%s',len(df)) df = df.reset_index() vars = self.feature_list.copy() if int(self.lag) > 0: for var in vars: if self.variable != var: df[var] = df[var].shift(int(self.lag)) df = df.dropna() self.df1 = df #logger.warning('line 184- prep data: df:%s',self.df.head(10)) except Exception: logger.error('prep data', exc_info=True) def set_groupby_dict(self): try: pass except Exception: logger.error('set groupby dict', exc_info=True) # ///////////////// PLOTS ///////////////////// def lags_plot(self, launch): try: df = self.df.copy() df = df[[self.lag_variable, self.variable]] df = df.compute() cols = [self.lag_variable] lags = self.lag_days.split(',') for day in lags: try: label = self.lag_variable + '_' + day df[label] = df[self.lag_variable].shift(int(day)) cols.append(label) except: logger.warning('%s is not an integer', day) df = df.dropna() self.lags_corr(df) # plot the comparison #logger.warning('in lags plot: df:%s',df.head(10)) return df.hvplot(x=self.variable, y=cols, kind='scatter', alpha=0.4) except Exception: logger.error('lags plot', exc_info=True) # calculate the correlation produced by the lags vector def lags_corr(self, df): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'lag': [], 'r': [], 'p_value': [] } a = df[self.variable].tolist() for col in df.columns: if col not in ['timestamp', self.variable]: # find lag var = col.split('_') try: tmp = int(var[-1]) lag = tmp except Exception: lag = 'None' b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append(self.variable) corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['lag'].append(lag) corr_dict_data['r'].append(round(rvalue, 4)) corr_dict_data['p_value'].append(round(pvalue, 4)) lags_corr_src.stream(corr_dict_data, rollover=(len(corr_dict_data['lag']))) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=lags_corr_src, columns=columns, width=900, height=400) return data_table except Exception: logger.error('lags corr', exc_info=True) def correlation_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'r': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop('timestamp', axis=1) #df = df.compute() a = df[self.variable].tolist() for col in self.feature_list: if col != self.variable: #logger.warning('%s:%s', col, self.variable) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) # add to dict corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['r'].append(round(rvalue, 4)) corr_dict['p-value'].append(round(pvalue, 4)) # update significant effect variables if self.variable in self.adoption_variables[ 'developer']: if any(relationship in txt for relationship in self.relationships_to_check): if self.variable not in self.significant_effect_dict.keys( ): self.significant_effect_dict[ self.variable] = [] self.significant_effect_dict[ self.variable].append(col) if self.variable in self.adoption_variables['developer']: tmp = self.significant_effect_dict[self.variable].copy() tmp = list(set(tmp)) tmp_dct = { 'features': tmp, 'timestamp': datetime.now().strftime(self.DATEFORMAT) } # write to redis save_params = 'adoption_features:developer' + '-' + self.variable self.redis.save(tmp_dct, save_params, "", "", type='checkpoint') df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'r': corr_dict['r'], 'p-value': corr_dict['p-value'] }) #logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value' ], width=550, height=400, title='Correlation between variables') except Exception: logger.error('correlation table', exc_info=True) def non_parametric_relationship_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'stat': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop('timestamp', axis=1) #df = df.compute() #logger.warning('line df:%s',df.head(10)) a = df[self.variable].tolist() for col in self.feature_list: if col != self.variable: #logger.warning('%s:%s', col, self.variable) b = df[col].tolist() stat, pvalue, txt = self.mann_whitneyu_label(a, b) corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['stat'].append(round(stat, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'stat': corr_dict['stat'], 'p-value': corr_dict['p-value'] }) #logger.warning('df:%s',df.head(23)) return df.hvplot.table( columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value' ], width=550, height=400, title='Non parametricrelationship between variables') except Exception: logger.error('non parametric table', exc_info=True) def hist(self, launch): try: return self.df.hvplot.hist(y=self.feature_list, subplots=True, shared_axes=False, bins=25, alpha=0.3, width=300).cols(4) except Exception: logger.warning('histogram', exc_info=True) def matrix_plot(self, launch=-1): try: logger.warning('line 306 self.feature list:%s', self.feature_list) df = self.df1 #df = df[self.feature_list] # get difference for money columns #thistab.prep_data(thistab.df) if 'timestamp' in df.columns: df = df.drop('timestamp', axis=1) #df = df.repartition(npartitions=1) #df = df.compute() df = df.fillna(0) #logger.warning('line 302. df: %s',df.head(10)) cols_temp = self.feature_list.copy() if self.variable in cols_temp: cols_temp.remove(self.variable) #variable_select.options = cols_lst p = df.hvplot.scatter(x=self.variable, y=cols_temp, width=330, subplots=True, shared_axes=False, xaxis=False).cols(4) return p except Exception: logger.error('matrix plot', exc_info=True) ''' def regression(self,df): try: except Exception: logger.error('matrix plot', exc_info=True) ''' def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data(thistab.df) thistab.variable = new if thistab.variable in thistab.adoption_variables['developer']: thistab.reset_adoption_dict(thistab.variable) thistab.lag_section_head_txt = 'Lag relationships between {} and...'.format( thistab.variable) #thistab.section_header_updater('lag',thistab.lag_section_head_txt) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag_plot_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_variable = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_crypto(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.crypto = crypto_select.value thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag(attr, old, new): # update lag & cryptocurrency thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'external_daily' cols = list(groupby_dict.keys()) + ['timestamp', 'crypto'] thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=200) # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp') thistab.prep_data(thistab.df) # MANAGE STREAM # date comes out stream in milliseconds #stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value='fork', options=thistab.feature_list) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) crypto_select = Select(title='Select cryptocurrency', value='all', options=['all'] + thistab.items) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] lags_corr_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=280) width = 800 hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot, streams=[stream_launch_matrix]) hv_corr_table = hv.DynamicMap(thistab.correlation_table, streams=[stream_launch_corr]) hv_nonpara_table = hv.DynamicMap( thistab.non_parametric_relationship_table, streams=[stream_launch_corr]) #hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist]) hv_lags_plot = hv.DynamicMap(thistab.lags_plot, streams=[stream_launch_lags_var]) matrix_plot = renderer.get_plot(hv_matrix_plot) corr_table = renderer.get_plot(hv_corr_table) nonpara_table = renderer.get_plot(hv_nonpara_table) lags_plot = renderer.get_plot(hv_lags_plot) # setup divs # handle callbacks variable_select.on_change('value', update_variable) lag_variable_select.on_change('value', update_lag_plot_variable) lag_select.on_change('value', update_lag) # individual lag resample_select.on_change('value', update_resample) crypto_select.on_change('value', update_crypto) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) lags_input_button.on_click(update_lags_selected) # lags array # COMPOSE LAYOUT # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, variable_select, lag_select, crypto_select, resample_select) controls_lag = WidgetBox(lag_variable_select, lags_input, lags_input_button) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [matrix_plot.state, controls], [thistab.section_headers['relationships']], [Spacer(width=20, height=30)], [thistab.section_headers['correlations']], [Spacer(width=20, height=30)], [corr_table.state, thistab.corr_information_div()], [thistab.section_headers['non_linear']], [Spacer(width=20, height=30)], [nonpara_table.state], [thistab.section_headers['lag']], [Spacer(width=20, height=30)], [lags_plot.state, controls_lag], [lags_corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('crypto:', exc_info=True) return tab_error_flag(panel_title)
def KPI_user_adoption_tab(DAYS_TO_LOAD=90): class Thistab(KPI): def __init__(self, table, cols=[]): KPI.__init__(self, table, name='social_media', cols=cols) self.table = table self.df = None self.checkboxgroup = {'account_type': [], 'update_type': []} self.KPI_card_div = self.initialize_cards(self.page_width, height=350) # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'cards': self.section_header_div(text='Period to date:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'pop': self.section_header_div(text='Period over period:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # ---------------------- DIVS ---------------------------- def reset_checkboxes(self, value='all', checkboxgroup=''): try: self.checkboxgroup[checkboxgroup].value = value except Exception: logger.error('reset checkboxes', exc_info=True) def information_div(self, width=400, height=300): div_style = """ style='width:350px;margin-right:-800px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # -------------------- CARDS ----------------------------------------- def initialize_cards(self, width, height=250): try: txt = '' for period in ['year', 'quarter', 'month', 'week']: design = random.choice(list(KPI_card_css.keys())) txt += self.card(title='', data='', card_design=design) text = """<div style="margin-top:100px;display:flex; flex-direction:row;"> {} </div>""".format(txt) div = Div(text=text, width=width, height=height) return div except Exception: logger.error('initialize cards', exc_info=True) # -------------------- GRAPHS ------------------------------------------- def graph_periods_to_date(self, df1, filter_col): try: if self.account_type != 'all': df1 = df1[df1.account_type == self.account_type] dct = {} for idx, period in enumerate( ['week', 'month', 'quarter', 'year']): df = self.period_to_date( df1, timestamp=dashboard_config['dates']['last_date'], timestamp_filter_col=filter_col, period=period) # get unique instances df = df[['address']] df = df.compute() df = df.drop_duplicates(keep='first') #logger.warning('post duplicates dropped:%s', df.head(10)) data = len(df) del df gc.collect() dct[period] = data self.update_cards(dct) except Exception: logger.error('graph periods to date', exc_info=True) def graph_period_over_period(self, period): try: periods = [period] start_date = self.pop_start_date end_date = self.pop_end_date if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) cols = ['account_type', 'timestamp_of_first_event', 'day'] df = self.load_df(start_date=start_date, end_date=end_date, cols=cols, timestamp_col='timestamp_of_first_event') if abs(start_date - end_date).days > 7: if 'week' in periods: periods.remove('week') if abs(start_date - end_date).days > 31: if 'month' in periods: periods.remove('month') if abs(start_date - end_date).days > 90: if 'quarter' in periods: periods.remove('quarter') if self.account_type != 'all': df = df[df.account_type == self.account_type] # col for when list is empty self.variable = 'account_type' for idx, period in enumerate(periods): df_period = self.period_over_period( df, start_date=start_date, end_date=end_date, period=period, history_periods=self.pop_history_periods, timestamp_col='timestamp_of_first_event') groupby_cols = ['dayset', 'period'] if len(df_period) > 0: df_period = df_period.groupby(groupby_cols).agg( {'account_type': 'count'}) df_period = df_period.reset_index() df_period = df_period.compute() else: df_period = df_period.compute() df_period = df_period.rename(index=str, columns={'day': 'dayset'}) prestack_cols = list(df_period.columns) logger.warning('Line 179:%s', df_period.head(10)) df_period = self.split_period_into_columns( df_period, col_to_split='period', value_to_copy='account_type') logger.warning('line 180 df_period columns:%s', df_period.head(50)) poststack_cols = list(df_period.columns) title = "{} over {}".format(period, period) plotcols = list(np.setdiff1d(poststack_cols, prestack_cols)) df_period, plotcols = self.pop_include_zeros( df_period=df_period, plotcols=plotcols, period=period) if idx == 0: p = df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False) else: p += df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False) return p except Exception: logger.error('period over period to date', exc_info=True) def update_account(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.account_type = new thistab.graph_periods_to_date(thistab.df, 'timestamp_of_first_event') thistab.section_header_updater('cards') thistab.section_header_updater('pop') thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_period_over_period(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.pop_history_periods = history_periods_select.value thistab.pop_start_date = datepicker_period_start.value # trigger period over period thistab.pop_end_date = datepicker_period_end.value # trigger period thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_history_periods(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") try: cols = [ 'address', 'account_type', 'update_type', 'balance', 'timestamp_of_first_event' ] thistab = Thistab(table='account_ext_warehouse', cols=cols) # ------------------------------------- SETUP ---------------------------- # format dates first_date_range = thistab.initial_date last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime(last_date.year, 1, 1, 0, 0, 0) thistab.df = thistab.load_df(first_date, last_date, cols, 'timestamp_of_first_event') thistab.graph_periods_to_date(thistab.df, filter_col='timestamp_of_first_event') thistab.section_header_updater('cards') thistab.section_header_updater('pop') # MANAGE STREAM # date comes out stream in milliseconds # --------------------------------CREATE WIDGETS --------------------------------- datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) thistab.pop_end_date = last_date thistab.pop_start_date = thistab.first_date_in_period( thistab.pop_end_date, 'week') stream_launch = streams.Stream.define('Launch', launch=-1)() datepicker_period_start = DatePicker(title="Period start", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_start_date) datepicker_period_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_end_date) history_periods_select = Select( title='Select # of comparative periods', value='2', options=thistab.menus['history_periods']) account_type_select = Select(title='Select account type', value='all', options=thistab.menus['account_type']) pop_number_select = Select(title='Select # of comparative periods', value=str(thistab.pop_history_periods), options=thistab.menus['history_periods']) # --------------------------------- GRAPHS --------------------------- hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch]) pop_week = renderer.get_plot(hv_pop_week) hv_pop_month = hv.DynamicMap(thistab.pop_month, streams=[stream_launch]) pop_month = renderer.get_plot(hv_pop_month) hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch]) pop_quarter = renderer.get_plot(hv_pop_quarter) # -------------------------------- CALLBACKS ------------------------ #datepicker_start.on_change('value', update) #datepicker_end.on_change('value', update) account_type_select.on_change('value', update_account) history_periods_select.on_change('value', update_period_over_period) datepicker_period_start.on_change('value', update_period_over_period) datepicker_period_end.on_change('value', update_period_over_period) pop_number_select.on_change('value', update_history_periods) # -----------------------------------LAYOUT ---------------------------- # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, account_type_select) controls_pop = WidgetBox(datepicker_period_start, datepicker_period_end, history_periods_select) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['cards']], [Spacer(width=20, height=2)], [thistab.KPI_card_div, controls], [thistab.section_headers['pop']], [Spacer(width=20, height=25)], [pop_week.state, controls_pop], [pop_month.state], [pop_quarter.state], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title='KPI: user adoption') return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag('KPI accounts')
def accounts_tsa_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = {} # to contain churned and retained splits self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.rf = {} # random forest self.cl = PythonClickhouse('aion') self.forecast_days = 30 self.interest_var = 'address' self.trigger = -1 self.status = 'all' self.update_type = 'all' self.status = 'all' self.account_type = 'all' self.interest_var = 'amount' self.pl = {} # for rf pipeline self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # list of tier specific addresses for prediction self.address_list = [] self.address_select = Select(title='Filter by address', value='all', options=[]) self.address = 'all' self.load_data_flag = False self.day_diff = 1 self.groupby_dict = {} self.addresses = [] self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.max_loaded_date = None self.min_loaded_date = None # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'forecast': self.section_header_div(text='Forecasts:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def reset_checkboxes(self): try: self.address_selected = "" self.address_select.value = "all" except Exception: logger.error('reset checkboxes', exc_info=True) ################################################### # I/O def load_df(self, start_date, end_date): try: logger.warning("data load begun") if isinstance(start_date, str): start_date = datetime.strptime(start_date, self.DATEFORMAT) if isinstance(end_date, str): end_date = datetime.strptime(end_date, self.DATEFORMAT) if self.df is not None: self.max_loaded_date = self.df.block_timestamp.max( ).compute() self.min_loaded_date = self.df.block_timestamp.min( ).compute() if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date: logger.warning("data already loaded - %s", self.df.tail(10)) pass else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) #self.make_delta() #self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) # self.make_delta() # self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) self.df = self.filter(self.df) except Exception: logger.error('load_df', exc_info=True) ################################################### # MUNGE DATA def make_delta(self): try: if self.df is not None: if len(self.df) > 0: df = self.df.compute() for col in self.targets: col_new = col + '_diff' df[col_new] = df[col].pct_change() df[col_new] = df[col_new].fillna(0) logger.warning('diff col added : %s', col_new) self.df = self.df.fillna(self.df.mean()) self.df = dd.dataframe.from_pandas(df, npartitions=15) # logger.warning('POST DELTA:%s',self.df1.tail(20)) except Exception: logger.error('make delta', exc_info=True) ################################################## # EXPLICATORY GRAPHS # PLOTS def box_plot(self, variable): try: # logger.warning("difficulty:%s", self.df.tail(30)) # get max value of variable and multiply it by 1.1 minv = 0 maxv = 0 df = self.df if df is not None: if len(df) > 0: minv, maxv = dd.compute(df[variable].min(), df[variable].max()) else: df = SD('filter', [variable, 'status'], []).get_df() return df.hvplot.box(variable, by='status', ylim=(.9 * minv, 1.1 * maxv)) except Exception: logger.error("box plot:", exc_info=True) ################################################### # MODELS def filter(self, df): try: df = df.assign(freq=df.address) if self.status != 'all': df = df[df.status == self.status] if self.account_type != 'all': df = df[df.acccount_type == self.account_type] if self.update_type != 'all': df = df[df.update_type == self.update_type] if self.address != 'all': df = df[df.address == self.address] return df except Exception: logger.error("filter:", exc_info=True) def tsa_amount(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'amount': 'mean'}) df = df.reset_index() df = df.compute() label = 'amount_diff' df[label] = df[self.interest_var].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'amount': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) else: p *= forecast.hvplot.scatter(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) def tsa_freq(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'address': 'nunique'}) df = df.reset_index() df = df.compute() label = 'freq_diff' df[label] = df['address'].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'address': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='#').relabel(col) else: p *= forecast.hvplot.scatter( x='ds', y=col, width=600, height=250, value_label='#').relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) #################################################### # GRAPHS def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.update_type = update_type_select.value thistab.status = status_select.value thistab.account_type = account_type_select.value thistab.forecast_days = int(select_forecast_days.value) thistab.address = thistab.address_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_load(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.load_df(datepicker_start.value, datepicker_end.value) thistab.notification_updater("ready") try: # SETUP table = 'account_ext_warehouse' #cols = list(table_dict[table].keys()) cols = [ 'address', 'block_timestamp', 'account_type', 'status', 'update_type', 'amount' ] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = last_date - timedelta(days=60) # STREAMS Setup # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch', launch=-1)() stream_select_variable = streams.Stream.define('Select_variable', variable='amount')() # setup widgets datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) select_forecast_days = Select( title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) status_select = Select(title='Select account status', value=thistab.status, options=menus['status']) account_type_select = Select(title='Select account type', value=thistab.account_type, options=menus['account_type']) update_type_select = Select(title='Select transfer type', value=thistab.update_type, options=menus['update_type']) # search by address checkboxes thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses, active=[0]) # ----------------------------------- LOAD DATA # load model-making data thistab.load_df(datepicker_start.value, datepicker_end.value) # load data for period to be predicted # tables hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount, streams=[stream_launch]) tsa_amount = renderer.get_plot(hv_tsa_amount) hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch]) tsa_freq = renderer.get_plot(hv_tsa_freq) # add callbacks datepicker_start.on_change('value', update_load) datepicker_end.on_change('value', update_load) thistab.address_select.on_change('value', update) select_forecast_days.on_change('value', update) update_type_select.on_change('value', update) account_type_select.on_change('value', update) status_select.on_change('value', update) # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, thistab.address_select, select_forecast_days, update_type_select, account_type_select, status_select, thistab.checkboxes) grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['forecast']], [Spacer(width=20, height=30)], [tsa_amount.state, controls], [tsa_freq.state], [thistab.notification_div['bottom']]]) tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def KPI_projects_tab(panel_title, DAYS_TO_LOAD=90): timeline_source = ColumnDataSource(data=dict( Item=[], Start=[], End=[], Color=[], start=[], end=[], ID=[], ID1=[])) class Thistab(KPI): def __init__(self, table, cols=[]): KPI.__init__(self, table, name='project', cols=cols) self.table = table self.df = None self.df_pop = None self.checkboxgroup = {} self.period_to_date_cards = {} self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0) self.timestamp_col = 'project_startdate_actual' self.pym = PythonMongo('aion') self.groupby_dict = { 'project': 'sum', 'project_duration': 'sum', 'project_start_delay': 'mean', 'project_end_delay': ' mean', 'milestone': 'sum', 'milestone_duration': 'sum', 'milestone_start_delay': 'mean', 'milestone_end_delay': ' mean', 'task': 'sum', 'task_duration': 'sum', 'task_start_delay': 'mean', 'task_end_delay': ' mean', } self.menus = { 'status': ['all', 'open', 'closed'], 'type': [ 'all', 'research', 'reconciliation', 'audit', 'innovation', 'construction', 'manufacturing', 'conference' ], 'gender': ['all', 'male', 'female'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], } self.status = 'all' self.pm_gender = 'all' self.m_gender = 'all' self.t_gender = 'all' self.type = 'all' self.variables = sorted(list(self.groupby_dict.keys())) self.variable = self.variables[0] self.groupby_var = 'project' self.chord_data = { 'rename': { 'project_owner': 'source', 'milestone_owner': 'target', 'remuneration': 'value' }, 'percentile_threshold': .75, } self.percentile_threshold = 10 # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'cards': self.section_header_div(text='Period to date:{}'.format( self.section_divider), width=1000, html_header='h2', margin_top=50, margin_bottom=5), 'pop': self.section_header_div(text='Period over period:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'chord': self.section_header_div(text='Relationships:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'timeline': self.section_header_div(text='Project timeline:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } self.KPI_card_div = self.initialize_cards(self.page_width, height=350) start = datetime(2014, 1, 1, 0, 0, 0) end = datetime(2019, 5, 15, 0, 0, 0) self.tools = [ BoxZoomTool(), ResetTool(), PanTool(), SaveTool(), WheelZoomTool() ] self.timeline_vars = { 'projects': '', 'project': '', 'types': ['all', 'milestone', 'task', 'project'], 'type': 'all', 'DF': None, 'G': figure(title=None, x_axis_type='datetime', width=1200, height=900, y_range=[], x_range=Range1d(start, end), toolbar_location=None), 'toolbar_box': ToolbarBox() } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>"""\ .format(margin_top,margin_bottom,html_header, text, html_header) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret sentiment score</h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def initialize_cards(self, width, height=250): try: txt = '' for period in ['year', 'quarter', 'month', 'week']: design = random.choice(list(KPI_card_css.keys())) txt += self.card(title='', data='', card_design=design) text = """<div style="margin-top:100px;display:flex; flex-direction:row;"> {} </div>""".format(txt) div = Div(text=text, width=width, height=height) return div except Exception: logger.error('initialize cards', exc_info=True) def load_df(self, req_startdate, req_enddate, table, cols, timestamp_col): try: # get min and max of loaded df if self.df is not None: loaded_min = self.df[timestamp_col].min() loaded_max = self.df[timestamp_col].max() if loaded_min <= req_startdate and loaded_max >= req_enddate: df = self.df[(self.df[timestamp_col] >= req_startdate) & (self.df[timestamp_col] <= req_enddate)] return df return self.pym.load_df(req_startdate, req_enddate, table=table, cols=cols, timestamp_col=timestamp_col) except Exception: logger.error('load_df', exc_info=True) def filter_df(self, df1): if self.status != 'all': df1 = df1[df1.status == self.status] if self.pm_gender != 'all': df1 = df1[df1.project_owner_gender == self.pm_gender] if self.m_gender != 'all': df1 = df1[df1.milestone_owner_gender == self.m_gender] if self.t_gender != 'all': df1 = df1[df1.task_owner_gender == self.t_gender] if self.type != 'all': df1 = df1[df1.type == self.type] return df1 def period_to_date(self, df, timestamp=None, timestamp_filter_col=None, cols=[], period='week'): try: if timestamp is None: timestamp = datetime.now() timestamp = datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, 0, 0) start = self.first_date_in_period(timestamp, period) # filter df[timestamp_filter_col] = pd.to_datetime( df[timestamp_filter_col], format=self.DATEFORMAT_PTD) #logger.warning('df:%s', df[self.timestamp_col]) df = df[(df[timestamp_filter_col] >= start) & (df[timestamp_filter_col] <= timestamp)] if len(cols) > 0: df = df[cols] return df except Exception: logger.error('period to date', exc_info=True) def period_over_period(self, df, start_date, end_date, period, history_periods=2, timestamp_col='timestamp_of_first_event'): try: # filter cols if necessary string = '0 {}(s) prev(current)'.format(period) # filter out the dates greater than today df_current = df.copy() df_current = self.filter_df(df_current) logger.warning('df current:%s', df_current.head(10)) df_current['period'] = string # label the days being compared with the same label if len(df_current) > 0: df_current = self.label_dates_pop(df_current, period, timestamp_col) cols = [self.variable, 'period', 'dayset'] if 'project' in self.variable: if self.variable != 'project': df_current = df_current[[ self.variable, 'period', 'dayset', 'project' ]] elif 'milestone' in self.variable: if self.variable != 'milestone': df_current = df_current[[ self.variable, 'period', 'dayset', 'milestone', 'project' ]] elif 'task' in self.variable: if self.variable != 'task': df_current = df_current[[ self.variable, 'period', 'dayset', 'task', 'milestone', 'project' ]] # zero out time information start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0) end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0) cols = list(df.columns) counter = 1 if isinstance(history_periods, str): history_periods = int(history_periods) # make dataframes for request no. of periods start, end = self.shift_period_range(period, start, end) while counter < history_periods and start >= self.initial_date: # load data df_temp = self.load_df(start, end, table=self.table, cols=[], timestamp_col=timestamp_col) df_temp = self.filter_df(df_temp) if df_temp is not None: if len(df_temp) > 1: df_temp[timestamp_col] = pd.to_datetime( df_temp[timestamp_col]) string = '{} {}(s) prev'.format(counter, period) # label period df_temp['period'] = string # relabel days to get matching day of week,doy, dom, for different periods df_temp = self.label_dates_pop( df_temp, period, timestamp_col) df_temp = df_temp[cols] # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp)) df_current = pd.concat([df_current, df_temp]) del df_temp gc.collect() # shift the loading window counter += 1 start, end = self.shift_period_range(period, start, end) return df_current except Exception: logger.error('period over period', exc_info=True) # label dates for period over period (pop) def pop_include_zeros(self, df_period, plotcols, period): try: # check for no data on original dates tmp_title = '0 {}(s) prev(current)'.format(period) if tmp_title not in plotcols: df_period[tmp_title] = [0] * len(df_period) plotcols.append(tmp_title) logger.warning('line 218 cols to plot:%s', plotcols) # do other periods tmp = plotcols[0] txt = tmp[1:] if isinstance(self.pop_history_periods, str): self.pop_history_periods = int(self.pop_history_periods) for i in range(1, self.pop_history_periods): tmp_txt = str(i) + txt if tmp_txt not in plotcols: df_period[tmp_txt] = [0] * len(df_period) plotcols.append(tmp_txt) clean_plotcols = [] for idx, col in enumerate(plotcols): if 'prev' in col or 'curr' in col: clean_plotcols.append(col) logger.warning( 'LINE 340 plotcols at end of pop include zeros:%s', clean_plotcols) return df_period, sorted(clean_plotcols) except Exception: logger.error('pop include zeros', exc_info=True) def label_dates_pop(self, df, period, timestamp_col): #df[timestamp_col] = pd.to_datetime(df[timestamp_col]) def label_qtr_pop(y): try: curr_quarter = int((y.month - 1) / 3 + 1) start = datetime(y.year, 3 * curr_quarter - 2, 1) return abs((start - y).days) except Exception: logger.error('df label quarter', exc_info=True) try: logger.warning('df columns:%s', list(df.columns)) if period == 'week': df['dayset'] = df[timestamp_col].dt.dayofweek elif period == 'month': df['dayset'] = df[timestamp_col].dt.day elif period == 'year': #logger.warning('LINE 218:%s', df.head(5)) df['dayset'] = df[timestamp_col].dt.dayofyear elif period == 'quarter': df['dayset'] = df[timestamp_col].apply( lambda x: label_qtr_pop(x)) return df except Exception: logger.error('label data ', exc_info=True) def get_groupby_pop_df(self, df, variable, groupby_cols): try: if df is not None: if len(df) > 0: if 'dayset' in df.columns: if variable in ['project']: df = df.groupby(groupby_cols).agg( {variable: 'count'}) df = df.reset_index() #logger.warning('LINE 286 df:%s',df) elif variable in ['milestone']: df = df.groupby(groupby_cols).agg( {variable: 'count'}) df = df.reset_index() #logger.warning('LINE 291 df:%s', df) elif variable in ['task']: df = df.groupby(groupby_cols).agg( {variable: 'count'}) df = df.reset_index() elif variable in ['remuneration']: df = df.groupby(groupby_cols).agg( {variable: 'sum'}) df = df.reset_index() else: #logger.warning('LINE 259:df:%s',df.head()) df = df.groupby(groupby_cols).agg( {variable: 'mean'}) df = df.reset_index() # clean up if self.groupby_var in df.columns and self.variable != self.groupby_var: df = df.drop([self.groupby_var], axis=1) return df except Exception: logger.error('get groupby card data', exc_info=True) def get_groupby_card_data(self, df, variable): try: if variable in ['project']: data = len(df[variable].unique()) data = "{} {}s".format(data, variable) elif variable in ['milestone']: df = df.groupby(['project']).agg({variable: 'nunique'}) data = df[variable].sum() data = "{} {}s".format(data, variable) elif variable in ['task']: df = df.groupby(['project', 'milestone']).agg({variable: 'count'}) data = df[variable].sum() data = "{} {}s".format(data, variable) elif variable in ['project_duration'] or 'delay' in variable: df = df.groupby([self.groupby_var]).agg({variable: 'mean'}) df = df.reset_index() data = "{} days".format(round(df[variable].sum(), 2)) elif variable in ['milestone_duration']: df = df.groupby([self.groupby_var, 'project']).agg({variable: 'mean'}) df = df.reset_index() data = "{} days".format(round(df[variable].sum(), 2)) elif variable in [ 'task_duration', 'task_start_delay', 'task_start_end' ]: df = df.groupby([self.groupby_var, 'project', 'milestone']).agg({variable: 'mean'}) df = df.reset_index() data = "{} hours".format(round(df[variable].sum(), 2)) elif variable in ['remuneration']: data = df[variable].sum() data = "${:,.2f}".format(data) return data except Exception: logger.error('get groupby card data', exc_info=True) # -------------------- GRAPHS ------------------------------------------- def graph_periods_to_date(self, df2, timestamp_filter_col, variable): df1 = df2.copy() #self.section_header_updater(section='cards',label=variable,margin_top=159,html_header='h2') try: df1 = self.filter_df(df1) dct = {} for idx, period in enumerate( ['week', 'month', 'quarter', 'year']): df = self.period_to_date( df1, timestamp=dashboard_config['dates']['last_date'], timestamp_filter_col=timestamp_filter_col, period=period) df = df.drop_duplicates(keep='first') # groupby to eliminate repetition data = self.get_groupby_card_data(df, variable) del df gc.collect() dct[period] = data #title = "{} to date".format(period) #p = self.card(title=title, data=data, card_design=random.choice(list(self.KPI_card_css.keys()))) #self.period_to_date_cards[period].text = p.text self.update_cards(dct) except Exception: logger.error('graph periods to date', exc_info=True) def graph_period_over_period(self, period): try: periods = [period] start_date = self.pop_start_date end_date = self.pop_end_date if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) today = datetime.combine(datetime.today().date(), datetime.min.time()) df = self.df_pop.copy() df = self.filter_df(df) #logger.warning('LINE 363 -df:%s',df.head()) cols = [self.variable, self.timestamp_col] if self.variable != 'project': cols.append('project') if abs(start_date - end_date).days > 7: if 'week' in periods: periods.remove('week') if abs(start_date - end_date).days > 31: if 'month' in periods: periods.remove('month') if abs(start_date - end_date).days > 90: if 'quarter' in periods: periods.remove('quarter') for idx, period in enumerate(periods): df_period = self.period_over_period( df, start_date=start_date, end_date=end_date, period=period, history_periods=self.pop_history_periods, timestamp_col=self.timestamp_col) groupby_cols = ['dayset', 'period'] if len(df_period) > 0: logger.warning('LINE 473:%s', list(df_period.columns)) df_period = self.get_groupby_pop_df( df_period, variable=self.variable, groupby_cols=groupby_cols) df_period = df_period.reset_index() else: if not 'day' in df_period.columns: df_period['dayset'] = "" else: df_period = df_period.rename( index=str, columns={'day': 'dayset'}) logger.warning('LINE 478:%s', list(df_period.columns)) prestack_cols = list(df_period.columns) df_period = self.split_period_into_columns( df_period, col_to_split='period', value_to_copy=self.variable) # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality if period == 'quarter': if 'dayset' in df_period.columns: min_day = df_period['dayset'].min() df_period = df_period[ df_period['dayset'] > min_day] poststack_cols = list(df_period.columns) title = "{} over {}".format(period, period) plotcols = list(np.setdiff1d(poststack_cols, prestack_cols)) # include current period if not extant df_period, plotcols = self.pop_include_zeros( df_period, plotcols=plotcols, period=period) if self.variable in [ 'task_start_delay', 'task_end_delay', 'task_duration' ]: ylabel = 'hours' elif self.variable in [ 'project_duration', 'milestone_duration', 'project_start_delay', 'project_end_delay', 'milestone_start_delay', 'milestone_end_delay' ]: ylabel = 'days' elif self.variable in ['project', 'task', 'milestone']: ylabel = '#' elif self.variable == 'remuneration': ylabel = '$' if 'dayset' not in df_period.columns: leng = len(df_period) if leng > 0: df_period['dayset'] = 0 logger.warning('LINE 549') else: logger.warning('LINE 551') df_period['dayset'] = '' logger.warning('LINE 552: df columns:%s', list(df_period.columns)) if idx == 0: p = df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False, width=1200, height=400, value_label=ylabel) else: p += df_period.hvplot.bar('dayset', plotcols, rot=45, title=title, stacked=False, width=1200, height=400, value_label=ylabel) return p except Exception: logger.error('period over period to date', exc_info=True) def chord_diagram(self, launch): try: def normalize_value(x, total): x = int((x / total) * 1000) if x <= 0: return 1 return x df = self.df.copy() # -------------- nodes data = {} data['nodes'] = [] source_list = df['milestone_owner'].tolist() names = list(set(source_list)) person_type_dict = dict(zip(df.milestone_owner, df.type)) type_dict = {} types = list(set(df['type'].tolist())) name_dict = {} for idx, name in enumerate(names): name_dict[name] = idx for idx, name in enumerate(names): type_tmp = person_type_dict[name] index = name_dict[name] data['nodes'].append({ 'OwnerID': index, 'index': idx, 'Type': type_tmp }) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') # --------- make the links data['links'] = [] for idx, row in df.iterrows(): src = name_dict[row['project_owner']] tgt = name_dict[row['milestone_owner']] val = row['remuneration'] data['links'].append({ 'source': src, 'target': tgt, 'value': val }) links = pd.DataFrame(data['links']) # get the individual links links = links.groupby(['source', 'target'])['value'].sum() links = links.reset_index() total = links['value'].sum() links['value'] = links['value'].apply( lambda x: normalize_value(x, total)) # filter for top percentile quantile_val = links['value'].quantile( self.chord_data['percentile_threshold']) links = links[links['value'] >= quantile_val] #logger.warning('after quantile filter:%s',len(links)) chord_ = hv.Chord((links, nodes), ['source', 'target'], ['value']) chord_.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='Type', node_color=dim('index').str(), width=1000, height=1000)) return chord_ except Exception: logger.error('chord diagram', exc_info=True) def timeline(self, project, type='milestone'): try: DF = self.df.copy() if type != project: DF = DF[DF['project'] == project] if type == 'all': rename_dct = { 'milestone_enddate_proposed': 'milestone_enddate', 'milestone_startdate_proposed': 'milestone_startdate', 'task_enddate_proposed': 'task_enddate', 'task_startdate_proposed': 'task_startdate', } DF = DF.rename(index=str, columns=rename_dct) DF = DF.groupby(['milestone', 'task']).agg({ 'milestone_startdate': 'min', 'milestone_enddate': 'max', 'task_startdate': 'min', 'task_enddate': 'max', }) DF = DF.reset_index() # melt to get milestone and task into one column df = pd.melt(DF, value_vars=['milestone', 'task'], id_vars=[ 'milestone_startdate', 'milestone_enddate', 'task_startdate', 'task_enddate' ], value_name='Item', var_name='type') df = df.groupby(['Item', 'type']).agg({ 'milestone_startdate': 'min', 'milestone_enddate': 'max', 'task_startdate': 'min', 'task_enddate': 'max' }).reset_index() df = pd.melt( df, id_vars=[ 'Item', 'type', 'milestone_startdate', 'task_startdate' ], value_vars=['milestone_enddate', 'task_enddate'], value_name='End', var_name='enddate_type') # filter out where tasks label dates and vice versa df1 = df[(df['type'] == 'task') & (df['enddate_type'] == 'task_enddate')] df = df[(df['type'] == 'milestone') & (df['enddate_type'] == 'milestone_enddate')] df = pd.concat([df1, df]) df = df.drop('enddate_type', axis=1) # do startdate df = pd.melt( df, id_vars=['Item', 'type', 'End'], value_vars=['milestone_startdate', 'task_startdate'], value_name='Start', var_name='startdate_type') # filter out where tasks label dates and vice versa df1 = df[(df['type'] == 'task') & (df['startdate_type'] == 'task_startdate')] df = df[(df['type'] == 'milestone') & (df['startdate_type'] == 'milestone_startdate')] df = pd.concat([df1, df]) df = df.drop('startdate_type', axis=1) # label colors df['Color'] = df['type'].apply( lambda x: 'black' if x == 'milestone' else 'green') # organize by milestone and tasks belonging to milestone df = df.sort_values(by=['Start']).reset_index() df = df.drop('index', axis=1) #logger.warning('LINE 605 - df:%s',df.head(50)) DF = df print( '##################################################################################' ) else: start_str = type + '_startdate_proposed' end_str = type + '_enddate_proposed' # group milestone rename_dct = { start_str: 'Start', end_str: 'End', type: 'Item' } DF = DF.rename(index=str, columns=rename_dct) DF = DF[['Item', 'Start', 'End']] DF = DF.groupby(['Item']).agg({ 'Start': 'min', 'End': 'max' }) DF = DF.reset_index() color_list = [] for item in DF.Item.tolist(): color_list.append( random.choice(dashboard_config['colors'])) DF['Color'] = np.array(color_list) DF['start'] = DF['Start'].dt.strftime('%Y-%m-%d') DF['end'] = DF['End'].dt.strftime('%Y-%m-%d') DF['ID'] = DF.index + 0.6 DF['ID1'] = DF.index + 1.4 logger.warning('LINE 648 %s', DF) self.timeline_vars['DF'] = DF # update source data = dict(Item=DF.Item.tolist(), Start=DF.Start.tolist(), End=DF.End.tolist(), Color=DF.Color.tolist(), start=DF.start.tolist(), end=DF.end.tolist(), ID=DF.ID.tolist(), ID1=DF.ID1.tolist()) # <-- This is the trick, make the x_rage empty first, before assigning new value self.timeline_vars['G'].y_range.factors = [] self.timeline_vars['G'].y_range.factors = DF.Item.tolist() #self.timeline_vars['G'].x_range.factors = [] #self.timeline_vars['G'].x_range.factors = sorted(DF.Start.tolist()) timeline_source.data = data except Exception: logger.error('timeline', exc_info=True) def timeline_plot(self, DF): try: hover = HoverTool(tooltips="Task: @Item<br>\ Start: @start<br>\ End: @end") self.timeline_vars['G'].quad(left='Start', right='End', bottom='ID', top='ID1', source=timeline_source, color="Color") self.tools = [hover] + self.tools self.timeline_vars['G'].tools = self.tools self.timeline_vars['toolbar_box'] = ToolbarBox() self.timeline_vars['toolbar_box'].toolbar = Toolbar( tools=self.tools) self.timeline_vars['toolbar_box'].toolbar_location = "above" self.timeline_vars['G'].x_range.start = DF.Start.min( ) - timedelta(days=10) self.timeline_vars['G'].x_range.start = DF.End.max( ) + timedelta(days=10) return self.timeline_vars['G'] except Exception: logger.error('timeline', exc_info=True) def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.pm_gender = pm_gender_select.value thistab.m_gender = m_gender_select.value thistab.t_gender = t_gender_select.value thistab.type = type_select.value thistab.variable = variable_select.value if 'project' in thistab.variable: thistab.groupby_var = 'project' elif 'milestone' in thistab.variable: thistab.groupby_var = 'milestone' elif 'task' in thistab.variable: thistab.groupby_var = 'task' thistab.status = status_select.value thistab.graph_periods_to_date(thistab.df, thistab.timestamp_col, variable=thistab.variable) thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_pop_dates(): thistab.notification_updater( "Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.pop_start_date = datepicker_pop_start.value # trigger period over period thistab.pop_end_date = datepicker_pop_end.value thistab.df_pop = thistab.pym.load_df(start_date=thistab.pop_start_date, end_date=thistab.pop_end_date, cols=[], table=thistab.table, timestamp_col='startdate_actual') thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_history_periods(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.pop_history_periods = pop_number_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_timeline(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.timeline_vars['project'] = timeline_project_select.value thistab.timeline_vars['type'] = timeline_type_select.value thistab.timeline(thistab.timeline_vars['project'], thistab.timeline_vars['type']) thistab.notification_updater("ready") try: cols = [] thistab = Thistab(table='project_composite', cols=cols) # ------------------------------------- SETUP ---------------------------- # format dates first_date_range = thistab.initial_date last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = datetime(last_date.year, 4, 1, 0, 0, 0) thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, table=thistab.table, cols=[], timestamp_col=thistab.timestamp_col) thistab.graph_periods_to_date( thistab.df, timestamp_filter_col=thistab.timestamp_col, variable=thistab.variable) thistab.pop_end_date = last_date thistab.pop_start_date = last_date - timedelta(days=5) thistab.df_pop = thistab.pym.load_df( start_date=thistab.pop_start_date, end_date=thistab.pop_end_date, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.timeline_vars['projects'] = sorted( list(set(thistab.df['project'].tolist()))) thistab.timeline_vars['project'] = thistab.timeline_vars['projects'][0] # MANAGE STREAM # date comes out stream in milliseconds # --------------------------------CREATE WIDGETS --------------------------------- stream_launch = streams.Stream.define('Launch', launch=-1)() datepicker_pop_start = DatePicker(title="Period start", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_start_date) datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range, max_date=last_date_range, value=thistab.pop_end_date) pop_number_select = Select(title='Select # of comparative periods', value=str(thistab.pop_history_periods), options=thistab.menus['history_periods']) pop_dates_button = Button(label="Select dates, then click me!", width=15, button_type="success") type_select = Select(title='Select project type', value=thistab.type, options=thistab.menus['type']) status_select = Select(title='Select project status', value=thistab.status, options=thistab.menus['status']) pm_gender_select = Select(title="Select project owner's gender", value=thistab.pm_gender, options=thistab.menus['gender']) m_gender_select = Select(title="Select milestone owner's gender", value=thistab.m_gender, options=thistab.menus['gender']) t_gender_select = Select(title="Select task owner's gender", value=thistab.t_gender, options=thistab.menus['gender']) variable_select = Select(title='Select variable of interest', value=thistab.variable, options=thistab.menus['variables']) timeline_project_select = Select( title='Select project', value=thistab.timeline_vars['project'], options=thistab.timeline_vars['projects']) timeline_type_select = Select(title='Select granularity', value='all', options=thistab.timeline_vars['types']) # --------------------------------- GRAPHS --------------------------- hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch]) pop_week = renderer.get_plot(hv_pop_week) hv_pop_month = hv.DynamicMap(thistab.pop_month, streams=[stream_launch]) pop_month = renderer.get_plot(hv_pop_month) hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch]) pop_quarter = renderer.get_plot(hv_pop_quarter) hv_pop_year = hv.DynamicMap(thistab.pop_year, streams=[stream_launch]) pop_year = renderer.get_plot(hv_pop_year) hv_chord = hv.DynamicMap(thistab.chord_diagram, streams=[stream_launch]) chord = renderer.get_plot(hv_chord) thistab.timeline(thistab.timeline_vars['project'], thistab.timeline_vars['type']) timeline = thistab.timeline_plot(DF=thistab.timeline_vars['DF']) # -------------------------------- CALLBACKS ------------------------ type_select.on_change('value', update) pop_dates_button.on_click(update_pop_dates) # lags array status_select.on_change('value', update) pm_gender_select.on_change('value', update) m_gender_select.on_change('value', update) t_gender_select.on_change('value', update) variable_select.on_change('value', update) pop_number_select.on_change('value', update_history_periods) timeline_project_select.on_change('value', update_timeline) timeline_type_select.on_change('value', update_timeline) # -----------------------------------LAYOUT ---------------------------- # put the controls in a single element controls_top = WidgetBox( variable_select, type_select, status_select, pm_gender_select, m_gender_select, t_gender_select, ) controls_pop = WidgetBox(datepicker_pop_start, datepicker_pop_end, pop_number_select) controls_timeline = WidgetBox(thistab.timeline_vars['toolbar_box'], timeline_project_select, timeline_type_select) grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['cards']], [thistab.KPI_card_div, controls_top], [thistab.section_headers['pop']], [Spacer(width=20, height=25)], [pop_week.state, controls_pop], [pop_month.state], [pop_quarter.state], [pop_year.state], [thistab.section_headers['chord']], [Spacer(width=20, height=25)], [chord.state], [thistab.section_headers['timeline']], [Spacer(width=20, height=25)], [timeline, controls_timeline], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def twitter_loader_tab(panel_title): class TwitterLoader(): def __init__(self,search_term = 'beiber'): # TWITTER SETUP self.api = None self.topic = search_term self.options = { 'messages': [str(x) for x in range(100, 10, -10)]+['5000'], 'time': ['40000'] + [str(x) for x in range(30, 100000, 3000)], } self.limits = { 'messages' : int(self.options['messages'][0]), 'time' : int(self.options['time'][0]) #secs } self.hidden_path = dashboard_config['hidden_path'] self.timestamp = { 'start_loading' : datetime.now(timezone.utc).timestamp(), 'stop_loading' : datetime.now(timezone.utc).timestamp() - self.limits['time'] } self.DATEFORMAT = "%Y-%d-%m %H:%M:%S" self.df = None self.messages_dict = { 'message_ID': [], 'human_readable_creation_date':[], 'creation_date': [], 'text': [], 'user_ID': [], 'user_creation_date': [], 'user_name': [], 'user_screen_name': [] } self.selects = { 'window' : Select(title='Select rolling mean window', value='1', options=[str(x) for x in range(1,20,2)]), } self.selects_values = { 'window': int(self.selects['window'].value), } self.resample_period = { 'menu' : [] } for val in range(300,3000,200): self.resample_period['menu'].append(str(val)+'S') self.resample_period['value'] = self.resample_period['menu'][0] # DIV VISUAL SETUP self.trigger = -1 self.html_header = 'h2' self.margin_top = 150 self.margin_bottom = -150 self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'twitter': self.section_header_div(text='Twitter search results:', width=600, html_header='h2', margin_top=155, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) # ////////////////////////// DIVS SETUP END ///////////////////////////////// # /////////////////////////// UTILS BEGIN /////////////////////////// def twitter_datetime_to_epoch(self, ts): ts = datetime.strptime(ts, '%a %b %d %H:%M:%S %z %Y') ts_epoch = ts.timestamp() ts = datetime.strftime(ts, self.DATEFORMAT) ts = datetime.strptime(ts,self.DATEFORMAT) return ts, ts_epoch def write_to_file(self): try: filename = """{}_searches_for_last_{}sec_or_last_{}messages.csv""".format(self.topic,self.limits['time'], self.limits['messages']) self.df.to_csv(filename,sep='\t', index=False) except: logger.error('Error writing to file', exc_info=True) # /////////////////////////// UTILS END ///////////////////// def reset_data(self): self.messages_dict = { 'message_ID': [], 'human_readable_creation_date':[], 'creation_date': [], 'text': [], 'user_ID': [], 'user_creation_date': [], 'user_name': [], 'user_screen_name': [] } self.df = None def get_credentials(self, filename='twitter_credentials.json'): try: filename = self.hidden_path +filename filepath = join(dirname(__file__),filename) print(filepath) if self.api is None: with open(filepath, 'r') as f: credentials_dict = json.load(f) self.api = twitter.Api( consumer_key=credentials_dict['consumer_key'], consumer_secret=credentials_dict['consumer_secret'], access_token_key=credentials_dict['access_token_key'], access_token_secret=credentials_dict['access_token_secret'], ) logger.info('CREDENTIALS LOADED') except: print('credentials not loaded') def construct_query(self): try: qry = 'q=' if ',' in self.topic: topics = self.topic.split(',') for topic, count in enumerate(topics): if count > 0: qry += '%20' + topic else: qry += topic else: qry += self.topic qry += '&count={}'.format(self.limits['messages']) qry += '&result_type=recent' logger.warning('QUERY CONSTRUCTED:%s',qry) print(qry) return qry except: logger.error('error constructing query',exc_info=True) return "q=beiber&count=100&result_type=recent" def load_data_about_topic(self): try: if self.api is None: self.get_credentials() qry = self.construct_query() results = self.api.GetSearch(raw_query=qry) self.timestamp['start_loading'] = datetime.now(timezone.utc).timestamp() self.timestamp['stop_loading'] = self.timestamp['start_loading'] - self.limits['time'] logger.warning('# of results retreived:%s',len(results)) return results except: logger.error('error in loading data', exc_info=True) # parse, truncate to requested records or seconds, make a dataframe from groupby def parse_results(self,results): try: messages_count = 0 stop = False logger.warning('start:end= %s: %s',self.timestamp['start_loading'],self.timestamp['stop_loading']) while not stop: res = results[messages_count] tweet_ts, ts_epoch = self.twitter_datetime_to_epoch(res.created_at) self.messages_dict['message_ID'].append(res.id) self.messages_dict['creation_date'].append(ts_epoch) self.messages_dict['human_readable_creation_date'].append(tweet_ts) self.messages_dict['text'].append(res.text) user = res.user ts, ts_epoch_user = self.twitter_datetime_to_epoch(user.created_at) self.messages_dict['user_ID'].append(user.id) self.messages_dict['user_creation_date'].append(ts_epoch_user) self.messages_dict['user_name'].append(user.name) self.messages_dict['user_screen_name'].append(user.screen_name) messages_count += 1 # the 100000 represents unlimited messages in case we want to load more than 30 seconds worth if messages_count >= len(results): stop = True if self.limits['messages'] != 5000: if messages_count >= self.limits['messages']: stop = True # make a dataframe self.df = pd.DataFrame.from_dict(self.messages_dict) if self.df is not None: logger.warning('df:, length=%s,%s',len(self.df),self.df.head()) except: logger.error('error in parsing results', exc_info=True) def munge_data(self): try: if self.df is not None: # groupby user, then sort by message time self.df = self.df.sort_values(by=['creation_date','user_ID']) else: self.df = pd.DataFrame.from_dict(self.messages_dict) except: logger.error('munge data', exc_info=True) def run(self): try: results = self.load_data_about_topic() self.parse_results(results) self.munge_data() #self.write_to_file() except Exception: logger.error('run', exc_info=True) # #################################### PLOTS ###################################### def sentiment_analysis(self,launch = 1): try: df = self.df[['text','human_readable_creation_date']] cols = ['pos', 'neg', 'neu'] for col in cols: if col not in df.columns: # create only once df[col] = 0 df['pos'], df['neg'], df['neu'] = zip(*df['text'].map(sentiment_analyzer_scores)) df = df.fillna(0) logger.warning('resample period:%s',self.resample_period['value']) df = df.set_index('human_readable_creation_date').resample(self.resample_period['value'])\ .agg({'pos': 'mean', 'neg': 'mean', 'neu': 'mean'}) df = df.reset_index() df = df.fillna(0) logger.warning('LINE 307, df:%s',df.head(30)) p = df.hvplot.line(x='human_readable_creation_date', y=cols, width=1200, height=600) return p except Exception: logger.error('run', exc_info=True) def visual(self,launch=1): try: df = self.df[self.df.creation_date >= self.timestamp['stop_loading']] p = df.hvplot.table(columns=['message_ID','creation_date','human_readable_creation_date','text', 'user_ID','user_creation_date','user_name','user_screen_name'], width=1200,height=2000) return p except Exception: logger.error('output data', exc_info=True) def jitter(self, launch=1): try: df = self.df.set_index('human_readable_creation_date') df = df[['creation_date']] df['jitter'] = df['creation_date'].diff(periods=-1) df['jitter'] = df['jitter'] * -1 df = df.dropna() df = df.reset_index() p = df.hvplot.line(x='creation_date',y='jitter',width=1200,height=600) return p except Exception: logger.error('output data', exc_info=True) def rolling_mean(self,launch=1): try: df = self.df.set_index('human_readable_creation_date') df = df.resample(self.resample_period['value']).agg({'message_ID':'count'}) df = df['message_ID'].rolling(self.selects_values['window']).mean() df = df.reset_index() df = df.rename(columns={'message_ID':'messages', 'human_readable_creation_date':'date'}) p = df.hvplot.scatter(x='date', y='messages', width=1200, height=500) return p except Exception: logger.error('time series analysis', exc_info=True) def update_tweet_search(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.reset_data() thistab.limits['messages'] = int(inputs['messages_limit'].value) thistab.limits['time'] = int(inputs['time_limit'].value) thistab.topic = inputs['search_term'].value thistab.run() thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_launch_sentiment.event(launch_this=thistab.trigger) thistab.notification_updater("Ready!") def update_rolling_mean(attr,old,new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.selects_values['window'] = int(thistab.selects['window'].value) thistab.trigger += 1 stream_launch_rolling_mean.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample_period(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.resample_period['value'] = new thistab.trigger += 1 #stream_launch_rolling_mean.event(launch=thistab.trigger) stream_launch_sentiment.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP thistab = TwitterLoader() thistab.run() # MANAGE STREAM stream_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_rolling_mean = streams.Stream.define('Launch', launch=-1)() stream_launch_sentiment = streams.Stream.define('Launch', launch=-1)() # DYNAMIC GRAPHS/OUTPUT hv_visual = hv.DynamicMap(thistab.visual,streams=[stream_launch]) visual = renderer.get_plot(hv_visual) hv_jitter = hv.DynamicMap(thistab.jitter, streams=[stream_launch]) jitter = renderer.get_plot(hv_jitter) hv_rolling_mean = hv.DynamicMap(thistab.rolling_mean, streams=[stream_launch_rolling_mean]) rolling_mean = renderer.get_plot(hv_rolling_mean) hv_sentiment_analysis = hv.DynamicMap(thistab.sentiment_analysis, streams=[stream_launch_sentiment]) sentiment_analysis = renderer.get_plot(hv_sentiment_analysis) # CREATE WIDGETS inputs = { 'search_term' : TextInput(title='Enter search term. For list, use commas',value=thistab.topic), 'messages_limit' : Select(title='Select messages limit (5000 = unbounded)', value= str(thistab.limits['messages']), options=thistab.options['messages']), 'time_limit' : Select(title='Select time limit (seconds)', value=str(thistab.limits['time']), options=thistab.options['time']), 'resample' : Select(title='Select resample period', value=thistab.resample_period['value'], options=thistab.resample_period['menu']) } tweet_search_button = Button(label='Enter filters/inputs, then press me', button_type="success") # WIDGET CALLBACK tweet_search_button.on_click(update_tweet_search) thistab.selects['window'].on_change('value',update_rolling_mean) inputs['resample'].on_change('value',update_resample_period) # COMPOSE LAYOUT # group controls (filters/input elements) controls_tweet_search = WidgetBox( inputs['search_term'], inputs['messages_limit'], inputs['time_limit'], tweet_search_button, ) controls_rolling_mean = WidgetBox( thistab.selects['window'], ) controls_resample_period = WidgetBox( inputs['resample'] ) grid = gridplot([ [thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.title_div('Sentiment analysis of tweets:', 1000)], [Spacer(width=20, height=30)], [sentiment_analysis.state, controls_resample_period], [thistab.title_div('Smooth graphs:', 1000)], [Spacer(width=20, height=30)], [rolling_mean.state, controls_rolling_mean], [thistab.title_div('Time between tweets:', 1000)], [Spacer(width=20, height=30)], [jitter.state], [thistab.title_div('Twitter search results (use filters on right, then click button):', 1000)], [Spacer(width=20, height=30)], [visual.state, controls_tweet_search], [thistab.notification_div['bottom']], ]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('Twitter loader:', exc_info=True) return tab_error_flag(panel_title)