def modify_doc(doc): source = ColumnDataSource(dict(x=[1, 2], y=[1, 1], val=["a", "b"])) plot = Plot(plot_height=400, plot_width=400, x_range=Range1d(0, 1), y_range=Range1d(0, 1), min_border=0) plot.add_tools(CustomAction(callback=CustomJS(args=dict(s=source), code=RECORD("data", "s.data")))) plot.add_glyph(source, Circle(x='x', y='y', size=20)) dp = DatePicker(title='Select date', value=datetime(2019, 9, 20), min_date=datetime(2019, 9, 1), max_date=datetime.utcnow(), css_classes=["foo"]) def cb(attr, old, new): source.data['val'] = [old, new] dp.on_change('value', cb) doc.add_root(column(dp, plot))
from bokeh.models import DatePicker, HBox from bokeh.io import curdoc from datetime import datetime beginning = DatePicker(title="Begin Date", min_date=datetime(2014, 11, 1), max_date=datetime.now(), value=datetime(datetime.now().year, 1, 1)) def cb(attr, old, new): print(new) beginning.on_change('value', cb) curdoc().add_root(HBox(children=[beginning]))
def accounts_tsa_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = {} # to contain churned and retained splits self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.rf = {} # random forest self.cl = PythonClickhouse('aion') self.forecast_days = 30 self.interest_var = 'address' self.trigger = -1 self.status = 'all' self.update_type = 'all' self.status = 'all' self.account_type = 'all' self.interest_var = 'amount' self.pl = {} # for rf pipeline self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # list of tier specific addresses for prediction self.address_list = [] self.address_select = Select(title='Filter by address', value='all', options=[]) self.address = 'all' self.load_data_flag = False self.day_diff = 1 self.groupby_dict = {} self.addresses = [] self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.max_loaded_date = None self.min_loaded_date = None # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'forecast': self.section_header_div(text='Forecasts:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def reset_checkboxes(self): try: self.address_selected = "" self.address_select.value = "all" except Exception: logger.error('reset checkboxes', exc_info=True) ################################################### # I/O def load_df(self, start_date, end_date): try: logger.warning("data load begun") if isinstance(start_date, str): start_date = datetime.strptime(start_date, self.DATEFORMAT) if isinstance(end_date, str): end_date = datetime.strptime(end_date, self.DATEFORMAT) if self.df is not None: self.max_loaded_date = self.df.block_timestamp.max( ).compute() self.min_loaded_date = self.df.block_timestamp.min( ).compute() if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date: logger.warning("data already loaded - %s", self.df.tail(10)) pass else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) #self.make_delta() #self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) else: self.df_load(start_date, end_date, cols=self.cols) self.df = self.df.fillna(0) df = self.df[['address']] df = df.compute() self.addresses = ['all'] + list(set(list(df))) # self.make_delta() # self.df = self.df.set_index('block_timestamp') logger.warning("data loaded - %s", self.df.tail(10)) self.df = self.filter(self.df) except Exception: logger.error('load_df', exc_info=True) ################################################### # MUNGE DATA def make_delta(self): try: if self.df is not None: if len(self.df) > 0: df = self.df.compute() for col in self.targets: col_new = col + '_diff' df[col_new] = df[col].pct_change() df[col_new] = df[col_new].fillna(0) logger.warning('diff col added : %s', col_new) self.df = self.df.fillna(self.df.mean()) self.df = dd.dataframe.from_pandas(df, npartitions=15) # logger.warning('POST DELTA:%s',self.df1.tail(20)) except Exception: logger.error('make delta', exc_info=True) ################################################## # EXPLICATORY GRAPHS # PLOTS def box_plot(self, variable): try: # logger.warning("difficulty:%s", self.df.tail(30)) # get max value of variable and multiply it by 1.1 minv = 0 maxv = 0 df = self.df if df is not None: if len(df) > 0: minv, maxv = dd.compute(df[variable].min(), df[variable].max()) else: df = SD('filter', [variable, 'status'], []).get_df() return df.hvplot.box(variable, by='status', ylim=(.9 * minv, 1.1 * maxv)) except Exception: logger.error("box plot:", exc_info=True) ################################################### # MODELS def filter(self, df): try: df = df.assign(freq=df.address) if self.status != 'all': df = df[df.status == self.status] if self.account_type != 'all': df = df[df.acccount_type == self.account_type] if self.update_type != 'all': df = df[df.update_type == self.update_type] if self.address != 'all': df = df[df.address == self.address] return df except Exception: logger.error("filter:", exc_info=True) def tsa_amount(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'amount': 'mean'}) df = df.reset_index() df = df.compute() label = 'amount_diff' df[label] = df[self.interest_var].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'amount': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) else: p *= forecast.hvplot.scatter(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) def tsa_freq(self, launch): try: logger.warning('df columns:%s', list(self.df.columns)) df = self.df.set_index('block_timestamp') df = df.resample('D').agg({'address': 'nunique'}) df = df.reset_index() df = df.compute() label = 'freq_diff' df[label] = df['address'].diff() df = df.fillna(0) rename = {'block_timestamp': 'ds', 'address': 'y'} df = df.rename(columns=rename) logger.warning('df:%s', df.head()) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='#').relabel(col) else: p *= forecast.hvplot.scatter( x='ds', y=col, width=600, height=250, value_label='#').relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='#').relabel(col) return p + q except Exception: logger.error("box plot:", exc_info=True) #################################################### # GRAPHS def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.update_type = update_type_select.value thistab.status = status_select.value thistab.account_type = account_type_select.value thistab.forecast_days = int(select_forecast_days.value) thistab.address = thistab.address_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_load(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.load_df(datepicker_start.value, datepicker_end.value) thistab.notification_updater("ready") try: # SETUP table = 'account_ext_warehouse' #cols = list(table_dict[table].keys()) cols = [ 'address', 'block_timestamp', 'account_type', 'status', 'update_type', 'amount' ] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] first_date = last_date - timedelta(days=60) # STREAMS Setup # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch', launch=-1)() stream_select_variable = streams.Stream.define('Select_variable', variable='amount')() # setup widgets datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) select_forecast_days = Select( title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) status_select = Select(title='Select account status', value=thistab.status, options=menus['status']) account_type_select = Select(title='Select account type', value=thistab.account_type, options=menus['account_type']) update_type_select = Select(title='Select transfer type', value=thistab.update_type, options=menus['update_type']) # search by address checkboxes thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses, active=[0]) # ----------------------------------- LOAD DATA # load model-making data thistab.load_df(datepicker_start.value, datepicker_end.value) # load data for period to be predicted # tables hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount, streams=[stream_launch]) tsa_amount = renderer.get_plot(hv_tsa_amount) hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch]) tsa_freq = renderer.get_plot(hv_tsa_freq) # add callbacks datepicker_start.on_change('value', update_load) datepicker_end.on_change('value', update_load) thistab.address_select.on_change('value', update) select_forecast_days.on_change('value', update) update_type_select.on_change('value', update) account_type_select.on_change('value', update) status_select.on_change('value', update) # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, thistab.address_select, select_forecast_days, update_type_select, account_type_select, status_select, thistab.checkboxes) grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['forecast']], [Spacer(width=20, height=30)], [tsa_amount.state, controls], [tsa_freq.state], [thistab.notification_div['bottom']]]) tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(panel_title)
def cryptocurrency_eda_tab(cryptos, panel_title): lags_corr_src = ColumnDataSource(data=dict(variable_1=[], variable_2=[], relationship=[], lag=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.items = cryptos # add all the coins to the dict self.github_cols = ['watch', 'fork', 'issue', 'release', 'push'] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 self.groupby_dict = groupby_dict self.feature_list = list(self.groupby_dict.keys()) self.variable = 'fork' self.crypto = 'all' self.lag_variable = 'push' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # track variable for AI for significant effects self.adoption_variables = { 'user': [], 'developer': ['watch', 'fork'] } self.significant_effect_dict = {} self.reset_adoption_dict(self.variable) self.relationships_to_check = ['weak', 'moderate', 'strong'] # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } #self.lag_section_head_txt = 'Lag relationships between {} and...'.format(self.variable) self.lag_section_head_txt = 'Lag relationships:' self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=self.lag_section_head_txt, width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div( text='Pre transform distribution:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div( text='non linear relationships between variables:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'non_linear': self.section_header_div( text='non linear relationships between variables:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:{}px;"> <h4 style="color:#fff;"> {}</h4></div>""".format(self.page_width, text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def reset_adoption_dict(self, variable): self.significant_effect_dict[variable] = [] def section_header_updater(self, text, section, html_header='h3', margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) self.section_headers[section].text = text # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def prep_data(self, df1): try: self.cols = list(df1.columns) df1['timestamp'] = df1['timestamp'].astype('M8[us]') df = df1.set_index('timestamp') #logger.warning('LINE 195 df:%s',df.head()) # handle lag for all variables if self.crypto != 'all': df = df[df.crypto == self.crypto] df = df.compute() #logger.warning('LINE 199: length before:%s',len(df)) df = df.groupby('crypto').resample(self.resample_period).agg( self.groupby_dict) #logger.warning('LINE 201: length after:%s',len(df)) df = df.reset_index() vars = self.feature_list.copy() if int(self.lag) > 0: for var in vars: if self.variable != var: df[var] = df[var].shift(int(self.lag)) df = df.dropna() self.df1 = df #logger.warning('line 184- prep data: df:%s',self.df.head(10)) except Exception: logger.error('prep data', exc_info=True) def set_groupby_dict(self): try: pass except Exception: logger.error('set groupby dict', exc_info=True) # ///////////////// PLOTS ///////////////////// def lags_plot(self, launch): try: df = self.df.copy() df = df[[self.lag_variable, self.variable]] df = df.compute() cols = [self.lag_variable] lags = self.lag_days.split(',') for day in lags: try: label = self.lag_variable + '_' + day df[label] = df[self.lag_variable].shift(int(day)) cols.append(label) except: logger.warning('%s is not an integer', day) df = df.dropna() self.lags_corr(df) # plot the comparison #logger.warning('in lags plot: df:%s',df.head(10)) return df.hvplot(x=self.variable, y=cols, kind='scatter', alpha=0.4) except Exception: logger.error('lags plot', exc_info=True) # calculate the correlation produced by the lags vector def lags_corr(self, df): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'lag': [], 'r': [], 'p_value': [] } a = df[self.variable].tolist() for col in df.columns: if col not in ['timestamp', self.variable]: # find lag var = col.split('_') try: tmp = int(var[-1]) lag = tmp except Exception: lag = 'None' b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append(self.variable) corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['lag'].append(lag) corr_dict_data['r'].append(round(rvalue, 4)) corr_dict_data['p_value'].append(round(pvalue, 4)) lags_corr_src.stream(corr_dict_data, rollover=(len(corr_dict_data['lag']))) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=lags_corr_src, columns=columns, width=900, height=400) return data_table except Exception: logger.error('lags corr', exc_info=True) def correlation_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'r': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop('timestamp', axis=1) #df = df.compute() a = df[self.variable].tolist() for col in self.feature_list: if col != self.variable: #logger.warning('%s:%s', col, self.variable) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) # add to dict corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['r'].append(round(rvalue, 4)) corr_dict['p-value'].append(round(pvalue, 4)) # update significant effect variables if self.variable in self.adoption_variables[ 'developer']: if any(relationship in txt for relationship in self.relationships_to_check): if self.variable not in self.significant_effect_dict.keys( ): self.significant_effect_dict[ self.variable] = [] self.significant_effect_dict[ self.variable].append(col) if self.variable in self.adoption_variables['developer']: tmp = self.significant_effect_dict[self.variable].copy() tmp = list(set(tmp)) tmp_dct = { 'features': tmp, 'timestamp': datetime.now().strftime(self.DATEFORMAT) } # write to redis save_params = 'adoption_features:developer' + '-' + self.variable self.redis.save(tmp_dct, save_params, "", "", type='checkpoint') df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'r': corr_dict['r'], 'p-value': corr_dict['p-value'] }) #logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value' ], width=550, height=400, title='Correlation between variables') except Exception: logger.error('correlation table', exc_info=True) def non_parametric_relationship_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'stat': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop('timestamp', axis=1) #df = df.compute() #logger.warning('line df:%s',df.head(10)) a = df[self.variable].tolist() for col in self.feature_list: if col != self.variable: #logger.warning('%s:%s', col, self.variable) b = df[col].tolist() stat, pvalue, txt = self.mann_whitneyu_label(a, b) corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['stat'].append(round(stat, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'stat': corr_dict['stat'], 'p-value': corr_dict['p-value'] }) #logger.warning('df:%s',df.head(23)) return df.hvplot.table( columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value' ], width=550, height=400, title='Non parametricrelationship between variables') except Exception: logger.error('non parametric table', exc_info=True) def hist(self, launch): try: return self.df.hvplot.hist(y=self.feature_list, subplots=True, shared_axes=False, bins=25, alpha=0.3, width=300).cols(4) except Exception: logger.warning('histogram', exc_info=True) def matrix_plot(self, launch=-1): try: logger.warning('line 306 self.feature list:%s', self.feature_list) df = self.df1 #df = df[self.feature_list] # get difference for money columns #thistab.prep_data(thistab.df) if 'timestamp' in df.columns: df = df.drop('timestamp', axis=1) #df = df.repartition(npartitions=1) #df = df.compute() df = df.fillna(0) #logger.warning('line 302. df: %s',df.head(10)) cols_temp = self.feature_list.copy() if self.variable in cols_temp: cols_temp.remove(self.variable) #variable_select.options = cols_lst p = df.hvplot.scatter(x=self.variable, y=cols_temp, width=330, subplots=True, shared_axes=False, xaxis=False).cols(4) return p except Exception: logger.error('matrix plot', exc_info=True) ''' def regression(self,df): try: except Exception: logger.error('matrix plot', exc_info=True) ''' def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data(thistab.df) thistab.variable = new if thistab.variable in thistab.adoption_variables['developer']: thistab.reset_adoption_dict(thistab.variable) thistab.lag_section_head_txt = 'Lag relationships between {} and...'.format( thistab.variable) #thistab.section_header_updater('lag',thistab.lag_section_head_txt) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag_plot_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_variable = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_crypto(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.crypto = crypto_select.value thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag(attr, old, new): # update lag & cryptocurrency thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'external_daily' cols = list(groupby_dict.keys()) + ['timestamp', 'crypto'] thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=200) # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp') thistab.prep_data(thistab.df) # MANAGE STREAM # date comes out stream in milliseconds #stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value='fork', options=thistab.feature_list) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) crypto_select = Select(title='Select cryptocurrency', value='all', options=['all'] + thistab.items) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] lags_corr_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=280) width = 800 hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot, streams=[stream_launch_matrix]) hv_corr_table = hv.DynamicMap(thistab.correlation_table, streams=[stream_launch_corr]) hv_nonpara_table = hv.DynamicMap( thistab.non_parametric_relationship_table, streams=[stream_launch_corr]) #hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist]) hv_lags_plot = hv.DynamicMap(thistab.lags_plot, streams=[stream_launch_lags_var]) matrix_plot = renderer.get_plot(hv_matrix_plot) corr_table = renderer.get_plot(hv_corr_table) nonpara_table = renderer.get_plot(hv_nonpara_table) lags_plot = renderer.get_plot(hv_lags_plot) # setup divs # handle callbacks variable_select.on_change('value', update_variable) lag_variable_select.on_change('value', update_lag_plot_variable) lag_select.on_change('value', update_lag) # individual lag resample_select.on_change('value', update_resample) crypto_select.on_change('value', update_crypto) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) lags_input_button.on_click(update_lags_selected) # lags array # COMPOSE LAYOUT # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end, variable_select, lag_select, crypto_select, resample_select) controls_lag = WidgetBox(lag_variable_select, lags_input, lags_input_button) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [matrix_plot.state, controls], [thistab.section_headers['relationships']], [Spacer(width=20, height=30)], [thistab.section_headers['correlations']], [Spacer(width=20, height=30)], [corr_table.state, thistab.corr_information_div()], [thistab.section_headers['non_linear']], [Spacer(width=20, height=30)], [nonpara_table.state], [thistab.section_headers['lag']], [Spacer(width=20, height=30)], [lags_plot.state, controls_lag], [lags_corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('crypto:', exc_info=True) return tab_error_flag(panel_title)
def account_predictive_tab(page_width=1200): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = {} # to contain churned and retained splits self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.rf = {} # random forest self.cl = PythonClickhouse('aion') self.feature_list = hyp_variables self.targets = { 'classification': { 'churned': { 'cols': ['churned', 'active'], 'target_col': 'status' } }, 'regression': { 'aion_fork': { 'cols': [1, 0], 'target_col': 'aion_fork' } } } self.interest_var = 'address' self.trigger = -1 self.status = 'all' self.clf = None self.pl = {} # for rf pipeline self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ # list of tier specific addresses for prediction self.address_list = [] self.prediction_address_selected = "" self.load_data_flag = False self.day_diff = 1 self.groupby_dict = {} for col in self.feature_list: self.groupby_dict[col] = 'mean' self.div_style = """ style='width:300px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.metrics_div = Div(text='', width=400, height=300) self.accuracy_df = None self.inspected_variable = 'amount' # ------- DIVS setup begin self.page_width = page_width txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format( self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'churn': self.section_header_div( text= 'Churned accounts: prediction model accuracy, variable ranking:{}' .format('----'), width=int(self.page_width * .5), html_header='h2', margin_top=5, margin_bottom=-155), 'variable behavior': self.section_header_div(text='Variable behavior:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'predictions': self.section_header_div( text='Select date range to make predictions:{}'.format( self.section_divider), width=int(self.page_width * .5), html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # #################################################### # UTILITY DIVS def results_div(self, text, width=600, height=300): div = Div(text=text, width=width, height=height) return div def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def reset_checkboxes(self): try: self.prediction_address_selected = "" self.prediction_address_select.value = "all" except Exception: logger.error('reset checkboxes', exc_info=True) ################################################### # I/O def load_df(self, start_date="2018-04-25 00:00:00", end_date="2018-12-10 00:00:00"): try: if isinstance(start_date, str): start_date = datetime.strptime(start_date, self.DATEFORMAT) if isinstance(end_date, str): end_date = datetime.strptime(end_date, self.DATEFORMAT) self.df_load(start_date, end_date) self.df = self.df.fillna(0) #self.make_delta() #self.df = self.df.set_index('block_timestamp') #logger.warning("data loaded - %s",self.df.tail(10)) except Exception: logger.error('load_df', exc_info=True) ################################################### # MUNGE DATA def make_delta(self): try: if self.df is not None: if len(self.df) > 0: df = self.df.compute() for col in self.targets: col_new = col + '_diff' df[col_new] = df[col].pct_change() df[col_new] = df[col_new].fillna(0) logger.warning('diff col added : %s', col_new) self.df = self.df.fillna(self.df.mean()) self.df = dd.dataframe.from_pandas(df, npartitions=15) # logger.warning('POST DELTA:%s',self.df1.tail(20)) except Exception: logger.error('make delta', exc_info=True) def split_df(self, df, target): cols = self.target['classification'][target] target_col = self.target['classification'][target] for val in cols: self.df1[val] = df[target_col] == val logger.warning( "Finished split into churned and retained dataframes") ################################################## # EXPLICATORY GRAPHS # PLOTS def box_plot(self, variable): try: # logger.warning("difficulty:%s", self.df.tail(30)) # get max value of variable and multiply it by 1.1 minv = 0 maxv = 0 df = self.df if df is not None: if len(df) > 0: minv, maxv = dd.compute(df[variable].min(), df[variable].max()) else: df = SD('filter', [variable, 'status'], []).get_df() return df.hvplot.box(variable, by='status', ylim=(.9 * minv, 1.1 * maxv)) except Exception: logger.error("box plot:", exc_info=True) ################################################### # MODELS def rf_clf(self): try: logger.warning("RANDOM FOREST LAUNCHED") error_lst = [] df_temp = self.df df_temp = self.normalize(df_temp, timestamp_col='block_timestamp') # if all addresses used filter for only positive transactions for target in self.targets['classification']: # filter out joined df = df_temp.copy() if target == 'churned': df = df[df['status'] != 'joined'] #logger.warning("line 205: df columns in %s:",df.columns.tolist()) df = df.groupby(['address', 'status']).agg(self.groupby_dict) df = df.reset_index() #logger.warning("line 222: df columns in %s:",df.tail(10)) df = df.compute() ''' # only retain wanted values col_values = list(self.df[self.targets['classification'][target]['target_col']].unique()) for val in col_values: if val in self.targets['classification'][target]['cols']: pass else: df[self.targets['classification'][target]['target_col']] = \ df[df[self.targets['classification'][target]['cols']] != val] ''' X = df[self.feature_list] y = df[self.targets['classification'][target] ['target_col']] #logger.warning('y=:%s',y.head(100)) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3) self.feature_list = X_train.columns.tolist() self.pl[target] = Pipeline([ ('imp', SimpleImputer(missing_values=0, strategy='median')), ('rf', RandomForestClassifier(n_estimators=100, random_state=42, max_depth=4, class_weight='balanced')) ]) self.pl[target].fit(X_train, y_train) y_pred = self.pl[target].predict(X_test) error_lst.append( round(100 * metrics.accuracy_score(y_test, y_pred), 2)) self.accuracy_df = pd.DataFrame({ 'Outcome': list(self.targets['classification'].keys()), 'Accuracy': error_lst, }) #logger.warning('accuracy_df:%s',self.accuracy_df.head()) #self.make_tree(target=target) print('confusion matrix:\n') print(confusion_matrix(y_test, y_pred)) print('classification report:\n') print(classification_report(y_test, y_pred)) #logger.warning("clf model built:%s",self.pl) except Exception: logger.error("RF:", exc_info=True) def accuracy_table(self): try: columns = self.accuracy_df.columns.tolist() return self.accuracy_df.hvplot.table( columns=['Outcome', 'Accuracy'], width=250, title='Prediction accuracy') except Exception: logger.error("RF:", exc_info=True) def prediction_information_div(self, width=350, height=450): txt = """ <div {}> <h4 {}>Info </h4> <ul style='margin-top:-10px;'> <li> The table shows the predicted change.</br> </li> <li> For desirable outcomes: </br> ... a positive number is good! </br> ... the bigger the number the better. </br> ... a negative number is bad! </br> ... the bigger the negative number the worse it is. </li> <> For non-desirable outcomes: </br>... the inverse is true </li> <li> Use the datepicker(s) to select dates for the period desired </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def metrics_div_update(self, data): div_style = """ style='width:350px;margin-right:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """<div {}> <h4 {}>Prediction Info </h4> <ul style='margin-top:-10px;'> <li> {}% likely to churn </li> </ul> </div>""".format(div_style, self.header_style, data) self.metrics_div.text = txt def stats_information_div(self, width=400, height=300): div_style = """ style='width:350px;margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>Metadata Info </h4> <ul> <li > <h4 style='margin-bottom:-2px;'>Table left:</h4> - shows the outcome,</br> and the accuracy in %</br> <strong><i>100% is perfection!</i></strong> </li> <li> <h4 style='margin-bottom:-2px;'>Table right:</h4> - shows the desired outcome, the variables(things Aion controls) </br> and their importance to the particular outcome </br> ...which variable(s) have a greater impact on an outcome. </br>- lower = better </br>- generally only the best ranked 3 matter </br>- business advice: manipulate the top ranked variables to attain desirable outcomes </li> </ul> </div>""".format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div def load_prediction_df(self, start_date, end_date): if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) cols = self.feature_list + ['address', 'block_timestamp'] self.df_predict = self.cl.load_data(table=self.table, cols=cols, start_date=start_date, end_date=end_date) logger.warning('319:in load prediction: %s', self.df_predict.head(5)) def update_prediction_addresses_select(self): self.prediction_address_select.options = ['all'] if len(self.df_predict) > 0: lst = ['all'] + list( self.df_predict['address'].unique().compute()) self.prediction_address_select.options = lst # the period for which the user wants a prediction def make_account_predictions(self, launch=-1): try: logger.warning("MAKE PREDICTIONS LAUNCHED") target = list(self.targets['classification'].keys())[0] # make df = self.df_predict #logger.warning("line 363%s",df.head(10)) # make list of address for prediction select # filter if prediction for certain addresses #logger.warning('address selected:%s',self.prediction_address_select.value) if self.prediction_address_select.value is not None: if len(self.prediction_address_select.value) > 0: if self.prediction_address_select.value not in [ 'all', '' ]: df = df[df.address == self.prediction_address_select.value] #logger.warning('line 409 predict-df post filter:%s', df.head(20)) # make table for display self.predict_df = pd.DataFrame({ 'address': [], 'likely action': [] }) for target in list(self.targets['classification'].keys()): if len(df) > 0: df = self.normalize(df, timestamp_col='block_timestamp') df = self.group_data(df, self.groupby_dict, timestamp_col='block_timestamp') interest_labels = list(df['address'].unique()) # run model df = df.fillna(0) X = df[self.feature_list] #logger.warning("df before prediction:%s",X.tail(10)) y_pred = self.pl[target].predict(X) logger.warning('y_pred:%s', y_pred) if target == 'churned': y_pred_verbose = [ 'remain' if x in ["active", 1] else "churn" for x in y_pred ] #---- make table for display self.predict_df = pd.DataFrame({ 'address': interest_labels, 'likely action': y_pred_verbose }) #------ label pools self.predict_df['address'] = self.predict_df[ 'address'].map(self.poolname_verbose_trun) #logger.warning('self.predict_df:%s',self.predict_df) churn_df = self.predict_df[ self.predict_df['likely action'] == 'churn'] perc_to_churn = round( 100 * len(churn_df) / len(self.predict_df), 1) txt = target[:-2] text = """<div {}> <h3>Percentage likely to {}:</h3> <strong 'style=color:black;'>{}%</strong></div>""".format( self.header_style, txt, perc_to_churn) self.metrics_div_update(data=perc_to_churn) else: text = """<div {}> <br/> <h3>Sorry, address not found</h3> </div>""".format(self.header_style) self.metrics_div.text = text logger.warning("end of %s predictions", target) return self.predict_df.hvplot.table( columns=['address', 'likely action'], width=500, title='Account predictions') except Exception: logger.error("prediction:", exc_info=True) def make_tree(self, target='churned'): try: if not self.pl: self.rf_clf() # Limit depth of tree to 3 levels # Extract the small tree tree_small = self.pl[target].named_steps['rf'].estimators_[5] # Save the tree as a png image export_graphviz(tree_small, out_file='small_tree.dot', feature_names=self.feature_list, rounded=True, precision=1) (graph, ) = pydot.graph_from_dot_file('small_tree.dot') # filepath = self.make_filepath('../../../static/images/small_tree.gif') # .write_png(filepath) filepath = self.make_filepath( '/home/andre/Downloads/small_tree.png') graph.write_png(filepath) logger.warning("TREE SAVED") except Exception: logger.error("make tree:", exc_info=True) def make_feature_importances(self): try: if not self.pl: self.rf_clf() results_dct = { 'outcome': [], 'feature': [], 'importance': [], 'rank_within_outcome': [] } for target in self.targets['classification'].keys(): logger.warning('make feature importances for :%s', target) # Get numerical feature importances importances = list( self.pl[target].named_steps['rf'].feature_importances_) # List of tuples with variable and importance feature_importances = [(feature, round(importance, 4)) for feature, importance in zip( self.feature_list, importances)] sorted_importances = sorted(feature_importances, key=itemgetter(1)) # logger.warning('importances :%s',importances) # logger.warning("feature_importances:%s",feature_importances) target_lst = [target] * len(importances) count = 1 rank_lst = [] for i in importances: rank_lst.append(count) count += 1 results_dct['outcome'] += target_lst results_dct['feature'] += [ i[0] for i in sorted_importances ] results_dct['importance'] += [ i[1] for i in sorted_importances ] results_dct['rank_within_outcome'] += sorted(rank_lst, reverse=True) df = pd.DataFrame.from_dict(results_dct) logger.warning('MAKE FEATURE IMPORTANCES FINISHED') return df.hvplot.table( columns=[ 'outcome', 'feature', 'importance', 'rank_within_outcome' ], width=600, title="Variables ranked by importance (for each output)") except Exception: logger.error("Feature importances:", exc_info=True) #################################################### # GRAPHS def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.load_prediction_df(datepicker_start.value, datepicker_end.value) thistab.update_prediction_addresses_select() thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) stream_select_variable.event(variable=thistab.inspected_variable) thistab.notification_updater("ready") def update_address_predictions(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("ready") def update_select_variable(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.inspected_variable = select_variable.value stream_select_variable.event(variable=thistab.inspected_variable) thistab.notification_updater("ready") try: # SETUP table = 'account_ext_warehouse' #cols = list(table_dict[table].keys()) cols = hyp_variables + [ 'address', 'block_timestamp', 'account_type', 'status', 'update_type' ] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] last_date = last_date - timedelta(days=50) first_date = last_date - timedelta(days=5) # STREAMS Setup # date comes out stream in milliseconds stream_launch = streams.Stream.define('Launch', launch=-1)() stream_select_variable = streams.Stream.define('Select_variable', variable='amount')() # setup widgets datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) select_variable = Select(title='Filter by variable', value=thistab.inspected_variable, options=thistab.feature_list) # search by address checkboxes thistab.prediction_address_select = Select(title='Filter by address', value='all', options=[]) reset_prediction_address_button = Button(label="reset address(es)", button_type="success") # ----------------------------------- LOAD DATA # load model-making data end = datepicker_start.value start = end - timedelta(days=60) thistab.load_df(start, end) thistab.rf_clf() # load data for period to be predicted thistab.load_prediction_df(datepicker_start.value, datepicker_end.value) thistab.update_prediction_addresses_select() # tables hv_account_prediction_table = hv.DynamicMap( thistab.make_account_predictions, streams=[stream_launch]) account_prediction_table = renderer.get_plot( hv_account_prediction_table) hv_features_table = hv.DynamicMap(thistab.make_feature_importances) features_table = renderer.get_plot(hv_features_table) hv_accuracy_table = hv.DynamicMap(thistab.accuracy_table) accuracy_table = renderer.get_plot(hv_accuracy_table) hv_variable_plot = hv.DynamicMap(thistab.box_plot, streams=[stream_select_variable])\ .opts(plot=dict(width=800, height=500)) variable_plot = renderer.get_plot(hv_variable_plot) # add callbacks datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) thistab.prediction_address_select.on_change( 'value', update_address_predictions) reset_prediction_address_button.on_click(thistab.reset_checkboxes) select_variable.on_change('value', update_select_variable) # put the controls in a single element controls = WidgetBox(select_variable, datepicker_start, datepicker_end, thistab.prediction_address_select, reset_prediction_address_button) controls_prediction = WidgetBox(datepicker_start, datepicker_end, thistab.prediction_address_select, reset_prediction_address_button) grid = gridplot( [[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['churn']], [Spacer(width=20, height=70)], [accuracy_table.state, thistab.stats_information_div()], [features_table.state], [thistab.section_headers['variable behavior']], [Spacer(width=20, height=30)], [variable_plot.state, controls], [thistab.section_headers['predictions']], [Spacer(width=20, height=30)], [ account_prediction_table.state, thistab.metrics_div, controls_prediction ], [thistab.notification_div['bottom']]]) tab = Panel(child=grid, title='predictions: accounts by value') return tab except Exception: logger.error('rendering err:', exc_info=True) text = 'predictions: accounts by value' return tab_error_flag(text)
live_checkbox = CheckboxGroup(labels=['Live'], active=[0], width=150) live_checkbox.on_change('active',live_toggle) live_hours_input = TextInput(value=str(max_hours), title="Live past hours (max. 24h):", width=150) live_hours_input.on_change('value',live_hours) # live_slider = Slider(start=0.01, end=24, value=max_hours, step=0.01, title="Hours before") # live_slider.on_change('value',live_hours) # pick a date date_picker_i = DatePicker(value=date.today(),max_date=date.today(), title='Choose inital date:', width=150, disabled=False) mydate_i=date_picker_i.value date_picker_f = DatePicker(value=date.today(),max_date=date.today(), title='Choose final date:' , width=150, disabled=False) mydate_f=date_picker_f.value date_picker_i.on_change('value',initial_date) date_picker_f.on_change('value',final_date) hist_button = Button(label="Show History", button_type="default", width=310) hist_button.on_click(get_history) pre_head = PreText(text="N.B.: Readout every 10 seconds. Be patient!",width=500, height=50) pre_head2 = PreText(text="",width=400, height=25) pre_temp_top = PreText(text="",width=400, height=20) pre_temp_mid = PreText(text="",width=400, height=20) pre_temp_bot = PreText(text="",width=400, height=20) h_space = PreText(text="",width=50, height=1) v_space = PreText(text="",width=1, height=50) # curdoc().add_root(column(pre_head,row(div,column(pre_head2,pre_temp_top,pre_temp_mid,pre_temp_bot),column(plot['temperature'],plot['humidity'],plot['pressure']),)))
new_source_2_data = dict( races=races, cases_percent=[ new_result['asian'][0], new_result['black'][0], new_result['cdph-other'][0], new_result['white'][0], new_result['latino'][0], new_result['other'][0] ], deaths_percent=[ new_result['asian'][1], new_result['black'][1], new_result['cdph-other'][1], new_result['white'][1], new_result['latino'][1], new_result['other'][1] ], population_percent=[ new_result['asian'][2], new_result['black'][2], new_result['cdph-other'][2], new_result['white'][2], new_result['latino'][2], new_result['other'][2] ], date=[ selected_date, selected_date, selected_date, selected_date, selected_date, selected_date ]) source_2.data = new_source_2_data print(new_result, flag_2) date_picker.on_change("value", call_back) # layout curdoc().add_root(column(intro, date_picker, description, row(p1, p2))) #### labels and legends
def forecasting_bcc_rentals_visitor_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = { 'category': 'nunique', 'item': 'nunique', 'area': 'nunique', 'visit_duration': 'mean', 'age': 'mean', 'gender_coded': 'mean', 'status_coded': 'mean', 'rental_employee_gender_coded': 'mean', 'rental_employee_age': 'mean', 'rental_tab': 'sum' } self.feature_list = ['age', 'rental_employee_age', 'rental_tab'] self.tsa_variable = 'rental_tab' self.forecast_days = 40 self.lag_variable = 'visit_duration' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variables = sorted(list(self.groupby_dict.keys())) self.variable = 'rental_tab' self.relationships_to_check = ['weak', 'moderate', 'strong'] self.pym = PythonMongo('aion') self.menus = { 'item': ['all'], 'category': ['all'], 'status': ['all', 'guest', 'member'], 'gender': ['all', 'male', 'female'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], 'area': ['all', 'bar', 'rentals'], 'tsa': ['rental_tab', 'visit_duration'] } self.select = {} self.select['area'] = Select(title='Select BCC area', value='all', options=self.menus['area']) self.select['item'] = Select(title='Select item', value='all', options=self.menus['item']) self.select['status'] = Select(title='Select visitor status', value='all', options=self.menus['status']) self.select['gender'] = Select(title="Select visitor gender", value='all', options=self.menus['gender']) self.select['category'] = Select(title="Select category", value='all', options=self.menus['category']) self.select['rental_employee_gender'] = Select( title="Select category", value='all', options=self.menus['category']) self.select_values = {} for item in self.select.keys(): self.select_values[item] = 'all' self.multiline_vars = {'x': 'gender', 'y': 'rental_tab'} self.timestamp_col = 'visit_start' # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div(text='Pre-transform distribution:', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div(text='Correlations:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), 'forecast': self.section_header_div(text='Forecasts:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def load_df(self, req_startdate, req_enddate, table, cols, timestamp_col): try: # get min and max of loaded df if self.df is not None: loaded_min = self.df[timestamp_col].min() loaded_max = self.df[timestamp_col].max() if loaded_min <= req_startdate and loaded_max >= req_enddate: df = self.df[(self.df[timestamp_col] >= req_startdate) & (self.df[timestamp_col] <= req_enddate)] return df return self.pym.load_df(req_startdate, req_enddate, table=table, cols=cols, timestamp_col=timestamp_col) except Exception: logger.error('load_df', exc_info=True) def filter_df(self, df1): try: df1 = df1[self.cols] for key, value in self.groupby_dict.items(): if value == 'count': if self.select_values[key] != 'all': df1 = df1[df1[key] == self.select_values[key]] return df1 except Exception: logger.error('filter', exc_info=True) def prep_data(self, df): try: df = self.filter_df(df) # set up code columns codes = { 'gender': { 'male': 1, 'female': 2, 'other': 3 }, 'status': { 'guest': 1, 'member': 2 } } for col in df.columns: coded_col = col + '_coded' if 'gender' in col: df[coded_col] = df[col].map(codes['gender']) if 'status' == col: df[coded_col] = df[col].map(codes['status']) self.df = df.set_index(self.timestamp_col) # groupby and resample self.df1 = self.df.groupby('name').resample( self.resample_period).agg(self.groupby_dict) self.df1 = self.df1.reset_index() self.df1 = self.df1.fillna(0) logger.warning('LINE 288 df:%s', self.df1.head(10)) except Exception: logger.error('prep data', exc_info=True) def tsa(self, launch): try: df = self.df.resample('D').agg({self.tsa_variable: 'mean'}) df = df.reset_index() label = self.tsa_variable + '_diff' df[label] = df[self.tsa_variable].diff() df = df.fillna(0) rename = {self.timestamp_col: 'ds', self.tsa_variable: 'y'} df = df.rename(columns=rename) df = df[['ds', 'y']] logger.warning('df:%s', df.tail()) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=self.forecast_days) forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) print(list(forecast.columns)) for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']): if idx == 0: p = forecast.hvplot.line(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) else: p *= forecast.hvplot.scatter(x='ds', y=col, width=600, height=250, value_label='$', legend=False).relabel(col) for idx, col in enumerate(['trend', 'weekly']): if idx == 0: q = forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) else: q *= forecast.hvplot.line(x='ds', y=col, width=550, height=250, value_label='$', legend=False).relabel(col) return p + q except Exception: logger.error("TSA:", exc_info=True) def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.variable = new thistab.section_head_updater('lag', thistab.variable) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_IVs(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") for item in thistab.select_values.keys(): thistab.select_values[item] = thistab.select[item].value thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df = thistab.pym.load_df(start_date=datepicker_start.value, end_date=datepicker_end.value, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.df['gender_code'] = thistab.df['gender'].apply( lambda x: 1 if x == 'male' else 2) thistab.df1 = thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.df1 = thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_multiline(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.multiline_vars['x'] = multiline_x_select.value thistab.multiline_vars['y'] = multiline_y_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_forecast(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.forecast_days = int(select_forecast_days.value) thistab.tsa_variable = forecast_variable_select.value thistab.trigger += 1 stream_launch_tsa.event(launch=thistab.trigger) thistab.notification_updater("ready") try: # SETUP table = 'bcc_composite' cols = cols_to_load['guest'] + cols_to_load['rental'] thistab = Thistab(table, cols, []) # setup dates first_date_range = datetime.strptime("2013-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=1) first_date = last_date - timedelta(days=1000) # initial function call thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.prep_data(thistab.df) # MANAGE STREAM stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() stream_launch = streams.Stream.define('Launch', launch=-1)() stream_launch_tsa = streams.Stream.define('Launch_tsa', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.variables) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) select_forecast_days = Select( title='Select # of days which you want forecasted', value=str(thistab.forecast_days), options=['10', '20', '30', '40', '50', '60', '70', '80', '90']) forecast_variable_select = Select(title='Select forecast variable', value=thistab.menus['tsa'][0], options=thistab.menus['tsa']) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) multiline_y_select = Select( title='Select comparative DV(y)', value=thistab.multiline_vars['y'], options=['price', 'amount', 'visit_duration']) multiline_x_select = Select(title='Select comparative IV(x)', value=thistab.multiline_vars['x'], options=[ 'category', 'gender', 'rental_employee_gender', 'status', 'item' ]) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- # tables hv_tsa = hv.DynamicMap(thistab.tsa, streams=[stream_launch_tsa]) tsa = renderer.get_plot(hv_tsa) # setup divs # handle callbacks variable_select.on_change('value', update_variable) resample_select.on_change('value', update_resample) thistab.select['area'].on_change('value', update_IVs) thistab.select['gender'].on_change('value', update_IVs) thistab.select['rental_employee_gender'].on_change('value', update_IVs) thistab.select['item'].on_change('value', update_IVs) thistab.select['category'].on_change('value', update_IVs) thistab.select['status'].on_change('value', update_IVs) select_forecast_days.on_change('value', update_forecast) forecast_variable_select.on_change('value', update_forecast) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) multiline_x_select.on_change('value', update_multiline) multiline_y_select.on_change('value', update_multiline) # COMPOSE LAYOUT # put the controls in a single element controls_tsa = WidgetBox(datepicker_start, datepicker_end, variable_select, thistab.select['status'], resample_select, thistab.select['gender'], thistab.select['category'], thistab.select['area'], forecast_variable_select, select_forecast_days) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['forecast']], [tsa.state, controls_tsa], [Spacer(width=20, height=30)], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def eda_projects_tab(panel_title): lags_corr_src = ColumnDataSource(data=dict(variable_1=[], variable_2=[], relationship=[], lag=[], r=[], p_value=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.trigger = 0 self.groupby_dict = { 'project_duration': 'sum', 'project_start_delay': 'mean', 'project_end_delay': 'mean', 'project_owner_age': 'mean', 'project_owner_gender': 'mean', 'milestone_duration': 'sum', 'milestone_start_delay': 'mean', 'milestone_end_delay': 'mean', 'milestone_owner_age': 'mean', 'milestone_owner_gender': 'mean', 'task_duration': 'sum', 'task_start_delay': 'sum', 'task_end_delay': 'mean', 'task_owner_age': 'mean', 'task_owner_gender': 'mean' } self.feature_list = list(self.groupby_dict.keys()) self.lag_variable = 'task_duration' self.lag_days = "1,2,3" self.lag = 0 self.lag_menu = [str(x) for x in range(0, 100)] self.strong_thresh = .65 self.mod_thresh = 0.4 self.weak_thresh = 0.25 self.corr_df = None self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.variables = sorted(list(self.groupby_dict.keys())) self.variable = self.variables[0] self.relationships_to_check = ['weak', 'moderate', 'strong'] self.status = 'all' self.pm_gender = 'all' self.m_gender = 'all' self.t_gender = 'all' self.type = 'all' self.pym = PythonMongo('aion') self.menus = { 'status': ['all', 'open', 'closed'], 'type': [ 'all', 'research', 'reconciliation', 'audit', 'innovation', 'construction', 'manufacturing', 'conference' ], 'gender': ['all', 'male', 'female'], 'variables': list(self.groupby_dict.keys()), 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], } self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'} self.timestamp_col = 'project_startdate_actual' # ------- DIVS setup begin self.page_width = 1250 txt = """<hr/> <div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } lag_section_head_txt = 'Lag relationships between {} and...'.format( self.variable) self.section_divider = '-----------------------------------' self.section_headers = { 'lag': self.section_header_div(text=lag_section_head_txt, width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'distribution': self.section_header_div(text='Pre-transform distribution:', width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'relationships': self.section_header_div( text='Relationships between variables:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), 'correlations': self.section_header_div(text='Correlations:', width=600, html_header='h3', margin_top=5, margin_bottom=-155), } # ----- UPDATED DIVS END # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt def reset_adoption_dict(self, variable): self.significant_effect_dict[variable] = [] # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def corr_information_div(self, width=400, height=300): div_style = """ style='width:350px; margin-left:-600px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> Positive: as variable 1 increases, so does variable 2. </li> <li> Negative: as variable 1 increases, variable 2 decreases. </li> <li> Strength: decisions can be made on the basis of strong and moderate relationships. </li> <li> No relationship/not significant: no statistical support for decision making. </li> <li> The scatter graphs (below) are useful for visual confirmation. </li> <li> The histogram (right) shows the distribution of the variable. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ///////////////////////////////////////////////////////////// def filter_df(self, df1): if self.status != 'all': df1 = df1[df1.status == self.status] if self.pm_gender != 'all': df1 = df1[df1.project_owner_gender == self.pm_gender] if self.m_gender != 'all': df1 = df1[df1.milestone_owner_gender == self.m_gender] if self.t_gender != 'all': df1 = df1[df1.task_owner_gender == self.t_gender] if self.type != 'all': df1 = df1[df1.type == self.type] return df1 def prep_data(self, df1): try: ''' df1[self.timestamp_col] = df1[self.timestamp_col].apply(lambda x: datetime(x.year, x.month, x.day, x.hour,0,0)) ''' df1 = df1.set_index(self.timestamp_col) logger.warning('LINE 195 df:%s', df1.head()) # handle lag for all variables df = df1.copy() df = self.filter_df(df) logger.warning('LINE 199: length before:%s', len(df)) slice = df[['project']] df = df[list(self.groupby_dict.keys())] logger.warning('LINE 218: columns:%s', df.head()) df = df.astype(float) df = pd.concat([df, slice], axis=1) df = df.groupby('project').resample(self.resample_period).agg( self.groupby_dict) logger.warning('LINE 201: length after:%s', len(df)) df = df.reset_index() vars = self.feature_list.copy() if int(self.lag) > 0: for var in vars: if self.variable != var: df[var] = df[var].shift(int(self.lag)) df = df.dropna() self.df1 = df logger.warning('line 184- prep data: df:%s', self.df.head(10)) except Exception: logger.error('prep data', exc_info=True) def lags_plot(self, launch): try: df = self.df.copy() df = df[[self.lag_variable, self.variable]] cols = [self.lag_variable] lags = self.lag_days.split(',') for day in lags: try: label = self.lag_variable + '_' + day df[label] = df[self.lag_variable].shift(int(day)) cols.append(label) except: logger.warning('%s is not an integer', day) df = df.dropna() self.lags_corr(df) # plot the comparison logger.warning('in lags plot: df:%s', df.head(10)) return df.hvplot(x=self.variable, y=cols, kind='scatter', alpha=0.4) except Exception: logger.error('lags plot', exc_info=True) # calculate the correlation produced by the lags vector def lags_corr(self, df): try: corr_dict_data = { 'variable_1': [], 'variable_2': [], 'relationship': [], 'lag': [], 'r': [], 'p_value': [] } a = df[self.variable].tolist() for col in df.columns: if col not in [self.timestamp_col, self.variable]: # find lag var = col.split('_') try: tmp = int(var[-1]) lag = tmp except Exception: lag = 'None' b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) corr_dict_data['variable_1'].append(self.variable) corr_dict_data['variable_2'].append(col) corr_dict_data['relationship'].append(txt) corr_dict_data['lag'].append(lag) corr_dict_data['r'].append(round(rvalue, 4)) corr_dict_data['p_value'].append(round(pvalue, 4)) lags_corr_src.stream(corr_dict_data, rollover=(len(corr_dict_data['lag']))) columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] data_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=280) return data_table except Exception: logger.error('lags corr', exc_info=True) def correlation_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'r': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop(self.timestamp_col, axis=1) # df = df.compute() a = df[self.variable].tolist() for col in self.feature_list: logger.warning('col :%s', col) if col != self.variable: logger.warning('%s:%s', col, self.variable) b = df[col].tolist() slope, intercept, rvalue, pvalue, txt = self.corr_label( a, b) # add to dict corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['r'].append(round(rvalue, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'r': corr_dict['r'], 'p-value': corr_dict['p-value'] }) # logger.warning('df:%s',df.head(23)) return df.hvplot.table(columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value' ], width=550, height=200, title='Correlation between variables') except Exception: logger.error('correlation table', exc_info=True) def non_parametric_relationship_table(self, launch): try: corr_dict = { 'Variable 1': [], 'Variable 2': [], 'Relationship': [], 'stat': [], 'p-value': [] } # prep df df = self.df1 # get difference for money columns df = df.drop(self.timestamp_col, axis=1) # df = df.compute() # logger.warning('line df:%s',df.head(10)) a = df[self.variable].tolist() for col in self.feature_list: logger.warning('col :%s', col) if col != self.variable: logger.warning('%s:%s', col, self.variable) b = df[col].tolist() stat, pvalue, txt = self.mann_whitneyu_label(a, b) corr_dict['Variable 1'].append(self.variable) corr_dict['Variable 2'].append(col) corr_dict['Relationship'].append(txt) corr_dict['stat'].append(round(stat, 4)) corr_dict['p-value'].append(round(pvalue, 4)) df = pd.DataFrame({ 'Variable 1': corr_dict['Variable 1'], 'Variable 2': corr_dict['Variable 2'], 'Relationship': corr_dict['Relationship'], 'stat': corr_dict['stat'], 'p-value': corr_dict['p-value'] }) # logger.warning('df:%s',df.head(23)) return df.hvplot.table( columns=[ 'Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value' ], width=550, height=200, title='Non parametric relationship between variables') except Exception: logger.error('non parametric table', exc_info=True) def hist(self, launch): try: return self.df.hvplot.hist(y=self.feature_list, subplots=True, shared_axes=False, bins=25, alpha=0.3, width=300).cols(4) except Exception: logger.warning('histogram', exc_info=True) def matrix_plot(self, launch=-1): try: logger.warning('line 306 self.feature list:%s', self.feature_list) df = self.df1 if df is not None: # thistab.prep_data(thistab.df) if self.timestamp_col in df.columns: df = df.drop(self.timestamp_col, axis=1) df = df.fillna(0) # logger.warning('line 302. df: %s',df.head(10)) cols_temp = self.feature_list.copy() if self.variable in cols_temp: cols_temp.remove(self.variable) # variable_select.options = cols_lst p = df.hvplot.scatter(x=self.variable, y=cols_temp, width=330, subplots=True, shared_axes=False, xaxis=False).cols(4) else: p = df.hvplot.scatter(x=[0, 0, 0], y=[0, 0, 0], width=330) return p except Exception: logger.error('matrix plot', exc_info=True) def multiline(self, launch=1): try: yvar = self.multiline_vars['y'] xvar = self.multiline_vars['x'] df = self.df.copy() df = df[[xvar, yvar, self.timestamp_col]] df = df.set_index(self.timestamp_col) df = df.groupby(xvar).resample(self.resample_period).agg( {yvar: 'mean'}) df = df.reset_index() lines = df[xvar].unique() # split data frames dfs = {} for idx, line in enumerate(lines): dfs[line] = df[df[xvar] == line] dfs[line] = dfs[line].fillna(0) logger.warning('LINE 428:%s - %s:', line, dfs[line].head()) if idx == 0: p = dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=1200, height=500).relabel(line) else: p *= dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=2, height=500).relabel(line) return p except Exception: logger.error('multiline plot', exc_info=True) def update_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.prep_data(thistab.df) if 'milestone owner gender' == new: thistab.variable = 'm_gender_code' if 'project owner gender' == new: thistab.variable = 'pm_gender_code' if 'task owner gender' == new: thistab.variable = 't_gender_code' if thistab.variable in thistab.adoption_variables['developer']: thistab.reset_adoption_dict(thistab.variable) thistab.section_head_updater('lag', thistab.variable) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag_plot_variable(attr, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_variable = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_IVs(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.pm_gender = pm_gender_select.value thistab.m_gender = m_gender_select.value thistab.t_gender = t_gender_select.value thistab.status = status_select.value thistab.type = type_select.value thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lag(attr, old, new): # update lag & cryptocurrency thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag = int(lag_select.value) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df = thistab.pym.load_df(start_date=datepicker_start.value, end_date=datepicker_end.value, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) thistab.df['project_owner_gender'] = thistab.df[ 'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['milestone_owner_gender'] = thistab.df[ 'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['task_owner_gender'] = thistab.df[ 'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = new thistab.prep_data(thistab.df) thistab.trigger += 1 stream_launch_matrix.event(launch=thistab.trigger) stream_launch_corr.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_lags_selected(): thistab.notification_updater("Calculations in progress! Please wait.") thistab.lag_days = lags_input.value logger.warning('line 381, new checkboxes: %s', thistab.lag_days) thistab.trigger += 1 stream_launch_lags_var.event(launch=thistab.trigger) stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") def update_multiline(attrname, old, new): thistab.notification_updater("Calculations in progress! Please wait.") thistab.multiline_vars['x'] = multiline_x_select.value thistab.multiline_vars['y'] = multiline_y_select.value thistab.trigger += 1 stream_launch.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'project_composite1' thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2013-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=30) # initial function call thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date, cols=[], table=thistab.table, timestamp_col=thistab.timestamp_col) if len(thistab.df) > 0: thistab.df['manager_gender'] = thistab.df['project_owner_gender'] thistab.df['project_owner_gender'] = thistab.df[ 'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['milestone_owner_gender'] = thistab.df[ 'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) thistab.df['task_owner_gender'] = thistab.df[ 'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2) logger.warning('LINE 527:columns %s', list(thistab.df.columns)) thistab.prep_data(thistab.df) # MANAGE STREAM stream_launch_hist = streams.Stream.define('Launch', launch=-1)() stream_launch_matrix = streams.Stream.define('Launch_matrix', launch=-1)() stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)() stream_launch_lags_var = streams.Stream.define('Launch_lag_var', launch=-1)() stream_launch = streams.Stream.define('Launch', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) variable_select = Select(title='Select variable', value=thistab.variable, options=thistab.variables) lag_variable_select = Select(title='Select lag variable', value=thistab.lag_variable, options=thistab.feature_list) lag_select = Select(title='Select lag', value=str(thistab.lag), options=thistab.lag_menu) type_select = Select(title='Select project type', value=thistab.type, options=thistab.menus['type']) status_select = Select(title='Select project status', value=thistab.status, options=thistab.menus['status']) pm_gender_select = Select(title="Select project owner's gender", value=thistab.pm_gender, options=thistab.menus['gender']) m_gender_select = Select(title="Select milestone owner's gender", value=thistab.m_gender, options=thistab.menus['gender']) t_gender_select = Select(title="Select task owner's gender", value=thistab.t_gender, options=thistab.menus['gender']) resample_select = Select(title='Select resample period', value='D', options=['D', 'W', 'M', 'Q']) multiline_y_select = Select(title='Select comparative DV(y)', value=thistab.multiline_vars['y'], options=[ 'remuneration', 'delay_start', 'delay_end', 'project_duration' ]) multiline_x_select = Select( title='Select comparative IV(x)', value=thistab.multiline_vars['x'], options=['manager_gender', 'type', 'status']) lags_input = TextInput( value=thistab.lag_days, title="Enter lags (integer(s), separated by comma)", height=55, width=300) lags_input_button = Button(label="Select lags, then click me!", width=10, button_type="success") # --------------------- PLOTS---------------------------------- columns = [ TableColumn(field="variable_1", title="variable 1"), TableColumn(field="variable_2", title="variable 2"), TableColumn(field="relationship", title="relationship"), TableColumn(field="lag", title="lag(days)"), TableColumn(field="r", title="r"), TableColumn(field="p_value", title="p_value"), ] lags_corr_table = DataTable(source=lags_corr_src, columns=columns, width=500, height=200) hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot, streams=[stream_launch_matrix]) hv_corr_table = hv.DynamicMap(thistab.correlation_table, streams=[stream_launch_corr]) hv_nonpara_table = hv.DynamicMap( thistab.non_parametric_relationship_table, streams=[stream_launch_corr]) # hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist]) hv_lags_plot = hv.DynamicMap(thistab.lags_plot, streams=[stream_launch_lags_var]) hv_multiline = hv.DynamicMap(thistab.multiline, streams=[stream_launch]) matrix_plot = renderer.get_plot(hv_matrix_plot) corr_table = renderer.get_plot(hv_corr_table) nonpara_table = renderer.get_plot(hv_nonpara_table) lags_plot = renderer.get_plot(hv_lags_plot) multiline = renderer.get_plot(hv_multiline) # setup divs # handle callbacks variable_select.on_change('value', update_variable) lag_variable_select.on_change('value', update_lag_plot_variable) lag_select.on_change('value', update_lag) # individual lag resample_select.on_change('value', update_resample) pm_gender_select.on_change('value', update_IVs) m_gender_select.on_change('value', update_IVs) t_gender_select.on_change('value', update_IVs) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) lags_input_button.on_click(update_lags_selected) # lags array status_select.on_change('value', update_IVs) type_select.on_change('value', update_IVs) multiline_x_select.on_change('value', update_multiline) multiline_y_select.on_change('value', update_multiline) # COMPOSE LAYOUT # put the controls in a single element controls_lag = WidgetBox(lags_input, lags_input_button, lag_variable_select) controls_multiline = WidgetBox(multiline_x_select, multiline_y_select) controls_page = WidgetBox(datepicker_start, datepicker_end, variable_select, type_select, status_select, resample_select, pm_gender_select, m_gender_select, t_gender_select) controls_gender = WidgetBox(pm_gender_select, m_gender_select, t_gender_select) # create the dashboards grid = gridplot( [[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.section_headers['relationships']], [Spacer(width=20, height=30)], [matrix_plot.state, controls_page], [thistab.section_headers['correlations']], [Spacer(width=20, height=30)], [corr_table.state, thistab.corr_information_div()], [thistab.title_div('Compare levels in a variable', 400)], [Spacer(width=20, height=30)], [multiline.state, controls_multiline], [thistab.section_headers['lag']], [Spacer(width=20, height=30)], [lags_plot.state, controls_lag], [lags_corr_table], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('EDA projects:', exc_info=True) return tab_error_flag(panel_title)
def tab2(): data = pd.read_csv('cdph-race-ethnicity.csv') data['date_time'] = pd.to_datetime(data['date']) data = data[(data['age'] == 'all')] percentages = ['confirmed cases', 'general population'] regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white'] x = [(race, percent) for race in regions for percent in percentages] def create_dataset(df): counts = sum( zip(df['confirmed_cases_percent'], df['population_percent']), ()) source = ColumnDataSource(data=dict(x=x, counts=counts)) return source def create_plot(source): p = figure(x_range=FactorRange(*x), y_axis_label='Percentage', plot_width=1030) p.title.text = "Confirmed_case% VS Population% by races" p.title.align = "center" p.title.text_font_size = "20px" p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', factors=percentages, palette=["#c9d9d3", "#718dbf"], start=1, end=2)) p.y_range.start = 0 p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None p.x_range.range_padding = 0.1 p.xgrid.grid_line_color = None p.legend.location = "top_left" p.legend.orientation = "horizontal" p.xgrid.grid_line_color = None p.add_tools( HoverTool(tooltips=[('Race, category', "@x"), ('Percentage', "@counts")], )) p.add_layout( Title( text="Data " "published by latimes.com/coronavirustracker; download data " "from " "https://github.com/datadesk/california-coronavirus-data/cdph-race" "-ethnicity.csv in GitHub", text_font_style="italic"), 'below') p.add_layout( Title( text="Data Source: California Department of Public Health " "https://www.cdph.ca.gov/Programs/CID/DCDC/Pages/COVID-19/Race-Ethnicity.aspx", text_font_style="italic"), 'below') p.add_layout( Title(text="Date of last update: 2020-10-14", text_font_style="italic"), 'below') return p def callback(attr, old, new): new_src = create_dataset( data[(data['date_time'] == date_picker.value)]) src.data.update(new_src.data) src = create_dataset(data[(data['date_time'] == '2020-10-01')]) p = create_plot(src) date_picker = DatePicker(title='Choose a date', min_date="2020-05-14", max_date='2020-10-14') date_picker.on_change('value', callback) controls = WidgetBox(date_picker) layout = row(p, controls) tab = Panel(child=layout, title='Percentage of confirmed cases by race') return tab
else: button.label = 'Play' curdoc().remove_periodic_callback(animate_update) button = Button(label='Play', width=60) button.on_click(animate) def dateChange(attrname, old, new): global df2 if button.label == 'Pause': button.label = 'Play' curdoc().remove_periodic_callback(animate_update) df2 = updateRange() label.text = datetime.fromtimestamp(totimestamp(df2.index[0])).strftime(labelFormat) slider.end = totimestamp(df2.index[-1])-totimestamp(df2.index[0]) slider.value = 0 datePick.on_change('value',dateChange) plot.select_one(HoverTool).tooltips = [ ('Name', '@Name'), ('Level', '@Level'+'%'), ] layout = layout([ [plot,datePick], [slider, button], ]) curdoc().add_root(layout)
class EphemerisApp: to_seconds = dict(Day=86400, Hour=3600, Minute=60, Second=1) def __init__(self): # app variables self.active = True self.playAnimation = None self.start_epoch = None self.stop_epoch = None self.current_epoch = None # get initial configuration self.available_models = inspect.getmembers(StandardEphemerisModels, inspect.isfunction) self.ephemeris_model = self.available_models[0][1]() self.spice_provider = SpiceProvider() self.spice_provider.SPICE_IDS = self.ephemeris_model.objects self.spice_provider.SPICE_NAMES = { v: k for k, v in self.ephemeris_model.objects.items() } # init data sources self.plot_source = self.spice_provider.state_source self.table_source = self.spice_provider.ephemeris_source self.cum_source = self.spice_provider.cum_source # gather options from ephemeris model and spice provider self.allowed_models = { model[1]().name: model[1] for model in self.available_models } allowed_objects = [ self.spice_provider.fromId(name) for name in self.ephemeris_model.objects ] allowed_frames = self.ephemeris_model.FRAMES allowed_corrections = [name for name in SpiceProvider.CORRECTIONS] allowed_durations = [ str(v) for v in self.ephemeris_model.DURATION_DAYS ] allowed_intervals = [name for name in SpiceProvider.INTERVALS] # set up widgets self.model = Select(title="Ephemeris Model", options=list(self.allowed_models.keys())) self.center = Select(title="Center", value=self.ephemeris_model.center, options=allowed_objects) self.target = Select(title="Target", value=self.ephemeris_model.target, options=allowed_objects) self.frames = Select(title="Frame", value=self.ephemeris_model.frame, options=allowed_frames) self.planes = RadioButtonGroup(labels=['XY', 'YZ', 'XZ'], active=0) self.vector = Select(title='Vector Type', value=self.ephemeris_model.vector_type, options=allowed_corrections) self.epoch = DatePicker(title="Select Epoch", value=datetime.strftime( self.ephemeris_model.epoch, "%Y-%m-%d")) self.offset = Slider(title="Days Since Epoch", value=self.ephemeris_model.offset, start=0, end=self.ephemeris_model.duration, step=1) self.duration = Select(title="Duration (Days)", value=str(self.ephemeris_model.duration), options=allowed_durations) self.interval = Select(title="Time Step", value=str(self.ephemeris_model.step_size), options=allowed_intervals) # create buttons self.play_button = Button(label="Play") self.exportRange = Div(text="Start and Stop Epoch: ") self.update_button = Button(label="Play") self.export_button = Button(label="Export") self.infoDiv = Div( text= "<hr>All ephemeris data shown on this website was obtained from publicly available " "SPICE files located at <a href='https://naif.jpl.nasa.gov/naif/data.html'>" "https://naif.jpl.nasa.gov/naif/data.html</a>, which is hosted by the " "Navigation and Ancillary Information Facility (NAIF) at the NASA Jet Propulsion " "Laboratory. The exception is the SPICE kernel for the Parker Solar Probe, which is " "available at <a href='https://sppgway.jhuapl.edu/ancil_products'>" "https://sppgway.jhuapl.edu/ancil_products</a>, hosted by the Johns Hopkins University " "Applied Physics Laboratory. SpiceyPy is being used to process the SPICE files.", sizing_mode='stretch_width') # create plot tab objects self.plot = figure(match_aspect=True, sizing_mode="stretch_both", title="Astropynamics", tools="hover, pan, reset, save", tooltips=[("name", "@index")]) self.plot.add_tools(BoxZoomTool(match_aspect=True)) self.plot.circle('px', 'py', size='radii', source=self.plot_source, line_width=3, line_alpha=0.5, name='XY') self.plot.circle('px', 'pz', size='radii', source=self.plot_source, line_width=3, line_alpha=0.5, name='XZ').visible = False self.plot.circle('py', 'pz', size='radii', source=self.plot_source, line_width=3, line_alpha=0.5, name='YZ').visible = False self.plot.line('px', 'py', source=self.cum_source, line_width=2, line_alpha=0.5, color='red', name='XYOrbit') self.plot.line('px', 'pz', source=self.cum_source, line_width=2, line_alpha=0.5, color='red', name='XZOrbit').visible = False self.plot.line('py', 'pz', source=self.cum_source, line_width=2, line_alpha=0.5, color='red', name='YZOrbit').visible = False self.plotLayout = column(self.plot, self.offset, sizing_mode="stretch_width") self.plotTab = Panel(child=self.plotLayout, title="Display") # create data table tab objects fmt = NumberFormatter(format='0.000', text_align=TextAlign.right) columns = [ TableColumn(field="index", title="Epoch", formatter=DateFormatter(format="%m/%d/%Y %H:%M:%S")), TableColumn(field="px", title="PX", formatter=fmt, width=10), TableColumn(field="py", title="PY", formatter=fmt), TableColumn(field="pz", title="PZ", formatter=fmt), TableColumn(field="vx", title="VX", formatter=fmt), TableColumn(field="vy", title="VY", formatter=fmt), TableColumn(field="vz", title="VZ", formatter=fmt) ] self.ephemerisTable = DataTable(source=self.table_source, columns=columns, sizing_mode="stretch_both") self.ephemerisLayout = column(self.exportRange, self.ephemerisTable, sizing_mode="stretch_width") self.dataTab = Panel(child=self.ephemerisLayout, title="Table") self.kernels = Div() self.kernelTab = Panel(child=self.kernels, title="Kernels") self.tabs = Tabs(tabs=[self.plotTab, self.dataTab, self.kernelTab]) # init data self.model.value = "The Solar System" self.update_model(None, 0, self.model.value) self.update_epochs(None, 0, 0) self.update_states(None, 0, 0) self.model.on_change('value', self.update_model) self.frames.on_change('value', self.update_epochs) self.planes.on_change('active', self.update_plot_view) self.center.on_change('value', self.update_epochs) self.target.on_change('value', self.update_epochs) self.offset.on_change('value', self.update_offset) self.epoch.on_change('value', self.update_epochs) self.duration.on_change('value', self.update_epochs) self.interval.on_change('value', self.update_epochs) self.update_button.on_click(self.update_onclick) self.tabs.on_change('active', self.update_button_type) self.inputs = column(self.model, self.frames, self.planes, self.center, self.target, self.epoch, self.duration, self.interval, self.update_button) def get_layout(self): return column(row([self.inputs, self.tabs]), self.infoDiv, sizing_mode='stretch_width') def update_kenerls_tab(self): kernels = self.spice_provider.fetch_kernels() kernel_text = "<h3>Loaded Spice Kernels:</h3>\n" kernel_text += '<table>\n' for k in kernels: kernel_text += f"<tr><td><b>{k[1]}  </b></td><td>{k[0].split('/')[-1]}</td></tr>\n" self.kernels.text = kernel_text + "</table>" def update_model(self, attr, old, new): # disable callbacks self.active = False # update model and load new kernel self.ephemeris_model = self.allowed_models[new]() self.spice_provider.set_meta_kernel(self.ephemeris_model.kernel) self.spice_provider.setSpiceIds(self.ephemeris_model.objects) self.update_kenerls_tab() # set widget values allowed_objects = [ self.spice_provider.fromId(name) for name in self.ephemeris_model.objects ] allowed_frames = self.ephemeris_model.FRAMES allowed_durations = [ str(v) for v in self.ephemeris_model.DURATION_DAYS ] self.target.options = allowed_objects self.center.options = allowed_objects self.frames.options = allowed_frames self.duration.options = allowed_durations self.target.value = self.ephemeris_model.target self.center.value = self.ephemeris_model.center self.frames.value = self.ephemeris_model.frame self.planes.active = self.ephemeris_model.plane self.epoch.value = datetime.strftime(self.ephemeris_model.epoch, "%Y-%m-%d") self.duration.value = str(self.ephemeris_model.duration) self.offset.value = self.ephemeris_model.offset self.offset.end = self.ephemeris_model.duration self.interval.value = self.ephemeris_model.step_size # reinstate callbacks self.active = True # update start and stop epochs and plot self.update_epochs(None, 0, 0) def update_epochs(self, attr, old, new): self.start_epoch = datetime.strptime(self.epoch.value, "%Y-%m-%d") self.stop_epoch = self.start_epoch + \ pd.Timedelta(seconds=(int(float(self.duration.value) * EphemerisApp.to_seconds['Day']))) self.offset.value = 0 self.offset.end = int( float(self.duration.value) * EphemerisApp.to_seconds['Day'] / EphemerisApp.to_seconds[self.interval.value]) self.offset.title = f"{self.interval.value}s Since Epoch" self.exportRange.text = f"Showing epoch range:\t<b>{self.start_epoch} to {self.stop_epoch}</b>" self.update_offset(None, 0, 0) def update_offset(self, attr, old, new): scale_factor = EphemerisApp.to_seconds[self.interval.value] self.current_epoch = self.start_epoch + pd.Timedelta( seconds=(self.offset.value * scale_factor)) if self.playAnimation is None: SpiceProvider.reset_source(self.spice_provider.cum_source) self.update_states(None, 0, 0) def update_ephemeris(self, attr, old, new): self.update_epochs(attr, old, new) self.spice_provider.set_center(self.center.value) self.spice_provider.frame = self.frames.value self.spice_provider.correction = SpiceProvider.CORRECTIONS[ self.vector.value] if self.active: self.spice_provider.fetch_ephemeris_states(self.target.value, self.start_epoch, self.stop_epoch, self.interval.value) def update_states(self, attr, old, new): self.spice_provider.set_center(self.center.value) self.spice_provider.frame = self.frames.value self.spice_provider.correction = SpiceProvider.CORRECTIONS[ self.vector.value] if self.active: self.spice_provider.fetch_target_states( self.ephemeris_model.objects, self.current_epoch, self.target.value) def update_plot_view(self, attr, old, new): self.plot.select_one({"name": self.planes.labels[old]}).visible = False self.plot.select_one({"name": self.planes.labels[new]}).visible = True self.plot.select_one({ "name": self.planes.labels[old] + "Orbit" }).visible = False self.plot.select_one({ "name": self.planes.labels[new] + "Orbit" }).visible = True def animate_update(self): self.offset.value = 0 if self.offset.value > self.offset.end else self.offset.value + 1 if self.offset.value == 0: SpiceProvider.reset_source(self.spice_provider.cum_source) def animate(self, start=True): if self.update_button.label == 'Play' and start: self.update_button.label = 'Pause' self.playAnimation = curdoc().add_periodic_callback( self.animate_update, 50) elif self.playAnimation is not None: self.update_button.label = 'Play' curdoc().remove_periodic_callback(self.playAnimation) self.playAnimation = None def update_onclick(self): if self.tabs.active == 0: self.animate() elif self.tabs.active == 1: self.update_ephemeris(None, 0, 0) elif self.tabs.active == 2: self.update_kenerls_tab() def update_button_type(self, attr, old, new): self.animate(False) if self.tabs.active == 0: self.update_button.label = "Play" else: self.update_button.label = "Update" self.update_ephemeris(None, 0, 0)
def cryptocurrency_clustering_tab(panel_title): class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') # add all the coins to the dict self.github_cols = ['watch', 'fork', 'issue', 'release', 'push'] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 self.groupby_dict = groupby_dict self.feature_list = list(self.groupby_dict.keys()) self.kmean_model = {} self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.k = '1' self.max_clusters_menu = [str(k) for k in range(1, 12)] self.launch_cluster_table = False # launch cluster self.cryptos = None # ------- DIVS setup begin self.page_width = 1200 txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;margin-bottom:200px"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, 'Welcome') self.notification_div = { 'top': Div(text=txt, width=self.page_width, height=20), 'bottom': Div(text=txt, width=self.page_width, height=10), } self.section_divider = '-----------------------------------' self.section_headers = { 'Crypto families': self.section_header_div(text='Crypto families:{}'.format( self.section_divider), width=600, html_header='h2', margin_top=5, margin_bottom=-155), } # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150): text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \ .format(margin_top, margin_bottom, html_header, text, html_header) return Div(text=text, width=width, height=15) # ////////////// DIVS ///////////////////////////////// def title_div(self, text, width=700): text = '<h2 style="color:#4221cc;">{}</h2>'.format(text) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=150): div_style = """ style='width:350px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> A cluster is statistical grouping of items based on a composite similarity of the variables under review. </li> <li> I have highlighted the peers in our cluster (aion_cluster), and simply labeled the other clusters with numbers. </li> </ul> </div> """.format(div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ////////////////// HELPER FUNCTIONS //////////////////// def set_groupby_dict(self): try: lst = ['mention', 'hashtags', 'tweets', 'replies', 'favorites'] for col in self.cols: if col not in self.groupby_dict.keys(): if not string_contains_list(lst, col): self.groupby_dict[col] = 'mean' else: self.groupby_dict[col] = 'sum' except Exception: logger.error('set groupby dict', exc_info=True) # ///////////////////////////////////////////////////////////// def optimalK(self, data, nrefs=3, maxClusters=10): try: """ Calculates KMeans optimal K using Gap Statistic from Tibshirani, Walther, Hastie Params: data: ndarry of shape (n_samples, n_features) nrefs: number of sample reference datasets to create maxClusters: Maximum number of clusters to test for Returns: (gaps, optimalK) """ gaps = np.zeros((len(range(1, maxClusters)), )) resultsdf = pd.DataFrame({'clusterCount': [], 'gap': []}) for gap_index, k in enumerate( range(1, len(self.max_clusters_menu))): logger.warning('starting for k=%s', k) # Holder for reference dispersion results refDisps = np.zeros(nrefs) # For n references, generate random sa,kmple and perform kmeans getting resulting dispersion of each loop for i in range(nrefs): logger.warning('nref=%s', i) # Create new random reference set randomReference = np.random.random_sample( size=data.shape) # Fit to it km = KMeans(k) km.fit(randomReference) refDisp = km.inertia_ refDisps[i] = refDisp # Fit cluster to original data and create dispersion self.kmean_model[k] = KMeans(k, random_state=42) self.kmean_model[k].fit(data) origDisp = km.inertia_ # Calculate gap statistic gap = np.log(np.mean(refDisps)) - np.log(origDisp) # Assign this loop's gap statistic to gaps gaps[gap_index] = gap resultsdf = resultsdf.append( { 'clusterCount': k, 'gap': gap }, ignore_index=True) return ( gaps.argmax() + 1, resultsdf ) # Plus 1 because index of 0 means 1 cluster is optimal, index 2 = 3 clusters are optimal except Exception: logger.error('optimal', exc_info=True) def cluster_table(self, launch): try: # prep df = self.df.groupby(['crypto']).agg(groupby_dict) df = df.compute() logger.warning('df after groupby:%s', df) self.cryptos = df.index.tolist() logger.warning('self.cryptos:%s', self.cryptos) print(self.cryptos) X = df[self.feature_list] scaler = StandardScaler() X = scaler.fit_transform(X) self.k, gapdf = self.optimalK(X, nrefs=3, maxClusters=len( self.max_clusters_menu)) logger.warning('Optimal k is:%s ', self.k) # Labels of each point labels = self.kmean_model[self.k].labels_ # Nice Pythonic way to get the indices of the points for each corresponding cluster mydict = { 'cluster_' + str(i): np.where(labels == i)[0].tolist() for i in range(self.kmean_model[self.k].n_clusters) } mydict_verbose = mydict.copy( ) # make a dictionary with the clusters and name of the cryptos # Transform this dictionary into dct with matching crypto labels dct = { 'crypto': self.cryptos, 'cluster': [''] * len(self.cryptos) } # get index aion to identify the aion cluster aion_idx = self.cryptos.index('aion') for key, values in mydict.items(): if aion_idx in values: key = 'aion_cluster' mydict_verbose[key] = [] for crypto_index in values: try: dct['cluster'][int(crypto_index)] = key mydict_verbose[key].append( self.cryptos[int(crypto_index)]) except: logger.warning('cannot change to int:%s', crypto_index) # save to redis self.write_clusters(mydict_verbose) logger.warning('line 229: cluster labels:%s', mydict_verbose) df = pd.DataFrame.from_dict(dct) self.launch_cluster_table = False cols = ['crypto', 'cluster'] return df.hvplot.table(columns=cols, width=500, height=1200, title='Cluster table') except Exception: logger.error('cluster table', exc_info=True) def write_clusters(self, my_dict): try: # write to redis cluster_dct = my_dict.copy() cluster_dct['timestamp'] = datetime.now().strftime( self.DATEFORMAT) cluster_dct['features'] = self.feature_list save_params = 'clusters:cryptocurrencies' self.redis.save(cluster_dct, save_params, "", "", type='checkpoint') logger.warning('%s saved to redis', save_params) except: logger.error('', exc_info=True) def update(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.trigger += 1 stream_launch_elbow_plot.event(launch=thistab.trigger) thistab.notification_updater("Ready!") try: # SETUP table = 'external_daily' #cols = list(groupby_dict.keys()) + ['crypto'] thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = last_date - timedelta(days=340) # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp') thistab.cols = sorted(list(thistab.df.columns)) # MANAGE STREAMS stream_launch_elbow_plot = streams.Stream.define('Launch_elbow_plot', launch=-1)() stream_launch_cluster_table = streams.Stream.define( 'Launch_cluster_table', launch=-1)() # CREATE WIDGETS datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) datepicker_start.on_change('value', update) datepicker_end.on_change('value', update) # PLOTS hv_cluster_table = hv.DynamicMap(thistab.cluster_table, streams=[stream_launch_cluster_table]) cluster_table = renderer.get_plot(hv_cluster_table) # COMPOSE LAYOUT # put the controls in a single element controls = WidgetBox(datepicker_start, datepicker_end) # create the dashboards grid = gridplot([[thistab.notification_div['top']], [Spacer(width=20, height=70)], [thistab.information_div(), controls], [thistab.section_headers['Crypto families']], [Spacer(width=20, height=30)], [cluster_table.state], [thistab.notification_div['bottom']]]) # Make a tab with the layout tab = Panel(child=grid, title=panel_title) return tab except Exception: logger.error('crypto:', exc_info=True) return tab_error_flag(panel_title)
def load_file(index_file_name): index = json.loads(read_file(index_file_name, "r")) def date_range_change(is_start, attr, old, new): print(is_start, new) start = startDate.value start = datetime(start.year, start.month, start.day, 0, 0, 0) end = endDate.value end = datetime(end.year, end.month, end.day, 23, 59, 59) print(type(start), end) filter_func = lambda ds: any( map(lambda dt: start <= dt and dt <= end, ds["_time_values"])) dsTable.filter_datasets(filter_func) def name_filter_changed(attr, old, new): regex = re.compile(new) filter_func = lambda ds: regex.search(ds["id"]) dsTable.filter_datasets(filter_func) class DatasetsTable: def __init__(self, index): self.meta_data = DotDict(index["meta"]) self.datasets = index["datasets"] self.meta_variables = self.meta_data.variables self._fill_variables_table() self._fill_datasets_table() def _fill_variables_table(self): self.vars_short_names = list(self.meta_variables.keys()) vars_names = list(map(self.to_long_name, self.vars_short_names)) vars_dims = list( map( lambda v: ", ".join( list(map(self.to_long_name, v["shape"]))), self.meta_variables.values())) self.vars_data = dict( names=vars_names, dims=vars_dims, ) self.vars_source = ColumnDataSource(self.vars_data) vars_columns = [ TableColumn(field="names", title="Variable name"), TableColumn(field="dims", title="Variable dimensions"), ] self.vars_table = DataTable(source=self.vars_source, columns=vars_columns, width=600, height=350, selectable=True) # vars_source.selected.on_change('indices', table_changed) def to_long_name(self, name, show_units=False): if name not in self.meta_variables: return name attr = self.meta_variables[name]["attributes"] if "long_name" in attr: name = attr["long_name"]["value"] elif "standard_name" in attr: name = attr["standard_name"]["value"] if show_units and "units" in attr: return "%s [%s]" % (name, attr["units"]["value"]) return name def filter_datasets(self, filter_func): self.filtered_datasets = list(filter(filter_func, self.datasets)) self.datasets_source.data.update( self._populate_datasets_table_data(self.filtered_datasets)) def _fill_datasets_table(self): # preprocess the time values max_date = min_date = None for ds in self.datasets: ds["_time_values"] = list( map(lambda t: np.datetime64(t).astype(datetime), ds["data"]["time"])) mx = max(ds["_time_values"]) mn = min(ds["_time_values"]) if max_date is None or mx > max_date: max_date = mx if min_date is None or mn < min_date: min_date = mn self.filtered_datasets = self.datasets self.datasets_min_date = min_date self.datasets_max_date = max_date self.datasets_source = ColumnDataSource( self._populate_datasets_table_data(self.filtered_datasets)) datasets_columns = [ TableColumn(field="names", title="Name", width=600), TableColumn(field="dates", title="Date", width=600) ] self.datasets_table = DataTable(source=self.datasets_source, columns=datasets_columns, width=600, height=350, selectable=True) # datasets_source.selected.on_change('indices', _single_selection(datasets_source, lambda index: update_available_vars( # filtered_datasets[index]["meta"]))) def _populate_datasets_table_data(self, datasets): ds_names = [] ds_dates = [] for ds in datasets: ds_names.append(ds["id"]) ds_dates.append(ds["data"]["time"][0]) # todo format date return {"names": ds_names, "dates": ds_dates} def get_plot_infos(self): vars_index = self.vars_source.selected.indices ds_index = self.datasets_source.selected.indices if not vars_index or not ds_index: log("Nothing selected!") return var_name = self.vars_short_names[vars_index[0]] var = self.meta_variables[var_name] ds = self.filtered_datasets[ds_index[0]] return (ds, var_name, var["shape"]) def gen_plot(): infos = dsTable.get_plot_infos() if infos is None: return ds, var_name, shape = infos ds_uri = ds["id"] timestamp = ds["data"]["time"][0] file_name = ds_uri.split("/")[-1] kdims = shape vdims = [var_name] lon_key, lat_key = None, None for key in dsTable.meta_variables: entry = dsTable.meta_variables[key] if "attributes" not in entry or "standard_name" not in entry[ "attributes"]: continue if entry["attributes"]["standard_name"]["value"] == "longitude": lon_key = key if entry["attributes"]["standard_name"]["value"] == "latitude": lat_key = key if lat_key not in kdims or lon_key not in kdims: log("'lat' and 'lon' are required dimensions!") return full_url = index["opendap_url"] + ds_uri log("Opening dataset: " + full_url) btn_plot_lonXlat.disabled = True try: print("Opening : " + full_url) dataset = xr.open_dataset(full_url) log("Dataset successfully opened. Loading data...") kdimsSingularValue = list( filter(lambda dim: dataset[dim].size == 1, kdims)) kdimsMultipleValues = list( filter(lambda dim: dataset[dim].size > 1, kdims)) indexers = { key: dataset[key].values[0] for key in kdimsSingularValue } print(indexers) dataset = dataset.sel(indexers=indexers) print(kdimsMultipleValues, kdimsSingularValue) xr_dataset = gv.Dataset( dataset[var_name], group=dsTable.to_long_name(var_name, True) + " ", crs=ccrs.PlateCarree()) image = xr_dataset.to(gv.Image, [lon_key, lat_key], dynamic=True) graph = image.options(colorbar=True, tools=['hover'], cmap="viridis", width=800, height=640, colorbar_position="right", toolbar="below") * gf.coastline() renderer = hv.renderer('bokeh') hover = HoverTool( tooltips=[ ("(x,y)", "(@lon{%0.1f}, @lat{%0.1f})"), ('desc', '@' + var_name), ], formatters={ 'y': 'printf', # use 'datetime' formatter for 'date' field 'x': 'printf', # use 'printf' formatter for 'adj close' field # use default 'numeral' formatter for other fields }) plot = renderer.get_plot(graph) if len(kdimsMultipleValues) > 2: # callback_policy="mouseup" for slider in plots print(plot) plot = renderer.get_widget(plot, "server") bokeh_layout = plot.init_layout() print(bokeh_layout) latFull = dsTable.meta_variables[lat_key]["attributes"][ "standard_name"]["value"] lonFull = dsTable.meta_variables[lon_key]["attributes"][ "standard_name"]["value"] bk_plot = bokeh_layout.children[0] #bk_plot.add_tools(hover) bk_slider = bokeh_layout.children[1].children[1] print(bk_slider.callback_policy) bk_slider.callback_policy = "mouseup" bk_plot.xaxis.axis_label = lonFull bk_plot.yaxis.axis_label = latFull print(lonFull, latFull) # bk_plot.xaxis[0].formatter = NumeralTickFormatter(format="0.0") # bk_plot.yaxis[0].formatter = NumeralTickFormatter(format="$0") else: bokeh_layout = plot.state tab = Panel(title=timestamp, child=bokeh_layout) plotTabs.tabs.append(tab) log("Data successfully loaded!") except Exception as e: log("Failed to open or process dataset: %s" % full_url, e) finally: btn_plot_lonXlat.disabled = False dsTable = DatasetsTable(index) btn_plot_lonXlat = Button( label="Plot variable over 'lon'x'lat' (this may take some time)") btn_plot_lonXlat.on_click(gen_plot) startDate = DatePicker(title="Start date", min_date=dsTable.datasets_min_date, max_date=dsTable.datasets_max_date, value=dsTable.datasets_min_date) endDate = DatePicker(title="End date", min_date=dsTable.datasets_min_date, max_date=dsTable.datasets_max_date, value=dsTable.datasets_max_date) startDate.on_change("value", partial(date_range_change, True)) endDate.on_change("value", partial(date_range_change, False)) plotTabs = Tabs( tabs=[], width=1000, height=640, ) plotLayout = column(plotTabs, name="plotLayout") mainLayout = column(Div(height=50, style={"height": 50}), row(startDate, endDate), dsTable.datasets_table, dsTable.vars_table, btn_plot_lonXlat, plotLayout, status_bar, name='mainLayout') doc.remove_root(loadLayout) doc.add_root(mainLayout)
def modify_doc(doc): def startdate_update(attrname, old, new): rs.start.year = new.year rs.start.month = new.month rs.start.day = new.day def enddate_update(attrname, old, new): rs.end.year = new.year rs.end.month = new.month rs.end.day = new.day def starttime_update(attrname, old, new): h,m,s = map(int, new.split(':')) rs.start.hour = h rs.start.minute = m rs.start.second = s def endtime_update(attrname, old, new): h,m,s = map(int, new.split(':')) rs.end.hour = h rs.end.minute = m rs.end.second = s def update_target(attrname, old, new): rs.target = new.lower() dm.event() @gen.coroutine @without_document_lock def load(): global text if rs.start > rs.end: msg = "Start time is later than end time.\n" text.append(msg) return if (rs.end - rs.start) > 3600.: msg = "Length of requested data exceeds 1 hour.\n" text.append(msg) return executor = ThreadPoolExecutor(max_workers=4) msg = "Loading data for {:s} between {:s} and {:s}\n".format(rs.target, str(rs.start), str(rs.end)) text.append(msg) rs.reset() for _type in ['seismic', 'acoustic']: for slc in stations[rs.target][_type]: s, l, c = slc msg = "Downloading {:s}.{:s}.{:s}\n".format(s, l, c) text.append(msg) tr = yield executor.submit(get_data, s, l, c, rs.start, rs.end) if tr is not None: rs.streams[_type] += tr else: msg = 'No data for {}.{}.{}\n'.format(s, l, c) text.append(msg) time.sleep(0.1) continue # The sleep has to be added to prevent some kind of racing condition # in the underlying bokeh implementation time.sleep(0.1) doc.add_next_tick_callback(update_plot) text.append("LOADING FINISHED.\n") def reset(): global text if rs.start > rs.end: msg = "Start time is later than end time.\n" text.append(msg) return if (rs.end - rs.start) > 3600.: msg = "Length of requested data exceeds 1 hour.\n" text.append(msg) return dm.event(replot=True) def update_traces(attr, old, new): rs.plot_seismic = False rs.plot_acoustic = False rs.plot_labels = False for _c in new: if _c == 0: rs.plot_seismic = True if _c == 1: rs.plot_acoustic = True if _c == 2: rs.plot_labels = True dm.event() def update_red_vel(attr, old, new): if len(new) < 1: rs.red_vel = 0 else: rs.red_vel = 330. dm.event() sdateval = "{:d}-{:d}-{:d}".format(rs.start.year, rs.start.month, rs.start.day) date_start = DatePicker(title='Start date', value=sdateval) date_start.on_change('value', startdate_update) edateval = "{:d}-{:d}-{:d}".format(rs.end.year, rs.end.month, rs.end.day) date_end = DatePicker(title='End date', value=edateval) date_end.on_change('value', enddate_update) stimeval = "{:02d}:{:02d}:{:02d}".format(rs.start.hour, rs.start.minute, rs.start.second) starttime = TextInput(title='Start time', value=stimeval) starttime.on_change('value', starttime_update) etimeval = "{:02d}:{:02d}:{:02d}".format(rs.end.hour, rs.end.minute, rs.end.second) endtime = TextInput(title='End time', value=etimeval) endtime.on_change('value', endtime_update) loadb = Button(label='Load', button_type='success') loadb.on_click(load) resetb = Button(label='Reset', button_type='success') resetb.on_click(reset) select_target = Select(title='Volcano', value="Ruapehu", options=["Te Maari", "Ruapehu", "Ngauruhoe", "Red Crater"]) select_target.on_change('value', update_target) cg = CheckboxGroup(labels=["Seismic", "Acoustic", "Station name"], active=[1, 1, 0]) cg.on_change('active', update_traces) rvel = CheckboxGroup(labels=['Reduction velocity (330 m/s)'], active=[]) rvel.on_change('active', update_red_vel) div = Div(text="""<a target="_blank" href="https://wiki.geonet.org.nz/display/volcano/Record+Section+Plot ">Documentation</a>""", width=80, height=30) # Create HoloViews plot and attach the document hvplot = renderer.get_plot(dm, doc) doc.add_root(layout([[hvplot.state, widgetbox(pre)], [widgetbox(date_start, starttime, div), widgetbox(date_end, endtime), widgetbox(select_target, loadb, resetb, cg, rvel)]], sizing_mode='fixed')) return doc
race_table = raceTable(data_race, t_source, t_last_update) tableData = dict( race=['Asian', 'Black', 'Cdph-other', 'Latino', 'White', 'Other'], confirmed_cases_percent=race_table.confirm, deaths_percent=race_table.death, population_percent=race_table.percent, last_update=race_table.last_update, source=race_table.source) source2 = ColumnDataSource1(tableData) columns = [ TableColumn(field="race", title="Race"), TableColumn(field='confirmed_cases_percent', title="Confirmed_cases_percent"), TableColumn(field="deaths_percent", title="Deaths_percent"), TableColumn(field="population_percent", title="Population_percent") ] # TableColumn(field="last_update",title="Last_update"),TableColumn(field="source",title="Source") table = DataTable(source=source2, columns=columns, width=width, height=500) date_picker = DatePicker(title='Select a date', value="2020-10-26", min_date="2020-05-01", max_date="2020-11-02") date_picker.on_change('value', update1) pre3 = TextAreaInput(value='Source: ' + t_source + '\nLast update: ' + t_last_update, rows=3) layout = column(pre, dropdown_state, textbox, p, pre2, date_picker, pre3, table) curdoc().add_root(layout)
def callback2(attr, old, new): date = pd.to_datetime(date_picker2.value) data_case.data = get_data(date, data_race, "confirmed_cases_total") data_death.data = get_data(date, data_race, "deaths_total") data_num.data = get_data(date, data_race, "population_percent") plot2.axis.axis_label = None plot2.axis.visible = False plot2.grid.grid_line_color = None plot3.axis.axis_label = None plot3.axis.visible = False plot3.grid.grid_line_color = None plot4.axis.axis_label = None plot4.axis.visible = False plot4.grid.grid_line_color = None date_picker.on_change("value", callback) date_picker2.on_change("value", callback2) title2 = Div( text="""Comparison of COVID-19 case/death to its population by race""", width=1000, height=120) title2.default_size = 50 curdoc().add_root( layout([[title1], [plot1, date_picker], [plot2, plot3], [plot4, date_picker2]]))
def tab2(): data = pd.read_csv('cdph-race-ethnicity.csv') data['date_time'] = pd.to_datetime(data['date']) max_date = data['date'].iloc[0] data = data[(data['age'] == 'all')] percentages = ['confirmed cases', 'general population'] regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white'] x = [(race, percent) for race in regions for percent in percentages] def create_dataset(df): counts = sum( zip(df['confirmed_cases_percent'], df['population_percent']), ()) # like an hstack source = ColumnDataSource(data=dict(x=x, counts=counts)) return source def create_plot(source): p = figure( x_range=FactorRange(*x), title= 'Comparison of the persent of cases by race to the general population', y_axis_label='Persentage') palette = ["#CAB2D6", "#e84d60"] p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", fill_color=factor_cmap('x', palette=palette, factors=percentages, start=1, end=2)) p.y_range.start = 0 p.x_range.range_padding = 0.1 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None p.x_range.range_padding = 0.1 p.xgrid.grid_line_color = None p.legend.location = "top_left" p.legend.orientation = "horizontal" p.xgrid.grid_line_color = None p.add_tools( HoverTool(tooltips=[('Race, category', "@x"), ('Percentage', "@counts")], )) mytext = Label( x=20, y=-150, x_units='screen', text= f"Source of data: coming from a continual Times survey of California's 58 county health\n " " agencies and three city agencieas, pubished on https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/" " , access from Github repository https://github.com/datadesk/california-coronavirus-data/blob/master/cdph-race-ethnicity.csv" f" Date of last update: 2020-11-04", render_mode='css', y_units='screen', border_line_color='black', border_line_alpha=1.0, background_fill_color='white', background_fill_alpha=1.0, ) p.add_layout(mytext) return p def callback(attr, old, new): new_src = create_dataset( data[(data['date_time'] == date_picker.value)]) src.data.update(new_src.data) # Initial Plot src = create_dataset(data[(data['date_time'] == '2020-10-01')]) p = create_plot(src) date_picker = DatePicker( title='Click to choose a date (blank means no data)', min_date="2020-05-14", max_date=date.today()) date_picker.on_change('value', callback) controls = WidgetBox(date_picker) layout = row(controls, p) tab = Panel(child=layout, title='Percentage of confirmed cases by race') return tab
class UIClass: def __init__(self): self.input_df = pd.DataFrame({ 'x': ['2010-01-01'] * DF_NUM_PREVIEW_ROWS, 'y': [0] * DF_NUM_PREVIEW_ROWS }) self.forecasted_df = None self.datefmt = DateFormatter(format='%m-%d-%Y') self.inputs = None self.x_range = [0, 10] self.demand_plot = figure( x_range=self.x_range, x_axis_type="datetime", tools=["pan", 'wheel_zoom']) #,wheel_zoom,box_zoom,reset,resize") self.plot_data_source = ColumnDataSource( data=self.input_df) #dict(x=[0], y=[0]) self.line1 = self.demand_plot.line(x='x', y='y', source=self.plot_data_source, line_color='blue', name='line1') self.demand_plot.xaxis.formatter = DatetimeTickFormatter( days="%d %b %Y", hours="") self.demand_plot.axis.minor_tick_line_color = None self.demand_plot.xaxis[ 0].ticker.desired_num_ticks = 10 #num_minor_ticks = 0 self.demand_plot.xaxis.major_label_orientation = radians( 30) # from math import radians # Set up widgets self.data_source_selector = Select( title='Step 1/5: Select Data', value='Not Selected', options=['Not Selected', 'Use Example Data', 'Upload Data']) self.file_input = FileInput(accept='.csv,.xlsx') self.data_table = DataTable( height=DATATABLE_PREVIEW_HEIGHT, width=DATATABLE_PREVIEW_WIDTH, fit_columns=False, index_position=None, margin=(0, 15, 0, 15), #aspect_ratio=0.5, #default_size=50 ) self.data_preview_paragraph = Paragraph(text='Data Preview:', margin=(0, 15, 0, 15)) self.values_col_selector = Select( title='Step 2/5: Select column with demand values', value='Not Selected', options=['Not Selected']) self.product_id_col_selector = Select( title='Step 3/5: Select column with product ID', value='Not Selected', options=['Not Selected']) self.date_col_selector = Select(title="Step 4/5: Select date column", value='Not Selected', options=['Not Selected']) self.last_date_picker = DatePicker( title='Select the date of last observation', max_date=datetime.datetime.date(pd.to_datetime("today")), value=datetime.datetime.date(pd.to_datetime("today"))) self.workdays_checkboxgroup = CheckboxGroup( labels=["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"], active=[], inline=True, margin=(0, 15, 0, 0)) self.workdays_apply_button = Button(label='Select Business Days', button_type='primary') self.product_selector_plotting = Select( title='Select Product to Display', value='v1', options=['v1', 'v2']) self.prediction_button = Button( label='Forecast Demand for Selected Product ID', button_type='primary') self.default_info_msg = 'This window will contain additional information,\nas you interact with the app.' self.info_paragraph = PreText( text='Details:\n{}'.format(self.default_info_msg)) # self.text = TextInput(title='title', value='my sine wave') # self.offset = Slider(title='offset', value=0.0, start=-5.0, end=5.0, step=0.1) self.widgets = { 'data_source_selector': self.data_source_selector, 'file_input': self.file_input, 'values_col_selector': self.values_col_selector, 'product_id_col_selector': self.product_id_col_selector, 'data_preview_paragraph': self.data_preview_paragraph, 'data_table': self.data_table, 'product_selector': self.product_selector_plotting, 'demand_plot': self.demand_plot, 'date_col_selector': self.date_col_selector, 'last_date_picker': self.last_date_picker, 'workdays_checkboxgroup': self.workdays_checkboxgroup, 'workdays_apply_button': self.workdays_apply_button, 'prediction_button': self.prediction_button, #'': self., } self.values_colname = None self.product_id_colname = None self.date_colname = None self.product_ids = [] ########## WIDGETS VISIBILITY CONTROLS ########## def _change_widgets_visibility(self, names, names_show_or_hide='show'): displaying = True if names_show_or_hide == 'show' else False for widget_name in self.widgets: if widget_name in names: self.widgets[widget_name].visible = displaying else: self.widgets[widget_name].visible = not displaying def display_all_widgets_except(self, widgets=[]): self._change_widgets_visibility(widgets, 'hide') def hide_all_widgets_except(self, widgets=[]): self._change_widgets_visibility(widgets, 'show') ########## LOGIC ########## def set_widget_to_default_value(self, widget_names, default_val='Not Selected'): for widget_name in widget_names: self.widgets[widget_name].value = default_val def prepare_values_col_selection(self): self.values_col_selector.options = ['Not Selected' ] + self.input_df.columns.tolist() def get_additional_cols_to_show(self): return ['file_input' ] if self.data_source_selector.value == 'Upload Data' else [] def update_details_msg(self, msg): self.info_paragraph.text = "Details:\n{}".format(msg) def preview_input_df(self): # https://stackoverflow.com/questions/40942168/how-to-create-a-bokeh-datatable-datetime-formatter columns = [ TableColumn(field=Ci, title=Ci, width=DATATABLE_PREVIEW_COL_WIDTH) for Ci in self.input_df.columns ] self.data_table.update(columns=columns) self.data_table.update( source=ColumnDataSource(self.input_df.head(DF_NUM_PREVIEW_ROWS))) self.data_table.visible = True self.data_preview_paragraph.visible = True def upload_fit_data(self, attr, old, new): print('fit data upload succeeded') self.update_details_msg(msg='Step 1/5: Uploading data') base64_message = self.file_input.value base64_bytes = base64_message.encode('ascii') message_bytes = base64.b64decode(base64_bytes) message = message_bytes.decode('ascii') self.input_df = pd.read_csv(StringIO(message), sep=',') self.update_details_msg( msg='Step 1/5: Data has been successfully uploaded!') print('Input DF shape: {}'.format(self.input_df.shape)) self.prepare_values_col_selection() self.hide_all_widgets_except( ['data_source_selector', 'file_input', 'values_col_selector']) self.preview_input_df() def replace_selector_options(self, selector, old_value, new_options): selector.options = [old_value] + new_options selector.value = new_options[0] selector.options = new_options def date_col_integrity(self, date_colname): if not isinstance(self.input_df[date_colname][0], str): self.input_df[date_colname] = self.input_df[date_colname].astype( str) if '-' in self.input_df[date_colname][0]: sep = '-' elif '/' in self.input_df[date_colname][0]: sep = '/' else: return 'no separator found' date_parts = self.input_df[date_colname].apply(lambda x: x.split(sep)) if (date_parts.apply(lambda x: len(x)) == 3).all(): try: self.input_df[date_colname] = pd.to_datetime( self.input_df[date_colname]) return 'ok' except: return 'error converting to datetime' else: return 'not all dates have exactly 3 components' def display_preview_plot(self): self.replace_selector_options(self.product_selector_plotting, 'v1', self.product_ids) self.product_selector_plotting.visible = True self.prediction_button.visible = True self.demand_plot.renderers.remove(self.line1) self.plot_data_source = None self.plot_data_source = ColumnDataSource(data=self.input_df[ self.input_df[self.product_id_colname] == self.product_ids[0]]) self.line1 = self.demand_plot.line(x=self.date_colname, y=self.values_colname, source=self.plot_data_source, line_color='blue', name='line1') self.update_plot(None, None, self.product_ids[0]) self.demand_plot.visible = True def generate_dates(self, end_date: datetime.datetime, work_days: list, num_periods: int): work_days = ' '.join(work_days) # 'Sun Mon Tue Wed Fri' freq = pd.offsets.CustomBusinessDay(weekmask=work_days) return pd.date_range(end=end_date, periods=num_periods, freq=freq) def clean_df(self): """ Modifies self.input_df: 1) Removing duplicates based on [self.date_colname, self.product_id_colname] 2) Sorting based on self.date_colname :return: void """ self.input_df = self.input_df[~self.input_df.duplicated( subset=[self.date_colname, self.product_id_colname], keep='first')] self.input_df.sort_values(by=self.date_colname, inplace=True) print('===RESULTED INPUT_DF SHAPE AFTER CLEANING: ', self.input_df.shape) ########## WIDGETS ON_CHANGE METHODS ########## def select_data_source(self, attrname, old_val, new_val): self.set_widget_to_default_value([ 'values_col_selector', 'product_id_col_selector', 'date_col_selector' ]) if new_val == 'Upload Data': self.update_details_msg( msg= 'Step 1/5: Please upload data in one of the\nfollowing formats: .CSV or .XLSX' ) self.hide_all_widgets_except( ['data_source_selector', 'file_input']) elif new_val == 'Use Example Data': self.update_details_msg( msg= 'Step 1/5: Using a sample toy data. You can use it\nto test the functionality of this app.' ) self.input_df = pd.read_csv('default_table.csv') self.prepare_values_col_selection() self.preview_input_df() self.hide_all_widgets_except([ 'data_source_selector', 'values_col_selector', 'data_preview_paragraph', 'data_table' ]) else: # Not Selected self.update_details_msg(msg=self.default_info_msg) self.hide_all_widgets_except(['data_source_selector']) def select_values_colname(self, attrname, old_val, new_val): self.update_details_msg( msg= 'Step 2/5: Please select a column that contains\nthe demand values. Note, that all the values in\nthis column should be numerical.' ) self.set_widget_to_default_value( ['product_id_col_selector', 'date_col_selector']) self.hide_all_widgets_except([ 'data_source_selector', 'values_col_selector', 'data_preview_paragraph', 'data_table' ] + self.get_additional_cols_to_show()) if new_val == 'Not Selected': pass else: self.values_colname = new_val try: self.input_df[self.values_colname] = self.input_df[ self.values_colname].astype(float) available_cols = set(self.input_df.columns) available_cols.remove(self.values_colname) if self.date_colname in available_cols: available_cols.remove(self.date_colname) self.product_id_col_selector.options = [ 'Not Selected' ] + list(available_cols) self.product_id_col_selector.visible = True except: self.update_details_msg( msg= 'WARNING! Step 2/5: Not all the values\nin selected column are numerical!' ) def select_product_id_colname(self, attrname, old_val, new_val): self.update_details_msg( msg= "Step 3/5: Please select a column that contains products' identifiers." ) self.set_widget_to_default_value(['date_col_selector']) self.hide_all_widgets_except([ 'data_source_selector', 'values_col_selector', 'data_preview_paragraph', 'data_table', 'product_id_col_selector' ] + self.get_additional_cols_to_show()) if new_val == 'Not Selected': pass else: self.product_id_colname = new_val self.product_ids = self.input_df[ self.product_id_colname].unique().astype(str).tolist() available_cols = set(self.input_df.columns) for colname in [self.values_colname, self.product_id_colname]: available_cols.remove(colname) if self.date_colname in available_cols: available_cols.remove(self.date_colname) self.date_col_selector.options = ['Not Selected' ] + list(available_cols) self.date_col_selector.visible = True self.last_date_picker.visible = True self.workdays_checkboxgroup.visible = True self.workdays_apply_button.visible = True def select_date_column(self, attrname, old_val, new_val): self.update_details_msg( msg="Step 4/5: If there is a date column, please select it's name.\n" "Note: Dates should be in one of the following formats:\n" "yyyy-mm-dd OR mm-dd-yyyy OR yyyy/mm/dd OR mm/dd/yyyy\n" "If there is no such column, use 'Not Selected' option.") self.hide_all_widgets_except([ 'data_source_selector', 'values_col_selector', 'data_preview_paragraph', 'data_table', 'product_id_col_selector', 'date_col_selector' ] + self.get_additional_cols_to_show()) if new_val == 'Not Selected': self.last_date_picker.visible = True self.workdays_checkboxgroup.visible = True self.workdays_apply_button.visible = True else: self.date_colname = new_val date_col_integrity_status = self.date_col_integrity( self.date_colname) if date_col_integrity_status == 'ok': self.clean_df() self.display_preview_plot() else: print('date_col_integrity_status: ', date_col_integrity_status) self.update_details_msg( msg= "ERROR: selected date column doesn't satisfy specified requirements:\n" "Dates should be in one of the following formats:\n" "yyyy-mm-dd OR mm-dd-yyyy OR yyyy/mm/dd OR mm/dd/yyyy\n" "If there is no such column, use 'Not Selected' option.") def select_last_date(self, attrname, old_val, new_val): self.update_details_msg( msg="Alright, dates will be automatically generated for you!\n" "Select days when your business works.") self.workdays_checkboxgroup.visible = True self.workdays_apply_button.visible = True def workdays_button_pressed(self, new): if len(self.workdays_checkboxgroup.active) == 0: self.update_details_msg( msg="Please select at least one business day.") else: self.update_details_msg(msg="Generating dates.") if 'generated_dates' in self.input_df.columns: self.update_details_msg( msg="Please rename the generated_dates column in you table." ) else: self.date_colname = 'generated_date' self.input_df[self.date_colname] = '' for product_id in self.product_ids: inds = self.input_df[self.product_id_colname] == product_id self.input_df.loc[ inds, self.date_colname] = self.generate_dates( end_date=self.last_date_picker.value, work_days=np.array( self.workdays_checkboxgroup.labels)[ self.workdays_checkboxgroup.active], num_periods=inds.sum()) self.input_df[self.date_colname] = pd.to_datetime( self.input_df[self.date_colname]) self.clean_df() self.display_preview_plot() #self.preview_input_df() # https://stackoverflow.com/questions/40942168/how-to-create-a-bokeh-datatable-datetime-formatter def prediction_button_pressed(self, new): train_dataset = pd.DataFrame() print('Preparing forecast for product: ', self.product_selector_plotting.value) inds = self.input_df[ self.product_id_colname] == self.product_selector_plotting.value train_dataset['ds'] = self.input_df.loc[inds, self.date_colname] train_dataset['y'] = self.input_df.loc[inds, self.values_colname] # train_dataset = train_dataset[train_dataset.duplicated(subset=['ds'],keep='first')] #train_dataset.sort_values(by=self.date_colname, inplace=True) print('Train Dataset shape: ', train_dataset.shape) for q in self.make_predictions(train_dataset): if q[0] == 'msg': print('Message: ', q[1]) else: self.forecasted_df = q[1] self.forecasted_df.columns = ['ds', 'y'] print('Done; shape: ', self.forecasted_df.shape) #self.demand_plot.line(x='ds', y='yhat', source=ColumnDataSource(data=self.forecasted_df, name='line2')) #print(self.forecasted_df.tail(30)) #combined_dataset = train_dataset.append(self.forecasted_df.tail(30), ignore_index=True) d = { 'ds': train_dataset['ds'].append( self.forecasted_df.tail(30)['ds']), 'y': train_dataset['y'].append( self.forecasted_df.tail(30)['y']) } combined_dataset = pd.DataFrame(d) try: while len(self.demand_plot.legend[0].items) > 0: self.demand_plot.legend[0].items.pop() except: print( 'FAIL: popping legends in prediction_button_pressed()') self.demand_plot.renderers.remove(self.line1) try: self.demand_plot.renderers.remove(self.line2) except: pass self.plot_data_source = None self.plot_data_source = ColumnDataSource(data=combined_dataset) self.line1 = self.demand_plot.line(x=train_dataset['ds'], y=train_dataset['y'], line_color='blue', name='line1', legend_label='Historical') self.line2 = self.demand_plot.line( x=train_dataset['ds'].tail(1).append( self.forecasted_df['ds'].tail(30)), y=train_dataset['y'].tail(1).append( self.forecasted_df['y'].tail(30)), line_color='red', name='line2', legend_label='Forecast') #print('QQQ ', self.demand_plot.select(name="line2")) self.demand_plot.legend.location = "top_left" self.demand_plot.x_range.start = combined_dataset['ds'].min() self.demand_plot.x_range.end = combined_dataset['ds'].max() self.demand_plot.y_range.start = combined_dataset['y'].min() self.demand_plot.y_range.end = combined_dataset['y'].max() self.demand_plot.visible = True ########## OTHER ########## def dates_diff_count(self, df, product_name): days_diffs = ( df[1:][self.date_colname].values - df[:-1][self.date_colname].values) / 1000000000 / 60 / 60 / 24 unique_diffs, diffs_counts = np.unique(days_diffs, return_counts=True) msg = 'Product: {}:\n# Days Delta ; Count\n'.format(product_name) for value, count in zip(unique_diffs, diffs_counts): msg += '{:10} ; {}\n'.format(value, count) msg += 'If there is more than one unique value\nit can make forecast less accurate' self.update_details_msg(msg=msg) # https://facebook.github.io/prophet/docs/non-daily_data.html def make_predictions(self, df, days_ahead=30): yield ['msg', 'training model'] prophet = Prophet(weekly_seasonality=False, daily_seasonality=False) prophet.fit(df) yield ['msg', 'making predictions'] future = prophet.make_future_dataframe(periods=days_ahead) forecast = prophet.predict(future) yield ['results', forecast[['ds', 'yhat']]] def update_plot(self, attrname, old, new): try: while len(self.demand_plot.legend[0].items) > 0: self.demand_plot.legend[0].items.pop() except: print('FAIL: popping legends in update_plot()') try: self.demand_plot.renderers.remove(self.line2) except: pass sub_df = self.input_df[self.input_df[self.product_id_colname] == new] self.dates_diff_count(sub_df, new) self.demand_plot.renderers.remove(self.line1) self.plot_data_source = None self.plot_data_source = ColumnDataSource(data=sub_df) self.line1 = self.demand_plot.line(x=self.date_colname, y=self.values_colname, source=self.plot_data_source, line_color='blue', legend_label='Historical', name='line1') self.demand_plot.legend.location = "top_left" self.demand_plot.x_range.start = sub_df[self.date_colname].min() self.demand_plot.x_range.end = sub_df[self.date_colname].max() self.demand_plot.y_range.start = sub_df[self.values_colname].min() self.demand_plot.y_range.end = sub_df[self.values_colname].max() ########## MAIN ########## def display(self): self.file_input.on_change('value', self.upload_fit_data) self.plot = figure(plot_height=400, plot_width=400, title='my sine wave', tools='crosshair,pan,reset,save,wheel_zoom') # Set up layouts and add to document self.inputs = column(self.data_source_selector, self.file_input, self.values_col_selector, self.product_id_col_selector, self.date_col_selector, self.last_date_picker, self.workdays_checkboxgroup, self.workdays_apply_button) #self.data_source_selector.visible = True self.hide_all_widgets_except(['data_source_selector']) self.data_source_selector.on_change('value', self.select_data_source) self.values_col_selector.on_change('value', self.select_values_colname) self.product_id_col_selector.on_change('value', self.select_product_id_colname) self.product_selector_plotting.on_change('value', self.update_plot) self.date_col_selector.on_change('value', self.select_date_column) self.last_date_picker.on_change('value', self.select_last_date) self.workdays_apply_button.on_click(self.workdays_button_pressed) self.prediction_button.on_click(self.prediction_button_pressed) #self.col_left = self.inputs columns = [ TableColumn(field=Ci, title=Ci, width=DATATABLE_PREVIEW_COL_WIDTH) for Ci in self.input_df.columns ] self.data_table.columns = columns self.data_table.source = ColumnDataSource( self.input_df.head(DF_NUM_PREVIEW_ROWS)) self.col_middle = column(self.data_preview_paragraph, self.data_table) #self.col_info = column() #self.col_left.width = 300 #self.col_right.max_width = 500 #self.col_right.sizing_mode = 'scale_width' #self.row_data_input = row(self.col_left, self.col_right, self.info_paragraph) #self.row_data_input.sizing_mode = 'scale_width' #self.row_demand_plot = row(self.product_selector_plotting)#, self.demand_plot) #self.layout = column(self.row_data_input, self.row_demand_plot) self.layout = column( row( column( self.data_source_selector, self.file_input, self.values_col_selector, self.product_id_col_selector, ), column(self.data_preview_paragraph, self.data_table), self.info_paragraph), row( column(self.date_col_selector, self.last_date_picker, self.workdays_checkboxgroup, self.workdays_apply_button, self.product_selector_plotting, self.prediction_button), self.demand_plot)) curdoc().add_root(self.layout) curdoc().title = 'Demand Forecasting'
from bokeh.models import DatePicker, HBox from bokeh.io import curdoc from datetime import datetime beginning = DatePicker(title="Begin Date", min_date=datetime(2014,11,1), max_date=datetime.now(), value=datetime(datetime.now().year,1,1)) def cb(attr, old, new): print(new) beginning.on_change('value', cb) curdoc().add_root(HBox(children=[beginning]))
row(clm_in_div, clm_input_dir_text_input), row(cases_div, cases_dir_text_input), row(output_div, output_dir_text_input)) ############################################################################### """ Simulation period """ ############################################################################### period_div = Div(text="<h2>Simulation period</h2>", sizing_mode="stretch_width") start_date_picker = DatePicker(title='Select simulation start date:', value="2000-01-01", min_date="1900-01-01", max_date="2020-12-30") start_date_picker.on_change('value', _input_changed) end_date_picker = DatePicker(title='Select simulation end date:', value="2001-01-01", min_date="1900-01-02", max_date="2020-12-31") end_date_picker.on_change('value', _input_changed) dates_section = column(period_div, row(start_date_picker, end_date_picker)) def _check_dates(): global start_date, end_date start_date = datetime.strptime(start_date_picker.value, '%Y-%m-%d') end_date = datetime.strptime(end_date_picker.value, '%Y-%m-%d')
"src='https://portal.rockgympro.com/portal/public/" "4f7e4c65977f6cd9be6d61308c7d7cc2/occupancy?&iframeid" "=occupancyCounter&fId=' height=250 width=100% " "scrolling='no' style='border:0px;'></iframe>") DF = load_data() GYMS = list(DF["gym"].unique()) D_MIN = DF["scrape_time"].min().date() D_MAX = DF["scrape_time"].max().date() WEEKS = math.ceil((D_MAX - D_MIN).days / 7) PLURAL = {1: ""}.get(WEEKS, "s") gyms = Select(value=GYM, options=GYMS) gyms.on_change("value", update_gym) date = DatePicker(value=str(D_MAX), min_date=str(D_MIN), max_date=str(D_MAX)) date.on_change("value", update_date) reload_str = f"Dataset last loaded <i>{datetime.now().strftime('%Y-%m-%d %H:%M')}</i>" last_refresh = Div(text=reload_str) reload_btn = Button(label="Reload Counter", button_type="primary") reload_btn.on_click(reload_counter) _iframe = Div(text=IFRAME) INFO_TEXT = f"""Dataset file: <pre>{DATASET}</pre>""" # <br>Remote file: <pre>{REMOTE_DATASET}</pre> # <br>Hashes: <pre>{HASH}<br>{REMOTE_HASH}</pre> # <br>If the hashes do not match the dataset has been updated on GitHub""" dataset_info = Div(text=INFO_TEXT) extras = row(column(_iframe), column(dataset_info), column(reload_btn))