def construct_from_redis(key_lst, item_type='list', df=None, table=None, df_cols=None, dedup_cols=None): try: redis = PythonRedis() if not key_lst: return None else: temp_item = [] if item_type == 'list' else df start_list = [] end_list = [] for key in key_lst: # create df if necessary if item_type == 'list': item_loaded = redis.load([], '', '', key, item_type) temp_item.append(item_loaded) return temp_item else: #make list of start and end dates from list # get key dates logger.warning('key for churned load:%s', key) lst = key.split(':') if lst[-1] != '': req_start_date = datetime.strptime( lst[-2] + ' 00:00:00', '%Y-%m-%d %H:%M:%S') req_end_date = datetime.strptime( lst[-1] + ' 00:00:00', '%Y-%m-%d %H:%M:%S') #req_start_date = datetime.combine(req_start_date, datetime.min.time()) #req_end_date = datetime.combine(req_end_date, datetime.min.time()) start_list.append(req_start_date) end_list.append(req_end_date) tab = Mytab('block_tx_warehouse', cols['block_tx_warehouse']['models'], []) if len(start_list) > 0: if item_type != 'list': # if warehouse get minimum start date and maximum end data, and retrive from database tab.key_tab = 'models' req_start_date = min(start_list) req_end_date = max(end_list) tab.df_load(req_start_date, req_end_date) logger.warning('TRACKER:%s', tab.df.tail(10)) return tab.df1 else: return tab.df1 except Exception: logger.error("construct from redis/clickhouse", exc_info=True)
def __init__(self, table, cols, dedup_cols, panel_title=None): self.panel_title = panel_title self.table = table self.load_params = dict() self.cols = cols self.locals = dict() # stuff local to each tab self.streaming_dataframe = SD(table, cols, dedup_cols) self.df = self.streaming_dataframe.df self.df1 = None self.dedup_cols = dedup_cols self.params = None self.load_params = None self.poolname_dict = self.get_poolname_dict() self.key_tab = '' # for key composition in redis self.construction_tables = {} self.tier1_miners_list = [] self.tier2_miners_list = [] self.pq = PythonParquet() self.ch = PythonClickhouse('aion') self.redis = PythonRedis() self.conn = self.redis.conn self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.ToA_THRESH = { # Tests of association (TOA) 'STRONG': .65, 'MODERATE': .4, 'WEAK': .25 } self.menus = {'resample_periods': ['D', 'W', 'M', 'Q']} self.resample_period = self.menus['resample_periods'][0] self.pvalue_thresh = 0.1 self.page_width = 1200
from scripts.databases.pythonRedis import PythonRedis from scripts.utils.mylogger import mylogger logger = mylogger(__file__) redis = PythonRedis()
def crypto_clusters_eda_tab(cryptos, panel_title): global groupby_dict global features global cluster_dct #global source redis = PythonRedis() cluster_dct = redis.simple_load('clusters:cryptocurrencies') if cluster_dct is not None: groupby_dict = {} for var in cluster_dct['features']: groupby_dict[var] = 'sum' features = cluster_dct['features'] source = {} for feature in features: source[feature] = ColumnDataSource( data=dict(xs=[], ys=[], labels=[], colors=[])) class Thistab(Mytab): def __init__(self, table, cols, dedup_cols=[]): Mytab.__init__(self, table, cols, dedup_cols, panel_title=panel_title) self.table = table self.cols = cols self.DATEFORMAT = "%Y-%m-%d %H:%M:%S" self.df = None self.df1 = None self.df_predict = None self.day_diff = 1 # for normalizing for classification periods of different lengths self.df_grouped = '' self.cl = PythonClickhouse('aion') self.items = cryptos # add all the coins to the dict self.github_cols = [ 'watch', 'fork', 'issue', 'release', 'push', 'tw_mentions', 'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative', 'tw_emojis_positive', 'tw_emojis_compound', 'tw_emojis_negative', 'tw_emojis_count', 'tw_reply_hashtags' ] self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume'] self.trigger = 0 txt = """<div style="text-align:center;background:black;width:100%;"> <h1 style="color:#fff;"> {}</h1></div>""".format( 'Welcome') self.notification_div = { 'top': Div(text=txt, width=1400, height=20), 'bottom': Div(text=txt, width=1400, height=10), } self.cluster_dct = cluster_dct self.groupby_dict = groupby_dict self.features = features self.crypto = 'all' self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.significant_effect_dict = {} self.df1 = None self.section_headers = { 'ts': self.section_header_div( 'Comparison of clusters across variables:---------------------', width=600) } self.timestamp_col = None self.colors = None # ---------------------- DIVS ---------------------------- def section_header_div(self, text, html_header='h2', width=1400): text = '<{} style="color:#4221cc;">{}</{}>'.format( html_header, text, html_header) return Div(text=text, width=width, height=15) def information_div(self, width=400, height=300): txt = """ <div {}> <h4 {}>How to interpret relationships </h4> <ul style='margin-top:-10px;'> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> <li> </li> </ul> </div> """.format(self.div_style, self.header_style) div = Div(text=txt, width=width, height=height) return div # ////////////////////////// UPDATERS /////////////////////// def section_head_updater(self, section, txt): try: self.section_header_div[section].text = txt except Exception: logger.error('', exc_info=True) def notification_updater(self, text): txt = """<div style="text-align:center;background:black;width:100%;"> <h4 style="color:#fff;"> {}</h4></div>""".format(text) for key in self.notification_div.keys(): self.notification_div[key].text = txt # /////////////////////////// LOAD CLUSTERS ////////////////////// def prep_data(self, df, timestamp_col): def label_cluster(x): for key, values in self.cluster_dct.items(): if key not in ['timestamp', 'variables']: if x in values: return key return x try: cols = self.features + ['crypto', 'timestamp'] df = df[cols] # groupby and resample df['crypto'] = df['crypto'].map(lambda x: label_cluster(x)) df = df.rename(columns={'crypto': 'cluster'}) df = df.compute() df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce') df.set_index(timestamp_col, inplace=True) df = df.groupby('cluster').resample(self.resample_period).agg( self.groupby_dict) df.reset_index(inplace=True) df.set_index(timestamp_col, inplace=True) self.timestamp_col = timestamp_col self.df1 = df except Exception: logger.error('prep data', exc_info=True) def graph_ts(self): try: #global source if self.df1 is not None: df = self.df1.copy() clusters = df['cluster'].unique() self.colors = [''] * len(clusters) for idx, feature in enumerate(clusters): self.colors[idx] = dashboard_config['colors'][idx] if self.features is not None: for idx, feature in enumerate(self.features): df1 = df[['cluster', feature]] # pivot into columns for cluster df1 = df1.pivot(columns='cluster') data = dict(x=[df1.index.values] * len(clusters), y=[df1[name].values for name in df1], labels=clusters, colors=self.colors) source[feature].data = data except Exception: logger.error('graph ts', exc_info=True) def graph_chartify(self, timestamp_col): try: # global source if self.df1 is not None: df = self.df1.copy() df = df.reset_index() for feature in self.features: ch = chartify.Chart(blank_labels=True, x_axis_type='datetime') ch.set_title("CHARTIFY") ch.plot.line( # Data must be sorted by x column data_frame=df.sort_values(timestamp_col), x_column=timestamp_col, y_column=feature, color_column='cluster') return ch except Exception: logger.error('graph chartify', exc_info=True) def update(): thistab.notification_updater( "Calculations underway. Please be patient") thistab.df_load(datepicker_start.value, datepicker_end.value, timestamp_col='timestamp') thistab.prep_data(thistab.df, 'timestamp') thistab.graph_ts() thistab.notification_updater("Ready!") def update_resample(attrname, old, new): thistab.notification_updater( "Calculations underway. Please be patient") thistab.resample_period = resample_select.value thistab.prep_data(thistab.df, 'timestamp') thistab.graph_ts() thistab.notification_updater("ready") try: table = 'external_daily' thistab = Thistab(table, [], []) # setup dates first_date_range = datetime.strptime("2018-04-25 00:00:00", "%Y-%m-%d %H:%M:%S") last_date_range = datetime.now().date() last_date = dashboard_config['dates']['last_date'] - timedelta(days=2) first_date = dashboard_config['dates']['current_year_start'] # initial function call thistab.df_load(first_date, last_date, timestamp_col='timestamp', cols=[]) thistab.prep_data(thistab.df, timestamp_col='timestamp') # MANAGE STREAMS --------------------------------------------------------- # CREATE WIDGETS ---------------------------------------------------------------- datepicker_start = DatePicker(title="Start", min_date=first_date_range, max_date=last_date_range, value=first_date) datepicker_end = DatePicker(title="End", min_date=first_date_range, max_date=last_date_range, value=last_date) load_dates_button = Button( label="Select dates/periods, then click me!", width=20, height=8, button_type="success") resample_select = Select(title='Select summary period', value=thistab.resample_period, options=thistab.menus['resample_periods']) # -------------------------------- PLOTS --------------------------- thistab.graph_ts() p = {} for feature in features: p[feature] = figure(x_axis_type="datetime", plot_width=1400, plot_height=400, title=feature) p[feature].multi_line( xs='x', ys='y', legend='labels', line_color='colors', line_width=5, hover_line_color='colors', hover_line_alpha=1.0, source=source[feature], ) p[feature].add_tools( HoverTool(show_arrow=False, line_policy='next', tooltips=[ ('freq', '$y'), ])) # ch = thistab.graph_chartify(timestamp_col='timestamp') # -------------------------------- CALLBACKS ------------------------ load_dates_button.on_click(update) # lags array resample_select.on_change('value', update_resample) # -----------------------------------LAYOUT ---------------------------- # COMPOSE LAYOUT # put the controls in a single element controls_left = WidgetBox(datepicker_start, load_dates_button) controls_right = WidgetBox(datepicker_end) grid_data = [ #[ch.figure], [thistab.notification_div['top']], [controls_left, controls_right], [thistab.section_headers['ts'], resample_select], ] for feature in features: grid_data.append([p[feature]]) logger.warning('p:%s', p[feature]) grid_data.append([thistab.notification_div['bottom']]) grid = gridplot(grid_data) # Make a tab with the layout tab = Panel(child=grid, title=thistab.panel_title) return tab except Exception: logger.error('rendering err:', exc_info=True) return tab_error_flag(thistab.panel_title)
def __init__(self, table, name, cols): self.df = None self.ch = PythonClickhouse('aion') self.redis = PythonRedis() self.table = table self.cols = cols self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.welcome_txt = """<div style="text-align:center;background:black;width:100%;"> <h1 style="color:#fff;"> {}</h1></div>""".format('Welcome') css_path = join(dirname(__file__), "../../../static/css/KPI_interface.css") self.KPI_card_css = KPI_card_css self.DATEFORMAT = '%Y-%m-%d %H:%M:%S' self.DATEFORMAT_PTD = '%Y-%m-%d' self.initial_date = datetime.strptime("2018-04-25 00:00:00", self.DATEFORMAT) self.account_type = 'all' self.trigger = -1 self.periods_to_plot = {1: ['week', 'month'], 2: ['quarter']} self.pop_history_periods = 3 # number of periods for period over period self.pop_start_date = None self.pop_end_date = None self.timestamp_col = '' self.checkboxgroup = {} self.sig_effect_dict = {} self.name = name self.redis_stat_sig_key = 'adoption_features:' + self.name self.card_grid_row = {'year': 0, 'quarter': 1, 'month': 2, 'week': 3} weekly_pay = 1200 num_engineers = 40 self.payroll = { 'week': weekly_pay * num_engineers, 'month': weekly_pay * num_engineers * 4, 'quarter': weekly_pay * num_engineers * 4 * 3, 'year': weekly_pay * num_engineers * 4 * 3 * 4 } self.resample_period = self.menus['resample_period'][0] self.groupby_dict = { 'tw_mentions': 'sum', 'tw_positive': 'mean', 'tw_compound': 'mean', 'tw_neutral': 'mean', 'tw_negative': 'mean', 'tw_emojis_positive': 'mean', 'tw_emojis_compound': 'mean', 'tw_emojis_negative': 'mean', 'tw_emojis_count': 'sum', 'tw_replies_from_followers': 'sum', 'tw_replies_from_following': 'sum', 'tw_reply_hashtags': 'sum' } self.pop_history_periods = 3 # number of periods for period over period self.variable = 'item' self.grouby_var = '' self.page_width = 1200
class KPI: menus = { 'account_type': ['all', 'contract', 'miner', 'native_user', 'token_user'], 'update_type': [ 'all', 'contract_deployment', 'internal_transfer', 'mined_block', 'token_transfer', 'transaction' ], 'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], 'developer_adoption_DVs': ['aion_fork', 'aion_watch'], 'resample_period': ['W', 'M', 'Q'], 'social_media': ['twitter', 'facebook'], 'social_media_variables': [ 'tw_mentions', 'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative', 'tw_emojis_positive', 'tw_emojis_compound', 'tw_emojis_negative', 'tw_emojis_count', 'tw_replies_from_followers', 'tw_replies_from_following', 'tw_reply_hashtags' ], 'cryptos': ['all'] + load_cryptos(), 'bcc': { 'rental': ['area', 'category', 'item', 'status', 'gender'] } } def __init__(self, table, name, cols): self.df = None self.ch = PythonClickhouse('aion') self.redis = PythonRedis() self.table = table self.cols = cols self.div_style = """ style='width:350px; margin-left:25px; border:1px solid #ddd;border-radius:3px;background:#efefef50;' """ self.header_style = """ style='color:blue;text-align:center;' """ self.welcome_txt = """<div style="text-align:center;background:black;width:100%;"> <h1 style="color:#fff;"> {}</h1></div>""".format('Welcome') css_path = join(dirname(__file__), "../../../static/css/KPI_interface.css") self.KPI_card_css = KPI_card_css self.DATEFORMAT = '%Y-%m-%d %H:%M:%S' self.DATEFORMAT_PTD = '%Y-%m-%d' self.initial_date = datetime.strptime("2018-04-25 00:00:00", self.DATEFORMAT) self.account_type = 'all' self.trigger = -1 self.periods_to_plot = {1: ['week', 'month'], 2: ['quarter']} self.pop_history_periods = 3 # number of periods for period over period self.pop_start_date = None self.pop_end_date = None self.timestamp_col = '' self.checkboxgroup = {} self.sig_effect_dict = {} self.name = name self.redis_stat_sig_key = 'adoption_features:' + self.name self.card_grid_row = {'year': 0, 'quarter': 1, 'month': 2, 'week': 3} weekly_pay = 1200 num_engineers = 40 self.payroll = { 'week': weekly_pay * num_engineers, 'month': weekly_pay * num_engineers * 4, 'quarter': weekly_pay * num_engineers * 4 * 3, 'year': weekly_pay * num_engineers * 4 * 3 * 4 } self.resample_period = self.menus['resample_period'][0] self.groupby_dict = { 'tw_mentions': 'sum', 'tw_positive': 'mean', 'tw_compound': 'mean', 'tw_neutral': 'mean', 'tw_negative': 'mean', 'tw_emojis_positive': 'mean', 'tw_emojis_compound': 'mean', 'tw_emojis_negative': 'mean', 'tw_emojis_count': 'sum', 'tw_replies_from_followers': 'sum', 'tw_replies_from_following': 'sum', 'tw_reply_hashtags': 'sum' } self.pop_history_periods = 3 # number of periods for period over period self.variable = 'item' self.grouby_var = '' self.page_width = 1200 # make block timestamp the index def load_df(self, start_date, end_date, cols, timestamp_col='timestamp_of_first_event', supplemental_where=None): try: if isinstance(end_date, date): end_date = datetime.combine(end_date, datetime.min.time()) if isinstance(start_date, date): start_date = datetime.combine(start_date, datetime.min.time()) end_date += timedelta(days=1) temp_cols = cols.copy() if self.table != 'external_daily': if 'amount' not in temp_cols: temp_cols.append('amount') df = self.ch.load_data(self.table, temp_cols, start_date, end_date, timestamp_col, supplemental_where) # filter out the double entry #df = df[df['value'] >= 0] if len(cols) > 0: return df[cols] else: return df #df[timestamp_col] = df[timestamp_col].map(lambda x: clean_dates_from_db(x)) except Exception: logger.error('load df', exc_info=True) def load_df_pym(self, req_startdate, req_enddate, table, cols, timestamp_col): try: # get min and max of loaded df if self.df is not None: loaded_min = self.df[timestamp_col].min() loaded_max = self.df[timestamp_col].max() if loaded_min <= req_startdate and loaded_max >= req_enddate: df = self.df[(self.df[timestamp_col] >= req_startdate) & (self.df[timestamp_col] <= req_enddate)] return df return self.pym.load_df(req_startdate, req_enddate, table=table, cols=cols, timestamp_col=timestamp_col) except Exception: logger.error('load_df', exc_info=True) def update_cards(self, dct): try: txt = '' for period, data in dct.items(): design = random.choice(list(KPI_card_css.keys())) title = period + ' to date' txt += self.card(title=title, data=data, card_design=design) text = """<div style="margin-top:100px;display:flex; flex-direction:row;"> {} </div>""".format(txt) self.KPI_card_div.text = text except Exception: logger.error('update cards', exc_info=True) def reset_checkboxes(self, value='all', checkboxgroup=''): try: self.checkboxgroup[checkboxgroup].value = value except Exception: logger.error('reset checkboxes', exc_info=True) def first_date_in_quarter(self, timestamp): try: curr_quarter = int((timestamp.month - 1) / 3 + 1) return datetime(timestamp.year, 3 * curr_quarter - 2, 1) except Exception: logger.error('period to date', exc_info=True) def first_date_in_period(self, timestamp, period): try: if period == 'week': start = timestamp - timedelta(days=timestamp.weekday()) elif period == 'month': start = datetime(timestamp.year, timestamp.month, 1, 0, 0, 0) elif period == 'year': start = datetime(timestamp.year, 1, 1, 0, 0, 0) elif period == 'quarter': start = self.first_date_in_quarter(timestamp) return start except Exception: logger.error('period to date', exc_info=True) def period_to_date(self, df, timestamp=None, timestamp_filter_col=None, cols=[], period='week'): try: if timestamp is None: timestamp = datetime.now() timestamp = datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, 0, 0) start = self.first_date_in_period(timestamp, period) # filter if timestamp_filter_col is None: timestamp_filter_col = self.timestamp_col #logger.warning('df:%s',df[timestamp_filter_col]) df = df[(df[timestamp_filter_col] >= start) & (df[timestamp_filter_col] <= timestamp)] if len(cols) > 0: df = df[cols] return df except Exception: logger.error('period to date', exc_info=True) def label_qtr_pop(y): try: curr_quarter = int((y.month - 1) / 3 + 1) start = datetime(y.year, 3 * curr_quarter - 2, 1) return abs((start - y).days) except Exception: logger.error('df label quarter', exc_info=True) def shift_period_range(self, period, start, end): try: if period == 'week': start = start - timedelta(days=7) end = end - timedelta(days=7) elif period == 'month': start = start - relativedelta(months=1) end = end - relativedelta(months=1) elif period == 'year': start = start - relativedelta(years=1) end = end - relativedelta(years=1) elif period == 'quarter': start = start - relativedelta(months=3) end = end - relativedelta(months=3) #logger.warning('%s start:end=%s:%s',period,start,end) return start, end except Exception: logger.error('shift period range', exc_info=True) # label dates for period over period (pop) def label_dates_pop(self, df, period, timestamp_col): logger.warning('timestamp col:%s', df.head(10)) def label_qtr_pop(y): try: curr_quarter = int((y.month - 1) / 3 + 1) start = datetime(y.year, 3 * curr_quarter - 2, 1) return abs((start - y).days) except Exception: logger.error('df label quarter', exc_info=True) try: if len(df) > 0: if period == 'week': df = df.assign( dayset=lambda x: x[timestamp_col].dt.dayofweek) elif period == 'month': df = df.assign(dayset=lambda x: x[timestamp_col].dt.day) elif period == 'year': df = df.assign( dayset=lambda x: x[timestamp_col].dt.dayofyear) elif period == 'quarter': df['dayset'] = df[timestamp_col].map(label_qtr_pop) return df except Exception: logger.error('label data ', exc_info=True) def pop_include_zeros(self, df_period, plotcols, period): try: # check for no data on original dates tmp_title = '0 {}(s) prev(current)'.format(period) if tmp_title not in plotcols: df_period[tmp_title] = [0] * len(df_period) plotcols.append(tmp_title) logger.warning('line 218 cols to plot:%s', plotcols) # do other periods tmp = plotcols[0] txt = tmp[1:] if isinstance(self.pop_history_periods, str): self.pop_history_periods = int(self.pop_history_periods) for i in range(1, self.pop_history_periods): tmp_txt = str(i) + txt if tmp_txt not in plotcols: df_period[tmp_txt] = [0] * len(df_period) plotcols.append(tmp_txt) logger.warning('LINE 158 plotcols at end of pop include zeros:%s', plotcols) return df_period, sorted(plotcols) except Exception: logger.error('pop include zeros', exc_info=True) def period_over_period(self, df, start_date, end_date, period, history_periods=2, timestamp_col='timestamp_of_first_event'): try: # filter cols if necessary string = '0 {}(s) prev(current)'.format(period) # filter out the dates greater than today df_current = df.assign(period=string) # label the days being compared with the same label if len(df_current) > 0: df_current = self.label_dates_pop(df_current, period, timestamp_col) # zero out time information start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0) end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0) cols = list(df.columns) logger.warning(' Line 293 %s:df %s', period, df.head(10)) logger.warning(' Line 293 %s:df cols %s', period, cols) counter = 1 if isinstance(history_periods, str): history_periods = int(history_periods) # make dataframes for request no. of periods start, end = self.shift_period_range(period, start, end) while counter < history_periods and start >= self.initial_date: # load data if period == 'quarter': logger.warning('start:end %s:%s', start, end) if 'bcc' in self.table: df_temp = self.load_df_pym(start, end, cols, timestamp_col) else: df_temp = self.load_df(start, end, cols, timestamp_col) if df_temp is not None: if len(df_temp) > 1: string = '{} {}(s) prev'.format(counter, period) # label period df_temp = df_temp.assign(period=string) # relabel days to get matching day of week,doy, dom, for different periods df_temp = self.label_dates_pop(df_temp, period, timestamp_col) #logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp)) df_current = concat_dfs(df_current, df_temp) del df_temp gc.collect() # shift the loading window counter += 1 start, end = self.shift_period_range(period, start, end) if period == 'week': logger.warning('LINE 327 df_current:%s', df_current.head(10)) return df_current except Exception: logger.error('period over period', exc_info=True) def pop_week(self, launch=-1): try: return self.graph_period_over_period('week') except Exception: logger.error('pop week', exc_info=True) def pop_month(self, launch=-1): try: return self.graph_period_over_period('month') except Exception: logger.error('pop month', exc_info=True) def pop_quarter(self, launch=-1): try: return self.graph_period_over_period('quarter') except Exception: logger.error('pop quarter', exc_info=True) def pop_year(self, launch=-1): try: return self.graph_period_over_period('year') except Exception: logger.error('pop year', exc_info=True) """ To enable comparision across period, dates must have label relative to period start. Place dates in columns to be able to plot multi-line/bar graphs """ def split_period_into_columns(self, df, col_to_split, value_to_copy): try: for item in df[col_to_split].unique(): df[item] = df.apply(lambda x: x[value_to_copy] if x[col_to_split] == item else 0, axis=1) #logger.warning('split period into columns:%s', df.head(10)) return df except Exception: logger.error('split period into column', exc_info=True) # ----------------------- UPDATERS ------------------------------------------ def card(self, title, data, width=200, height=200, card_design='folders'): try: txt = """ <div style="flex: 1 1 0px;border: 1px solid black;{};width:{}px; height:{}px;border-right=10px;"> <h3> {} </h3> </br> {} </div>""".format(self.KPI_card_css[card_design], width, height, title, data) return txt except Exception: logger.error('card', exc_info=True) def notification_updater(self, text): txt = """<hr/><div style="text-align:center;width:{}px;height:{}px; position:relative;background:black;"> <h1 style="color:#fff;margin-bottom:300px">{}</h1> </div>""".format(self.page_width, 50, text) for key in self.notification_div.keys(): self.notification_div[key].text = txt """ update the section labels on the page """ def section_header_updater(self, section, label='all'): if label not in ['all', '', 'remuneration']: label = label + 's' if section == 'cards': text = "Period to date:" if label == 'remuneration': text = text + '$ spent' if label == 'project': text = text + '# of projects' if label == 'delay_start': text = text + 'Mean delay in start projects(hours)' if label == 'delay_end': text = text + 'Mean project overrun(hours)' if label == 'project_duration': text = text + 'Mean project duration (days)' if label == 'task_duration': text = text + 'Total project person hours)' elif section == 'pop': text = "Period over period:{}".format(label) txt = """<h2 style="color:#4221cc;">{}-----------------------------------------------------------------</h2>"""\ .format(text) self.section_headers[section].text = txt # -------------------- CALCULATE KPI's DEVELOPED FROM VARIABLES WITH STATITICALLY SIGNIFICANT EFFECT def card_text(self, title, data, card_design='folders'): try: txt = """ <div {}> <h3>{}</h3></br>{} </div> """.format(self.KPI_card_css[card_design], title, data) return txt except Exception: logger.error('card text', exc_info=True) def match_sigvars_to_coin_vars(self, df, interest_var): try: # load statistically significant variables key = self.redis_stat_sig_key + '-' + interest_var # adjust the variable of interest to match the key key_vec = key.split( '-') # strip the crypto name off of he variable gen_variables = [ 'release', 'watch', 'push', 'issue', 'fork', 'open', 'high', 'low', 'close', 'volume', 'market_cap' ] for var in gen_variables: if var in key_vec[-1]: key = key_vec[-2] + '-' + var break sig_variables = self.redis.simple_load(key) self.sig_effect_dict = {} significant_features = {} # make a list of columns with names that include the significant feature if sig_variables is not None: if 'features' in sig_variables.keys(): if len(sig_variables['features']) > 0: for col in df.columns: if any(var in col for var in sig_variables['features']): significant_features[col] = 'sum' return significant_features except Exception: logger.error('match sig vars to coin vars', exc_info=True) def calc_sig_effect_card_data(self, df, interest_var, period): try: significant_features = self.match_sigvars_to_coin_vars( df, interest_var=interest_var) if len(significant_features) > 0: cols = [interest_var] + list(significant_features.keys()) tmp_df = df[cols] numer = tmp_df[interest_var].sum() variable_of_interest_tmp = interest_var.split('_') if variable_of_interest_tmp[-1] in ['watch']: variable_of_interest_tmp[-1] += 'e' txt = '' for var in significant_features.keys(): point_estimate = 0 var_tmp = var.split( '_') # slice out the 'fork' from 'aion_fork' if numer != 0: denom = tmp_df[var].sum() point_estimate = '*' if denom != 0: point_estimate = round(numer / denom, 3) # add metrics based on variables # update the divs self.sig_effect_dict[var] = { 'title': "{}s per {}".format(variable_of_interest_tmp[-1], var_tmp[-1]), 'point_estimate': point_estimate } txt += self.card( title=self.sig_effect_dict[var]['title'], data=self.sig_effect_dict[var]['point_estimate'], card_design=random.choice( list(self.KPI_card_css.keys()))) return txt except Exception: logger.error('make sig effect columns', exc_info=True) def update_significant_DV_cards(self, dct): try: txt = '' for idx, period in enumerate(dct.keys()): txt += dct[period] text = """<div style="margin-top:100px;display:flex; flex-direction:column;"> {} </div>""".format(txt) self.KPI_card_div.text = text except Exception: logger.error('update cards', exc_info=True) def payroll_to_date(self, period): try: # make data cards # number of weeks in period if period == 'year': weekcount = datetime.now().isocalendar()[1] payroll_to_date = self.payroll['week'] * weekcount elif period == 'week': payroll_to_date = self.payroll['week'] * ( datetime.today().weekday() / 7) elif period == 'month': weekcount = floor( datetime.today().day / 7) + 1 # no zero week allowed payroll_to_date = self.payroll['week'] * weekcount elif period == 'quarter': start = self.first_date_in_quarter(datetime.today()) weekcount = floor( (abs(datetime.today() - start).days + 1) / 7) + 1 payroll_to_date = self.payroll['week'] * weekcount return round(payroll_to_date, 2) except Exception: logger.error('payroll to date', exc_info=True) """ groupby the the data and make ratios between significant variables and interest variables """ def make_significant_ratios_df(self, df, resample_period, interest_var, timestamp_col): try: def ratio(df, col_old, col_new): df = df.assign(result=df[interest_var] / df[col_old]) df = df.rename(columns={'result': col_new}) #logger.warning('col-%s df:%s',col_old,df.head(5)) return df # filter sig_features_dict = self.match_sigvars_to_coin_vars( df, interest_var) sig_features_dict[ interest_var] = 'sum' # include interest var in aggregations sig_features_list = list(sig_features_dict.keys()) # rename column for overwriting sig_vars_relabel = [] for feature in sig_features_list: tmp = feature.split('_') sig_vars_relabel.append(tmp[-1]) # groupby df = df.set_index(timestamp_col) df = df.resample(resample_period).agg(sig_features_dict) #logger.warning('LINE 413:%s',len(df)) # create ratios for idx, col in enumerate(sig_features_list): if col != interest_var: # skip variable of interest df = df.map_partitions(ratio, col, sig_vars_relabel[idx]) # drop columns df = df.drop(sig_features_list, axis=1) df = df.fillna(0) return df except Exception: logger.error('significant ratios', exc_info=True)
from dask.dataframe.utils import make_meta from scripts.utils.mylogger import mylogger from scripts.databases.pythonRedis import PythonRedis import gc import re from datetime import datetime import pandas as pd from scripts.utils.myutils import datetime_to_date r = PythonRedis() logger = mylogger(__file__) def remove_char(row): return re.sub('\[', '', row['transaction_hashes']) def list_to_rows(df, column, sep=',', keep=False): """ Split the values of a column and expand so the new DataFrame has one split value per row. Filters rows where the column is missing. Params ------ df : pandas.DataFrame dataframe with the column to split and expand column : str the column to split and expand sep : str