Python PythonRedis Examples

Programming Language: Python

Namespace/Package Name: scripts.databases.pythonRedis

Class/Type: PythonRedis

Examples at hotexamples.com: 7

Python PythonRedis - 7 examples found. These are the top rated real world Python examples of scripts.databases.pythonRedis.PythonRedis extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PythonRedis(6)

simple_load(2)

load(1)

Example #1

Show file

File: miner_predictive_methods.py Project: andre-aion/analytics_demo

def construct_from_redis(key_lst,
                         item_type='list',
                         df=None,
                         table=None,
                         df_cols=None,
                         dedup_cols=None):
    try:
        redis = PythonRedis()
        if not key_lst:
            return None
        else:
            temp_item = [] if item_type == 'list' else df
            start_list = []
            end_list = []
            for key in key_lst:
                # create df if necessary
                if item_type == 'list':
                    item_loaded = redis.load([], '', '', key, item_type)
                    temp_item.append(item_loaded)
                    return temp_item
                else:
                    #make list of start and end dates from list

                    # get key dates
                    logger.warning('key for churned load:%s', key)

                    lst = key.split(':')
                    if lst[-1] != '':
                        req_start_date = datetime.strptime(
                            lst[-2] + ' 00:00:00', '%Y-%m-%d %H:%M:%S')
                        req_end_date = datetime.strptime(
                            lst[-1] + ' 00:00:00', '%Y-%m-%d %H:%M:%S')
                        #req_start_date = datetime.combine(req_start_date, datetime.min.time())
                        #req_end_date = datetime.combine(req_end_date, datetime.min.time())

                        start_list.append(req_start_date)
                        end_list.append(req_end_date)

            tab = Mytab('block_tx_warehouse',
                        cols['block_tx_warehouse']['models'], [])
            if len(start_list) > 0:
                if item_type != 'list':
                    # if warehouse get minimum start date and maximum end data, and retrive from database
                    tab.key_tab = 'models'
                    req_start_date = min(start_list)
                    req_end_date = max(end_list)
                    tab.df_load(req_start_date, req_end_date)
                    logger.warning('TRACKER:%s', tab.df.tail(10))

                    return tab.df1
            else:
                return tab.df1
    except Exception:
        logger.error("construct from redis/clickhouse", exc_info=True)

Example #2

Show file

File: mytab_interface.py Project: andre-aion/analytics_demo

    def __init__(self, table, cols, dedup_cols, panel_title=None):
        self.panel_title = panel_title
        self.table = table
        self.load_params = dict()
        self.cols = cols
        self.locals = dict()  # stuff local to each tab
        self.streaming_dataframe = SD(table, cols, dedup_cols)
        self.df = self.streaming_dataframe.df
        self.df1 = None
        self.dedup_cols = dedup_cols
        self.params = None
        self.load_params = None
        self.poolname_dict = self.get_poolname_dict()
        self.key_tab = ''  # for key composition in redis
        self.construction_tables = {}
        self.tier1_miners_list = []
        self.tier2_miners_list = []
        self.pq = PythonParquet()
        self.ch = PythonClickhouse('aion')
        self.redis = PythonRedis()
        self.conn = self.redis.conn
        self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
        self.ToA_THRESH = {  # Tests of association (TOA)
            'STRONG': .65,
            'MODERATE': .4,
            'WEAK': .25
        }
        self.menus = {'resample_periods': ['D', 'W', 'M', 'Q']}
        self.resample_period = self.menus['resample_periods'][0]
        self.pvalue_thresh = 0.1

        self.page_width = 1200

Example #3

Show file

from scripts.databases.pythonRedis import PythonRedis
from scripts.utils.mylogger import mylogger

logger = mylogger(__file__)
redis = PythonRedis()

Example #4

Show file

def crypto_clusters_eda_tab(cryptos, panel_title):
    global groupby_dict
    global features
    global cluster_dct
    #global source

    redis = PythonRedis()
    cluster_dct = redis.simple_load('clusters:cryptocurrencies')
    if cluster_dct is not None:
        groupby_dict = {}
        for var in cluster_dct['features']:
            groupby_dict[var] = 'sum'

        features = cluster_dct['features']
        source = {}
        for feature in features:
            source[feature] = ColumnDataSource(
                data=dict(xs=[], ys=[], labels=[], colors=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self,
                           table,
                           cols,
                           dedup_cols,
                           panel_title=panel_title)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = [
                'watch', 'fork', 'issue', 'release', 'push', 'tw_mentions',
                'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative',
                'tw_emojis_positive', 'tw_emojis_compound',
                'tw_emojis_negative', 'tw_emojis_count', 'tw_reply_hashtags'
            ]
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0
            txt = """<div style="text-align:center;background:black;width:100%;">
                                                                           <h1 style="color:#fff;">
                                                                           {}</h1></div>""".format(
                'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=1400, height=20),
                'bottom': Div(text=txt, width=1400, height=10),
            }
            self.cluster_dct = cluster_dct
            self.groupby_dict = groupby_dict
            self.features = features
            self.crypto = 'all'

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.significant_effect_dict = {}
            self.df1 = None
            self.section_headers = {
                'ts':
                self.section_header_div(
                    'Comparison of clusters across variables:---------------------',
                    width=600)
            }
            self.timestamp_col = None
            self.colors = None

        # ----------------------  DIVS ----------------------------
        def section_header_div(self, text, html_header='h2', width=1400):
            text = '<{} style="color:#4221cc;">{}</{}>'.format(
                html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=300):
            txt = """
               <div {}>
               <h4 {}>How to interpret relationships </h4>
               <ul style='margin-top:-10px;'>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                    <li>
                   </li>
                    <li>
                   </li>
               </ul>
               </div>

               """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ////////////////////////// UPDATERS ///////////////////////
        def section_head_updater(self, section, txt):
            try:
                self.section_header_div[section].text = txt
            except Exception:
                logger.error('', exc_info=True)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # /////////////////////////// LOAD CLUSTERS  //////////////////////
        def prep_data(self, df, timestamp_col):
            def label_cluster(x):
                for key, values in self.cluster_dct.items():
                    if key not in ['timestamp', 'variables']:
                        if x in values:
                            return key
                return x

            try:
                cols = self.features + ['crypto', 'timestamp']
                df = df[cols]
                # groupby and resample
                df['crypto'] = df['crypto'].map(lambda x: label_cluster(x))
                df = df.rename(columns={'crypto': 'cluster'})
                df = df.compute()
                df[timestamp_col] = pd.to_datetime(df[timestamp_col],
                                                   errors='coerce')
                df.set_index(timestamp_col, inplace=True)
                df = df.groupby('cluster').resample(self.resample_period).agg(
                    self.groupby_dict)
                df.reset_index(inplace=True)
                df.set_index(timestamp_col, inplace=True)
                self.timestamp_col = timestamp_col
                self.df1 = df

            except Exception:
                logger.error('prep data', exc_info=True)

        def graph_ts(self):
            try:
                #global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    clusters = df['cluster'].unique()
                    self.colors = [''] * len(clusters)
                    for idx, feature in enumerate(clusters):
                        self.colors[idx] = dashboard_config['colors'][idx]
                    if self.features is not None:
                        for idx, feature in enumerate(self.features):
                            df1 = df[['cluster', feature]]
                            # pivot into columns for cluster
                            df1 = df1.pivot(columns='cluster')
                            data = dict(x=[df1.index.values] * len(clusters),
                                        y=[df1[name].values for name in df1],
                                        labels=clusters,
                                        colors=self.colors)
                            source[feature].data = data
            except Exception:
                logger.error('graph ts', exc_info=True)

        def graph_chartify(self, timestamp_col):
            try:
                # global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    df = df.reset_index()

                    for feature in self.features:
                        ch = chartify.Chart(blank_labels=True,
                                            x_axis_type='datetime')
                        ch.set_title("CHARTIFY")
                        ch.plot.line(
                            # Data must be sorted by x column
                            data_frame=df.sort_values(timestamp_col),
                            x_column=timestamp_col,
                            y_column=feature,
                            color_column='cluster')
                        return ch

            except Exception:
                logger.error('graph chartify', exc_info=True)

    def update():
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = resample_select.value
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("ready")

    try:
        table = 'external_daily'
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = dashboard_config['dates']['current_year_start']
        # initial function call
        thistab.df_load(first_date,
                        last_date,
                        timestamp_col='timestamp',
                        cols=[])
        thistab.prep_data(thistab.df, timestamp_col='timestamp')

        # MANAGE STREAMS ---------------------------------------------------------

        # CREATE WIDGETS ----------------------------------------------------------------
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        load_dates_button = Button(
            label="Select dates/periods, then click me!",
            width=20,
            height=8,
            button_type="success")

        resample_select = Select(title='Select summary period',
                                 value=thistab.resample_period,
                                 options=thistab.menus['resample_periods'])

        # -------------------------------- PLOTS ---------------------------
        thistab.graph_ts()
        p = {}
        for feature in features:
            p[feature] = figure(x_axis_type="datetime",
                                plot_width=1400,
                                plot_height=400,
                                title=feature)

            p[feature].multi_line(
                xs='x',
                ys='y',
                legend='labels',
                line_color='colors',
                line_width=5,
                hover_line_color='colors',
                hover_line_alpha=1.0,
                source=source[feature],
            )
            p[feature].add_tools(
                HoverTool(show_arrow=False,
                          line_policy='next',
                          tooltips=[
                              ('freq', '$y'),
                          ]))

        # ch = thistab.graph_chartify(timestamp_col='timestamp')
        # -------------------------------- CALLBACKS ------------------------

        load_dates_button.on_click(update)  # lags array
        resample_select.on_change('value', update_resample)

        # -----------------------------------LAYOUT ----------------------------
        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_left = WidgetBox(datepicker_start, load_dates_button)

        controls_right = WidgetBox(datepicker_end)

        grid_data = [
            #[ch.figure],
            [thistab.notification_div['top']],
            [controls_left, controls_right],
            [thistab.section_headers['ts'], resample_select],
        ]
        for feature in features:
            grid_data.append([p[feature]])
            logger.warning('p:%s', p[feature])

        grid_data.append([thistab.notification_div['bottom']])

        grid = gridplot(grid_data)

        # Make a tab with the layout
        tab = Panel(child=grid, title=thistab.panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(thistab.panel_title)

Example #5

Show file

File: KPI_interface.py Project: andre-aion/analytics_demo

    def __init__(self, table, name, cols):
        self.df = None
        self.ch = PythonClickhouse('aion')
        self.redis = PythonRedis()
        self.table = table
        self.cols = cols
        self.div_style = """ style='width:350px; margin-left:25px;
                                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                """

        self.header_style = """ style='color:blue;text-align:center;' """
        self.welcome_txt = """<div style="text-align:center;background:black;width:100%;">
                                         <h1 style="color:#fff;">
                                         {}</h1></div>""".format('Welcome')
        css_path = join(dirname(__file__),
                        "../../../static/css/KPI_interface.css")
        self.KPI_card_css = KPI_card_css
        self.DATEFORMAT = '%Y-%m-%d %H:%M:%S'
        self.DATEFORMAT_PTD = '%Y-%m-%d'

        self.initial_date = datetime.strptime("2018-04-25 00:00:00",
                                              self.DATEFORMAT)
        self.account_type = 'all'
        self.trigger = -1
        self.periods_to_plot = {1: ['week', 'month'], 2: ['quarter']}
        self.pop_history_periods = 3  # number of periods for period over period
        self.pop_start_date = None
        self.pop_end_date = None
        self.timestamp_col = ''
        self.checkboxgroup = {}
        self.sig_effect_dict = {}
        self.name = name
        self.redis_stat_sig_key = 'adoption_features:' + self.name
        self.card_grid_row = {'year': 0, 'quarter': 1, 'month': 2, 'week': 3}
        weekly_pay = 1200
        num_engineers = 40
        self.payroll = {
            'week': weekly_pay * num_engineers,
            'month': weekly_pay * num_engineers * 4,
            'quarter': weekly_pay * num_engineers * 4 * 3,
            'year': weekly_pay * num_engineers * 4 * 3 * 4
        }
        self.resample_period = self.menus['resample_period'][0]

        self.groupby_dict = {
            'tw_mentions': 'sum',
            'tw_positive': 'mean',
            'tw_compound': 'mean',
            'tw_neutral': 'mean',
            'tw_negative': 'mean',
            'tw_emojis_positive': 'mean',
            'tw_emojis_compound': 'mean',
            'tw_emojis_negative': 'mean',
            'tw_emojis_count': 'sum',
            'tw_replies_from_followers': 'sum',
            'tw_replies_from_following': 'sum',
            'tw_reply_hashtags': 'sum'
        }

        self.pop_history_periods = 3  # number of periods for period over period
        self.variable = 'item'
        self.grouby_var = ''
        self.page_width = 1200

Example #6

Show file

File: KPI_interface.py Project: andre-aion/analytics_demo

class KPI:
    menus = {
        'account_type':
        ['all', 'contract', 'miner', 'native_user', 'token_user'],
        'update_type': [
            'all', 'contract_deployment', 'internal_transfer', 'mined_block',
            'token_transfer', 'transaction'
        ],
        'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
        'developer_adoption_DVs': ['aion_fork', 'aion_watch'],
        'resample_period': ['W', 'M', 'Q'],
        'social_media': ['twitter', 'facebook'],
        'social_media_variables': [
            'tw_mentions', 'tw_positive', 'tw_compound', 'tw_neutral',
            'tw_negative', 'tw_emojis_positive', 'tw_emojis_compound',
            'tw_emojis_negative', 'tw_emojis_count',
            'tw_replies_from_followers', 'tw_replies_from_following',
            'tw_reply_hashtags'
        ],
        'cryptos': ['all'] + load_cryptos(),
        'bcc': {
            'rental': ['area', 'category', 'item', 'status', 'gender']
        }
    }

    def __init__(self, table, name, cols):
        self.df = None
        self.ch = PythonClickhouse('aion')
        self.redis = PythonRedis()
        self.table = table
        self.cols = cols
        self.div_style = """ style='width:350px; margin-left:25px;
                                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                """

        self.header_style = """ style='color:blue;text-align:center;' """
        self.welcome_txt = """<div style="text-align:center;background:black;width:100%;">
                                         <h1 style="color:#fff;">
                                         {}</h1></div>""".format('Welcome')
        css_path = join(dirname(__file__),
                        "../../../static/css/KPI_interface.css")
        self.KPI_card_css = KPI_card_css
        self.DATEFORMAT = '%Y-%m-%d %H:%M:%S'
        self.DATEFORMAT_PTD = '%Y-%m-%d'

        self.initial_date = datetime.strptime("2018-04-25 00:00:00",
                                              self.DATEFORMAT)
        self.account_type = 'all'
        self.trigger = -1
        self.periods_to_plot = {1: ['week', 'month'], 2: ['quarter']}
        self.pop_history_periods = 3  # number of periods for period over period
        self.pop_start_date = None
        self.pop_end_date = None
        self.timestamp_col = ''
        self.checkboxgroup = {}
        self.sig_effect_dict = {}
        self.name = name
        self.redis_stat_sig_key = 'adoption_features:' + self.name
        self.card_grid_row = {'year': 0, 'quarter': 1, 'month': 2, 'week': 3}
        weekly_pay = 1200
        num_engineers = 40
        self.payroll = {
            'week': weekly_pay * num_engineers,
            'month': weekly_pay * num_engineers * 4,
            'quarter': weekly_pay * num_engineers * 4 * 3,
            'year': weekly_pay * num_engineers * 4 * 3 * 4
        }
        self.resample_period = self.menus['resample_period'][0]

        self.groupby_dict = {
            'tw_mentions': 'sum',
            'tw_positive': 'mean',
            'tw_compound': 'mean',
            'tw_neutral': 'mean',
            'tw_negative': 'mean',
            'tw_emojis_positive': 'mean',
            'tw_emojis_compound': 'mean',
            'tw_emojis_negative': 'mean',
            'tw_emojis_count': 'sum',
            'tw_replies_from_followers': 'sum',
            'tw_replies_from_following': 'sum',
            'tw_reply_hashtags': 'sum'
        }

        self.pop_history_periods = 3  # number of periods for period over period
        self.variable = 'item'
        self.grouby_var = ''
        self.page_width = 1200

        # make block timestamp the index
    def load_df(self,
                start_date,
                end_date,
                cols,
                timestamp_col='timestamp_of_first_event',
                supplemental_where=None):
        try:

            if isinstance(end_date, date):
                end_date = datetime.combine(end_date, datetime.min.time())
            if isinstance(start_date, date):
                start_date = datetime.combine(start_date, datetime.min.time())
            end_date += timedelta(days=1)
            temp_cols = cols.copy()

            if self.table != 'external_daily':
                if 'amount' not in temp_cols:
                    temp_cols.append('amount')

            df = self.ch.load_data(self.table, temp_cols, start_date, end_date,
                                   timestamp_col, supplemental_where)
            # filter out the double entry
            #df = df[df['value'] >= 0]
            if len(cols) > 0:
                return df[cols]
            else:
                return df
            #df[timestamp_col] = df[timestamp_col].map(lambda x: clean_dates_from_db(x))
        except Exception:
            logger.error('load df', exc_info=True)

    def load_df_pym(self, req_startdate, req_enddate, table, cols,
                    timestamp_col):
        try:
            # get min and max of loaded df
            if self.df is not None:
                loaded_min = self.df[timestamp_col].min()
                loaded_max = self.df[timestamp_col].max()

                if loaded_min <= req_startdate and loaded_max >= req_enddate:
                    df = self.df[(self.df[timestamp_col] >= req_startdate)
                                 & (self.df[timestamp_col] <= req_enddate)]
                    return df
            return self.pym.load_df(req_startdate,
                                    req_enddate,
                                    table=table,
                                    cols=cols,
                                    timestamp_col=timestamp_col)

        except Exception:
            logger.error('load_df', exc_info=True)

    def update_cards(self, dct):
        try:
            txt = ''
            for period, data in dct.items():
                design = random.choice(list(KPI_card_css.keys()))
                title = period + ' to date'
                txt += self.card(title=title, data=data, card_design=design)

            text = """<div style="margin-top:100px;display:flex; flex-direction:row;">
                                                {}
                                                </div>""".format(txt)

            self.KPI_card_div.text = text

        except Exception:
            logger.error('update cards', exc_info=True)

    def reset_checkboxes(self, value='all', checkboxgroup=''):
        try:
            self.checkboxgroup[checkboxgroup].value = value
        except Exception:
            logger.error('reset checkboxes', exc_info=True)

    def first_date_in_quarter(self, timestamp):
        try:
            curr_quarter = int((timestamp.month - 1) / 3 + 1)
            return datetime(timestamp.year, 3 * curr_quarter - 2, 1)

        except Exception:
            logger.error('period to date', exc_info=True)

    def first_date_in_period(self, timestamp, period):
        try:
            if period == 'week':
                start = timestamp - timedelta(days=timestamp.weekday())
            elif period == 'month':
                start = datetime(timestamp.year, timestamp.month, 1, 0, 0, 0)
            elif period == 'year':
                start = datetime(timestamp.year, 1, 1, 0, 0, 0)
            elif period == 'quarter':
                start = self.first_date_in_quarter(timestamp)
            return start
        except Exception:
            logger.error('period to date', exc_info=True)

    def period_to_date(self,
                       df,
                       timestamp=None,
                       timestamp_filter_col=None,
                       cols=[],
                       period='week'):
        try:
            if timestamp is None:
                timestamp = datetime.now()
                timestamp = datetime(timestamp.year, timestamp.month,
                                     timestamp.day, timestamp.hour, 0, 0)

            start = self.first_date_in_period(timestamp, period)
            # filter
            if timestamp_filter_col is None:
                timestamp_filter_col = self.timestamp_col

            #logger.warning('df:%s',df[timestamp_filter_col])

            df = df[(df[timestamp_filter_col] >= start)
                    & (df[timestamp_filter_col] <= timestamp)]
            if len(cols) > 0:
                df = df[cols]
            return df
        except Exception:
            logger.error('period to date', exc_info=True)

    def label_qtr_pop(y):
        try:
            curr_quarter = int((y.month - 1) / 3 + 1)
            start = datetime(y.year, 3 * curr_quarter - 2, 1)
            return abs((start - y).days)
        except Exception:
            logger.error('df label quarter', exc_info=True)

    def shift_period_range(self, period, start, end):
        try:
            if period == 'week':
                start = start - timedelta(days=7)
                end = end - timedelta(days=7)
            elif period == 'month':
                start = start - relativedelta(months=1)
                end = end - relativedelta(months=1)
            elif period == 'year':
                start = start - relativedelta(years=1)
                end = end - relativedelta(years=1)
            elif period == 'quarter':
                start = start - relativedelta(months=3)
                end = end - relativedelta(months=3)
            #logger.warning('%s start:end=%s:%s',period,start,end)
            return start, end
        except Exception:
            logger.error('shift period range', exc_info=True)

    # label dates for period over period (pop)
    def label_dates_pop(self, df, period, timestamp_col):
        logger.warning('timestamp col:%s', df.head(10))

        def label_qtr_pop(y):
            try:
                curr_quarter = int((y.month - 1) / 3 + 1)
                start = datetime(y.year, 3 * curr_quarter - 2, 1)
                return abs((start - y).days)
            except Exception:
                logger.error('df label quarter', exc_info=True)

        try:
            if len(df) > 0:
                if period == 'week':
                    df = df.assign(
                        dayset=lambda x: x[timestamp_col].dt.dayofweek)
                elif period == 'month':
                    df = df.assign(dayset=lambda x: x[timestamp_col].dt.day)
                elif period == 'year':
                    df = df.assign(
                        dayset=lambda x: x[timestamp_col].dt.dayofyear)
                elif period == 'quarter':
                    df['dayset'] = df[timestamp_col].map(label_qtr_pop)

            return df
        except Exception:
            logger.error('label data ', exc_info=True)

    def pop_include_zeros(self, df_period, plotcols, period):
        try:
            # check for no data on original dates
            tmp_title = '0 {}(s) prev(current)'.format(period)
            if tmp_title not in plotcols:
                df_period[tmp_title] = [0] * len(df_period)
                plotcols.append(tmp_title)

                logger.warning('line 218 cols to plot:%s', plotcols)
            # do other periods
            tmp = plotcols[0]
            txt = tmp[1:]
            if isinstance(self.pop_history_periods, str):
                self.pop_history_periods = int(self.pop_history_periods)
            for i in range(1, self.pop_history_periods):
                tmp_txt = str(i) + txt
                if tmp_txt not in plotcols:
                    df_period[tmp_txt] = [0] * len(df_period)
                    plotcols.append(tmp_txt)

            logger.warning('LINE 158 plotcols at end of pop include zeros:%s',
                           plotcols)

            return df_period, sorted(plotcols)
        except Exception:
            logger.error('pop include zeros', exc_info=True)

    def period_over_period(self,
                           df,
                           start_date,
                           end_date,
                           period,
                           history_periods=2,
                           timestamp_col='timestamp_of_first_event'):
        try:
            # filter cols if necessary
            string = '0 {}(s) prev(current)'.format(period)

            # filter out the dates greater than today
            df_current = df.assign(period=string)
            # label the days being compared with the same label
            if len(df_current) > 0:
                df_current = self.label_dates_pop(df_current, period,
                                                  timestamp_col)

            # zero out time information
            start = datetime(start_date.year, start_date.month, start_date.day,
                             0, 0, 0)
            end = datetime(end_date.year, end_date.month, end_date.day, 0, 0,
                           0)

            cols = list(df.columns)
            logger.warning(' Line 293 %s:df %s', period, df.head(10))
            logger.warning(' Line 293 %s:df cols %s', period, cols)

            counter = 1
            if isinstance(history_periods, str):
                history_periods = int(history_periods)
            # make dataframes for request no. of periods
            start, end = self.shift_period_range(period, start, end)
            while counter < history_periods and start >= self.initial_date:
                # load data
                if period == 'quarter':
                    logger.warning('start:end %s:%s', start, end)
                if 'bcc' in self.table:
                    df_temp = self.load_df_pym(start, end, cols, timestamp_col)
                else:
                    df_temp = self.load_df(start, end, cols, timestamp_col)
                if df_temp is not None:
                    if len(df_temp) > 1:
                        string = '{} {}(s) prev'.format(counter, period)
                        # label period
                        df_temp = df_temp.assign(period=string)
                        # relabel days to get matching day of week,doy, dom, for different periods
                        df_temp = self.label_dates_pop(df_temp, period,
                                                       timestamp_col)
                        #logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp))

                        df_current = concat_dfs(df_current, df_temp)
                        del df_temp
                        gc.collect()

                # shift the loading window
                counter += 1
                start, end = self.shift_period_range(period, start, end)
                if period == 'week':
                    logger.warning('LINE 327 df_current:%s',
                                   df_current.head(10))

            return df_current
        except Exception:
            logger.error('period over period', exc_info=True)

    def pop_week(self, launch=-1):
        try:
            return self.graph_period_over_period('week')
        except Exception:
            logger.error('pop week', exc_info=True)

    def pop_month(self, launch=-1):
        try:
            return self.graph_period_over_period('month')
        except Exception:
            logger.error('pop month', exc_info=True)

    def pop_quarter(self, launch=-1):
        try:
            return self.graph_period_over_period('quarter')
        except Exception:
            logger.error('pop quarter', exc_info=True)

    def pop_year(self, launch=-1):
        try:
            return self.graph_period_over_period('year')
        except Exception:
            logger.error('pop year', exc_info=True)

    """
     To enable comparision across period, dates must have label relative to period start.
     Place dates in columns to be able to plot multi-line/bar graphs
     
    """

    def split_period_into_columns(self, df, col_to_split, value_to_copy):
        try:
            for item in df[col_to_split].unique():
                df[item] = df.apply(lambda x: x[value_to_copy]
                                    if x[col_to_split] == item else 0,
                                    axis=1)
            #logger.warning('split period into columns:%s', df.head(10))
            return df
        except Exception:
            logger.error('split period into column', exc_info=True)

    # -----------------------  UPDATERS  ------------------------------------------
    def card(self, title, data, width=200, height=200, card_design='folders'):
        try:
            txt = """
            <div style="flex: 1 1 0px;border: 1px solid black;{};width:{}px;
                        height:{}px;border-right=10px;">
                <h3>
                    {}
                </h3>
                </br>
                {}
            </div>""".format(self.KPI_card_css[card_design], width, height,
                             title, data)
            return txt
        except Exception:
            logger.error('card', exc_info=True)

    def notification_updater(self, text):
        txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                              position:relative;background:black;">
                              <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                        </div>""".format(self.page_width, 50, text)
        for key in self.notification_div.keys():
            self.notification_div[key].text = txt

    """
        update the section labels on the page

    """

    def section_header_updater(self, section, label='all'):
        if label not in ['all', '', 'remuneration']:
            label = label + 's'
        if section == 'cards':
            text = "Period to date:"
            if label == 'remuneration':
                text = text + '$ spent'
            if label == 'project':
                text = text + '# of projects'
            if label == 'delay_start':
                text = text + 'Mean delay in start projects(hours)'
            if label == 'delay_end':
                text = text + 'Mean project overrun(hours)'
            if label == 'project_duration':
                text = text + 'Mean project duration (days)'
            if label == 'task_duration':
                text = text + 'Total project person hours)'
        elif section == 'pop':
            text = "Period over period:{}".format(label)

        txt = """<h2 style="color:#4221cc;">{}-----------------------------------------------------------------</h2>"""\
            .format(text)
        self.section_headers[section].text = txt

    # -------------------- CALCULATE KPI's DEVELOPED FROM VARIABLES WITH STATITICALLY SIGNIFICANT EFFECT
    def card_text(self, title, data, card_design='folders'):
        try:
            txt = """
            <div {}>
            <h3>{}</h3></br>{}
            </div>
            """.format(self.KPI_card_css[card_design], title, data)
            return txt
        except Exception:
            logger.error('card text', exc_info=True)

    def match_sigvars_to_coin_vars(self, df, interest_var):
        try:
            # load statistically significant variables
            key = self.redis_stat_sig_key + '-' + interest_var
            # adjust the variable of interest to match the key
            key_vec = key.split(
                '-')  # strip the crypto name off of he variable
            gen_variables = [
                'release', 'watch', 'push', 'issue', 'fork', 'open', 'high',
                'low', 'close', 'volume', 'market_cap'
            ]
            for var in gen_variables:
                if var in key_vec[-1]:
                    key = key_vec[-2] + '-' + var
                    break

            sig_variables = self.redis.simple_load(key)
            self.sig_effect_dict = {}
            significant_features = {}
            # make a list of columns with names that include the significant feature
            if sig_variables is not None:
                if 'features' in sig_variables.keys():
                    if len(sig_variables['features']) > 0:
                        for col in df.columns:
                            if any(var in col
                                   for var in sig_variables['features']):
                                significant_features[col] = 'sum'
            return significant_features
        except Exception:
            logger.error('match sig vars to coin vars', exc_info=True)

    def calc_sig_effect_card_data(self, df, interest_var, period):
        try:

            significant_features = self.match_sigvars_to_coin_vars(
                df, interest_var=interest_var)
            if len(significant_features) > 0:
                cols = [interest_var] + list(significant_features.keys())
                tmp_df = df[cols]
                numer = tmp_df[interest_var].sum()

                variable_of_interest_tmp = interest_var.split('_')
                if variable_of_interest_tmp[-1] in ['watch']:
                    variable_of_interest_tmp[-1] += 'e'
                txt = ''
                for var in significant_features.keys():
                    point_estimate = 0
                    var_tmp = var.split(
                        '_')  # slice out the 'fork' from 'aion_fork'
                    if numer != 0:
                        denom = tmp_df[var].sum()
                        point_estimate = '*'
                        if denom != 0:
                            point_estimate = round(numer / denom, 3)
                    # add metrics based on variables
                    # update the divs
                    self.sig_effect_dict[var] = {
                        'title':
                        "{}s per {}".format(variable_of_interest_tmp[-1],
                                            var_tmp[-1]),
                        'point_estimate':
                        point_estimate
                    }

                    txt += self.card(
                        title=self.sig_effect_dict[var]['title'],
                        data=self.sig_effect_dict[var]['point_estimate'],
                        card_design=random.choice(
                            list(self.KPI_card_css.keys())))

                    return txt

        except Exception:
            logger.error('make sig effect columns', exc_info=True)

    def update_significant_DV_cards(self, dct):
        try:
            txt = ''
            for idx, period in enumerate(dct.keys()):
                txt += dct[period]
            text = """<div style="margin-top:100px;display:flex; flex-direction:column;">
                                            {}
                       </div>""".format(txt)

            self.KPI_card_div.text = text

        except Exception:
            logger.error('update cards', exc_info=True)

    def payroll_to_date(self, period):
        try:
            # make data cards
            # number of weeks in period
            if period == 'year':
                weekcount = datetime.now().isocalendar()[1]
                payroll_to_date = self.payroll['week'] * weekcount
            elif period == 'week':
                payroll_to_date = self.payroll['week'] * (
                    datetime.today().weekday() / 7)
            elif period == 'month':
                weekcount = floor(
                    datetime.today().day / 7) + 1  # no zero week allowed
                payroll_to_date = self.payroll['week'] * weekcount
            elif period == 'quarter':
                start = self.first_date_in_quarter(datetime.today())
                weekcount = floor(
                    (abs(datetime.today() - start).days + 1) / 7) + 1
                payroll_to_date = self.payroll['week'] * weekcount

            return round(payroll_to_date, 2)
        except Exception:
            logger.error('payroll to date', exc_info=True)

    """
        groupby the the data and make ratios between 
        significant variables and interest variables
    """

    def make_significant_ratios_df(self, df, resample_period, interest_var,
                                   timestamp_col):
        try:

            def ratio(df, col_old, col_new):
                df = df.assign(result=df[interest_var] / df[col_old])
                df = df.rename(columns={'result': col_new})
                #logger.warning('col-%s df:%s',col_old,df.head(5))

                return df

            # filter
            sig_features_dict = self.match_sigvars_to_coin_vars(
                df, interest_var)
            sig_features_dict[
                interest_var] = 'sum'  # include interest var in aggregations
            sig_features_list = list(sig_features_dict.keys())
            # rename column for overwriting
            sig_vars_relabel = []
            for feature in sig_features_list:
                tmp = feature.split('_')
                sig_vars_relabel.append(tmp[-1])
            # groupby
            df = df.set_index(timestamp_col)

            df = df.resample(resample_period).agg(sig_features_dict)
            #logger.warning('LINE 413:%s',len(df))

            # create ratios
            for idx, col in enumerate(sig_features_list):
                if col != interest_var:  # skip variable of interest
                    df = df.map_partitions(ratio, col, sig_vars_relabel[idx])

            # drop columns
            df = df.drop(sig_features_list, axis=1)
            df = df.fillna(0)
            return df
        except Exception:
            logger.error('significant ratios', exc_info=True)

Example #7

Show file

from dask.dataframe.utils import make_meta

from scripts.utils.mylogger import mylogger
from scripts.databases.pythonRedis import PythonRedis
import gc
import re
from datetime import datetime
import pandas as pd

from scripts.utils.myutils import datetime_to_date

r = PythonRedis()
logger = mylogger(__file__)


def remove_char(row):
    return re.sub('\[', '', row['transaction_hashes'])


def list_to_rows(df, column, sep=',', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Params
    ------
    df : pandas.DataFrame
        dataframe with the column to split and expand
    column : str
        the column to split and expand
    sep : str