Python tab_error_flag Beispiele

Programmiersprache: Python

Namespace / Paketname: scripts.utils.myutils

Methode / Funktion: tab_error_flag

Beispiele auf hotexamples.com: 20

Python tab_error_flag - 20 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die scripts.utils.myutils.tab_error_flag, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: rentals.py Projekt: andre-aion/analytics_demo

def forecasting_bcc_rentals_visitor_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0
            self.groupby_dict = {
                'category': 'nunique',
                'item': 'nunique',
                'area': 'nunique',
                'visit_duration': 'mean',
                'age': 'mean',
                'gender_coded': 'mean',
                'status_coded': 'mean',
                'rental_employee_gender_coded': 'mean',
                'rental_employee_age': 'mean',
                'rental_tab': 'sum'
            }

            self.feature_list = ['age', 'rental_employee_age', 'rental_tab']
            self.tsa_variable = 'rental_tab'
            self.forecast_days = 40
            self.lag_variable = 'visit_duration'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                style='width:350px; margin-left:25px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = 'rental_tab'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.pym = PythonMongo('aion')
            self.menus = {
                'item': ['all'],
                'category': ['all'],
                'status': ['all', 'guest', 'member'],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
                'area': ['all', 'bar', 'rentals'],
                'tsa': ['rental_tab', 'visit_duration']
            }
            self.select = {}
            self.select['area'] = Select(title='Select BCC area',
                                         value='all',
                                         options=self.menus['area'])

            self.select['item'] = Select(title='Select item',
                                         value='all',
                                         options=self.menus['item'])

            self.select['status'] = Select(title='Select visitor status',
                                           value='all',
                                           options=self.menus['status'])

            self.select['gender'] = Select(title="Select visitor gender",
                                           value='all',
                                           options=self.menus['gender'])

            self.select['category'] = Select(title="Select category",
                                             value='all',
                                             options=self.menus['category'])

            self.select['rental_employee_gender'] = Select(
                title="Select category",
                value='all',
                options=self.menus['category'])

            self.select_values = {}
            for item in self.select.keys():
                self.select_values[item] = 'all'

            self.multiline_vars = {'x': 'gender', 'y': 'rental_tab'}
            self.timestamp_col = 'visit_start'
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                    <div style="text-align:center;width:{}px;height:{}px;
                           position:relative;background:black;margin-bottom:200px">
                           <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(text='Pre-transform distribution:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(text='Correlations:',
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px; margin-left:-600px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////

        def load_df(self, req_startdate, req_enddate, table, cols,
                    timestamp_col):
            try:
                # get min and max of loaded df
                if self.df is not None:
                    loaded_min = self.df[timestamp_col].min()
                    loaded_max = self.df[timestamp_col].max()

                    if loaded_min <= req_startdate and loaded_max >= req_enddate:
                        df = self.df[(self.df[timestamp_col] >= req_startdate)
                                     & (self.df[timestamp_col] <= req_enddate)]
                        return df
                return self.pym.load_df(req_startdate,
                                        req_enddate,
                                        table=table,
                                        cols=cols,
                                        timestamp_col=timestamp_col)

            except Exception:
                logger.error('load_df', exc_info=True)

        def filter_df(self, df1):
            try:
                df1 = df1[self.cols]

                for key, value in self.groupby_dict.items():
                    if value == 'count':
                        if self.select_values[key] != 'all':
                            df1 = df1[df1[key] == self.select_values[key]]
                return df1

            except Exception:
                logger.error('filter', exc_info=True)

        def prep_data(self, df):
            try:
                df = self.filter_df(df)
                # set up code columns
                codes = {
                    'gender': {
                        'male': 1,
                        'female': 2,
                        'other': 3
                    },
                    'status': {
                        'guest': 1,
                        'member': 2
                    }
                }
                for col in df.columns:
                    coded_col = col + '_coded'
                    if 'gender' in col:
                        df[coded_col] = df[col].map(codes['gender'])
                    if 'status' == col:
                        df[coded_col] = df[col].map(codes['status'])

                self.df = df.set_index(self.timestamp_col)
                # groupby and resample
                self.df1 = self.df.groupby('name').resample(
                    self.resample_period).agg(self.groupby_dict)
                self.df1 = self.df1.reset_index()
                self.df1 = self.df1.fillna(0)

                logger.warning('LINE 288 df:%s', self.df1.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def tsa(self, launch):
            try:
                df = self.df.resample('D').agg({self.tsa_variable: 'mean'})
                df = df.reset_index()
                label = self.tsa_variable + '_diff'
                df[label] = df[self.tsa_variable].diff()
                df = df.fillna(0)

                rename = {self.timestamp_col: 'ds', self.tsa_variable: 'y'}
                df = df.rename(columns=rename)
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("TSA:", exc_info=True)

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.variable = new
        thistab.section_head_updater('lag', thistab.variable)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_IVs(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        for item in thistab.select_values.keys():
            thistab.select_values[item] = thistab.select[item].value
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df = thistab.pym.load_df(start_date=datepicker_start.value,
                                         end_date=datepicker_end.value,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.df['gender_code'] = thistab.df['gender'].apply(
            lambda x: 1 if x == 'male' else 2)
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['x'] = multiline_x_select.value
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_forecast(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.tsa_variable = forecast_variable_select.value
        thistab.trigger += 1
        stream_launch_tsa.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'bcc_composite'
        cols = cols_to_load['guest'] + cols_to_load['rental']
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2013-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=1)
        first_date = last_date - timedelta(days=1000)
        # initial function call
        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_tsa = streams.Stream.define('Launch_tsa', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=thistab.variables)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])

        forecast_variable_select = Select(title='Select forecast variable',
                                          value=thistab.menus['tsa'][0],
                                          options=thistab.menus['tsa'])

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        multiline_y_select = Select(
            title='Select comparative DV(y)',
            value=thistab.multiline_vars['y'],
            options=['price', 'amount', 'visit_duration'])

        multiline_x_select = Select(title='Select comparative IV(x)',
                                    value=thistab.multiline_vars['x'],
                                    options=[
                                        'category', 'gender',
                                        'rental_employee_gender', 'status',
                                        'item'
                                    ])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------

        # tables
        hv_tsa = hv.DynamicMap(thistab.tsa, streams=[stream_launch_tsa])
        tsa = renderer.get_plot(hv_tsa)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        resample_select.on_change('value', update_resample)
        thistab.select['area'].on_change('value', update_IVs)
        thistab.select['gender'].on_change('value', update_IVs)
        thistab.select['rental_employee_gender'].on_change('value', update_IVs)
        thistab.select['item'].on_change('value', update_IVs)
        thistab.select['category'].on_change('value', update_IVs)
        thistab.select['status'].on_change('value', update_IVs)
        select_forecast_days.on_change('value', update_forecast)
        forecast_variable_select.on_change('value', update_forecast)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)

        multiline_x_select.on_change('value', update_multiline)
        multiline_y_select.on_change('value', update_multiline)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_tsa = WidgetBox(datepicker_start, datepicker_end,
                                 variable_select, thistab.select['status'],
                                 resample_select, thistab.select['gender'],
                                 thistab.select['category'],
                                 thistab.select['area'],
                                 forecast_variable_select,
                                 select_forecast_days)

        # create the dashboards

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [tsa.state, controls_tsa],
                         [Spacer(width=20, height=30)],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #2

Datei anzeigen

Datei: risk_assessment.py Projekt: andre-aion/analytics_demo

def pm_risk_assessment_tab(panel_title):
    risk_matrix_src = ColumnDataSource(data=dict(Severity=[],
                                                 Unlikely=[],
                                                 Seldom=[],
                                                 Occaisional=[],
                                                 Likely=[],
                                                 Definite=[]))

    corr_src = ColumnDataSource(data=dict(
        variable_1=[], variable_2=[], relationship=[], r=[], p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0

            self.groupby_dict = {}

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """
            self.variable = 'delay_end'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.status = 'all'
            self.gender = 'all'
            self.type = 'all'
            self.ratings = {
                'severity': {
                    'Insignificant': 1,
                    'Minor': 2,
                    'Moderate': 3,
                    'Critical': 4,
                    'Catastrophic': 5
                },
                'likelihood': {
                    'Unlikely': 1,
                    'Seldom': 2,
                    'Occaisional': 3,
                    'Likely': 4,
                    'Definite': 5
                }
            }

            self.variables = {
                'severity': list(self.ratings['severity'].keys()),
                'likelihood': list(self.ratings['likelihood'].keys()),
            }
            self.pym = PythonMongo('aion')
            self.menus = {
                'status': ['all', 'open', 'closed'],
                'gender': ['all', 'male', 'female'],
            }
            self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'}
            self.timestamp_col = 'analysis_date'

            self.risks = []
            self.risk = ''
            self.matrices = []
            self.matrix = ''
            self.risk_select = Select(title='Select risk',
                                      value=self.risk,
                                      options=self.risks)
            self.risk_threshold = {'acceptable': 8, 'doubtful': 15}

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                position:relative;background:black;margin-bottom:200px">
                                                <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                          </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=1000,
                                        html_header='h2',
                                        margin_top=50,
                                        margin_bottom=5),
                'distribution':
                self.section_header_div(text='Pre-transform distribution',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'matrix':
                self.section_header_div(text='Risk Matrix:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'risk_solution':
                self.section_header_div(
                    text='Risk Matrix vs Solution :{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def load_df(self):
            try:
                risk_matrx = json_normalize(
                    list(self.pym.db['risk_matrix'].find()))
                logger.warning('LINE 169:RISK MATIRX:%s', risk_matrx.head())
                if len(risk_matrx) > 0:
                    risk_matrx = drop_cols(risk_matrx, ['desc'])
                    logger.warning('LINE 159:RISK MATIRX:%s',
                                   risk_matrx.head())

                    risk = json_normalize(list(self.pym.db['risk'].find()))
                    risk = risk.rename(columns={'matrix': 'matrix_id'})
                    analysis = json_normalize(
                        list(self.pym.db['risk_analysis'].find()))
                    analysis = drop_cols(analysis, ['_id'])
                    analysis = analysis.rename(columns={'risk': 'risk_id'})

                    # merges
                    risk = risk.merge(analysis,
                                      how='inner',
                                      left_on='_id',
                                      right_on='risk_id')
                    risk = drop_cols(risk, [
                        '_id', 'likelihood_comment', 'severity_comment',
                        'desc', 'risk_id'
                    ])
                    logger.warning('LINE 167:RISK:%s', risk.head())
                    logger.warning('LINE 169:RISK MATIRX:%s',
                                   risk_matrx.head())

                    risk = risk_matrx.merge(risk,
                                            how='inner',
                                            left_on='_id',
                                            right_on='matrix_id')

                    df = drop_cols(risk, ['_id', 'matrix_id', 'analyst'])
                    df = df.rename(columns={'name': 'matrix'})
                    dfs = {}
                    for component in ['severity', 'likelihood']:
                        table = 'risk_' + component
                        dfs[component] = json_normalize(
                            list(self.pym.db[table].find()))

                        dfs[component] = drop_cols(dfs[component],
                                                   ['desc', 'level'])
                        df = df.merge(dfs[component],
                                      how='left',
                                      left_on=component,
                                      right_on='_id')
                        df = drop_cols(df, ['_id', 'project', component])
                        df = df.rename(columns={'value': component})
                        df[component] = df[component].fillna(0)
                    df['composite'] = df.severity * df.likelihood

                    # set selection variables
                    logger.warning('LINE 154 df:%s', df)
                    self.df = df
                    self.matrices = list(df['matrix'].unique())
                    self.matrix = self.matrices[0]
                    self.set_risks(df, matrix=self.matrix)

            except Exception:
                logger.error('load df', exc_info=True)

        def set_risks(self, df, matrix):
            try:

                df = df[df.matrix == matrix]
                self.risks = list(df['risk'].unique())
                self.risk = self.risks[0]
                self.risk_select.options = self.risks

                self.df1 = df
            except Exception:
                logger.error('prep data', exc_info=True)

        # //////////////  DIVS   //////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        # ////////////// GRAPHS ////////////////////

        def action_table(self, launch):
            try:

                def label_action(x):
                    if x < self.risk_threshold['acceptable']:
                        return 'Proceed (risk is acceptable)'
                    elif x < self.risk_threshold['doubtful']:
                        return 'Proceed, if no other options are available'
                    else:
                        return 'Do no proceed (Risk unacceptable)'

                df = self.df
                df = df.groupby(['matrix', 'risk']).agg({
                    'likelihood': 'mean',
                    'severity': 'mean'
                })
                df = df.reset_index()
                df['composite'] = df.likelihood * df.severity
                df['action'] = df['composite'].map(label_action)
                self.risk_matrix()
                return df.hvplot.table(
                    columns=[
                        'matrix', 'risk', 'severity', 'likelihood', 'action'
                    ],
                    width=1000,
                )
            except Exception:
                logger.error('action table', exc_info=True)

        def risk_matrix(self):
            try:
                # filter
                df = self.df1
                df = df.groupby(['matrix', 'risk']).agg({
                    'likelihood': 'mean',
                    'severity': 'mean'
                })
                df = df.reset_index()
                df = df[df['risk'] == self.risk]
                severity_value = int(df['severity'].mean())
                #severity = [key for (key, value) in self.ratings['severity'].items() if value == severity_value][0]
                likelihood_value = int(df['likelihood'].mean())
                logger.warning('severity=%s,likelihood=%s', severity_value,
                               likelihood_value)

                # make the matrix
                dct = {
                    'Severity': list(self.ratings['severity'].keys()),
                }
                cols = list(self.ratings['likelihood'].keys())

                for idx_row, val_col in enumerate(
                        list(self.ratings['likelihood'].keys())):
                    row = idx_row + 1
                    dct[val_col] = []
                    for idx_row, val_row in enumerate(dct['Severity']):
                        col = idx_row + 1
                        val = row * col
                        if row == severity_value and col == likelihood_value:
                            logger.warning('CONDITIONS MET')
                            txt = 'BINGO ' + str(val)
                        else:
                            txt = val

                        dct[val_col].append(txt)

                logger.warning('LINE 288 %s - length=%s', val_col,
                               len(dct[val_col]))

                risk_matrix_src.stream(dct, rollover=(len(dct['Severity'])))
                columns = [
                    TableColumn(field="Severity", title='severity'),
                    TableColumn(
                        field="Unlikely",
                        title='unlikely',
                        formatter=dashboard_config['formatters']['Unlikely']),
                    TableColumn(
                        field="Seldom",
                        title='seldom',
                        formatter=dashboard_config['formatters']['Seldom']),
                    TableColumn(field="Occaisional",
                                title='occaisional',
                                formatter=dashboard_config['formatters']
                                ['Occaisional']),
                    TableColumn(
                        field="Likely",
                        title='likely',
                        formatter=dashboard_config['formatters']['Likely']),
                    TableColumn(
                        field="Definite",
                        title='definite',
                        formatter=dashboard_config['formatters']['Definite']),
                ]
                risk_matrix_table = DataTable(source=risk_matrix_src,
                                              columns=columns,
                                              width=800,
                                              height=500)
                self.corr()
                return risk_matrix_table
            except Exception:
                logger.error('risk matrix', exc_info=True)

        def correlate_solution_risk(self, launch):
            try:
                # load solution
                df = json_normalize(
                    list(self.pym.db['project_composite1'].find(
                        {}, {
                            'severity': 1,
                            'likelihood': 1,
                            'solution': 1,
                            'project_owner_gender': 1,
                            'project': 1
                        })))
                df['solution'] = df.solution.apply(lambda x: x[0] * 10)

                df = df.groupby(['project']).agg({
                    'severity': 'mean',
                    'likelihood': 'mean',
                    'solution': 'mean'
                })
                df = df.reset_index()
                df['composite'] = df.severity * df.likelihood
                logger.warning('df:%s', df.head(20))

                # load project
                for idx, col in enumerate(
                    ['severity', 'likelihood', 'composite']):
                    if idx == 0:
                        p = df.hvplot.scatter(x='solution', y=col)
                    else:
                        p *= df.hvplot.scatter(x='solution', y=col)
                return p
                # load risk
            except Exception:
                logger.error('correlate solution risk', exc_info=True)

        def risk_information_div(self, width=400, height=300):
            txt = """
                   <div {}>
                   <h4 {}>How to interpret Risk assessment matrix:</h4>
                   <ul style='margin-top:-10px;'>
                       <li>
                       Red: Unacceptable risk. Do NOT proceed.
                       </li>
                       <li>
                       Yellow: Risky. Proceed only after ensuring better options aren't reasonable available
                       </li>
                       <li>
                       Green: Acceptable risk. Proceed.
                       </li>
                   </ul>
                   </div>
    
                   """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # calculate the correlation produced by the lags vector
        def corr(self):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'r': [],
                    'p_value': []
                }
                # load solution
                df = json_normalize(
                    list(self.pym.db['project_composite1'].find(
                        {}, {
                            'severity': 1,
                            'likelihood': 1,
                            'solution': 1,
                            'project_owner_gender': 1,
                            'project': 1
                        })))
                df['solution'] = df.solution.apply(lambda x: x[0] * 10)

                df = df.groupby(['project']).agg({
                    'severity': 'mean',
                    'likelihood': 'mean',
                    'solution': 'mean'
                })
                df = df.reset_index()
                df['composite'] = df.severity * df.likelihood
                logger.warning('df:%s', df.head(20))

                a = df['solution'].tolist()
                for col in ['composite', 'severity', 'likelihood']:
                    # find lag
                    logger.warning('column:%s', col)
                    b = df[col].tolist()
                    slope, intercept, rvalue, pvalue, txt = self.corr_label(
                        a, b)
                    corr_dict_data['variable_1'].append('solution')
                    corr_dict_data['variable_2'].append(col)
                    corr_dict_data['relationship'].append(txt)
                    corr_dict_data['r'].append(round(rvalue, 3))
                    corr_dict_data['p_value'].append(round(pvalue, 3))

                corr_src.stream(corr_dict_data, rollover=3)
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=corr_src,
                                       columns=columns,
                                       width=900,
                                       height=400)
                return data_table
            except Exception:
                logger.error(' corr', exc_info=True)

    def update_matrix(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.matrix = matrix_select.value
        thistab.set_risks(thistab.df, matrix=thistab.matrix)
        thistab.trigger += 1
        stream_launch_action_table.event(launch=thistab.trigger)
        stream_launch_matrix.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_risk(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.risk = thistab.risk_select.value
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        thistab.risk_matrix()
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'project_composite'
        thistab = Thistab(table, [], [])
        thistab.load_df()
        thistab.corr()

        # MANAGE STREAM
        stream_launch_action_table = streams.Stream.define('Launch',
                                                           launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch', launch=-1)()
        stream_launch_risk_solution = streams.Stream.define('Launch',
                                                            launch=-1)()

        # MAKE TABLES
        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="Severity", title="severity"),
            TableColumn(field="Unlikely",
                        title='unlikely',
                        formatter=dashboard_config['formatters']['Unlikely']),
            TableColumn(field="Seldom",
                        title='seldom',
                        formatter=dashboard_config['formatters']['Seldom']),
            TableColumn(
                field="Occaisional",
                title='occaisional',
                formatter=dashboard_config['formatters']['Occaisional']),
            TableColumn(field="Likely",
                        title='likely',
                        formatter=dashboard_config['formatters']['Likely']),
            TableColumn(field="Definite",
                        title='definite',
                        formatter=dashboard_config['formatters']['Definite']),
        ]

        risk_matrix = DataTable(source=risk_matrix_src,
                                columns=columns,
                                width=800,
                                height=500)

        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        corr_table = DataTable(source=corr_src,
                               columns=columns,
                               width=500,
                               height=280)

        width = 800

        hv_action_table = hv.DynamicMap(thistab.action_table,
                                        streams=[stream_launch_action_table])
        action_table = renderer.get_plot(hv_action_table)

        hv_risk_solution = hv.DynamicMap(thistab.correlate_solution_risk,
                                         streams=[stream_launch_risk_solution])
        risk_solution = renderer.get_plot(hv_risk_solution)

        # CREATE WIDGETS
        matrix_select = Select(title='Select matrix',
                               value=thistab.matrix,
                               options=thistab.matrices)

        # handle callbacks
        matrix_select.on_change('value', update_matrix)
        thistab.risk_select.on_change('value', update_risk)

        # create the dashboards
        controls = WidgetBox(matrix_select, thistab.risk_select)

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.title_div('Determine action', 400)],
                         [Spacer(width=20, height=30)], [action_table.state],
                         [thistab.section_headers['matrix']],
                         [Spacer(width=20, height=30)],
                         [risk_matrix, controls],
                         [thistab.section_headers['risk_solution']],
                         [Spacer(width=20, height=30)], [corr_table],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #3

Datei anzeigen

Datei: social_media.py Projekt: andre-aion/analytics_demo

def KPI_social_media_tab(panel_title,DAYS_TO_LOAD=90):
    class Thistab(KPI):
        def __init__(self, table,cols=[]):
            KPI.__init__(self, table,name='social_media',cols=cols)
            self.table = table
            self.df = None
            self.pym = PythonMongo('aion')

            self.checkboxgroup = {}
            self.KPI_card_div = self.initialize_cards(self.page_width, height=350)
            self.ptd_startdate = datetime(datetime.today().year,1,1,0,0,0)

            self.timestamp_col = 'timestamp'
            self.items = None
            self.social_media = 'twitter'
            self.crypto = 'aion'
            self.groupby_dict = {

                'twu_tweets': 'sum',
                'twu_mentions': 'sum',
                'twu_positive': 'mean',
                'twu_compound': 'mean',
                'twu_neutral': 'mean',
                'twu_negative': 'mean',
                'twu_emojis_positive': 'mean',
                'twu_emojis_compound': 'mean',
                'twu_emojis_neutral': 'mean',
                'twu_emojis_negative': 'mean',
                'twu_emojis': 'sum',
                'twu_favorites': 'sum',
                'twu_retweets': 'sum',
                'twu_hashtags': 'sum',
                'twu_replies': 'sum',
                'twr_tweets': 'sum',
                'twr_mentions': 'sum',
                'twr_positive': 'mean',
                'twr_compound': 'mean',
                'twr_neutral': 'mean',
                'twr_negative': 'mean',
                'twr_emojis_positive': 'mean',
                'twr_emojis_compound': 'mean',
                'twr_emojis_neutral': 'mean',
                'twr_emojis_negative': 'mean',
                'twr_emojis': 'sum',
                'twr_favorites': 'sum',
                'twr_retweets': 'sum',
                'twr_hashtags': 'sum',
                'twr_replies': 'sum'
            }
            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = self.variables[0]

            self.vars_dict = None
            self.idvars = None

            self.external_hourly_labels = ['fork', 'release', 'push', 'watch', 'issue', 'twu_tweets',
                                           'twu_mentions', 'twu_positive', 'twu_compound', 'twu_neutral',
                                           'twu_negative', 'twu_emojis_positive', 'twu_emojis_compound',
                                           'twu_emojis_negative', 'twu_emojis', 'twu_retweets',
                                           'twu_hashtags', 'twu_replies', 'twu_favorites',
                                           'twr_tweets',
                                           'twr_mentions', 'twr_positive', 'twr_compound', 'twr_neutral',
                                           'twr_negative', 'twr_emojis_positive', 'twr_emojis_compound',
                                           'twr_emojis_negative', 'twr_emojis', 'twr_retweets',
                                           'twr_hashtags', 'twr_replies', 'twr_favorites',
                                           ]

            self.datepicker_pop_start = DatePicker(
                title="Period start", min_date=self.initial_date,
                max_date=dashboard_config['dates']['last_date'], value=dashboard_config['dates']['last_date'])
            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                    position:relative;background:black;margin-bottom:200px">
                    <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'cards': self.section_header_div(text='Period to date:{}'.format(
                    self.section_divider),
                    width=600, html_header='h2', margin_top=5,margin_bottom=-155),
                'pop': self.section_header_div(
                    text='Period over period:{}'.format(self.section_divider),
                    width=600, html_header='h2', margin_top=5, margin_bottom=-155),
            }

        # ----------------------  DIVS ----------------------------

        def load_df(self, start_date, end_date, cols, timestamp_col='timestamp_of_first_event',
                    supplemental_where=None):
            try:

                if isinstance(end_date, date):
                    end_date = datetime.combine(end_date, datetime.min.time())
                if isinstance(start_date, date):
                    start_date = datetime.combine(start_date, datetime.min.time())
                end_date += timedelta(days=1)
                temp_cols = cols.copy()

                if self.table != 'external_daily':
                    if 'amount' not in temp_cols:
                        temp_cols.append('amount')

                df = self.ch.load_data(self.table, temp_cols, start_date, end_date, timestamp_col, supplemental_where)
                # filter out the double entry
                # df = df[df['value'] >= 0]

                if self.items is None:
                    df_temp = df['crypto']
                    if df_temp is not None:
                        df_temp = df_temp.compute()

                        self.items = sorted(list(set(list(df_temp))))
                    logger.warning('LINE 148, items:%s',self.items)

                if len(cols) > 0:
                    return df[cols]
                else:
                    return df
                # df[timestamp_col] = df[timestamp_col].map(lambda x: clean_dates_from_db(x))
            except Exception:
                logger.error('load df', exc_info=True)
        


        def melt_df(self, df):

            try:
                # logger.warning('%s',df.head(20))
                temp_dct = {
                    'timestamp': [],
                    'crypto': [],
                    'twu_tweets': [],
                    'twu_mentions': [],
                    'twu_positive': [],
                    'twu_compound': [],
                    'twu_neutral': [],
                    'twu_negative': [],
                    'twu_emojis_positive': [],
                    'twu_emojis_compound': [],
                    'twu_emojis_neutral': [],
                    'twu_emojis_negative': [],
                    'twu_emojis': [],
                    'twu_favorites': [],
                    'twu_retweets': [],
                    'twu_hashtags': [],
                    'twu_replies': [],
                    'twr_tweets': [],
                    'twr_mentions': [],
                    'twr_positive': [],
                    'twr_compound': [],
                    'twr_neutral': [],
                    'twr_negative': [],
                    'twr_emojis_positive': [],
                    'twr_emojis_compound': [],
                    'twr_emojis_neutral': [],
                    'twr_emojis_negative': [],
                    'twr_emojis': [],
                    'twr_favorites': [],
                    'twr_retweets': [],
                    'twr_hashtags': [],
                    'twr_replies': [],

                }

                # loop through items
                counter = 0
                values_present = []
                for col in df.columns:
                    if col not in ['timestamp','month','year','day','hour']:
                        # split
                        item_tmp = col.split('.')
                        #logger.warning('LINE 228:%s', col)

                        key_len = len(item_tmp[0])
                        col_label = item_tmp[-1][key_len+1:]
                        if col_label in temp_dct.keys():
                            # label for each coin, only run once
                            if counter == 0:
                                temp_dct['crypto'].append(get_coin_name(col, key_len))

                            # get value from dataframe
                            try:
                                tmp = df[[col]]
                                val = tmp.values[0]
                            except:
                                val = [0]
                            logger.warning('LINE 228:%s', col_label)
                            temp_dct[col_label].append(val)
                    else:
                        pass
                        '''
                        if col == 'timestamp':
                            tmp = df[[col]]
                            val = tmp.values[0]
                            temp_dct['timestamp'].append(val)
                        '''

                    #values_present.appe;nd(col)
                logger.warning('LINE 234:%s', temp_dct)
                df = pd.DataFrame.from_dict(temp_dct)
                logger.warning('df after melt:%s',df)
                return df
            except Exception:
                logger.error('melt coins', exc_info=True)

        '''
        def load_df(self, start_date,end_date,cols, table='external_hourly',timestamp_col='timestamp'):
            try:
                if isinstance(end_date, date):
                    end_date = datetime.combine(end_date, datetime.min.time())
                if isinstance(start_date, date):
                    start_date = datetime.combine(start_date, datetime.min.time())
                end_date = start_date + timedelta(days=1)
                df = self.pym.load_df(start_date, end_date,cols=cols, table=table, timestamp_col='timestamp')
                logger.warning('df:%s',df.head())
                self.items = get_items()
                groupby_dict, self.vars_dict, self.idvars = set_vars(self.items)

                if df is not None:
                    if len(df) > 0:
                        if '_id' in df.columns:
                            df = df.drop(['_id'], axis=1)

                        logger.warning('length df:%s',len(df))

                        df = self.melt_df(df)

                        return df

                return df

            except Exception:
                logger.error('load external data', exc_info=True)
        
        '''

        def section_header_div(self, text, html_header='h2', width=600,
                               margin_top=150, margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;">
                <{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=170):
            div_style = """ 
               style='width:350px;margin-right:-800px;
               border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
           """
            txt = """
            <div {}>
                <h4 {}>How to interpret sentiment score</h4>
                <ul style='margin-top:-10px;'>
                    <li>
                    Sentiment scores: positive, negative, neutral.
                    </li>
                    <li>
                    The sentiment scores are percentages.
                    </li>
                    <li>
                    The sentiment scores are averaged over the period.
                    </li>
                    <li>
                    (e.g.) Interpretation: over the quarter to date twitter comments
                    were 18% positive
                    </li>
                    
                </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def initialize_cards(self,width,height=250):
            try:
                txt = ''
                for period in ['year','quarter','month','week']:
                    design = random.choice(list(KPI_card_css.keys()))
                    txt += self.card(title='',data='',card_design=design)

                text = """<div style="margin-top:100px;display:flex; flex-direction:row;">
                {}
                </div>""".format(txt)
                div = Div(text=text, width=width, height=height)
                return div
            except Exception:
                logger.error('initialize cards', exc_info=True)



        # ------------------------- CARDS END -----------------------------------
        def period_to_date(self, df, timestamp=None, timestamp_filter_col=None, cols=[], period='week'):
            try:
                if timestamp is None:
                    timestamp = datetime.now()
                    timestamp = datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, 0, 0)

                start = self.first_date_in_period(timestamp, period)
                # filter

                df[timestamp_filter_col] = pd.to_datetime(df[timestamp_filter_col], format=self.DATEFORMAT_PTD)
                logger.warning('df:%s', df['timestamp'])

                df = df[(df[timestamp_filter_col] >= start) & (df[timestamp_filter_col] <= timestamp)]
                if len(cols) > 0:
                    df = df[cols]
                return df
            except Exception:
                logger.error('period to date', exc_info=True)

        def period_over_period(self, df, start_date, end_date, period,
                               history_periods=2, timestamp_col='timestamp_of_first_event'):
            try:
                # filter cols if necessary
                string = '0 {}(s) prev(current)'.format(period)

                # filter out the dates greater than today
                df_current = df.copy()
                df_current['period'] = string
                # label the days being compared with the same label
                df_current = self.label_dates_pop(df_current, period, timestamp_col)
                #logger.warning('LINE 244:%s', df_current.head(15))
                # zero out time information
                start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0)
                end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0)

                cols = list(df.columns)
                counter = 1
                if isinstance(history_periods, str):
                    history_periods = int(history_periods)
                # make dataframes for request no. of periods
                start, end = self.shift_period_range(period, start, end)
                while counter < history_periods and start >= self.initial_date:
                    # load data
                    if period == 'quarter':
                        logger.warning('start:end %s:%s', start, end)
                    if self.crypto != 'all':
                        supplemental_where = "AND crypto = '{}'".format(self.crypto)
                    df_temp = self.load_df(start, end, cols, timestamp_col, supplemental_where=supplemental_where)
                    df_temp = df_temp.compute()
                    df_temp[timestamp_col] = pd.to_datetime(df_temp[timestamp_col])
                    if df_temp is not None:
                        if len(df_temp) > 1:
                            string = '{} {}(s) prev'.format(counter, period)
                            # label period
                            df_temp = df_temp.assign(period=string)
                            # relabel days to get matching day of week,doy, dom, for different periods
                            df_temp = self.label_dates_pop(df_temp, period, timestamp_col)
                            # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp))

                            df_current = pd.concat([df_current, df_temp])
                            del df_temp
                            gc.collect()
                    # shift the loading window
                    counter += 1
                    start, end = self.shift_period_range(period, start, end)
                return df_current
            except Exception:
                logger.error('period over period', exc_info=True)

            # label dates for period over period (pop)

        def label_dates_pop(self, df, period, timestamp_col):
            df[timestamp_col] = pd.to_datetime(df[timestamp_col])

            def label_qtr_pop(y):
                try:
                    curr_quarter = int((y.month - 1) / 3 + 1)
                    start = datetime(y.year, 3 * curr_quarter - 2, 1)
                    return abs((start - y).days)
                except Exception:
                    logger.error('df label quarter', exc_info=True)

            try:
                if period == 'week':
                    df['dayset'] = df[timestamp_col].dt.dayofweek
                elif period == 'month':
                    df['dayset'] = df[timestamp_col].dt.day
                elif period == 'year':
                    df['dayset'] = df[timestamp_col].timetuple().tm_yday
                elif period == 'quarter':
                    df['dayset'] = df[timestamp_col].apply(lambda x: label_qtr_pop(x))

                return df
            except Exception:
                logger.error('label data ', exc_info=True)

        # -------------------- GRAPHS -------------------------------------------
        def graph_periods_to_date(self, df1, timestamp_filter_col, variable):
            try:
                if self.crypto != 'all':
                    df1 = df1[df1.crypto == self.crypto]

                df1 = df1.compute()
                dct = {}
                for idx, period in enumerate(['week', 'month', 'quarter', 'year']):
                    df = self.period_to_date(df1, timestamp=dashboard_config['dates']['last_date'],
                                             timestamp_filter_col=timestamp_filter_col, period=period)

                    # get unique instances
                    df = df[[variable]]
                    df = df.drop_duplicates(keep='first')
                    #logger.warning('post duplicates dropped:%s', df.head(10))
                    if self.groupby_dict[variable] == 'sum':
                        data = int(df[variable].sum())
                    elif self.groupby_dict[variable] == 'mean':
                        data = "{}%".format(round(df[variable].mean(),3))
                    del df
                    gc.collect()
                    dct[period] = data

                self.update_cards(dct)


            except Exception:
                logger.error('graph periods to date', exc_info=True)

        def graph_period_over_period(self, period):
            try:

                periods = [period]
                start_date = self.pop_start_date
                end_date = self.pop_end_date
                if isinstance(start_date, date):
                    start_date = datetime.combine(start_date, datetime.min.time())
                if isinstance(end_date, date):
                    end_date = datetime.combine(end_date, datetime.min.time())
                today = datetime.combine(datetime.today().date(), datetime.min.time())
                '''
                - if the start day is today (there is no data for today),
                  adjust start date
                '''
                if start_date == today:
                    logger.warning('START DATE of WEEK IS TODAY.!NO DATA DATA')
                    start_date = start_date - timedelta(days=7)
                    self.datepicker_pop_start.value = start_date

                cols = [self.variable, self.timestamp_col]
                supplemental_where = None
                if self.crypto != 'all':
                    supplemental_where = "AND crypto = '{}'".format(self.crypto)
                df = self.load_df(start_date=start_date, end_date=end_date, cols=cols,
                                  timestamp_col='timestamp',supplemental_where=supplemental_where)

                if abs(start_date - end_date).days > 7:
                    if 'week' in periods:
                        periods.remove('week')
                if abs(start_date - end_date).days > 31:
                    if 'month' in periods:
                        periods.remove('month')
                if abs(start_date - end_date).days > 90:
                    if 'quarter' in periods:
                        periods.remove('quarter')
                df = df.compute()
                for idx, period in enumerate(periods):
                    df_period = self.period_over_period(df, start_date=start_date, end_date=end_date,
                                                        period=period, history_periods=self.pop_history_periods,
                                                        timestamp_col='timestamp')

                    logger.warning('LINE 368: dayset:%s',df_period.head(30))
                    groupby_cols = ['dayset', 'period']
                    if len(df_period) > 0 :
                        # logger.warning('line 150 df_period columns:%s',df.columns)
                        df_period = df_period.groupby(groupby_cols).agg({self.variable: 'sum'})
                        df_period = df_period.reset_index()
                    else:
                        df_period = df_period.rename(index=str,columns={'day':'dayset'})

                    prestack_cols = list(df_period.columns)

                    df_period = self.split_period_into_columns(df_period, col_to_split='period',
                                                                   value_to_copy=self.variable)

                    # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality
                    if period == 'quarter':
                        min_day = df_period['dayset'].min()
                        logger.warning('LINE 252: MINIUMUM DAY:%s', min_day)
                        df_period = df_period[df_period['dayset'] > min_day]

                    poststack_cols = list(df_period.columns)

                    title = "{} over {}".format(period, period)
                    plotcols = list(np.setdiff1d(poststack_cols, prestack_cols))
                    # include current period if not extant
                    df_period, plotcols = self.pop_include_zeros(df_period,plotcols=plotcols, period=period)
                    # logger.warning('line 155 cols to plot:%s',plotcols)
                    if self.groupby_dict[self.variable] == 'sum':
                        xlabel = 'frequency'
                    elif self.groupby_dict[self.variable] == 'mean':
                        xlabel = '%'

                    if idx == 0:
                        p = df_period.hvplot.bar('dayset', plotcols, rot=45, title=title,
                                                 stacked=False,width=1200, height=400,value_label=xlabel)
                    else:
                        p += df_period.hvplot.bar('dayset', plotcols, rot=45, title=title,
                                                  stacked=False,width=1200, height=400,value_label=xlabel)
                return p

            except Exception:
                logger.error('period over period to date', exc_info=True)
    def update(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.crypto = crypto_select.value
        thistab.variable = variable_select.value
        thistab.social_media = social_media_select.value
        thistab.graph_periods_to_date(thistab.df,'timestamp',thistab.variable)
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")


    def update_period_over_period():
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.pop_start_date = thistab.datepicker_pop_start.value  # trigger period over period
        thistab.pop_end_date = datepicker_pop_end.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        cols = []
        thistab = Thistab(table='external_daily', cols=cols)
        # -------------------------------------  SETUP   ----------------------------
        # format dates
        first_date_range = thistab.initial_date
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = datetime(last_date.year,1,1,0,0,0)

        loadcols = ['timestamp','crypto'] + thistab.variables
        loadcols = []
        thistab.df = thistab.load_df(first_date, last_date,loadcols,timestamp_col='timestamp')

        thistab.graph_periods_to_date(thistab.df,timestamp_filter_col='timestamp',variable=thistab.variable)
        thistab.section_header_updater('cards',label='')
        thistab.section_header_updater('pop',label='')

        # MANAGE STREAM
        # date comes out stream in milliseconds
        # --------------------------------CREATE WIDGETS ---------------------------------
        thistab.pop_end_date = last_date
        thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week')

        stream_launch = streams.Stream.define('Launch',launch=-1)()

        datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range,
                                        max_date=last_date_range, value=thistab.pop_end_date)

        pop_number_select = Select(title='Select # of comparative periods',
                                   value=str(5),
                                   options=thistab.menus['history_periods'])
        pop_button = Button(label="Select dates/periods, then click me!",width=15,button_type="success")

        variable_select = Select(title='Select variable', value=thistab.variable,
                                 options=thistab.variables)

        social_media_select = Select(title='Select social media',value=thistab.social_media,
                                     options=thistab.menus['social_media'])

        crypto_select = Select(title='Select item/crypto of interest', value=thistab.crypto,
                               options=thistab.items)

        # ---------------------------------  GRAPHS ---------------------------

        hv_pop_week = hv.DynamicMap(thistab.pop_week,streams=[stream_launch])
        pop_week = renderer.get_plot(hv_pop_week)

        hv_pop_month = hv.DynamicMap(thistab.pop_month,streams=[stream_launch])
        pop_month = renderer.get_plot(hv_pop_month)

        hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch])
        pop_quarter = renderer.get_plot(hv_pop_quarter)


        # -------------------------------- CALLBACKS ------------------------

        variable_select.on_change('value', update)
        pop_button.on_click(update_period_over_period) # lags array
        social_media_select.on_change('value', update)
        crypto_select.on_change('value', update)


        # -----------------------------------LAYOUT ----------------------------
        # put the controls in a single element

        controls_pop = WidgetBox(thistab.datepicker_pop_start,
                                 datepicker_pop_end,pop_number_select,pop_button)
        controls_top = WidgetBox(social_media_select,crypto_select,variable_select)

        grid = gridplot([
            [thistab.notification_div['top']],
            [Spacer(width=20, height=70)],
            [thistab.information_div()],
            [thistab.section_headers['cards']],
            [Spacer(width=20, height=2)],
            [thistab.KPI_card_div,controls_top],
            [thistab.section_headers['pop']],
            [Spacer(width=20, height=25)],
            [pop_week.state,controls_pop],
            [pop_month.state],
            [pop_quarter.state],
            [thistab.notification_div['bottom']]
        ])


        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #4

Datei anzeigen

Datei: economic_indicators.py Projekt: andre-aion/analytics_demo

def eda_country_indexes_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0

            self.groupby_dict = {}

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """
            self.countries = []
            self.country = 'Barbados'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.pym = PythonMongo('aion')
            self.menus = {
                'status': ['all', 'open', 'closed'],
                'gender': ['all', 'male', 'female'],
            }
            self.multiline_vars = {'x': '', 'y': ''}
            self.timestamp_col = 'timestamp'

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                position:relative;background:black;margin-bottom:200px">
                                                <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                          </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'info': self.section_header_div(text='Country indexes')
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def load_df(self):
            try:
                df = json_normalize(
                    list(self.pym.db[self.table].find({}, {'_id': False})))
                df = df.fillna(0)
                logger.warning('LINE 96:  country indicator:%s', df.head())
                self.countries = []
                self.df = df

            except Exception:
                logger.error('load', exc_info=True)

        def get_row_column_labels(self, txt):
            x = txt.split('.')
            if x[0] not in self.countries:
                self.countries.append(x[0])
                sorted(self.countries)
            x[-1] = x[-1].replace('-', '_')
            return x[0], x[-1]

        def melt_df(self):

            try:
                # logger.warning('%s',df.head(20))
                temp_dct = {'country': []}

                # loop through items
                counter = 0
                values_present = []

                for col in self.df.columns:
                    if col != 'timestamp':
                        # label for each coin, only run once
                        if counter == 0:
                            row, column = self.get_row_column_labels(col)
                            temp_dct['country'].append(row)
                            if column not in temp_dct.keys():
                                temp_dct[column] = []
                            try:
                                tmp = self.df[[col]]
                                val = tmp.values[0]
                            except Exception:
                                val = [0]
                            temp_dct[column].append(val[0])

                #logger.warning('LINE 140 tmp dict:%s',temp_dct)

                # find items that are not present
                # not_present = list

                counter += 1
                '''
                # logger.warning('item-length=%s-%s',key,len(temp_dct[key]))
                # convert to dataframe
                for item in temp_dct.keys():
                    # logger.warning('%s length = %s',item,len(temp_dct[item]))
                    if len(temp_dct[item]) == 0:
                        temp_dct[item] = [0] * len(temp_dct)
                '''
                self.df1 = pd.DataFrame.from_dict(temp_dct)
                # logger.warning('df after melt:%s',self.df1.head())
            except Exception:
                logger.error('melt coins', exc_info=True)

        def plot_country_rows(self, launch):
            try:
                if self.df1 is None:
                    self.melt_df()

            except Exception:
                logger.error('plot', exc_info=True)

    def update_country(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.country = thistab.country_select.value
        thistab.trigger += 1
        stream_launch_action_table.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'country_indexes'
        thistab = Thistab(table, [], [])
        thistab.load_df()

        # MANAGE STREAM
        stream_launch_action_table = streams.Stream.define('Launch',
                                                           launch=-1)()

        # MAKE TABLES
        # --------------------- PLOTS---------------------------------

        hv_action_table = hv.DynamicMap(thistab.plot_country_rows,
                                        streams=[stream_launch_action_table])
        action_table = renderer.get_plot(hv_action_table)

        # CREATE WIDGETS
        country_select = Select(title='Select matrix',
                                value=thistab.load_df(),
                                options=thistab.countries)

        # handle callbacks
        country_select.on_change('value', update_country)

        # create the dashboards
        controls = WidgetBox()

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.title_div('info', 400)],
                         [Spacer(width=20, height=30)], [action_table.state],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #5

Datei anzeigen

def crypto_clusters_eda_tab(cryptos, panel_title):
    global groupby_dict
    global features
    global cluster_dct
    #global source

    redis = PythonRedis()
    cluster_dct = redis.simple_load('clusters:cryptocurrencies')
    if cluster_dct is not None:
        groupby_dict = {}
        for var in cluster_dct['features']:
            groupby_dict[var] = 'sum'

        features = cluster_dct['features']
        source = {}
        for feature in features:
            source[feature] = ColumnDataSource(
                data=dict(xs=[], ys=[], labels=[], colors=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self,
                           table,
                           cols,
                           dedup_cols,
                           panel_title=panel_title)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = [
                'watch', 'fork', 'issue', 'release', 'push', 'tw_mentions',
                'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative',
                'tw_emojis_positive', 'tw_emojis_compound',
                'tw_emojis_negative', 'tw_emojis_count', 'tw_reply_hashtags'
            ]
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0
            txt = """<div style="text-align:center;background:black;width:100%;">
                                                                           <h1 style="color:#fff;">
                                                                           {}</h1></div>""".format(
                'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=1400, height=20),
                'bottom': Div(text=txt, width=1400, height=10),
            }
            self.cluster_dct = cluster_dct
            self.groupby_dict = groupby_dict
            self.features = features
            self.crypto = 'all'

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.significant_effect_dict = {}
            self.df1 = None
            self.section_headers = {
                'ts':
                self.section_header_div(
                    'Comparison of clusters across variables:---------------------',
                    width=600)
            }
            self.timestamp_col = None
            self.colors = None

        # ----------------------  DIVS ----------------------------
        def section_header_div(self, text, html_header='h2', width=1400):
            text = '<{} style="color:#4221cc;">{}</{}>'.format(
                html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=300):
            txt = """
               <div {}>
               <h4 {}>How to interpret relationships </h4>
               <ul style='margin-top:-10px;'>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                    <li>
                   </li>
                    <li>
                   </li>
               </ul>
               </div>

               """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ////////////////////////// UPDATERS ///////////////////////
        def section_head_updater(self, section, txt):
            try:
                self.section_header_div[section].text = txt
            except Exception:
                logger.error('', exc_info=True)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # /////////////////////////// LOAD CLUSTERS  //////////////////////
        def prep_data(self, df, timestamp_col):
            def label_cluster(x):
                for key, values in self.cluster_dct.items():
                    if key not in ['timestamp', 'variables']:
                        if x in values:
                            return key
                return x

            try:
                cols = self.features + ['crypto', 'timestamp']
                df = df[cols]
                # groupby and resample
                df['crypto'] = df['crypto'].map(lambda x: label_cluster(x))
                df = df.rename(columns={'crypto': 'cluster'})
                df = df.compute()
                df[timestamp_col] = pd.to_datetime(df[timestamp_col],
                                                   errors='coerce')
                df.set_index(timestamp_col, inplace=True)
                df = df.groupby('cluster').resample(self.resample_period).agg(
                    self.groupby_dict)
                df.reset_index(inplace=True)
                df.set_index(timestamp_col, inplace=True)
                self.timestamp_col = timestamp_col
                self.df1 = df

            except Exception:
                logger.error('prep data', exc_info=True)

        def graph_ts(self):
            try:
                #global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    clusters = df['cluster'].unique()
                    self.colors = [''] * len(clusters)
                    for idx, feature in enumerate(clusters):
                        self.colors[idx] = dashboard_config['colors'][idx]
                    if self.features is not None:
                        for idx, feature in enumerate(self.features):
                            df1 = df[['cluster', feature]]
                            # pivot into columns for cluster
                            df1 = df1.pivot(columns='cluster')
                            data = dict(x=[df1.index.values] * len(clusters),
                                        y=[df1[name].values for name in df1],
                                        labels=clusters,
                                        colors=self.colors)
                            source[feature].data = data
            except Exception:
                logger.error('graph ts', exc_info=True)

        def graph_chartify(self, timestamp_col):
            try:
                # global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    df = df.reset_index()

                    for feature in self.features:
                        ch = chartify.Chart(blank_labels=True,
                                            x_axis_type='datetime')
                        ch.set_title("CHARTIFY")
                        ch.plot.line(
                            # Data must be sorted by x column
                            data_frame=df.sort_values(timestamp_col),
                            x_column=timestamp_col,
                            y_column=feature,
                            color_column='cluster')
                        return ch

            except Exception:
                logger.error('graph chartify', exc_info=True)

    def update():
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = resample_select.value
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("ready")

    try:
        table = 'external_daily'
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = dashboard_config['dates']['current_year_start']
        # initial function call
        thistab.df_load(first_date,
                        last_date,
                        timestamp_col='timestamp',
                        cols=[])
        thistab.prep_data(thistab.df, timestamp_col='timestamp')

        # MANAGE STREAMS ---------------------------------------------------------

        # CREATE WIDGETS ----------------------------------------------------------------
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        load_dates_button = Button(
            label="Select dates/periods, then click me!",
            width=20,
            height=8,
            button_type="success")

        resample_select = Select(title='Select summary period',
                                 value=thistab.resample_period,
                                 options=thistab.menus['resample_periods'])

        # -------------------------------- PLOTS ---------------------------
        thistab.graph_ts()
        p = {}
        for feature in features:
            p[feature] = figure(x_axis_type="datetime",
                                plot_width=1400,
                                plot_height=400,
                                title=feature)

            p[feature].multi_line(
                xs='x',
                ys='y',
                legend='labels',
                line_color='colors',
                line_width=5,
                hover_line_color='colors',
                hover_line_alpha=1.0,
                source=source[feature],
            )
            p[feature].add_tools(
                HoverTool(show_arrow=False,
                          line_policy='next',
                          tooltips=[
                              ('freq', '$y'),
                          ]))

        # ch = thistab.graph_chartify(timestamp_col='timestamp')
        # -------------------------------- CALLBACKS ------------------------

        load_dates_button.on_click(update)  # lags array
        resample_select.on_change('value', update_resample)

        # -----------------------------------LAYOUT ----------------------------
        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_left = WidgetBox(datepicker_start, load_dates_button)

        controls_right = WidgetBox(datepicker_end)

        grid_data = [
            #[ch.figure],
            [thistab.notification_div['top']],
            [controls_left, controls_right],
            [thistab.section_headers['ts'], resample_select],
        ]
        for feature in features:
            grid_data.append([p[feature]])
            logger.warning('p:%s', p[feature])

        grid_data.append([thistab.notification_div['bottom']])

        grid = gridplot(grid_data)

        # Make a tab with the layout
        tab = Panel(child=grid, title=thistab.panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(thistab.panel_title)

Beispiel #6

Datei anzeigen

def twitter_loader_tab(panel_title):
    class TwitterLoader():
        def __init__(self, search_term='beiber'):
            # TWITTER SETUP
            self.api = None
            self.topic = search_term

            self.options = {'messages': [str(x) for x in range(10, 1000, 50)]}
            self.limits = {
                'messages': int(self.options['messages'][0]),
            }
            self.hidden_path = dashboard_config['hidden_path']
            self.DATEFORMAT = "%Y-%d-%m %H:%M:%S"
            self.df = None
            min_date = datetime.today() - timedelta(days=7)
            print(min_date)
            self.selects = {
                'window':
                Select(title='Select rolling mean window',
                       value='1',
                       options=[str(x) for x in range(1, 20, 2)]),
                'date_since':
                DatePicker(title="Tweets since:",
                           min_date=min_date,
                           max_date=datetime.today(),
                           value=min_date)
            }
            self.selects_values = {
                'window': int(self.selects['window'].value),
                'date_since': self.selects['date_since'].value
            }
            self.resample_period = {'menu': []}
            for val in range(30, 350, 30):
                self.resample_period['menu'].append(str(val) + 'Min')
            self.resample_period['value'] = self.resample_period['menu'][0]
            # DIV VISUAL SETUP
            self.trigger = -1
            self.html_header = 'h2'
            self.margin_top = 150
            self.margin_bottom = -150

            self.div_style = """ 
                           style='width:350px; margin-left:25px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.page_width = 1250
            txt = """<hr/>
                               <div style="text-align:center;width:{}px;height:{}px;
                                      position:relative;background:black;margin-bottom:200px">
                                      <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                               </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'twitter':
                self.section_header_div(text='Twitter search results:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=155,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

        # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                         position:relative;background:black;">
                                         <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                   </div>""".format(self.page_width, 50, text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        # //////////////////////////  DIVS SETUP END   /////////////////////////////////

        # /////////////////////////// UTILS BEGIN ///////////////////////////

        def twitter_datetime_to_epoch(self, ts):
            ts = datetime.strptime(ts, '%a %b %d %H:%M:%S %z %Y')
            ts_epoch = ts.created_at()
            ts = datetime.strftime(ts, self.DATEFORMAT)
            ts = datetime.strptime(ts, self.DATEFORMAT)
            return ts, ts_epoch

        def write_to_file(self):
            try:
                filename = """{}_searches_for_last_{}sec_or_last_{}messages.csv""".format(
                    self.topic, self.limits['time'], self.limits['messages'])
                self.df.to_csv(filename, sep='\t', index=False)
            except:
                logger.error('Error writing to file', exc_info=True)

        # /////////////////////////// UTILS END /////////////////////
        def reset_data(self):
            self.df = None

        def get_credentials(self, filename='twitter_credentials.json'):
            try:
                filename = self.hidden_path + filename
                filepath = join(dirname(__file__), filename)
                print(filepath)
                if self.api is None:
                    with open(filepath, 'r') as f:
                        credentials_dict = json.load(f)
                    auth = tw.OAuthHandler(credentials_dict['consumer_key'],
                                           credentials_dict['consumer_secret'])
                    auth.set_access_token(
                        credentials_dict['access_token_key'],
                        credentials_dict['access_token_secret'],
                    )
                    self.api = tw.API(auth, wait_on_rate_limit=True)
                logger.info('CREDENTIALS LOADED')
                try:
                    self.api.verify_credentials()
                    print("Authentication OK")
                except:
                    print("Error during authentication")
            except:
                print('credentials not loaded')

        def load_data_about_topic(self):
            try:
                if self.api is None:
                    self.get_credentials()
                date_since = datetime.combine(
                    self.selects_values['date_since'], datetime.min.time())
                logger.warning('LINE 186:%s,messages=%s', self.topic,
                               self.limits['messages'])
                # initialize a list to hold all the tweepy Tweets
                alltweets = []

                # make initial request for most recent tweets (200 is the maximum allowed count)
                new_tweets = self.api.search(q=self.topic,
                                             count=self.limits['messages'])

                # save most recent tweets
                alltweets.extend(new_tweets)

                # save the id of the oldest tweet less one
                oldest = alltweets[-1].id - 1

                # keep grabbing tweets until there are no tweets left to grab
                stop = False
                while not stop:
                    print(f"getting tweets before {oldest}")

                    # all subsequent requests use the max_id param to prevent duplicates
                    new_tweets = self.api.search(q=self.topic,
                                                 count=100,
                                                 max_id=oldest,
                                                 tweet_mode='extended')

                    # save most recent tweets
                    alltweets.extend(new_tweets)
                    if len(alltweets) > self.limits['messages'] or len(
                            new_tweets) <= 0:
                        stop = True
                    # update the id of the oldest tweet less one
                    oldest = alltweets[-1].id - 1

                    print(f"...{len(alltweets)} tweets downloaded so far")

                # transform the tweepy tweets into a 2D array that will populate the csv
                results = []
                for tweet in alltweets:
                    try:
                        results.append([tweet.created_at, tweet.text])
                    except:
                        print("skipped this one")

                self.df = pd.DataFrame(data=results,
                                       columns=['created_at', 'text'])
                logger.warning('LINE 211 self.df:%s', self.df.head(20))
            except:
                logger.error('error in loading data', exc_info=True)

        def run(self):
            try:
                self.load_data_about_topic()
                # self.write_to_file()

            except Exception:
                logger.error('run', exc_info=True)

        # #################################### PLOTS ######################################
        def sentiment_analysis(self, launch=1):
            try:
                df = self.df[['text', 'created_at']]
                cols = ['pos', 'neg', 'neu']
                for col in cols:
                    if col not in df.columns:  # create only once
                        df[col] = 0

                df['pos'], df['neg'], df['neu'] = zip(
                    *df['text'].map(sentiment_analyzer_scores))
                df = df.fillna(0)
                logger.warning('resample period:%s',
                               self.resample_period['value'])
                df = df.set_index('created_at').resample(self.resample_period['value']) \
                    .agg({'pos': 'mean',
                          'neg': 'mean',
                          'neu': 'mean'})
                df = df.reset_index()
                df = df.fillna(0)
                logger.warning('LINE 307, df:%s', df.head(30))

                p = df.hvplot.line(x='created_at',
                                   y=cols,
                                   width=1200,
                                   height=600)
                return p
            except Exception:
                logger.error('run', exc_info=True)

        def visual(self, launch=1):
            try:
                p = self.df.hvplot.table(columns=['created_at', 'text'],
                                         width=1200,
                                         height=2000)
                return p
            except Exception:
                logger.error('output data', exc_info=True)

        def jitter(self, launch=1):
            try:
                df = self.df.copy()
                df['jitter'] = df['created_at'].diff(periods=-1)
                df['jitter'] = df['jitter'] * -1
                df = df.dropna()

                p = df.hvplot.line(x='created_at',
                                   y='jitter',
                                   width=1200,
                                   height=600)
                return p
            except Exception:
                logger.error('output data', exc_info=True)

    def update_tweet_search():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.reset_data()
        thistab.limits['messages'] = int(inputs['messages_limit'].value)
        thistab.topic = inputs['search_term'].value
        thistab.run()
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_sentiment.event(launch_this=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample_period(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.resample_period['value'] = new
        thistab.trigger += 1
        # stream_launch_rolling_mean.event(launch=thistab.trigger)
        stream_launch_sentiment.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        thistab = TwitterLoader()
        thistab.run()

        # MANAGE STREAM
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_rolling_mean = streams.Stream.define('Launch',
                                                           launch=-1)()
        stream_launch_sentiment = streams.Stream.define('Launch', launch=-1)()

        # DYNAMIC GRAPHS/OUTPUT
        hv_visual = hv.DynamicMap(thistab.visual, streams=[stream_launch])
        visual = renderer.get_plot(hv_visual)

        hv_jitter = hv.DynamicMap(thistab.jitter, streams=[stream_launch])
        jitter = renderer.get_plot(hv_jitter)

        hv_sentiment_analysis = hv.DynamicMap(
            thistab.sentiment_analysis, streams=[stream_launch_sentiment])
        sentiment_analysis = renderer.get_plot(hv_sentiment_analysis)

        # CREATE WIDGETS
        inputs = {
            'search_term':
            TextInput(title='Enter search term. For list, use commas',
                      value=thistab.topic),
            'messages_limit':
            Select(title='Select messages limit (5000 = unbounded)',
                   value=str(thistab.limits['messages']),
                   options=thistab.options['messages']),
            'resample':
            Select(title='Select resample period',
                   value=thistab.resample_period['value'],
                   options=thistab.resample_period['menu'])
        }
        tweet_search_button = Button(
            label='Enter filters/inputs, then press me', button_type="success")

        # WIDGET CALLBACK
        tweet_search_button.on_click(update_tweet_search)
        inputs['resample'].on_change('value', update_resample_period)

        # COMPOSE LAYOUT
        # group controls (filters/input elements)
        controls_tweet_search = WidgetBox(
            inputs['search_term'],
            inputs['messages_limit'],
            tweet_search_button,
        )

        controls_rolling_mean = WidgetBox(thistab.selects['window'], )

        controls_resample_period = WidgetBox(inputs['resample'])

        grid = gridplot([
            [thistab.notification_div['top']],
            [Spacer(width=20, height=70)],
            [thistab.title_div('Sentiment analysis of tweets:', 1000)],
            [Spacer(width=20, height=30)],
            [sentiment_analysis.state, controls_resample_period],
            [thistab.title_div('Time between tweets:', 1000)],
            [Spacer(width=20, height=30)],
            [jitter.state],
            [
                thistab.title_div(
                    'Twitter search results (use filters on right, then click button):',
                    1000)
            ],
            [Spacer(width=20, height=30)],
            [visual.state, controls_tweet_search],
            [thistab.notification_div['bottom']],
        ])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('Twitter loader:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #7

Datei anzeigen

def EDA_business_events_tab(panel_title, DAYS_TO_LOAD=90):
    timeline_source = ColumnDataSource(data=dict(
        Item=[],
        Start=[],
        End=[],
        Color=[],
        start=[],
        end=[],
        ID=[],
        ID1=[]
    ))

    class Thistab(KPI):
        def __init__(self, table, cols=[]):
            KPI.__init__(self, table, name='business', cols=cols)
            self.table = table
            self.df = None
            self.df1 = None
            self.df_pop = None

            self.checkboxgroup = {}
            self.period_to_date_cards = {

            }
            self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0)

            self.timestamp_col = 'start_actual'
            self.pym = PythonMongo('aion')
            self.groupby_dict = {
                'event': 'count',
                'type':'count',
                'rate':'sum',
                'event_duration': 'sum',
                'start_delay': 'mean',
                'end_delay': ' mean',
                'event_location':'count',

                'patron':'count',
                'patron_likes':'nunique',
                'patron_gender':'count',
                'patron_age':'mean',
                'patron_friend':'nunique',
                'patron_friend_gender':'count',
                'patron_friend_age':'mean',
                'patron_discovery':'nunique',

                'manager':'count',
                'manager_gender':'count',
                'manager_age':'mean',
                'manager_education':'count',
                'manager_parish':'count',

                'staff':'count',
                'staff_gender':'count',
                'staff_age':'mean',
                'staff_education':'count',
                'staff_parish':'count',
                'remuneration':'sum',

            }

            self.menus = {
                'company': [],
                'type': [],
                'patron':[],
                'manager':[],
                'gender': ['all', 'male', 'female','other'],
                'variables': list(self.groupby_dict.keys()),
                'history_periods': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
            }

            #self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = 'rate'

            # #########  SETUP FILTERS #########################
            self.selects = {
                'event': Select(title='Select event', value="all",options=['all']),

                'company' : Select(title='Select company', value="all",options=['all']),

                'patron_likes' : Select(title='Select patron likes/hobbies', value='all',
                                              options=['all']),

                'patron' : Select(title='Select patron', value='all', options=['all']),


                'manager_education' : Select(title="Select manager's education", value='all',
                                           options=['all']),

                'staff_education' : Select(title="Select staff's education", value='all',
                                                   options=['all']),


                'manager_gender' : Select(title="Select manager's gender", value='all',
                                         options=self.menus['gender']),
                'staff_gender' : Select(title="Select staff's gender", value='all',
                                         options=self.menus['gender']),
                'patron_gender' : Select(title="Select patron's gender", value='all',
                                         options=self.menus['gender']),

                'manager_parish' : Select(title="Select manager's parish", value='all',
                                         options=['all']),
                'staff_parish' : Select(title="Select staff's parish", value='all',
                                       options=['all']),
                'patron_parish' : Select(title="Select patron's parish", value='all',
                                        options=['all']),
                'type': Select(title="Select event type", value='all',
                                        options=['all']),
            }

            self.vars = {
                'event': 'all',

                'company': 'all',

                'patron_likes': 'all',

                'patron': 'all',

                'manager_education': 'all',

                'staff_education': 'all',

                'manager_gender': 'all',

                'staff_gender': 'all',
                'patron_gender': 'all',

                'manager_parish':'all',
                'patron_parish':'all',
                'type':'all'

            }
            self.multiline_vars = {
                'xs' : ['patron_likes','manager_education','staff_education',
                        'manager_gender','staff_gender','patron_gender','manager_parish',
                        'patron_parish','type'],
                'ys': ['rate','remuneration','attendance']
            }
            self.multiline_variable = {
                'x':'manager_gender',
                'y':'rate'
            }
            self.resample_period = {
                'multiline' : 'D'
            }

            self.chord_data = {
                'rename': {

                    'patron': 'source',
                    'company': 'target',
                    'rate': 'value'
                },
                'percentile_threshold': .75,

            }

            self.feature_list = self.multiline_vars['xs'] + ['rate','remuneration','start_delay','end_delay',
                                                             'staff_age','manager_age','patron_age']

            self.percentile_threshold = 10
            self.tsa_variable = 'event'
            self.forecast_days = 30
            self.initial_date = datetime.strptime('2015-01-01 00:00:00',self.DATEFORMAT)
            self.datepicker_pop_start = DatePicker(
                title="Period start", min_date=self.initial_date,
                max_date=dashboard_config['dates']['last_date'], value=dashboard_config['dates']['last_date'])

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                         position:relative;background:black;margin-bottom:200px">
                                         <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                   </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'cards': self.section_header_div(text='Period to date:{}'.format(self.section_divider),
                                                 width=1000, html_header='h2', margin_top=50, margin_bottom=5),
                'pop': self.section_header_div(text='Period over period:{}'.format(self.section_divider),
                                               width=600, html_header='h2', margin_top=5, margin_bottom=-155),
                'chord': self.section_header_div(text='Patron networks:{}'.format(self.section_divider),
                                                 width=600, html_header='h3', margin_top=5, margin_bottom=-155),
                'tsa': self.section_header_div(text='Forecasts (TSA):{}'.format(self.section_divider),
                                                    width=600, html_header='h2', margin_top=5, margin_bottom=-155),
                'multiline': self.section_header_div(text='Comparative graphs:{}'.format(self.section_divider),
                                               width=600, html_header='h2', margin_top=5, margin_bottom=-155),
                'patron info': self.section_header_div(text='Patron info:{}'.format(self.section_divider),
                                                     width=600, html_header='h2', margin_top=5, margin_bottom=-155),
                'relationships': self.section_header_div(text='Statistically Significant Relationships:---',
                                                     width=600, html_header='h2', margin_top=5, margin_bottom=-155),
            }
            self.KPI_card_div = self.initialize_cards(self.page_width, height=40)
            start = datetime(2014, 1, 1, 0, 0, 0)
            end = datetime(2019, 5, 15, 0, 0, 0)
            self.tools = [BoxZoomTool(), ResetTool(), PanTool(), SaveTool(), WheelZoomTool()]
            self.timeline_vars = {
                'company': '',
                'event': '',
                'types': ['all'],
                'type': 'all',
                'DF': None,
                'G': figure(
                    title=None, x_axis_type='datetime', width=1200, height=900,
                    y_range=[], x_range=Range1d(start, end), toolbar_location=None),
                'toolbar_box': ToolbarBox()
            }


            # ----- UPDATED DIVS END

        # ----------------------  DIVS ----------------------------
        def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=300):
            txt = """
            <div {}>
                <h4 {}>How to interpret sentiment score</h4>
                <ul style='margin-top:-10px;'>
                    <li>
                    </li>
                    <li>
                    </li>
                    <li>
                    </li>
                    <li>
                    </li>

                </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def initialize_cards(self, width, height=250):
            try:
                txt = ''
                for period in ['year', 'quarter', 'month', 'week']:
                    design = random.choice(list(KPI_card_css.keys()))
                    txt += self.card(title='', data='', card_design=design)

                text = """<div style="margin-top:100px;display:flex; flex-direction:row;">
                {}
                </div>""".format(txt)
                div = Div(text=text, width=width, height=height)
                return div
            except Exception:
                logger.error('initialize cards', exc_info=True)

        def df_load(self, req_startdate, req_enddate, table, cols, timestamp_col):
            try:
                # get min and max of loaded df
                if self.df is not None:
                    loaded_min = self.df[timestamp_col].min()
                    loaded_max = self.df[timestamp_col].max()

                    if loaded_min <= req_startdate and loaded_max >= req_enddate:
                        df = self.df[(self.df[timestamp_col] >= req_startdate) &
                                     (self.df[timestamp_col] <= req_enddate)]
                    else:
                        df = self.pym.load_df(req_startdate, req_enddate, table=table,
                                                cols=cols, timestamp_col=timestamp_col)
                else:
                    df = self.pym.load_df(req_startdate, req_enddate, table=table,
                                          cols=cols, timestamp_col=timestamp_col)
                logger.warning('LINE 316: df:%s',df.head())
                if df is not None and len(df) > 0:
                    self.filter_df(df)

                return df

            except Exception:
                logger.error('df_load', exc_info=True)

        def load_menus(self,df1):
            try:
                logger.warning('LINE 315:column%s',list(df1.columns))

                for col in self.vars.keys():
                    self.selects[col].options = ['all'] + list(df1[col].unique())

            except Exception:
                logger.error('load menus',exc_info=True)

        def filter_df(self, df1):
            try:
                for key in self.vars.keys():
                    logger.warning('LINE 343-self.df1-%s:%s', key,self.vars[key])
                    if self.vars[key] != 'all':
                        logger.warning('LINE 345:key for filtering :%s',key)
                        df1 = df1[df1[key] == self.vars[key]]
                return df1
                logger.warning('LINE 342-self.df1:%s',self.df1.head())

            except Exception:
                logger.error('period to date', exc_info=True)

        # ------------------------- CARDS END -----------------------------------
        def period_to_date(self, df, timestamp=None, timestamp_filter_col='start_actual', cols=[], period='week'):
            try:
                if timestamp is None:
                    timestamp = datetime.now()
                    timestamp = datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, 0, 0)

                start = self.first_date_in_period(timestamp, period)
                # filter

                df[timestamp_filter_col] = pd.to_datetime(df[timestamp_filter_col], format=self.DATEFORMAT_PTD)

                df = df[(df[timestamp_filter_col] >= start) & (df[timestamp_filter_col] <= timestamp)]
                if len(cols) > 0:
                    df = df[cols]
                return df
            except Exception:
                logger.error('period to date', exc_info=True)

        def period_over_period(self, df, start_date, end_date, period,
                               history_periods=2, timestamp_col='start_actual'):
            try:
                # filter cols if necessary
                string = '0 {}(s) prev(current)'.format(period)

                # filter out the dates greater than today
                df_current = df
                df_current['period'] = string
                # label the days being compared with the same label
                if len(df_current) > 0:
                    df_current = self.label_dates_pop(df_current, period, timestamp_col)

                # zero out time information
                start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0)
                end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0)

                cols = list(df.columns)

                counter = 1
                if isinstance(history_periods, str):
                    history_periods = int(history_periods)
                # make dataframes for request no. of periods
                start, end = self.shift_period_range(period, start, end)
                while counter < history_periods and start >= self.initial_date:
                    # load data
                    if period == 'quarter':
                        logger.warning('start:end %s:%s', start, end)

                    df_temp = self.df_load(start, end,self.table,cols, timestamp_col)
                    if df_temp is not None:
                        if len(df_temp) > 1:
                            string = '{} {}(s) prev'.format(counter, period)
                            # label period
                            df_temp['period'] = string
                            # relabel days to get matching day of week,doy, dom, for different periods
                            df_temp = self.label_dates_pop(df_temp, period, timestamp_col)
                            # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp))
                            df_current = pd.concat([df_current, df_temp])
                            df_current = df_current.reset_index()
                            del df_temp
                            gc.collect()

                    # shift the loading window
                    counter += 1
                    start, end = self.shift_period_range(period, start, end)
                    if period == 'week':
                        logger.warning('LINE 327 df_current:%s', df_current.head(10))

                return df_current
            except Exception:
                logger.error('period over period', exc_info=True)

        def label_dates_pop(self, df, period, timestamp_col):
            if df is not None:
                if len(df) > 0:
                    df[timestamp_col] = pd.to_datetime(df[timestamp_col])

            def label_qtr_pop(y):
                try:
                    curr_quarter = int((y.month - 1) / 3 + 1)
                    start = datetime(y.year, 3 * curr_quarter - 2, 1)
                    return abs((start - y).days)
                except Exception:
                    logger.error('df label quarter', exc_info=True)

            try:
                if period == 'week':
                    df['dayset'] = df[timestamp_col].dt.dayofweek
                elif period == 'month':
                    df['dayset'] = df[timestamp_col].dt.day
                elif period == 'year':
                    df['dayset'] = df[timestamp_col].timetuple().tm_yday
                elif period == 'quarter':
                    df['dayset'] = df[timestamp_col].apply(lambda x: label_qtr_pop(x))

                return df
            except Exception:
                logger.error('label data ', exc_info=True)

        # -------------------- GRAPHS -------------------------------------------
        def graph_periods_to_date(self, df1, timestamp_filter_col, variable):
            try:

                #df1 = df1.compute()
                dct = {}
                for idx, period in enumerate(['week', 'month', 'quarter', 'year']):
                    df = self.period_to_date(df1, timestamp=dashboard_config['dates']['last_date'],
                                             timestamp_filter_col=timestamp_filter_col, period=period)

                    # get unique instances
                    df = df[[variable]]
                    df = df.drop_duplicates(keep='first')
                    # logger.warning('post duplicates dropped:%s', df.head(10))
                    if self.groupby_dict[variable] == 'sum':
                        data = int(df[variable].sum())
                    elif self.groupby_dict[variable] == 'mean':
                        data = "{}%".format(round(df[variable].mean(), 3))
                    else:
                        data = int(len(list(df[variable].unique())))
                    del df
                    gc.collect()
                    dct[period] = data

                self.update_cards(dct)

            except Exception:
                logger.error('graph periods to date', exc_info=True)

        def graph_period_over_period(self, period):
            try:

                periods = [period]
                start_date = self.pop_start_date
                end_date = self.pop_end_date
                if isinstance(start_date, date):
                    start_date = datetime.combine(start_date, datetime.min.time())
                if isinstance(end_date, date):
                    end_date = datetime.combine(end_date, datetime.min.time())
                today = datetime.combine(datetime.today().date(), datetime.min.time())

                if start_date == today:
                    logger.warning('START DATE of WEEK IS TODAY.!NO DATA DATA')
                    start_date = start_date - timedelta(days=7)
                    self.datepicker_pop_start.value = start_date

                cols = [self.variable, self.timestamp_col]

                df = self.df_load(req_startdate=start_date, req_enddate=end_date, table=self.table, cols=cols,
                                  timestamp_col=self.timestamp_col)

                if abs(start_date - end_date).days > 7:
                    if 'week' in periods:
                        periods.remove('week')
                if abs(start_date - end_date).days > 31:
                    if 'month' in periods:
                        periods.remove('month')
                if abs(start_date - end_date).days > 90:
                    if 'quarter' in periods:
                        periods.remove('quarter')
                for idx, period in enumerate(periods):
                    df_period = self.period_over_period(df, start_date=start_date, end_date=end_date,
                                                        period=period, history_periods=self.pop_history_periods,
                                                        timestamp_col='start_actual')

                    logger.warning('LINE 368: dayset:%s', df_period.head(30))
                    groupby_cols = ['dayset', 'period']
                    if len(df_period) > 0:
                        # logger.warning('line 150 df_period columns:%s',df.columns)
                        df_period = df_period.groupby(groupby_cols).agg({self.variable: 'sum'})
                        df_period = df_period.reset_index()
                    else:
                        df_period = df_period.rename(index=str, columns={'day': 'dayset'})

                    prestack_cols = list(df_period.columns)

                    df_period = self.split_period_into_columns(df_period, col_to_split='period',
                                                               value_to_copy=self.variable)

                    # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality
                    if period == 'quarter':
                        if 'dayset' in df_period.columns:
                            min_day = df_period['dayset'].min()
                            logger.warning('LINE 252: MINIUMUM DAY:%s', min_day)
                            df_period = df_period[df_period['dayset'] > min_day]

                    poststack_cols = list(df_period.columns)

                    title = "{} over {}".format(period, period)
                    plotcols = list(np.setdiff1d(poststack_cols, prestack_cols))
                    # include current period if not extant
                    df_period, plotcols = self.pop_include_zeros(df_period, plotcols=plotcols, period=period)
                    # logger.warning('line 155 cols to plot:%s',plotcols
                    if self.groupby_dict[self.variable] == 'mean':
                        xlabel = '%'
                    else:
                        xlabel = 'frequency'

                    if 'dayset' not in df_period.columns:
                        leng = len(df_period)
                        if leng > 0:
                            df_period['dayset'] = 0
                        else:
                            df_period['dayset'] = ''


                    if idx == 0:
                        p = df_period.hvplot.bar('dayset', plotcols, rot=45, title=title,
                                                 stacked=False, width=1200, height=400, value_label=xlabel)
                    else:
                        p += df_period.hvplot.bar('dayset', plotcols, rot=45, title=title,
                                                  stacked=False, width=1200, height=400, value_label=xlabel)
                return p

            except Exception:
                logger.error('period over period to date', exc_info=True)

        def patron_info_table(self,launch):
            try:
                tmp_df = None
                tmp_df1 = None
                if self.vars['patron'] != 'all':
                    tmp_df = self.df1['patron_friend','patron_friend_gender','patron_friend_parish']
                    tmp_df.drop_duplicates(keep='first', inplace=True)
                    # likes
                    tmp_df1 = self.df1['patron', 'patron_likes', 'patron_gender', 'patron_discovery', 'patron_parish']
                    tmp_df1.drop_duplicates(keep='first', inplace=True)

                if tmp_df is None:
                    tmp_df = pd.DataFrame()
                if tmp_df1 is None:
                    tmp_df1 = pd.DataFrame()

                p = tmp_df.hvplot.table(width=400)

                q = tmp_df1.hvplot.table(width=600)

                return q + p


            except Exception:
                logger.error('patron friends table', exc_info=True)

        def chord_diagram(self, launch):
            try:
                def normalize_value(x, total):
                    x = int((x / total) * 1000)
                    if x <= 0:
                        return 1
                    return x

                df = self.df1.copy()
                # chord setup
                var1 = 'staff'
                var2 = 'patron'
                # --------------  nodes
                data = {}
                data['nodes'] = []
                source_list = df[var1].tolist()
                names = list(set(source_list))

                var1_dict = dict(zip(df[var2], df[var1]))
                type_dict = {}
                types = list(set(df['type'].tolist()))
                name_dict = {}
                for idx, name in enumerate(names):
                    name_dict[name] = idx

                for idx, name in enumerate(names):
                    type_tmp = var1_dict[name]
                    index = name_dict[name]
                    data['nodes'].append({'OwnerID': index, 'index': idx, 'Type': type_tmp})

                nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index')

                # --------- make the links

                data['links'] = []

                for idx, row in df.iterrows():
                    src = name_dict[row[var1]]
                    tgt = name_dict[row[var2]]
                    val = row['rate']
                    data['links'].append({'source': src, 'target': tgt, 'value': val})

                links = pd.DataFrame(data['links'])
                # get the individual links
                links = links.groupby(['source', 'target'])['value'].sum()
                links = links.reset_index()
                total = links['value'].sum()
                links['value'] = links['value'].apply(lambda x: normalize_value(x, total))

                # filter for top percentile
                quantile_val = links['value'].quantile(self.chord_data['percentile_threshold'])
                links = links[links['value'] >= quantile_val]
                # logger.warning('after quantile filter:%s',len(links))

                chord_ = hv.Chord((links, nodes), ['source', 'target'], ['value'])
                chord_.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(),
                                       labels='Type', node_color=dim('index').str(), width=1000, height=1000))

                return chord_

            except Exception:
                logger.error('chord diagram', exc_info=True)

        def forecasts(self, launch):
            try:
                logger.warning('LINE 660: self.df1 :%s', self.df1.head())
                df = self.df.copy()
                df = df.set_index(self.timestamp_col)
                #logger.warning('LINE 648: df:%s', df.head())

                tsa_variable = self.tsa_variable
                if self.tsa_variable in ['remuneration','rate']:
                    df = df.resample('D').agg({tsa_variable: 'sum'})
                else:
                    # calculate attendance
                    if self.tsa_variable == 'attendance':
                        tsa_variable = 'patrons'
                    df = df.resample('D').agg({tsa_variable: 'count'})


                label = 'freq_diff'
                #df[label] = df[tsa_variable].diff()
                df = df.fillna(0)
                df = df.reset_index()
                logger.warning('LINE 672: df:%s', df.head())

                rename = {self.timestamp_col: 'ds', tsa_variable: 'y'}
                df = df.rename(columns=rename)
                #logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                #logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)

                print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
                print('LINE 689 forecast columns:',list(forecast.columns))

                if tsa_variable in ['rate','remuneration']:
                    value_label = '$'
                else:
                    value_label = '#'

                for idx, col in enumerate(['yhat', 'yhat_lower', 'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds', y=col, width=600,
                                                 height=250, value_label=value_label).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds'
                                                     , y=col, width=600,
                                                     height=250, value_label=value_label).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds', y=col, width=550,
                                                 height=250, value_label=value_label).relabel(col)
                    else:
                        if 'weekly' in forecast.columns:
                            q *= forecast.hvplot.line(x='ds', y=col,
                                                      width=550, height=250, value_label=value_label).relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)



        def kruskal_label(self,df,var,treatment):
            try:
                # get unique levels

                try:
                    stat,pvalue = kruskal(*[group[var].values for name, group in df.groupby(treatment)])
                    logger.warning('stat:%s,pvalue:%s', stat, pvalue)
                    if pvalue > 0.05:
                        txt = 'No'
                    else:
                        txt = 'Yes'
                    return stat, pvalue, txt
                except Exception:
                    stat = 'na'
                    pvalue = 'na'
                    txt = 'na'
                    logger.warning('Line 737: not enough groups')

                    return stat, pvalue, txt
            except Exception:
                logger.error('kruskal label', exc_info=True)


        def non_para_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1.copy()
                df = df.drop(self.timestamp_col, axis=1)
                logger.warning('LINE 758; df:%s',list(df.columns))
                for var in ['rate','remuneration','patron']:
                    for treatment in self.vars.keys():
                        logger.warning('col :%s', treatment)
                        df_tmp = df[[var,treatment]]

                        if treatment != var:
                            if var == 'patron':
                                df_tmp = df_tmp.groupby([treatment]).agg({'patron': 'nunique'})
                                df_tmp = df_tmp.reset_index()

                            stat, pvalue, txt = self.kruskal_label(df_tmp,var,treatment)
                            # add to dict
                            corr_dict['Variable 1'].append(var)
                            corr_dict['Variable 2'].append(treatment)
                            corr_dict['Relationship'].append(txt)
                            if isinstance(pvalue, float):
                                corr_dict['stat'].append(round(stat, 3))
                            else:
                                corr_dict['stat'].append(stat)
                            if isinstance(pvalue,float):
                                corr_dict['p-value'].append(round(pvalue, 3))
                            else:
                                corr_dict['p-value'].append(pvalue)
                            logger.warning('LINE 756:%s-%s completed',var,treatment)

                df = pd.DataFrame(
                    {
                        'Variable 1': corr_dict['Variable 1'],
                        'Variable 2': corr_dict['Variable 2'],
                        'Relationship': corr_dict['Relationship'],
                        'stat': corr_dict['stat'],
                        'p-value': corr_dict['p-value']

                    })
                # logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=['Variable 1', 'Variable 2', 'Relationship', 'stat', 'p-value'],
                                       width=550, height=600, title='Effect of variable levels on outcomes')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def multiline(self, launch=1):
            try:
                yvar = self.multiline_variable['y']
                if self.multiline_variable['y'] == 'attendance':
                    yvar = 'patron'

                xvar = self.multiline_variable['x']

                df = self.df1.copy()
                for key in thistab.vars.keys():
                    if thistab.vars[key] != 'all':
                        if key != xvar:
                            df = df[df[key] == self.vars[key]]

                df = df[[xvar, yvar, self.timestamp_col]]
                df = df.set_index(self.timestamp_col)
                if yvar == 'patron':
                    df = df.groupby(xvar).resample(self.resample_period['multiline']).agg({yvar: 'nunique'})
                    df = df.reset_index()
                    logger.warning('LINE 817: df:%s', df.head())

                else:
                    df = df.groupby(xvar).resample(self.resample_period['multiline']).agg({yvar: 'sum'})
                    df = df.reset_index()
                    logger.warning('LINE 820: df:%s',df.head())


                lines = df[xvar].unique()
                # split data frames
                dfs = {}
                for idx, line in enumerate(lines):
                    dfs[line] = df[df[xvar] == line]
                    dfs[line] = dfs[line].fillna(0)
                    #logger.warning('LINE 788:%s - %s:', line, dfs[line].head())
                    if idx == 0:
                        p = dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=1200, height=500).relabel(line)
                    else:
                        p *= dfs[line].hvplot.line(x=self.timestamp_col, y=yvar, width=1200, height=500).relabel(line)
                return p
            except Exception:
                logger.error('multiline plot', exc_info=True)

    def update():
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.filter_df(thistab.df)
        thistab.load_menus(thistab.df1)
        thistab.graph_periods_to_date(thistab.df, thistab.timestamp_col, thistab.variable)
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_period_over_period():
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.pop_start_date = thistab.datepicker_pop_start.value  # trigger period over period
        thistab.pop_end_date = datepicker_pop_end.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_tsa_variable_launch.event(launch=thistab.trigger)
        thistab.trigger += 1
        stream_tsa_variable_launch.event(launch=thistab.trigger)
        thistab.resample_period['multiline'] = select_resample_period['multiline'].value
        thistab.multiline_variable['x'] = multiline_x_select.value
        thistab.multiline_variable['y'] = multiline_y_select.value
        thistab.notification_updater("ready")

    def update_forecasts():
        thistab.notification_updater("Calculations underway. Please be patient")
        for key in thistab.vars.keys():
            thistab.vars[key] = thistab.selects[key]
        thistab.filter_df(thistab.df)
        thistab.tsa_variable = tsa_variable_select.value
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.trigger += 1
        stream_tsa_variable_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_multiline_variables():
        thistab.notification_updater("Calculations underway. Please be patient")

        thistab.resample_period['multiline'] = select_resample_period['multiline'].value
        thistab.multiline_variable['x'] = multiline_x_select.value
        thistab.multiline_variable['y'] = multiline_y_select.value

        thistab.trigger += 1
        stream_multiline_variable_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")


    try:
        cols = []
        thistab = Thistab(table='business_composite', cols=cols)
        # -------------------------------------  SETUP   ----------------------------
        # format dates
        first_date_range = thistab.initial_date
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = datetime(last_date.year, 1, 1, 0, 0, 0)

        loadcols = []
        thistab.df = thistab.df_load(first_date, last_date,thistab.table,loadcols, timestamp_col=thistab.timestamp_col)
        thistab.df1 = thistab.filter_df(thistab.df)
        thistab.load_menus(thistab.df)

        thistab.graph_periods_to_date(thistab.df, timestamp_filter_col=thistab.timestamp_col, variable=thistab.variable)
        thistab.section_header_updater('cards', label='')
        thistab.section_header_updater('pop', label='')

        # MANAGE STREAM
        # date comes out stream in milliseconds
        # --------------------------------CREATE WIDGETS ---------------------------------
        thistab.pop_end_date = last_date
        thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week')

        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_tsa_variable_launch = streams.Stream.define('Launch', launch=-1)()
        stream_multiline_variable_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()


        datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range,
                                max_date=last_date_range, value=thistab.pop_end_date)

        pop_number_select = Select(title='Select # of comparative periods',
                           value=str(5),
                           options=thistab.menus['history_periods'])
        pop_button = Button(label="Select dates/periods, then click me!", width=15, button_type="success")

        filter_button = Button(label="Select filters, then click me!", width=15, button_type="success")
        multiline_button = Button(label="Select multiline variables, then click me!", width=15, button_type="success")

        tsa_variable_select = Select(title='Select forecast variable',
                           value='rate',options=['rate','remuneration','attendance'])
        tsa_button = Button(label="Select forecast variables, then click me!", width=15, button_type="success")
        
        select_forecast_days = Select(title='Select # of days which you want forecasted',
                                      value=str(thistab.forecast_days),
                                      options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])

        multiline_y_select = Select(title='Select numerical variable for comparison',
                             value=thistab.multiline_variable['y'], options=thistab.multiline_vars['ys'])

        multiline_x_select = Select(title='Select categorical variable for comparison',
                            value=thistab.multiline_variable['x'], options=thistab.multiline_vars['xs'])

        select_resample_period = {
            'multiline' : Select(title='Select resample period',
                value=thistab.resample_period['multiline'], options=['D','W','M','Q'])
        }

        # ---------------------------------  GRAPHS ---------------------------

        hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch])
        pop_week = renderer.get_plot(hv_pop_week)

        hv_pop_month = hv.DynamicMap(thistab.pop_month, streams=[stream_launch])
        pop_month = renderer.get_plot(hv_pop_month)

        hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch])
        pop_quarter = renderer.get_plot(hv_pop_quarter)

        hv_tsa = hv.DynamicMap(thistab.forecasts, streams=[stream_tsa_variable_launch])
        tsa = renderer.get_plot(hv_tsa)

        hv_chord = hv.DynamicMap(thistab.chord_diagram, streams=[stream_launch])
        chord = renderer.get_plot(hv_chord)

        hv_patron_info = hv.DynamicMap(thistab.patron_info_table, streams=[stream_launch])
        patron_info = renderer.get_plot(hv_patron_info)

        hv_non_para_table = hv.DynamicMap(thistab.non_para_table,streams=[stream_launch_corr])
        non_para_table = renderer.get_plot(hv_non_para_table)

        hv_multiline = hv.DynamicMap(thistab.multiline, streams=[stream_multiline_variable_launch])
        multiline = renderer.get_plot(hv_multiline)

        # -------------------------------- CALLBACKS ------------------------

        filter_button.on_click(update)
        pop_button.on_click(update_period_over_period)  # lags array
        tsa_button.on_click(update_forecasts)
        multiline_button.on_click(update_multiline_variables)

        # controls
        controls_multiline = WidgetBox(
            multiline_x_select,
            multiline_y_select,
            select_resample_period['multiline'],
            multiline_button
        )

        controls_tsa = WidgetBox(
            tsa_variable_select,
            select_forecast_days,
            tsa_button

        )
        
        controls_pop = WidgetBox(
            pop_number_select,
            pop_button,
        )
        
        controls_filters = WidgetBox(
            thistab.selects['event'],
            thistab.selects['company'],
            thistab.selects['patron'],
            thistab.selects['patron_likes'],
            thistab.selects['manager_education'],
            thistab.selects['staff_education'],
            thistab.selects['manager_gender'],
            thistab.selects['staff_gender'],
            thistab.selects['patron_gender'],
            thistab.selects['manager_parish'],
            thistab.selects['staff_parish'],
            thistab.selects['patron_parish'],
            thistab.selects['type'],
        )

        # create the dashboards
        grid_data = [
            [thistab.notification_div['top']],
            [Spacer(width=20, height=40)],
            [thistab.section_headers['cards']],
            [Spacer(width=20, height=2)],
            [thistab.KPI_card_div,controls_filters],
            [thistab.section_headers['pop']],
            [Spacer(width=20, height=25)],
            [pop_week.state, controls_pop],
            [pop_month.state],
            [pop_quarter.state],
            [thistab.section_headers['patron info']],
            [Spacer(width=20, height=25)],
            [patron_info.state],
            [chord.state],
            [thistab.section_headers['tsa']],
            [Spacer(width=20, height=25)],
            [tsa.state, controls_tsa],
            [thistab.section_headers['relationships']],
            [Spacer(width=20, height=25)],
            [non_para_table.state],
            [thistab.section_headers['multiline']],
            [Spacer(width=20, height=25)],
            [multiline.state, controls_multiline],
            [thistab.notification_div['bottom']]
        ]

        grid = gridplot(grid_data)
        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #8

Datei anzeigen

Datei: developer_adoption.py Projekt: andre-aion/analytics_demo

def KPI_developer_adoption_tab(page_width,DAYS_TO_LOAD=90):
    class Thistab(KPI):
        def __init__(self, table,cols=[]):
            KPI.__init__(self, table,name='developer',cols=cols)
            self.table = table
            self.df = None

            self.checkboxgroup = {}

            self.period_to_date_cards = {
                'year': self.card('',''),
                'quarter': self.card('', ''),
                'month': self.card('', ''),
                'week': self.card('', '')

            }
            self.ptd_startdate = datetime(datetime.today().year,1,1,0,0,0)

            self.timestamp_col = 'block_timestamp'
            self.variable = self.menus['developer_adoption_DVs'][0]

            self.datepicker_pop_start = DatePicker(
                title="Period start", min_date=self.initial_date,
                max_date=dashboard_config['dates']['last_date'], value=dashboard_config['dates']['last_date'])


            # ------- DIVS setup begin
            self.page_width = page_width
            self.KPI_card_div = self.initialize_cards(width=self.page_width,height=1000)
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                          position:relative;background:black;margin-bottom:200px">
                          <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'cards': self.section_header_div(
                    text='Period to date:{}'.format(self.section_divider),
                    width=int(self.page_width*.5), html_header='h2', margin_top=5,margin_bottom=-155),
                'pop': self.section_header_div(
                    text='Period over period:{}'.format(self.section_divider),
                    width=int(self.page_width*.5), html_header='h2', margin_top=5, margin_bottom=-155),
                'sig_ratio': self.section_header_div(
                    text='Time series of ratio of DV to significant IVs'.format(self.section_divider),
                    width=int(self.page_width*.5), html_header='h2', margin_top=5, margin_bottom=-155),
            }

        # ----------------------  DIVS ----------------------------

        def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px;margin-right:-800px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                </li>
                <li>
                </li>
                <li>
                </li>
                <li>
                </li>
                 <li>
                </li>
                 <li>
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # -------------------- CARDS -----------------------------------------

        def initialize_cards(self, width, height=250):
            try:
                txt = ''
                for idx,period in enumerate(['year', 'quarter', 'month', 'week']):
                    design = random.choice(list(KPI_card_css.keys()))
                    txt += self.card(title='', data='', card_design=design)
                text = """<div style="margin-top:100px;display:flex;flex-direction:column;">
                         {}
                         </div>""".format(txt)
                div = Div(text=text, width=width, height=height)
                return div
            except Exception:
                logger.error('initialize cards', exc_info=True)


        # -------------------- GRAPHS -------------------------------------------
        def graph_periods_to_date(self,df1,timestamp_filter_col,variable):
            try:
                dct = {}
                for idx,period in enumerate(['week','month','quarter','year']):
                    all_txt = """<div style="width:{}px;display:flex;flex-direction:row;">"""\
                        .format(int(self.page_width*.6))
                    # go to next row
                    df = self.period_to_date(df1,
                        timestamp=dashboard_config['dates']['last_date'],
                        timestamp_filter_col=timestamp_filter_col, period=period)
                    # get unique instances
                    df = df.compute()
                    df = df.drop_duplicates(keep='first')

                    count = len(df)
                    gc.collect()

                    denom = df[variable].sum()
                    if denom != 0:
                        payroll_to_date = self.payroll_to_date(period)
                        cost_per_var = round(payroll_to_date/denom,2)
                        tmp_var = self.variable.split('_')
                        title = "{} to date".format(period)
                        title += "</br>${} per {}".format(cost_per_var,tmp_var[-1])
                    else:
                        title = "{} to date".format(period)

                    design = random.choice(list(KPI_card_css.keys()))
                    all_txt += self.card(title=title,data=count,card_design=design)

                    # add the statistically significant point estimates
                    all_txt += self.calc_sig_effect_card_data(df,interest_var=self.variable, period=period)
                    all_txt += """</div>"""
                    print(all_txt)
                    dct[period] = all_txt
                    del df
                self.update_significant_DV_cards(dct)

            except Exception:
                logger.error('graph periods to date',exc_info=True)


        def graph_period_over_period(self,period):
            try:
                periods = [period]
                start_date = self.pop_start_date
                end_date = self.pop_end_date
                if isinstance(start_date,date):
                    start_date = datetime.combine(start_date,datetime.min.time())
                if isinstance(end_date,date):
                    end_date = datetime.combine(end_date,datetime.min.time())
                today = datetime.combine(datetime.today().date(),datetime.min.time())
                '''
                - if the start day is today (there is no data for today),
                  adjust start date
                '''
                if start_date == today:
                    logger.warning('START DATE of WEEK IS TODAY.!NO DATA DATA')
                    start_date = start_date - timedelta(days=7)
                    self.datepicker_pop_start.value = start_date

                cols = [self.variable,self.timestamp_col, 'day']
                df = self.load_df(start_date=start_date,end_date=end_date,cols=cols,timestamp_col='block_timestamp')
                if abs(start_date - end_date).days > 7:
                    if 'week' in periods:
                        periods.remove('week')
                if abs(start_date - end_date).days > 31:
                    if 'month' in periods:
                        periods.remove('month')
                if abs(start_date - end_date).days > 90:
                    if 'quarter' in periods:
                        periods.remove('quarter')

                for idx,period in enumerate(periods):
                    df_period = self.period_over_period(df, start_date = start_date, end_date=end_date,
                                                        period=period, history_periods=self.pop_history_periods,
                                                        timestamp_col='block_timestamp')

                    groupby_cols = ['dayset', 'period']
                    if len(df_period) > 0:
                        df_period = df_period.groupby(groupby_cols).agg({self.variable: 'sum'})
                        df_period = df_period.reset_index()
                        df_period = df_period.compute()
                    else:
                        df_period = df_period.compute()
                        df_period = df_period.rename(index=str, columns={'day': 'dayset'})
                    prestack_cols = list(df_period.columns)
                    logger.warning('Line 179:%s', df_period.head(10))
                    df_period = self.split_period_into_columns(df_period, col_to_split='period',
                                                               value_to_copy=self.variable)

                    # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality
                    if period == 'quarter':
                        min_day = df_period['dayset'].min()
                        logger.warning('LINE 252: MINIUMUM DAY:%s', min_day)
                        df_period = df_period[df_period['dayset'] > min_day]

                    logger.warning('line 180 df_period columns:%s', df_period.head(50))
                    poststack_cols = list(df_period.columns)
                    title = "{} over {}".format(period, period)

                    plotcols = list(np.setdiff1d(poststack_cols, prestack_cols))
                    df_period, plotcols = self.pop_include_zeros(df_period=df_period, plotcols=plotcols, period=period)

                    if idx == 0:
                        p = df_period.hvplot.bar('dayset',plotcols,rot=45,title=title,
                                                 stacked=False,width=int(self.page_width*.8),height=400,value_label='#')
                    else:
                        p += df_period.hvplot.bar('dayset',plotcols,rot=45,title=title,
                                                  stacked=False,width=int(self.page_width*.8),height=400,value_label='#')
                return p

            except Exception:
                logger.error('period over period to date', exc_info=True)


        # --------------------------------  PLOT TRENDS FOR SIGNIFICANT RATIOS  --------------------------
        def graph_significant_ratios_ts(self,launch=-1):
            try:
                df = self.make_significant_ratios_df(self.df,resample_period=self.resample_period,
                                                     interest_var=self.variable,
                                                     timestamp_col='block_timestamp')
                # clean
                if self.variable in df.columns:
                    df = df.drop(self.variable,axis=1)

                #df = df.compute()
                # plot
                return df.hvplot.line(width=int(self.page_width*.8),height=400)

            except Exception:
                logger.error('graph significant ratios',exc_info=True)

    def update_variable(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.variable = variable_select.value
        thistab.graph_periods_to_date(thistab.df,'block_timestamp',thistab.variable)
        thistab.section_header_updater('cards',label='')
        thistab.section_header_updater('pop',label='')
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_sig_ratio.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_period_over_period():
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.pop_start_date = thistab.datepicker_pop_start.value  # trigger period over period
        thistab.pop_end_date = datepicker_pop_end.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_resample(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.resample_period = resample_select.value
        thistab.trigger += 1
        stream_launch_sig_ratio.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_history_periods(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        cols = ['aion_fork','aion_watch','aion_release','aion_issue','aion_push','block_timestamp']
        thistab = Thistab(table='account_ext_warehouse', cols=cols)
        # -------------------------------------  SETUP   ----------------------------
        # format dates
        first_date_range = thistab.initial_date
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = datetime(last_date.year,1,1,0,0,0)

        thistab.df = thistab.load_df(first_date, last_date,cols,'block_timestamp')
        thistab.graph_periods_to_date(thistab.df,timestamp_filter_col='block_timestamp',variable=thistab.variable)
        thistab.section_header_updater('cards',label='')
        thistab.section_header_updater('pop',label='')

        # MANAGE STREAM
        # date comes out stream in milliseconds
        # --------------------------------CREATE WIDGETS ---------------------------------
        thistab.pop_end_date = last_date
        thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week')

        stream_launch = streams.Stream.define('Launch',launch=-1)()
        stream_launch_sig_ratio = streams.Stream.define('Launch_sigratio',launch=-1)()

        datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range,
                                        max_date=last_date_range, value=thistab.pop_end_date)

        pop_number_select = Select(title='Select # of comparative periods',
                                   value=str(thistab.pop_history_periods),
                                   options=thistab.menus['history_periods'])
        pop_button = Button(label="Select dates/periods, then click me!",width=15,button_type="success")

        variable_select = Select(title='Select variable', value=thistab.variable,
                                 options=thistab.menus['developer_adoption_DVs'])

        resample_select = Select(title='Select resample period',
                                 value=thistab.resample_period,
                                 options=thistab.menus['resample_period'])


        # ---------------------------------  GRAPHS ---------------------------
        hv_sig_ratios = hv.DynamicMap(thistab.graph_significant_ratios_ts,
                                      streams=[stream_launch_sig_ratio])
        sig_ratios= renderer.get_plot(hv_sig_ratios)

        hv_pop_week = hv.DynamicMap(thistab.pop_week,streams=[stream_launch])
        pop_week = renderer.get_plot(hv_pop_week)

        hv_pop_month = hv.DynamicMap(thistab.pop_month,streams=[stream_launch])
        pop_month = renderer.get_plot(hv_pop_month)

        hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch])
        pop_quarter = renderer.get_plot(hv_pop_quarter)


        # -------------------------------- CALLBACKS ------------------------

        variable_select.on_change('value', update_variable)
        pop_button.on_click(update_period_over_period) # lags array
        resample_select.on_change('value', update_resample)
        pop_number_select.on_change('value',update_history_periods)


        # -----------------------------------LAYOUT ----------------------------
        # put the controls in a single element
        controls_ptd = WidgetBox(variable_select, resample_select)

        controls_pop = WidgetBox(thistab.datepicker_pop_start,
                                 datepicker_pop_end, pop_number_select,pop_button)

        grid_data = [
            [thistab.notification_div['top']],
            [Spacer(width=20, height=40)],
            [thistab.section_headers['sig_ratio']],
            [Spacer(width=20, height=25)],
            [sig_ratios.state, controls_ptd],
            [thistab.section_headers['cards']],
            [Spacer(width=20, height=2)],
            [thistab.KPI_card_div],
            [thistab.section_headers['pop']],
            [Spacer(width=20, height=25)],
            [pop_week.state,controls_pop],
            [pop_month.state],
            [pop_quarter.state],
            [thistab.notification_div['bottom']]
        ]

        grid = gridplot(grid_data)

        # Make a tab with the layout
        tab = Panel(child=grid, title='KPI: developer adoption')
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag('KPI: developer adoption')

Beispiel #9

Datei anzeigen

def account_predictive_tab(page_width=1200):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')
            self.feature_list = hyp_variables

            self.targets = {
                'classification': {
                    'churned': {
                        'cols': ['churned', 'active'],
                        'target_col': 'status'
                    }
                },
                'regression': {
                    'aion_fork': {
                        'cols': [1, 0],
                        'target_col': 'aion_fork'
                    }
                }
            }
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'

            self.clf = None
            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.prediction_address_selected = ""
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            for col in self.feature_list:
                self.groupby_dict[col] = 'mean'

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.metrics_div = Div(text='', width=400, height=300)
            self.accuracy_df = None
            self.inspected_variable = 'amount'

            # ------- DIVS setup begin
            self.page_width = page_width
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                                       position:relative;background:black;margin-bottom:200px">
                                                                       <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                                                 </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'churn':
                self.section_header_div(
                    text=
                    'Churned accounts: prediction model accuracy, variable ranking:{}'
                    .format('----'),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'variable behavior':
                self.section_header_div(text='Variable behavior:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'predictions':
                self.section_header_div(
                    text='Select date range to make predictions:{}'.format(
                        self.section_divider),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.prediction_address_selected = ""
                self.prediction_address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self,
                    start_date="2018-04-25 00:00:00",
                    end_date="2018-12-10 00:00:00"):
            try:
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)
                self.df_load(start_date, end_date)
                self.df = self.df.fillna(0)
                #self.make_delta()
                #self.df = self.df.set_index('block_timestamp')
                #logger.warning("data loaded - %s",self.df.tail(10))

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        def split_df(self, df, target):
            cols = self.target['classification'][target]
            target_col = self.target['classification'][target]
            for val in cols:
                self.df1[val] = df[target_col] == val
            logger.warning(
                "Finished split into churned and retained dataframes")

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS
        def rf_clf(self):
            try:
                logger.warning("RANDOM FOREST LAUNCHED")

                error_lst = []
                df_temp = self.df
                df_temp = self.normalize(df_temp,
                                         timestamp_col='block_timestamp')
                # if all addresses used filter for only positive transactions

                for target in self.targets['classification']:
                    # filter out joined
                    df = df_temp.copy()
                    if target == 'churned':
                        df = df[df['status'] != 'joined']

                    #logger.warning("line 205: df columns in %s:",df.columns.tolist())
                    df = df.groupby(['address',
                                     'status']).agg(self.groupby_dict)
                    df = df.reset_index()
                    #logger.warning("line 222: df columns in %s:",df.tail(10))

                    df = df.compute()
                    '''
                    # only retain wanted values
                    col_values = list(self.df[self.targets['classification'][target]['target_col']].unique())
                    for val in col_values:
                        if val in self.targets['classification'][target]['cols']:
                            pass
                        else:
                            df[self.targets['classification'][target]['target_col']] = \
                            df[df[self.targets['classification'][target]['cols']] != val]
                    '''
                    X = df[self.feature_list]
                    y = df[self.targets['classification'][target]
                           ['target_col']]
                    #logger.warning('y=:%s',y.head(100))

                    X_train, X_test, y_train, y_test = train_test_split(
                        X, y, test_size=0.3)
                    self.feature_list = X_train.columns.tolist()

                    self.pl[target] = Pipeline([
                        ('imp',
                         SimpleImputer(missing_values=0, strategy='median')),
                        ('rf',
                         RandomForestClassifier(n_estimators=100,
                                                random_state=42,
                                                max_depth=4,
                                                class_weight='balanced'))
                    ])
                    self.pl[target].fit(X_train, y_train)

                    y_pred = self.pl[target].predict(X_test)
                    error_lst.append(
                        round(100 * metrics.accuracy_score(y_test, y_pred), 2))

                self.accuracy_df = pd.DataFrame({
                    'Outcome':
                    list(self.targets['classification'].keys()),
                    'Accuracy':
                    error_lst,
                })
                #logger.warning('accuracy_df:%s',self.accuracy_df.head())
                #self.make_tree(target=target)

                print('confusion matrix:\n')
                print(confusion_matrix(y_test, y_pred))
                print('classification report:\n')
                print(classification_report(y_test, y_pred))
                #logger.warning("clf model built:%s",self.pl)

            except Exception:
                logger.error("RF:", exc_info=True)

        def accuracy_table(self):
            try:
                columns = self.accuracy_df.columns.tolist()
                return self.accuracy_df.hvplot.table(
                    columns=['Outcome', 'Accuracy'],
                    width=250,
                    title='Prediction accuracy')

            except Exception:
                logger.error("RF:", exc_info=True)

        def prediction_information_div(self, width=350, height=450):
            txt = """
            <div {}>
            <h4 {}>Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            The table shows the predicted change.</br>
            </li>
            <li>
            For desirable outcomes:
            </br> ... a positive number is good!
            </br> ... the bigger the number the better.
            </br> ... a negative number is bad!
            </br> ... the bigger the negative number the worse it is.
            </li>
            <>
            For non-desirable outcomes:
            </br>... the inverse is true
            </li>
            <li>
            Use the datepicker(s) to select dates for the period desired
            </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def metrics_div_update(self, data):
            div_style = """ 
                   style='width:350px;margin-right:-600px;
                   border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
               """
            txt = """<div {}>
            <h4 {}>Prediction Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            {}% likely to churn
            </li>
            </ul>
            </div>""".format(div_style, self.header_style, data)
            self.metrics_div.text = txt

        def stats_information_div(self, width=400, height=300):
            div_style = """ 
                           style='width:350px;margin-left:-600px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """
            txt = """
            <div {}>
                   <h4 {}>Metadata Info </h4>
                   <ul>
                   <li >
                   <h4 style='margin-bottom:-2px;'>Table left:</h4>
                   - shows the outcome,</br>
                     and the accuracy in %</br>
                     <strong><i>100% is perfection!</i></strong>
                   </li>
                   <li>
                   <h4 style='margin-bottom:-2px;'>Table right:</h4>
                     - shows the desired outcome, the variables(things Aion controls)
                   </br> and their importance to the particular outcome
                   </br> ...which variable(s) have a greater impact on an outcome.
                   </br>- lower = better
                   </br>- generally only the best ranked 3 matter
                   </br>- business advice: manipulate the top ranked variables to attain desirable outcomes
                   </li>
                   </ul>
            </div>""".format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def load_prediction_df(self, start_date, end_date):
            if isinstance(start_date, date):
                start_date = datetime.combine(start_date, datetime.min.time())
            if isinstance(end_date, date):
                end_date = datetime.combine(end_date, datetime.min.time())
            cols = self.feature_list + ['address', 'block_timestamp']
            self.df_predict = self.cl.load_data(table=self.table,
                                                cols=cols,
                                                start_date=start_date,
                                                end_date=end_date)
            logger.warning('319:in load prediction: %s',
                           self.df_predict.head(5))

        def update_prediction_addresses_select(self):
            self.prediction_address_select.options = ['all']
            if len(self.df_predict) > 0:
                lst = ['all'] + list(
                    self.df_predict['address'].unique().compute())
                self.prediction_address_select.options = lst

        # the period for which the user wants a prediction
        def make_account_predictions(self, launch=-1):
            try:
                logger.warning("MAKE PREDICTIONS LAUNCHED")
                target = list(self.targets['classification'].keys())[0]
                # make
                df = self.df_predict
                #logger.warning("line 363%s",df.head(10))
                # make list of address for prediction select
                # filter if prediction for certain addresses
                #logger.warning('address selected:%s',self.prediction_address_select.value)
                if self.prediction_address_select.value is not None:
                    if len(self.prediction_address_select.value) > 0:
                        if self.prediction_address_select.value not in [
                                'all', ''
                        ]:
                            df = df[df.address ==
                                    self.prediction_address_select.value]

                #logger.warning('line 409 predict-df post filter:%s', df.head(20))
                # make table for display
                self.predict_df = pd.DataFrame({
                    'address': [],
                    'likely action': []
                })
                for target in list(self.targets['classification'].keys()):
                    if len(df) > 0:

                        df = self.normalize(df,
                                            timestamp_col='block_timestamp')
                        df = self.group_data(df,
                                             self.groupby_dict,
                                             timestamp_col='block_timestamp')
                        interest_labels = list(df['address'].unique())

                        # run model
                        df = df.fillna(0)
                        X = df[self.feature_list]
                        #logger.warning("df before prediction:%s",X.tail(10))
                        y_pred = self.pl[target].predict(X)
                        logger.warning('y_pred:%s', y_pred)
                        if target == 'churned':
                            y_pred_verbose = [
                                'remain' if x in ["active", 1] else "churn"
                                for x in y_pred
                            ]

                        #---- make table for display
                        self.predict_df = pd.DataFrame({
                            'address':
                            interest_labels,
                            'likely action':
                            y_pred_verbose
                        })

                        #------ label pools
                        self.predict_df['address'] = self.predict_df[
                            'address'].map(self.poolname_verbose_trun)
                        #logger.warning('self.predict_df:%s',self.predict_df)

                        churn_df = self.predict_df[
                            self.predict_df['likely action'] == 'churn']
                        perc_to_churn = round(
                            100 * len(churn_df) / len(self.predict_df), 1)
                        txt = target[:-2]
                        text = """<div {}>
                        <h3>Percentage likely to {}:</h3>
                        <strong 'style=color:black;'>{}%</strong></div>""".format(
                            self.header_style, txt, perc_to_churn)
                        self.metrics_div_update(data=perc_to_churn)
                    else:

                        text = """<div {}>
                            <br/> <h3>Sorry, address not found</h3>
                            </div>""".format(self.header_style)
                        self.metrics_div.text = text
                    logger.warning("end of %s predictions", target)
                return self.predict_df.hvplot.table(
                    columns=['address', 'likely action'],
                    width=500,
                    title='Account predictions')
            except Exception:
                logger.error("prediction:", exc_info=True)

        def make_tree(self, target='churned'):
            try:
                if not self.pl:
                    self.rf_clf()
                # Limit depth of tree to 3 levels
                # Extract the small tree
                tree_small = self.pl[target].named_steps['rf'].estimators_[5]
                # Save the tree as a png image
                export_graphviz(tree_small,
                                out_file='small_tree.dot',
                                feature_names=self.feature_list,
                                rounded=True,
                                precision=1)

                (graph, ) = pydot.graph_from_dot_file('small_tree.dot')
                # filepath = self.make_filepath('../../../static/images/small_tree.gif')
                # .write_png(filepath)
                filepath = self.make_filepath(
                    '/home/andre/Downloads/small_tree.png')
                graph.write_png(filepath)
                logger.warning("TREE SAVED")
            except Exception:
                logger.error("make tree:", exc_info=True)

        def make_feature_importances(self):
            try:
                if not self.pl:
                    self.rf_clf()

                results_dct = {
                    'outcome': [],
                    'feature': [],
                    'importance': [],
                    'rank_within_outcome': []
                }
                for target in self.targets['classification'].keys():
                    logger.warning('make feature importances for :%s', target)
                    # Get numerical feature importances
                    importances = list(
                        self.pl[target].named_steps['rf'].feature_importances_)

                    # List of tuples with variable and importance
                    feature_importances = [(feature, round(importance, 4))
                                           for feature, importance in zip(
                                               self.feature_list, importances)]

                    sorted_importances = sorted(feature_importances,
                                                key=itemgetter(1))

                    # logger.warning('importances :%s',importances)
                    # logger.warning("feature_importances:%s",feature_importances)
                    target_lst = [target] * len(importances)

                    count = 1
                    rank_lst = []
                    for i in importances:
                        rank_lst.append(count)
                        count += 1

                    results_dct['outcome'] += target_lst
                    results_dct['feature'] += [
                        i[0] for i in sorted_importances
                    ]
                    results_dct['importance'] += [
                        i[1] for i in sorted_importances
                    ]
                    results_dct['rank_within_outcome'] += sorted(rank_lst,
                                                                 reverse=True)

                df = pd.DataFrame.from_dict(results_dct)
                logger.warning('MAKE FEATURE IMPORTANCES FINISHED')
                return df.hvplot.table(
                    columns=[
                        'outcome', 'feature', 'importance',
                        'rank_within_outcome'
                    ],
                    width=600,
                    title="Variables ranked by importance (for each output)")

            except Exception:
                logger.error("Feature importances:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    def update_address_predictions(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_select_variable(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.inspected_variable = select_variable.value
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = hyp_variables + [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        last_date = last_date - timedelta(days=50)
        first_date = last_date - timedelta(days=5)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_variable = Select(title='Filter by variable',
                                 value=thistab.inspected_variable,
                                 options=thistab.feature_list)

        # search by address checkboxes
        thistab.prediction_address_select = Select(title='Filter by address',
                                                   value='all',
                                                   options=[])
        reset_prediction_address_button = Button(label="reset address(es)",
                                                 button_type="success")

        # ----------------------------------- LOAD DATA
        # load model-making data
        end = datepicker_start.value
        start = end - timedelta(days=60)
        thistab.load_df(start, end)
        thistab.rf_clf()
        # load data for period to be predicted
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()

        # tables
        hv_account_prediction_table = hv.DynamicMap(
            thistab.make_account_predictions, streams=[stream_launch])
        account_prediction_table = renderer.get_plot(
            hv_account_prediction_table)

        hv_features_table = hv.DynamicMap(thistab.make_feature_importances)
        features_table = renderer.get_plot(hv_features_table)

        hv_accuracy_table = hv.DynamicMap(thistab.accuracy_table)
        accuracy_table = renderer.get_plot(hv_accuracy_table)


        hv_variable_plot = hv.DynamicMap(thistab.box_plot,
                                 streams=[stream_select_variable])\
            .opts(plot=dict(width=800, height=500))

        variable_plot = renderer.get_plot(hv_variable_plot)

        # add callbacks
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        thistab.prediction_address_select.on_change(
            'value', update_address_predictions)
        reset_prediction_address_button.on_click(thistab.reset_checkboxes)
        select_variable.on_change('value', update_select_variable)

        # put the controls in a single element
        controls = WidgetBox(select_variable, datepicker_start, datepicker_end,
                             thistab.prediction_address_select,
                             reset_prediction_address_button)

        controls_prediction = WidgetBox(datepicker_start, datepicker_end,
                                        thistab.prediction_address_select,
                                        reset_prediction_address_button)

        grid = gridplot(
            [[thistab.notification_div['top']], [Spacer(width=20, height=70)],
             [thistab.section_headers['churn']], [Spacer(width=20, height=70)],
             [accuracy_table.state,
              thistab.stats_information_div()], [features_table.state],
             [thistab.section_headers['variable behavior']],
             [Spacer(width=20, height=30)], [variable_plot.state, controls],
             [thistab.section_headers['predictions']],
             [Spacer(width=20, height=30)],
             [
                 account_prediction_table.state, thistab.metrics_div,
                 controls_prediction
             ], [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title='predictions: accounts by value')
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        text = 'predictions: accounts by value'
        return tab_error_flag(text)

Beispiel #10

Datei anzeigen

def account_activity_tab(DAYS_TO_LOAD=90,panel_title=None):
    class Thistab(Mytab):
        def __init__(self, table,cols=[], dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols,panel_title=panel_title)
            self.table = table
            self.cols = cols
            self.period = menus['period'][0]

            self.update_type = menus['update_type'][0]
            self.status = menus['status'][0]
            self.account_type = menus['account_type'][0]


            self.trigger = 0

            self.df_warehouse = None

            # correlation
            self.variable = 'aion_fork'

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ style='width:350px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """

            self.header_style = """ style='color:blue;text-align:center;' """
            self.feature_list = hyp_variables.copy()
            self.groupby_dict = groupby_dict
            self.pym = PythonMongo('aion')

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                    position:relative;background:black;margin-bottom:200px">
                                                    <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                              </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'account activity': self.section_header_div(text='Account activity:{}'.format(self.section_divider),
                                               width=600, html_header='h2', margin_top=5, margin_bottom=-155),
                'relationships': self.section_header_div(text='Relationships:{}'.format(self.section_divider),
                                                 width=600, html_header='h2', margin_top=5, margin_bottom=-155),
            }

        # ----------------------  DIVS ----------------------------
        def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def clean_data(self, df):
            df = df.fillna(0)
            df[df == -inf] = 0
            df[df == inf] = 0
            return df

        def load_df(self,start_date, end_date,cols,timestamp_col):
            try:
                # make timestamp into index
                self.df_load(start_date, end_date,cols=cols,timestamp_col='block_timestamp')
                #logger.warning('df loaded:%s',self.df.head())
            except Exception:
                logger.warning('load df',exc_info=True)

        def prep_data(self):
            try:
                #self.df = dd.dataframe.from_pandas(self.df,npartitions=10)
                # make timestamp into index
                logger.warning('%s',self.df['block_timestamp'].head())
                self.df1 = self.df.set_index('block_timestamp')
            except Exception:
                logger.warning('load df',exc_info=True)

        def plot_account_activity(self,launch=-1):
            try:
                df = self.df1
                if self.update_type != 'all':
                    df = df[df['update_type'] == self.update_type]
                if self.account_type != 'all':
                    df = df[df['account_type'] == self.account_type]

                logger.warning('df columns:%s',df.columns)

                df = df[df.amount >= 0]
                #logger.warning('line 100 df:%s',df.head(30))
                df = df.resample(self.period).agg({'address':'count'})
                df = df.reset_index()
                df = df.compute()
                df = df.rename(index=str,columns={'address':'period_activity'})

                df['activity_delta(%)'] = df['period_activity'].pct_change(fill_method='ffill')
                df['activity_delta(%)'] = df['activity_delta(%)'].multiply(100)
                df = df.fillna(0)

                logger.warning('df in balance after resample:%s',df.tail(10))

                # make timestamp into index
                return df.hvplot.line(x='block_timestamp', y=['period_activity'],
                                      title='# of transactions')+\
                       df.hvplot.line(x='block_timestamp', y=['activity_delta(%)'],
                                      title='% change in # of transactions')
                # make timestamp into index
            except Exception:
                logger.warning('plot account activity',exc_info=True)

        def plot_account_status(self, launch=-1):
            try:
                state = self.status
                #logger.warning('df1 head:%s',self.df1.columns)
                df = self.df1
                if self.account_type != 'all':
                    df = self.df1[self.df1['account_type'] == self.account_type]

                df = df[df['status'] == state]
                df = df.resample(self.period).agg({'status': 'count'})

                df = df.reset_index()
                df = df.compute()
                df['perc_change'] = df['status'].pct_change(fill_method='ffill')
                df.perc_change = df.perc_change.multiply(100)
                df = df.fillna(0)
                # df = self.clean_data(df)

                # make timestamp into index
                value_label = '# '+state
                gc.collect()
                title1 = 'accounts {} by period'.format(state)
                title2 = 'percentage {} change by period'.format(state)
                return df.hvplot.line(x='block_timestamp', y=['status'], value_label=value_label,
                                      title=title1) + \
                       df.hvplot.line(x='block_timestamp', y=['perc_change'], value_label='%',
                                      title=title2)
            except Exception:
                logger.error('plot account status', exc_info=True)



        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                           style='width:350px; margin-left:-500px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def hist(self,launch):
            try:
                return self.corr_df.hvplot.hist(
                    y=self.variable, bins=50, alpha=0.3,width=350,xaxis=False)
            except Exception:
                logger.warning('histogram', exc_info=True)


        def correlation_table(self,launch):
            try:
                corr_dict = {
                    'Variable 1':[],
                    'Variable 2':[],
                    'Relationship':[],
                    'r':[],
                    'p-value':[]
                }

                df = self.corr_df
                logger.warning(' df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in df.columns.tolist():
                    logger.warning('col :%s', col)

                    if col != self.variable:
                        logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, std_err = linregress(a, b)
                        logger.warning('slope:%s,intercept:%s,rvalue:%s,pvalue:%s,std_err:%s',
                                     slope, intercept, rvalue, pvalue, std_err)
                        if pvalue < 0.05:
                            if abs(rvalue) <= self.weak_thresh:
                                txt = 'none'
                            else:
                                strength = 'weak'
                                if rvalue > 0:
                                    direction = 'positive'
                                if rvalue < 0:
                                    direction = 'negative'
                                if abs(rvalue) > self.mod_thresh:
                                    strength = 'moderate'
                                if abs(rvalue) > self.strong_thresh:
                                    strength = 'strong'

                                txt = "{} {}".format(strength,direction)
                        else:
                            txt = 'Not significant'
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue,4))
                        corr_dict['p-value'].append(round(pvalue,4))

                df = pd.DataFrame(
                    {
                        'Variable 1': corr_dict['Variable 1'],
                        'Variable 2': corr_dict['Variable 2'],
                        'Relationship': corr_dict['Relationship'],
                        'r':corr_dict['r'],
                        'p-value':corr_dict['p-value']

                     })
                logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=['Variable 1', 'Variable 2','Relationship','r','p-value'],
                                       width=700,height=400,title='Correlation between variables')
            except Exception:
                logger.warning('correlation table', exc_info=True)


        def matrix_plot(self,launch=-1):
            try:
                #logger.warning('line 306 self.feature list:%s',self.feature_list)

                if self.update_type != 'all':
                    df = self.df1[self.df1['update_type'] == self.update_type]
                else:
                    df = self.df1
                #df = df[self.feature_list]

                # get difference for money columns
                #logger.warning('line 282 df; %s', list(df.columns))

                df = df.resample(self.period).mean()
                #logger.warning('line 285 df; %s', self.groupby_dict)

                df = df.reset_index()
                #logger.warning('line 286 df; %s', df.head())

                df = df.drop('block_timestamp',axis=1)
                df = df.fillna(0)
                df = df.compute()

                df['aion_close'] = df['aion_close']
                df['aion_market_cap'] = df['aion_market_cap']
                df['bitcoin_close'] = df['bitcoin_close']
                df['ethereum_close'] = df['ethereum_close']
                df['bitcoin_market_cap'] = df['aion_market_cap']
                df['ethereum_market_cap'] = df['aion_market_cap']

                df = df.fillna(0)
                #logger.warning('line 302. df: %s',df.head(10))

                self.corr_df = df.copy()
                cols_lst = df.columns.tolist()
                cols_temp = cols_lst.copy()
                if self.variable in cols_temp:
                    cols_temp.remove(self.variable)
                variable_select.options = cols_lst
                logger.warning('line 305 cols temp:%s',cols_temp)
                logger.warning('line 306 self.variable:%s',self.variable)
                logger.warning('line 307 df columns:%s',df.columns)

                p = df.hvplot.scatter(x=self.variable,y=cols_temp,width=400,
                                      subplots=True,shared_axes=False,xaxis=False).cols(3)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)


    def update(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.load_df(datepicker_start.value,datepicker_end.value)
        thistab.prep_data()
        thistab.update_type = update_type_select.value
        thistab.status = status_select.value
        thistab.account_type = account_type_select.value
        thistab.variable = variable_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_matrix.event(launch=thistab.trigger)
        thistab.notification_updater("Ready.")

    def update_resample(attr,old,new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data()
        thistab.period = new
        thistab.update_type = update_type_select.value
        thistab.status = status_select.value
        thistab.account_type = account_type_select.value
        thistab.variable = variable_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_account_type(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data()
        thistab.account_type = new
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_update_type(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data()
        thistab.update_type = new
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data()
        thistab.variable = new
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_status(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data()
        thistab.status = new
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        
        cols = list(set(hyp_variables + ['address','update_type','account_type','balance',
                                         'status','block_timestamp','timestamp_of_first_event']))
        thistab = Thistab(table='account_ext_warehouse',cols=cols)
        # STATIC DATES
        # format dates
        first_date_range = "2018-04-25 00:00:00"
        first_date_range = datetime.strptime(first_date_range, thistab.DATEFORMAT)
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = datetime_to_date(last_date - timedelta(days=DAYS_TO_LOAD))
        '''
        thistab.df = thistab.pym.load_df(start_date=first_date, end_date=last_date,
                            cols=cols,table='account_ext_warehouse',timestamp_col='block_timestamp')
        '''
        thistab.load_df(start_date=first_date, end_date=last_date,cols=cols,
                                     timestamp_col='block_timestamp')
        thistab.prep_data()

        # MANAGE STREAM
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch',launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr',launch=-1)()


        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start", min_date=first_date_range,
                                      max_date=last_date_range, value=first_date)
        datepicker_end = DatePicker(title="End", min_date=first_date_range,
                                    max_date=last_date_range, value=last_date)

        period_select = Select(title='Select aggregation period',
                               value=thistab.period,
                               options=menus['period'])

        variable_select = Select(title='Select variable',
                                 value='aion_fork',
                                 options=sorted(hyp_variables))
        status_select = Select(title='Select account status',
                               value=thistab.status,
                               options=menus['status'])
        account_type_select = Select(title='Select account type',
                                     value=thistab.account_type,
                                     options=menus['account_type'])
        update_type_select = Select(title='Select transfer type',
                                    value=thistab.update_type,
                                    options=menus['update_type'])

        # --------------------- PLOTS----------------------------------
        width = 800
        hv_account_status = hv.DynamicMap(thistab.plot_account_status,
                                           streams=[stream_launch]).opts(plot=dict(width=width, height=400))
        hv_account_activity = hv.DynamicMap(thistab.plot_account_activity,
                                            streams=[stream_launch]).opts(plot=dict(width=width, height=400))
        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_hist_plot = hv.DynamicMap(thistab.hist,streams=[stream_launch_corr])

        account_status = renderer.get_plot(hv_account_status)
        account_activity = renderer.get_plot(hv_account_activity)
        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        hist_plot = renderer.get_plot(hv_hist_plot)

        # handle callbacks
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        period_select.on_change('value',update_resample)
        update_type_select.on_change('value',update_update_type)
        account_type_select.on_change('value',update_account_type)
        variable_select.on_change('value',update_variable)
        status_select.on_change('value',update_status)


        # COMPOSE LAYOUT
        # put the controls in a single element
        controls = WidgetBox(
            datepicker_start,
            datepicker_end,
            period_select,
            update_type_select,
            account_type_select,
            status_select,
            variable_select)


        # create the dashboards
        grid = gridplot([
            [thistab.notification_div['top']],
            [Spacer(width=20, height=50)],
            [thistab.section_headers['relationships']],
            [Spacer(width=20, height=30)],
            [matrix_plot.state,controls],
            [corr_table.state, thistab.corr_information_div()],
            [hist_plot.state],
            [thistab.section_headers['account activity']],
            [Spacer(width=20, height=30)],
            [account_status.state],
            [account_activity.state],
            [thistab.notification_div['bottom']]
            ])

        # Make a tab with the layout
        tab = Panel(child=grid, title=thistab.panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(thistab.panel_title)

Beispiel #11

Datei anzeigen

Datei: projects.py Projekt: andre-aion/analytics_demo

def eda_projects_tab(panel_title):
    lags_corr_src = ColumnDataSource(data=dict(variable_1=[],
                                               variable_2=[],
                                               relationship=[],
                                               lag=[],
                                               r=[],
                                               p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0
            self.groupby_dict = {
                'project_duration': 'sum',
                'project_start_delay': 'mean',
                'project_end_delay': 'mean',
                'project_owner_age': 'mean',
                'project_owner_gender': 'mean',
                'milestone_duration': 'sum',
                'milestone_start_delay': 'mean',
                'milestone_end_delay': 'mean',
                'milestone_owner_age': 'mean',
                'milestone_owner_gender': 'mean',
                'task_duration': 'sum',
                'task_start_delay': 'sum',
                'task_end_delay': 'mean',
                'task_owner_age': 'mean',
                'task_owner_gender': 'mean'
            }
            self.feature_list = list(self.groupby_dict.keys())
            self.lag_variable = 'task_duration'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                style='width:350px; margin-left:25px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = self.variables[0]

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.status = 'all'
            self.pm_gender = 'all'
            self.m_gender = 'all'
            self.t_gender = 'all'
            self.type = 'all'

            self.pym = PythonMongo('aion')
            self.menus = {
                'status': ['all', 'open', 'closed'],
                'type': [
                    'all', 'research', 'reconciliation', 'audit', 'innovation',
                    'construction', 'manufacturing', 'conference'
                ],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
            }
            self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'}
            self.timestamp_col = 'project_startdate_actual'
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                    <div style="text-align:center;width:{}px;height:{}px;
                           position:relative;background:black;margin-bottom:200px">
                           <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(text='Pre-transform distribution:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(text='Correlations:',
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def reset_adoption_dict(self, variable):
            self.significant_effect_dict[variable] = []

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px; margin-left:-600px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////
        def filter_df(self, df1):
            if self.status != 'all':
                df1 = df1[df1.status == self.status]
            if self.pm_gender != 'all':
                df1 = df1[df1.project_owner_gender == self.pm_gender]
            if self.m_gender != 'all':
                df1 = df1[df1.milestone_owner_gender == self.m_gender]
            if self.t_gender != 'all':
                df1 = df1[df1.task_owner_gender == self.t_gender]

            if self.type != 'all':
                df1 = df1[df1.type == self.type]
            return df1

        def prep_data(self, df1):
            try:
                '''
                df1[self.timestamp_col] = df1[self.timestamp_col].apply(lambda x: datetime(x.year,
                                                                                   x.month,
                                                                                   x.day,
                                                                                   x.hour,0,0))
                '''
                df1 = df1.set_index(self.timestamp_col)
                logger.warning('LINE 195 df:%s', df1.head())
                # handle lag for all variables

                df = df1.copy()
                df = self.filter_df(df)

                logger.warning('LINE 199: length before:%s', len(df))
                slice = df[['project']]
                df = df[list(self.groupby_dict.keys())]
                logger.warning('LINE 218: columns:%s', df.head())
                df = df.astype(float)
                df = pd.concat([df, slice], axis=1)
                df = df.groupby('project').resample(self.resample_period).agg(
                    self.groupby_dict)
                logger.warning('LINE 201: length after:%s', len(df))

                df = df.reset_index()
                vars = self.feature_list.copy()
                if int(self.lag) > 0:
                    for var in vars:
                        if self.variable != var:
                            df[var] = df[var].shift(int(self.lag))
                df = df.dropna()
                self.df1 = df
                logger.warning('line 184- prep data: df:%s', self.df.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def lags_plot(self, launch):
            try:
                df = self.df.copy()
                df = df[[self.lag_variable, self.variable]]
                cols = [self.lag_variable]
                lags = self.lag_days.split(',')
                for day in lags:
                    try:
                        label = self.lag_variable + '_' + day
                        df[label] = df[self.lag_variable].shift(int(day))
                        cols.append(label)
                    except:
                        logger.warning('%s is not an integer', day)
                df = df.dropna()
                self.lags_corr(df)
                # plot the comparison
                logger.warning('in lags plot: df:%s', df.head(10))
                return df.hvplot(x=self.variable,
                                 y=cols,
                                 kind='scatter',
                                 alpha=0.4)
            except Exception:
                logger.error('lags plot', exc_info=True)

        # calculate the correlation produced by the lags vector
        def lags_corr(self, df):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'lag': [],
                    'r': [],
                    'p_value': []
                }
                a = df[self.variable].tolist()
                for col in df.columns:
                    if col not in [self.timestamp_col, self.variable]:
                        # find lag
                        var = col.split('_')
                        try:
                            tmp = int(var[-1])

                            lag = tmp
                        except Exception:
                            lag = 'None'

                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        corr_dict_data['variable_1'].append(self.variable)
                        corr_dict_data['variable_2'].append(col)
                        corr_dict_data['relationship'].append(txt)
                        corr_dict_data['lag'].append(lag)
                        corr_dict_data['r'].append(round(rvalue, 4))
                        corr_dict_data['p_value'].append(round(pvalue, 4))

                lags_corr_src.stream(corr_dict_data,
                                     rollover=(len(corr_dict_data['lag'])))
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="lag", title="lag(days)"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=lags_corr_src,
                                       columns=columns,
                                       width=500,
                                       height=280)
                return data_table
            except Exception:
                logger.error('lags corr', exc_info=True)

        def correlation_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'r': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop(self.timestamp_col, axis=1)
                # df = df.compute()

                a = df[self.variable].tolist()

                for col in self.feature_list:
                    logger.warning('col :%s', col)
                    if col != self.variable:
                        logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        # add to dict
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'r': corr_dict['r'],
                    'p-value': corr_dict['p-value']
                })
                # logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=[
                    'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value'
                ],
                                       width=550,
                                       height=200,
                                       title='Correlation between variables')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def non_parametric_relationship_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop(self.timestamp_col, axis=1)
                # df = df.compute()

                # logger.warning('line df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in self.feature_list:
                    logger.warning('col :%s', col)
                    if col != self.variable:
                        logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        stat, pvalue, txt = self.mann_whitneyu_label(a, b)
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['stat'].append(round(stat, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'stat': corr_dict['stat'],
                    'p-value': corr_dict['p-value']
                })
                # logger.warning('df:%s',df.head(23))
                return df.hvplot.table(
                    columns=[
                        'Variable 1', 'Variable 2', 'Relationship', 'stat',
                        'p-value'
                    ],
                    width=550,
                    height=200,
                    title='Non parametric relationship between variables')
            except Exception:
                logger.error('non parametric table', exc_info=True)

        def hist(self, launch):
            try:

                return self.df.hvplot.hist(y=self.feature_list,
                                           subplots=True,
                                           shared_axes=False,
                                           bins=25,
                                           alpha=0.3,
                                           width=300).cols(4)
            except Exception:
                logger.warning('histogram', exc_info=True)

        def matrix_plot(self, launch=-1):
            try:
                logger.warning('line 306 self.feature list:%s',
                               self.feature_list)

                df = self.df1

                if df is not None:
                    # thistab.prep_data(thistab.df)
                    if self.timestamp_col in df.columns:
                        df = df.drop(self.timestamp_col, axis=1)

                    df = df.fillna(0)
                    # logger.warning('line 302. df: %s',df.head(10))

                    cols_temp = self.feature_list.copy()
                    if self.variable in cols_temp:
                        cols_temp.remove(self.variable)
                    # variable_select.options = cols_lst

                    p = df.hvplot.scatter(x=self.variable,
                                          y=cols_temp,
                                          width=330,
                                          subplots=True,
                                          shared_axes=False,
                                          xaxis=False).cols(4)
                else:
                    p = df.hvplot.scatter(x=[0, 0, 0], y=[0, 0, 0], width=330)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)

        def multiline(self, launch=1):
            try:
                yvar = self.multiline_vars['y']
                xvar = self.multiline_vars['x']
                df = self.df.copy()
                df = df[[xvar, yvar, self.timestamp_col]]
                df = df.set_index(self.timestamp_col)
                df = df.groupby(xvar).resample(self.resample_period).agg(
                    {yvar: 'mean'})
                df = df.reset_index()
                lines = df[xvar].unique()
                # split data frames
                dfs = {}
                for idx, line in enumerate(lines):
                    dfs[line] = df[df[xvar] == line]
                    dfs[line] = dfs[line].fillna(0)
                    logger.warning('LINE 428:%s - %s:', line, dfs[line].head())
                    if idx == 0:
                        p = dfs[line].hvplot.line(x=self.timestamp_col,
                                                  y=yvar,
                                                  width=1200,
                                                  height=500).relabel(line)
                    else:
                        p *= dfs[line].hvplot.line(x=self.timestamp_col,
                                                   y=yvar,
                                                   width=2,
                                                   height=500).relabel(line)
                return p
            except Exception:
                logger.error('multiline plot', exc_info=True)

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data(thistab.df)
        if 'milestone owner gender' == new:
            thistab.variable = 'm_gender_code'
        if 'project owner gender' == new:
            thistab.variable = 'pm_gender_code'
        if 'task owner gender' == new:
            thistab.variable = 't_gender_code'

        if thistab.variable in thistab.adoption_variables['developer']:
            thistab.reset_adoption_dict(thistab.variable)
        thistab.section_head_updater('lag', thistab.variable)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag_plot_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_variable = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_IVs(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.pm_gender = pm_gender_select.value
        thistab.m_gender = m_gender_select.value
        thistab.t_gender = t_gender_select.value
        thistab.status = status_select.value
        thistab.type = type_select.value
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag(attr, old, new):  # update lag & cryptocurrency
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df = thistab.pym.load_df(start_date=datepicker_start.value,
                                         end_date=datepicker_end.value,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)
        thistab.df['project_owner_gender'] = thistab.df[
            'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.df['milestone_owner_gender'] = thistab.df[
            'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.df['task_owner_gender'] = thistab.df[
            'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['x'] = multiline_x_select.value
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'project_composite1'
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2013-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=30)
        # initial function call
        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)
        if len(thistab.df) > 0:
            thistab.df['manager_gender'] = thistab.df['project_owner_gender']
            thistab.df['project_owner_gender'] = thistab.df[
                'project_owner_gender'].apply(lambda x: 1
                                              if x == 'male' else 2)
            thistab.df['milestone_owner_gender'] = thistab.df[
                'milestone_owner_gender'].apply(lambda x: 1
                                                if x == 'male' else 2)
            thistab.df['task_owner_gender'] = thistab.df[
                'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
            logger.warning('LINE 527:columns %s', list(thistab.df.columns))

            thistab.prep_data(thistab.df)

        # MANAGE STREAM
        stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()
        stream_launch = streams.Stream.define('Launch', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=thistab.variables)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        type_select = Select(title='Select project type',
                             value=thistab.type,
                             options=thistab.menus['type'])

        status_select = Select(title='Select project status',
                               value=thistab.status,
                               options=thistab.menus['status'])

        pm_gender_select = Select(title="Select project owner's gender",
                                  value=thistab.pm_gender,
                                  options=thistab.menus['gender'])

        m_gender_select = Select(title="Select milestone owner's gender",
                                 value=thistab.m_gender,
                                 options=thistab.menus['gender'])

        t_gender_select = Select(title="Select task owner's gender",
                                 value=thistab.t_gender,
                                 options=thistab.menus['gender'])

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        multiline_y_select = Select(title='Select comparative DV(y)',
                                    value=thistab.multiline_vars['y'],
                                    options=[
                                        'remuneration', 'delay_start',
                                        'delay_end', 'project_duration'
                                    ])

        multiline_x_select = Select(
            title='Select comparative IV(x)',
            value=thistab.multiline_vars['x'],
            options=['manager_gender', 'type', 'status'])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="lag", title="lag(days)"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        lags_corr_table = DataTable(source=lags_corr_src,
                                    columns=columns,
                                    width=500,
                                    height=200)

        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_nonpara_table = hv.DynamicMap(
            thistab.non_parametric_relationship_table,
            streams=[stream_launch_corr])
        # hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist])
        hv_lags_plot = hv.DynamicMap(thistab.lags_plot,
                                     streams=[stream_launch_lags_var])
        hv_multiline = hv.DynamicMap(thistab.multiline,
                                     streams=[stream_launch])

        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        nonpara_table = renderer.get_plot(hv_nonpara_table)
        lags_plot = renderer.get_plot(hv_lags_plot)
        multiline = renderer.get_plot(hv_multiline)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        lag_variable_select.on_change('value', update_lag_plot_variable)
        lag_select.on_change('value', update_lag)  # individual lag
        resample_select.on_change('value', update_resample)
        pm_gender_select.on_change('value', update_IVs)
        m_gender_select.on_change('value', update_IVs)
        t_gender_select.on_change('value', update_IVs)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        lags_input_button.on_click(update_lags_selected)  # lags array

        status_select.on_change('value', update_IVs)
        type_select.on_change('value', update_IVs)

        multiline_x_select.on_change('value', update_multiline)
        multiline_y_select.on_change('value', update_multiline)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_lag = WidgetBox(lags_input, lags_input_button,
                                 lag_variable_select)

        controls_multiline = WidgetBox(multiline_x_select, multiline_y_select)

        controls_page = WidgetBox(datepicker_start, datepicker_end,
                                  variable_select, type_select, status_select,
                                  resample_select, pm_gender_select,
                                  m_gender_select, t_gender_select)
        controls_gender = WidgetBox(pm_gender_select, m_gender_select,
                                    t_gender_select)

        # create the dashboards

        grid = gridplot(
            [[thistab.notification_div['top']], [Spacer(width=20, height=70)],
             [thistab.section_headers['relationships']],
             [Spacer(width=20, height=30)], [matrix_plot.state, controls_page],
             [thistab.section_headers['correlations']],
             [Spacer(width=20, height=30)],
             [corr_table.state,
              thistab.corr_information_div()],
             [thistab.title_div('Compare levels in a variable', 400)],
             [Spacer(width=20, height=30)],
             [multiline.state, controls_multiline],
             [thistab.section_headers['lag']], [Spacer(width=20, height=30)],
             [lags_plot.state, controls_lag], [lags_corr_table],
             [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #12

Datei anzeigen

Datei: inventory.py Projekt: andre-aion/various_foods_on_a_blanket

def kpi_inventory_tab(panel_title, credentials):
    class Thistab(KPI):
        def __init__(self, table, credentials, cols=[]):
            KPI.__init__(self,
                         table,
                         name='inventory',
                         cols=cols,
                         credentials=credentials)
            self.table = table
            self.df = None

            # setup selects
            self.menus = {'gender': ['all', 'Male', 'Female']}
            self.select = {}
            self.select_values = {}
            for key in self.menus.keys():
                title = 'Select {}'.format(key)
                self.select[key] = Select(title=title,
                                          value='all',
                                          options=self.menus[key])
                self.select_values[key] = 'all'
            self.timestamp_col = 'timestamp_delivered'
            self.variable = 'delivery_amount'
            self.groupby_dict = {'delivery_amount': 'sum'}
            self.multiline_resample_period = 'D'
            self.pop = {
                'history_periods': 3,
                'aggregate': 'mean',
                'start': datetime(2015, 1, 5, 0, 0, 0),
                'end': self.pop_start_date + timedelta(days=8)
            }
            self.cols = cols

            self.load_data_start_date = datetime(2014, 1, 1, 0, 0, 0)
            self.load_data_end_date = datetime.now()

            self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0)

            # cards

            self.KPI_card_div = self.initialize_cards(self.page_width,
                                                      height=350)

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                          position:relative;background:black;margin-bottom:200px">
                          <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'cards':
                self.section_header_div(text='Period to date({})):{}'.format(
                    self.variable, self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'pop':
                self.section_header_div(text='Period over period:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'dow':
                self.section_header_div(
                    text='Compare days of the week:'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def section_header_div_updater(self, which_header, update_text):
            text = """<div style="margin-top:150 px;margin-bottom:--150px;">
                      <h2 style="color:#4221cc;">{}</h2></div>""" \
                .format(update_text)
            self.section_headers[which_header].text = text

        # ----------------------  DIVS ----------------------------

        def reset_checkboxes(self, value='all', checkboxgroup=''):
            try:
                self.checkboxgroup[checkboxgroup].value = value
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        def information_div(self, width=400, height=300):
            div_style = """ 
                          style='width:350px;margin-right:-800px;
                          border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                      """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                </li>
                <li>
                </li>
                <li>
                </li>
                <li>
                </li>
                 <li>
                </li>
                 <li>
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ------------------- LOAD AND SETUP DATA -----------------------
        def filter_df(self, df):
            try:
                for item in self.select_values.keys():
                    if self.select_values[item] != 'all':
                        df = df[df[item] == self.select_values[item]]
                return df

            except Exception:
                logger.error('filters', exc_info=True)

        def set_select_menus(self, df):
            try:
                for item in self.select.keys():
                    if item in df.columns and len(df) > 0:
                        lst = list(set(df[item].values))
                        lst.append('all')
                        sorted(lst)

                        self.select[item].options = lst

            except Exception:
                logger.error('set filters menus', exc_info=True)

        # -------------------- CARDS -----------------------------------------
        def initialize_cards(self, width, height=250):
            try:
                txt = ''
                for period in ['year', 'quarter', 'month', 'week']:
                    design = random.choice(list(KPI_card_css.keys()))
                    txt += self.card(title='', data='', card_design=design)

                text = """<div style="margin-top:100px;display:flex; flex-direction:row;">
                       {}
                       </div>""".format(txt)
                div = Div(text=text, width=width, height=height)
                return div
            except Exception:
                logger.error('initialize cards', exc_info=True)

        # -------------------- GRAPHS -------------------------------------------

        def graph_periods_to_date(self, df1, timestamp_filter_col, variable):
            try:
                dct = {}
                for idx, period in enumerate(
                    ['week', 'month', 'quarter', 'year']):
                    if df1 is not None:
                        df = self.period_to_date(
                            df1,
                            timestamp=dashboard_config['dates']['last_date'],
                            timestamp_filter_col=timestamp_filter_col,
                            period=period)

                        # get unique instances
                        # df = df[[variable]]
                        df = df.drop_duplicates(keep='first')
                        # logger.warning('post duplicates dropped:%s', df.head(10))
                        data = 0
                        if self.groupby_dict[variable] == 'sum':
                            data = int(df[variable].sum())
                        elif self.groupby_dict[variable] == 'mean':
                            data = "{}%".format(round(df[variable].mean(), 3))
                        else:
                            data = int(df[variable].count())
                        del df
                        gc.collect()
                        dct[period] = data
                    else:
                        dct[period] = 0

                self.update_cards(dct)

            except Exception:
                logger.error('graph periods to date', exc_info=True)

        def period_over_period(self, df, start_date, end_date, period,
                               history_periods, timestamp_col):
            def label_qtr_pop(y):
                try:
                    curr_quarter = int((y.month - 1) / 3 + 1)
                    start = datetime(y.year, 3 * curr_quarter - 2, 1)
                    if isinstance(y, date):
                        start = start.date()
                    return abs((start - y).days)
                except Exception:
                    logger.error('df label quarter', exc_info=True)

            try:
                # make columns for each history  period
                if len(df) == 0:
                    dfi = pd.date_range(self.pop['start'],
                                        self.pop['end'],
                                        freq='D')
                    dfi.rename(self.timestamp_col)
                    df = pd.DataFrame(random, index=dfi)
                    df[self.variable] = 0
                    print('LINE 239:%s', df.head())
                df = df.rename(columns={
                    self.variable: '0_periods_prev',
                    self.timestamp_col: 'date'
                })
                df.set_index('date', inplace=True)

                for count in range(1, history_periods + 1):
                    label = f"{count}_periods_prev"
                    print('LINE 252,label', label)
                    print('LINE 253', df.head())
                    try:
                        if period == 'month':
                            df[label] = df.shift(periods=30)
                        elif period == 'year':
                            df[label] = df.shift(periods=365)
                        elif period == 'week':
                            df[label] = df.shift(periods=7)
                        elif period == 'quarter':
                            df[label] = df.shift(periods=90)
                        df = df.fillna(0)
                    except Exception:
                        df[label] = 0
                    print('LINE 265, COUNT:', count)

                return df
            except Exception:
                logger.error('graph period over period', exc_info=True)

        def graph_period_over_period(self, period):
            try:
                periods = [period]
                start_date = self.pop['start']
                end_date = self.pop['end']
                if isinstance(start_date, date):
                    start_date = datetime.combine(start_date,
                                                  datetime.min.time())
                if isinstance(end_date, date):
                    end_date = datetime.combine(end_date, datetime.min.time())
                cols = [self.variable, self.timestamp_col]
                df = self.load_df(start_date,
                                  end_date,
                                  cols=cols,
                                  timestamp_col=self.timestamp_col)

                for idx, period in enumerate(periods):
                    df_period = self.period_over_period(
                        df,
                        start_date=start_date,
                        end_date=end_date,
                        period=period,
                        history_periods=self.pop['history_periods'],
                        timestamp_col=self.timestamp_col)

                    title = "{} over {}".format(period, period)
                    plotcols = list(df.columns)
                    plotcols = plotcols.remove(self.variable)

                    if idx == 0:
                        p = df_period.hvplot.bar('date',
                                                 plotcols,
                                                 rot=45,
                                                 title=title,
                                                 stacked=False,
                                                 width=1200,
                                                 height=500)
                    else:
                        p += df_period.hvplot.bar('date',
                                                  plotcols,
                                                  rot=45,
                                                  title=title,
                                                  stacked=False,
                                                  width=1200,
                                                  height=500)
                return p

            except Exception:
                logger.error('period over period to date', exc_info=True)

        def pop_week(self, launch=-1):
            try:
                return self.graph_period_over_period('week')
            except Exception:
                logger.error('pop week', exc_info=True)

        def pop_month(self, launch=-1):
            try:
                return self.graph_period_over_period('month')
            except Exception:
                logger.error('pop month', exc_info=True)

        def pop_quarter(self, launch=-1):
            try:
                return self.graph_period_over_period('quarter')
            except Exception:
                logger.error('pop quarter', exc_info=True)

        def pop_year(self, launch=-1):
            try:
                return self.graph_period_over_period('year')
            except Exception:
                logger.error('pop year', exc_info=True)

        def multiline_dow(self, launch=1):
            try:
                df = self.df.copy()
                dct = {'Y': 'year', 'M': 'month', 'W': 'week', 'Q': 'Qtr'}
                resample_period = dct[self.multiline_resample_period]
                yvar = self.multiline_vars['y']
                xvar = 'day_of_week'
                df[resample_period] = df[self.timestamp_col].dt.to_period(
                    self.multiline_resample_period)
                df[resample_period] = df[resample_period].astype('str')
                df[xvar] = df[self.timestamp_col].dt.day_name()
                df = df.groupby([xvar, resample_period]).agg({yvar: 'mean'})
                df = df.reset_index()
                # logger.warning('LINE 402 df:%s',df.head(20))
                p = df.hvplot.line(resample_period,
                                   yvar,
                                   by='day_of_week',
                                   width=1200,
                                   height=500)
                p.opts(xrotation=45)
                return p
            except Exception:
                logger.error('multiline plot', exc_info=True)

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        for item in thistab.select_values.keys():
            thistab.select_values[item] = thistab.select[item].value
        thistab.graph_periods_to_date(thistab.df, thistab.timestamp_col,
                                      thistab.variable)
        thistab.section_header_updater('cards')
        thistab.section_header_updater('pop')
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_variable(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.variable = variable_select.value
        thistab.graph_periods_to_date(thistab.df, thistab.timestamp_col,
                                      thistab.variable)
        thistab.section_header_div_updater('cards', thistab.variable)
        # thistab.section_header_updater('cards',label='')
        # thistab.section_header_updater('pop',label='')
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_period_over_period():
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.pop['history_periods'] = history_periods_select.value
        thistab.pop_start_date = datepicker_pop_start.value  # trigger period over period
        thistab.pop_end_date = datepicker_pop_end.value  # trigger period
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_history_periods(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.pop['history_periods'] = pop_number_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        table = 'inventory_warehouse'
        cols = ['delivery_amount', 'timestamp_delivered', 'gender']
        thistab = Thistab(table, credentials=credentials, cols=cols)
        # -------------------------------------  SETUP   ----------------------------
        # format dates
        first_date_range = thistab.initial_date
        last_date_range = datetime.now().date()

        last_date = dashboard_config['dates']['last_date']
        first_date = datetime(2014, 1, 1, 0, 0, 0)

        cols = [thistab.variable, thistab.timestamp_col]
        thistab.df = thistab.load_df(first_date, last_date, cols,
                                     thistab.timestamp_col)
        thistab.set_select_menus(thistab.df)
        thistab.graph_periods_to_date(
            thistab.df,
            timestamp_filter_col=thistab.timestamp_col,
            variable=thistab.variable)

        # MANAGE STREAM
        # date comes out stream in milliseconds
        # --------------------------------CREATE WIDGETS ---------------------------------

        daynum = datetime.now().day
        if daynum > 3:
            thistab.pop_end_date = datetime.now().date() - timedelta(
                days=daynum)
            thistab.pop_start_date = thistab.pop_end_date - timedelta(days=7)
        else:
            thistab.pop_start_date = thistab.first_date_in_period(
                thistab.pop_end_date, 'week')

        logger.warning('LINE 500: POP Start: END %s:%s',
                       thistab.pop_start_date, thistab.pop_end_date)

        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_multiline = streams.Stream.define('Launch', launch=-1)()

        history_periods_select = Select(
            title='Select # of comparative periods',
            value=str(thistab.pop['history_periods']),
            options=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'])

        datepicker_pop_start = DatePicker(title="Period start",
                                          min_date=first_date_range,
                                          max_date=last_date_range,
                                          value=thistab.load_data_start_date)

        datepicker_pop_end = DatePicker(title="Period end",
                                        min_date=first_date_range,
                                        max_date=last_date_range,
                                        value=thistab.load_data_end_date)

        pop_number_select = Select(
            title='Select # of comparative periods',
            value=str(5),
            options=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'])
        pop_button = Button(label="Select dates/periods, then click me!",
                            width=15,
                            button_type="success")

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=[thistab.variable] +
                                 list(thistab.select_values.keys()))

        # ---------------------------------  GRAPHS ---------------------------
        hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch])
        pop_week = renderer.get_plot(hv_pop_week)

        hv_pop_month = hv.DynamicMap(thistab.pop_month,
                                     streams=[stream_launch])
        pop_month = renderer.get_plot(hv_pop_month)

        hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter,
                                       streams=[stream_launch])
        pop_quarter = renderer.get_plot(hv_pop_quarter)

        # -------------------------------- CALLBACKS ------------------------
        # datepicker_start.on_change('value', update)
        # datepicker_end.on_change('value', update)
        for key in thistab.select_values.keys():
            thistab.select[key].on_change('value', update)

        variable_select.on_change('value', update_variable)
        pop_button.on_click(update_period_over_period)

        # -----------------------------------LAYOUT ----------------------------
        # put the controls in a single element
        controls = WidgetBox(variable_select, thistab.select['gender'])

        controls_pop = WidgetBox(datepicker_pop_start, datepicker_pop_end,
                                 history_periods_select, pop_button)

        # create the dashboards
        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['cards']],
                         [Spacer(width=20, height=2)],
                         [thistab.KPI_card_div, controls],
                         [thistab.section_headers['pop']],
                         [Spacer(width=20, height=25)],
                         [pop_week.state, controls_pop], [pop_month.state],
                         [pop_quarter.state],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #13

Datei anzeigen

Datei: rentals.py Projekt: andre-aion/analytics_demo

def kpi_bcc_rentals_visitor_tab(panel_title):
    class Thistab(KPI):
        def __init__(self, table, cols=[]):
            KPI.__init__(self, table,name='rentals',cols=cols)
            self.table = table
            self.df = None
            self.pym = PythonMongo('aion')

            self.checkboxgroup = {
                'category': [],
                'item' : [],
                'area':[],
                'visit_duration':[]
            }

            self.multiline_vars = {
                'y' : 'visit_duration'
            }

            self.groupby_dict = {
                'item':'count',
                'area':'count',
                'category':'count',
                'status':'count',
                'gender':'count',
                'visit_duration':'sum'
            }
            # setup selects

            self.select_values = {}
            self.select_menus = {}
            for item in ['area', 'item', 'category', 'status', 'gender']:
                self.select_values[item] = 'all'
                self.select_menus[item] = ['all']

            self.select = {}
            for item in ['area', 'item', 'category', 'status', 'gender']:
                self.select[item] = Select(title='Select ' + item, value='all',
                                           options=self.select_menus[item])
            self.timestamp_col = 'visit_start'

            self.variable = 'item'
            
            self.multiline_resample_period = 'M'

            self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0)

            # cards

            self.KPI_card_div = self.initialize_cards(self.page_width, height=350)

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                          position:relative;background:black;margin-bottom:200px">
                          <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'cards': self.section_header_div(text='Period to date:{}'.format(self.section_divider),
                                                 width=600, html_header='h2', margin_top=5,
                                                 margin_bottom=-155),
                'pop': self.section_header_div(text='Period over period:{}'.format(self.section_divider),
                                               width=600, html_header='h2', margin_top=5, margin_bottom=-155),
                'dow': self.section_header_div(text='Compare days of the week:'.format(self.section_divider),
                                               width=600, html_header='h2', margin_top=5, margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)


        # ----------------------  DIVS ----------------------------

        def reset_checkboxes(self, value='all',checkboxgroup=''):
            try:
                self.checkboxgroup[checkboxgroup].value = value
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        def information_div(self, width=400, height=300):
            div_style = """ 
                          style='width:350px;margin-right:-800px;
                          border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                      """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                </li>
                <li>
                </li>
                <li>
                </li>
                <li>
e7                </li>
                 <li>
                </li>
                 <li>
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ------------------- LOAD AND SETUP DATA -----------------------
        def filter_df(self, df):
            try:
                for item in self.select_values.keys():
                    if self.select_values[item] != 'all':
                        df = df[df[item] == self.select_values[item]]
                return df

            except Exception:
                logger.error('filters', exc_info=True)

        def set_select_menus(self,df):
            try:
                for item in self.select.keys():
                    if item in df.columns and len(df) > 0:
                        logger.warning('LINE 151: item: %s', item)
                        lst = list(set(df[item].values))
                        lst.append('all')
                        sorted(lst)
                        logger.warning('LINE 157: LIST: %s',lst)

                        self.select[item].options = lst

            except Exception:
                logger.error('set filters menus', exc_info=True)


        def load_df_pym(self, req_startdate, req_enddate, cols, timestamp_col):
            try:
                # get min and max of loaded df
                if self.df is not None:
                    loaded_min = self.df[timestamp_col].min()
                    loaded_max = self.df[timestamp_col].max()

                    if loaded_min <= req_startdate and loaded_max >= req_enddate:
                        df = self.df[(self.df[timestamp_col] >= req_startdate) &
                                     (self.df[timestamp_col] <= req_enddate)]
                        df = self.filter_df(df)
                    else:
                        df = self.pym.load_df(req_startdate, req_enddate, table=self.table,
                                         cols=cols, timestamp_col=timestamp_col)

                else:
                    df = self.pym.load_df(req_startdate, req_enddate, table=self.table,
                                          cols=cols, timestamp_col=timestamp_col)

                df = self.filter_df(df)
                #logger.warning('LINE 185: item: %s', df.head())
                self.set_select_menus(df)

                return df

            except Exception:
                logger.error('load_df', exc_info=True)

        # -------------------- CARDS -----------------------------------------
        def initialize_cards(self, width, height=250):
            try:
                txt = ''
                for period in ['year', 'quarter', 'month', 'week']:
                    design = random.choice(list(KPI_card_css.keys()))
                    txt += self.card(title='', data='', card_design=design)

                text = """<div style="margin-top:100px;display:flex; flex-direction:row;">
                       {}
                       </div>""".format(txt)
                div = Div(text=text, width=width, height=height)
                return div
            except Exception:
                logger.error('initialize cards', exc_info=True)

        # -------------------- GRAPHS -------------------------------------------


        def graph_periods_to_date(self, df1, timestamp_filter_col, variable):
            try:
                dct = {}
                for idx, period in enumerate(['week', 'month', 'quarter', 'year']):
                    df = self.period_to_date(df1, timestamp=dashboard_config['dates']['last_date'],
                                             timestamp_filter_col=timestamp_filter_col, period=period)

                    # get unique instances
                    df = df[[variable]]
                    df = df.drop_duplicates(keep='first')
                    #logger.warning('post duplicates dropped:%s', df.head(10))
                    data  = 0
                    if self.groupby_dict[variable] == 'sum':
                        data = int(df[variable].sum())
                    elif self.groupby_dict[variable] == 'mean':
                        data = "{}%".format(round(df[variable].mean(),3))
                    else:
                        data = int(df[variable].count())
                    del df
                    gc.collect()
                    dct[period] = data

                self.update_cards(dct)


            except Exception:
                logger.error('graph periods to date', exc_info=True)

        def period_over_period(self, df, start_date, end_date, period,
                               history_periods=2, timestamp_col='timestamp_of_first_event'):
            try:
                # filter cols if necessary
                string = '0 {}(s) prev(current)'.format(period)

                # filter out the dates greater than today
                df_current = df.copy()
                df_current['period'] = string
                # label the days being compared with the same label
                df_current = self.label_dates_pop(df_current, period, timestamp_col)
                #logger.warning('LINE 244:%s', df_current.head(15))
                # zero out time information
                start = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0)
                end = datetime(end_date.year, end_date.month, end_date.day, 0, 0, 0)

                cols = list(df.columns)
                counter = 1
                if isinstance(history_periods, str):
                    history_periods = int(history_periods)
                # make dataframes for request no. of periods
                start, end = self.shift_period_range(period, start, end)
                while counter < history_periods and start >= self.initial_date:
                    # load data
                    if period == 'quarter':
                        logger.warning('start:end %s:%s', start, end)

                    df_temp = self.load_df_pym(start, end, cols, timestamp_col)
                    df_temp[timestamp_col] = pd.to_datetime(df_temp[timestamp_col])
                    if df_temp is not None:
                        if len(df_temp) > 1:
                            string = '{} {}(s) prev'.format(counter, period)
                            # label period
                            df_temp[period] = string
                            # relabel days to get matching day of week,doy, dom, for different periods
                            df_temp = self.label_dates_pop(df_temp, period, timestamp_col)
                            # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp))

                            df_current = pd.concat([df_current, df_temp])
                            del df_temp
                            gc.collect()
                    # shift the loading window
                    counter += 1
                    start, end = self.shift_period_range(period, start, end)
                return df_current
            except Exception:
                logger.error('period over period', exc_info=True)

        def graph_period_over_period(self,period):
            try:
                periods = [period]
                start_date = self.pop_start_date
                end_date = self.pop_end_date
                if isinstance(start_date,date):
                    start_date = datetime.combine(start_date,datetime.min.time())
                if isinstance(end_date,date):
                    end_date = datetime.combine(end_date,datetime.min.time())
                #cols = [self.variable, self.timestamp_col, 'day']
                cols = [self.variable, self.timestamp_col]

                df = self.load_df_pym(start_date,end_date,cols=cols,
                                  timestamp_col=self.timestamp_col)


                for idx,period in enumerate(periods):
                    df_period = self.period_over_period(df, start_date=start_date, end_date=end_date,
                                                        period=period, history_periods=self.pop_history_periods,
                                                        timestamp_col=self.timestamp_col)
                    logger.warning('LINE 274 start:end=%s:%s,%s,%s len(Df),df.head',start_date,end_date,len(df),df.head())
                    groupby_cols = ['dayset','period']
                    if len(df_period) > 0:
                        df_period = df_period.groupby(groupby_cols).agg({self.variable:'count'})
                        df_period = df_period.reset_index()
                    else:
                        df_period = df_period.rename(index=str,columns={'day':'dayset'})

                    prestack_cols = list(df_period.columns)
                    logger.warning('Line 179:%s', df_period.head(10))
                    df_period = self.split_period_into_columns(df_period,col_to_split='period',
                                                               value_to_copy=self.variable)
                    logger.warning('line 180 df_period columns:%s',df_period.head(50))
                    poststack_cols = list(df_period.columns)
                    title = "{} over {}".format(period,period)


                    plotcols = list(np.setdiff1d(poststack_cols, prestack_cols))
                    df_period,plotcols = self.pop_include_zeros(df_period=df_period,plotcols=plotcols,period=period)

                    if 'dayset' not in df_period.columns:
                        leng = len(df_period)
                        if leng > 0:
                            df_period['dayset'] = 0
                        else:
                            df_period['dayset'] = ''

                    if idx == 0:
                        p = df_period.hvplot.bar('dayset',plotcols,rot=45,title=title,
                                                 stacked=False)
                    else:
                        p += df_period.hvplot.bar('dayset',plotcols,rot=45,title=title,
                                                  stacked=False)
                return p

            except Exception:
                logger.error('period over period to date', exc_info=True)

        def pop_week(self, launch=-1):
            try:
                return self.graph_period_over_period('week')
            except Exception:
                logger.error('pop week', exc_info=True)

        def pop_month(self, launch=-1):
            try:
                return self.graph_period_over_period('month')
            except Exception:
                logger.error('pop month', exc_info=True)

        def pop_quarter(self, launch=-1):
            try:
                return self.graph_period_over_period('quarter')
            except Exception:
                logger.error('pop quarter', exc_info=True)

        def pop_year(self, launch=-1):
            try:
                return self.graph_period_over_period('year')
            except Exception:
                logger.error('pop year', exc_info=True)

        def multiline_dow(self, launch=1):
            try:
                df = self.df.copy()
                dct = {
                    'Y':'year',
                    'M':'month',
                    'W':'week',
                    'Q':'Qtr'
                }
                resample_period =  dct[self.multiline_resample_period]
                yvar = self.multiline_vars['y']
                xvar = 'day_of_week'
                df[resample_period] = df[self.timestamp_col].dt.to_period(self.multiline_resample_period)
                df[xvar] = df[self.timestamp_col].dt.day_name()
                df = df.groupby([xvar,resample_period]).agg({yvar: 'mean'})
                df = df.reset_index()

                p = df.hvplot.line(x=self.timestamp_col, y=yvar,groupby=resample_period, width=1200, height=500)
                return p
            except Exception:
                logger.error('multiline plot', exc_info=True)

    def update(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        for item in ['area','category','gender','item']:
            thistab.select_values[item] = thistab.select[item].value
            
        thistab.graph_periods_to_date(thistab.df,thistab.timestamp_col)
        thistab.section_header_updater('cards')
        thistab.section_header_updater('pop')
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_variable(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.variable = variable_select.value
        thistab.graph_periods_to_date(thistab.df,'block_timestamp',thistab.variable)
        thistab.section_header_updater('cards',label='')
        thistab.section_header_updater('pop',label='')
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_period_over_period(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.pop_history_periods = history_periods_select.value
        thistab.pop_start_date=datepicker_period_start.value  # trigger period over period
        thistab.pop_end_date=datepicker_period_end.value  # trigger period
        thistab.trigger +=1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")


    def update_history_periods(attrname, old, new):
        thistab.notification_updater("Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.multiline_resample_period = multiline_resample_period_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        table = 'bcc_composite'
        cols = cols_to_load['guest'] + cols_to_load['rental']
        thistab = Thistab(table, cols)
        # -------------------------------------  SETUP   ----------------------------
        # format dates
        first_date_range = thistab.initial_date
        last_date_range = datetime.now().date()

        last_date = dashboard_config['dates']['last_date']
        first_date = datetime(2014,1,1,0,0,0)

        thistab.df = thistab.load_df_pym(first_date, last_date,cols,thistab.timestamp_col)
        thistab.graph_periods_to_date(thistab.df,timestamp_filter_col=thistab.timestamp_col,
                                      variable=thistab.variable)
        thistab.section_header_updater('cards')
        thistab.section_header_updater('pop')

        '''
        df_temp = thistab.df[(thistab.df['visit_start'].dt.year == 2019) & (thistab.df['visit_start'].dt.month == 8)]

        logger.warning('LINE 416: df_temp:%s,%s',len(df_temp),df_temp.head(30))
        '''

        # MANAGE STREAM
        # date comes out stream in milliseconds
        # --------------------------------CREATE WIDGETS ---------------------------------
        datepicker_start = DatePicker(title="Start", min_date=first_date_range,
                                      max_date=last_date_range, value=first_date)
        datepicker_end = DatePicker(title="End", min_date=first_date_range,
                                    max_date=last_date_range, value=last_date)

        thistab.pop_end_date = datetime.now().date()
        daynum = thistab.pop_end_date.day
        if daynum < 3:
            thistab.pop_end_date = datetime.now().date() - timedelta(days=daynum)
            thistab.pop_start_date = thistab.pop_end_date - timedelta(days=7)
        else:
            thistab.pop_start_date = thistab.first_date_in_period(thistab.pop_end_date, 'week')

        stream_launch = streams.Stream.define('Launch',launch=-1)()

        datepicker_period_start = DatePicker(title="Period start", min_date=first_date_range,
                                             max_date=last_date_range, value=thistab.pop_start_date)
        datepicker_period_end = DatePicker(title="Period end", min_date=first_date_range,
                                           max_date=last_date_range, value=thistab.pop_end_date)

        history_periods_select = Select(title='Select # of comparative periods',
                                        value='2',
                                        options=thistab.menus['history_periods'])

        datepicker_pop_start = DatePicker(title="Period start", min_date=first_date_range,
                                        max_date=last_date_range, value=thistab.pop_start_date)

        datepicker_pop_end = DatePicker(title="Period end", min_date=first_date_range,
                                        max_date=last_date_range, value=thistab.pop_end_date)

        pop_number_select = Select(title='Select # of comparative periods',
                                   value=str(5),
                                   options=thistab.menus['history_periods'])
        pop_button = Button(label="Select dates/periods, then click me!", width=15, button_type="success")

        variable_select = Select(title='Select variable', value=thistab.variable,
                                 options=thistab.menus['bcc']['rental'])

        multiline_y_select = Select(title='Select comparative DV(y)',
                                    value=thistab.multiline_vars['y'],
                                    options=['price', 'amount', 'visit_duration'])

        multiline_resample_period_select = Select(title='Select comparative DV(y)',
                                    value=thistab.multiline_resample_period,
                                    options=['W','M','Q','Y'])

        # ---------------------------------  GRAPHS ---------------------------
        hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch])
        pop_week = renderer.get_plot(hv_pop_week)

        hv_pop_month = hv.DynamicMap(thistab.pop_month, streams=[stream_launch])
        pop_month = renderer.get_plot(hv_pop_month)

        hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter, streams=[stream_launch])
        pop_quarter = renderer.get_plot(hv_pop_quarter)

        hv_multiline_dow = hv.DynamicMap(thistab.multiline_dow, streams=[stream_launch])
        multiline_dow = renderer.get_plot(hv_multiline_dow)

        # -------------------------------- CALLBACKS ------------------------
        #datepicker_start.on_change('value', update)
        #datepicker_end.on_change('value', update)
        for item in ['area','category','gender','item']:
            thistab.select[item].on_change('value',update)
        
        history_periods_select.on_change('value',update_period_over_period)
        datepicker_period_start.on_change('value',update_period_over_period)
        datepicker_period_end.on_change('value',update_period_over_period)
        pop_number_select.on_change('value',update_history_periods)
        variable_select.on_change('value', update_variable)
        multiline_y_select.on_change('value', update_multiline)
        multiline_resample_period_select.on_change('value', update_multiline)

        # -----------------------------------LAYOUT ----------------------------
        # put the controls in a single element
        controls = WidgetBox(datepicker_start,datepicker_end, 
                             thistab.select['area'],
                             thistab.select['category'],
                             thistab.select['item'],
                             thistab.select['gender'],
                             thistab.select['status'],
                             )
        controls_pop = WidgetBox(datepicker_pop_start,
                                 datepicker_pop_end,
                                 history_periods_select,
                                 pop_button)

        controls_multiline = WidgetBox(multiline_y_select,
                                       multiline_resample_period_select)

        # create the dashboards
        grid = gridplot([
            [thistab.notification_div['top']],
            [Spacer(width=20, height=70)],
            [thistab.section_headers['cards']],
            [Spacer(width=20, height=2)],
            [thistab.KPI_card_div,controls],
            [thistab.section_headers['pop']],
            [Spacer(width=20, height=25)],
            [pop_week.state,controls_pop],
            [pop_month.state],
            [pop_quarter.state],
            [thistab.section_headers['dow']],
            [Spacer(width=20, height=30)],
            [multiline_dow.state, controls_multiline],
            [thistab.notification_div['bottom']]
        ])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #14

Datei anzeigen

Datei: cryptocurrency.py Projekt: andre-aion/analytics_demo

def cryptocurrency_clustering_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            # add all the coins to the dict
            self.github_cols = ['watch', 'fork', 'issue', 'release', 'push']
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0

            self.groupby_dict = groupby_dict
            self.feature_list = list(self.groupby_dict.keys())
            self.kmean_model = {}

            self.div_style = """ style='width:350px; margin-left:25px;
                            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.k = '1'
            self.max_clusters_menu = [str(k) for k in range(1, 12)]

            self.launch_cluster_table = False  # launch cluster
            self.cryptos = None
            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                        position:relative;background:black;margin-bottom:200px">
                        <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                  </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'Crypto families':
                self.section_header_div(text='Crypto families:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

        # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=150):
            div_style = """ 
               style='width:350px;
               border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
           """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                A cluster is statistical grouping of items based on a composite similarity of the variables under review. 
                
                </li>
                <li>
                I have highlighted the peers in our cluster (aion_cluster), and simply labeled the other clusters with numbers.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ////////////////// HELPER FUNCTIONS ////////////////////
        def set_groupby_dict(self):
            try:
                lst = ['mention', 'hashtags', 'tweets', 'replies', 'favorites']
                for col in self.cols:
                    if col not in self.groupby_dict.keys():
                        if not string_contains_list(lst, col):
                            self.groupby_dict[col] = 'mean'
                        else:
                            self.groupby_dict[col] = 'sum'

            except Exception:
                logger.error('set groupby dict', exc_info=True)

        # /////////////////////////////////////////////////////////////
        def optimalK(self, data, nrefs=3, maxClusters=10):
            try:
                """
                Calculates KMeans optimal K using Gap Statistic from Tibshirani, Walther, Hastie
                Params:
                    data: ndarry of shape (n_samples, n_features)
                    nrefs: number of sample reference datasets to create
                    maxClusters: Maximum number of clusters to test for
                Returns: (gaps, optimalK)
                """
                gaps = np.zeros((len(range(1, maxClusters)), ))
                resultsdf = pd.DataFrame({'clusterCount': [], 'gap': []})
                for gap_index, k in enumerate(
                        range(1, len(self.max_clusters_menu))):
                    logger.warning('starting for k=%s', k)
                    # Holder for reference dispersion results
                    refDisps = np.zeros(nrefs)

                    # For n references, generate random sa,kmple and perform kmeans getting resulting dispersion of each loop
                    for i in range(nrefs):
                        logger.warning('nref=%s', i)

                        # Create new random reference set
                        randomReference = np.random.random_sample(
                            size=data.shape)

                        # Fit to it
                        km = KMeans(k)
                        km.fit(randomReference)

                        refDisp = km.inertia_
                        refDisps[i] = refDisp

                    # Fit cluster to original data and create dispersion

                    self.kmean_model[k] = KMeans(k, random_state=42)
                    self.kmean_model[k].fit(data)

                    origDisp = km.inertia_

                    # Calculate gap statistic
                    gap = np.log(np.mean(refDisps)) - np.log(origDisp)

                    # Assign this loop's gap statistic to gaps
                    gaps[gap_index] = gap

                    resultsdf = resultsdf.append(
                        {
                            'clusterCount': k,
                            'gap': gap
                        }, ignore_index=True)

                return (
                    gaps.argmax() + 1, resultsdf
                )  # Plus 1 because index of 0 means 1 cluster is optimal, index 2 = 3 clusters are optimal

            except Exception:
                logger.error('optimal', exc_info=True)

        def cluster_table(self, launch):
            try:
                # prep
                df = self.df.groupby(['crypto']).agg(groupby_dict)
                df = df.compute()
                logger.warning('df after groupby:%s', df)

                self.cryptos = df.index.tolist()
                logger.warning('self.cryptos:%s', self.cryptos)
                print(self.cryptos)

                X = df[self.feature_list]
                scaler = StandardScaler()
                X = scaler.fit_transform(X)
                self.k, gapdf = self.optimalK(X,
                                              nrefs=3,
                                              maxClusters=len(
                                                  self.max_clusters_menu))
                logger.warning('Optimal k is:%s ', self.k)
                # Labels of each point
                labels = self.kmean_model[self.k].labels_

                # Nice Pythonic way to get the indices of the points for each corresponding cluster
                mydict = {
                    'cluster_' + str(i): np.where(labels == i)[0].tolist()
                    for i in range(self.kmean_model[self.k].n_clusters)
                }
                mydict_verbose = mydict.copy(
                )  # make a dictionary with the clusters and name of the cryptos

                # Transform this dictionary into dct with matching crypto labels
                dct = {
                    'crypto': self.cryptos,
                    'cluster': [''] * len(self.cryptos)
                }
                # get index aion to identify the aion cluster
                aion_idx = self.cryptos.index('aion')

                for key, values in mydict.items():
                    if aion_idx in values:
                        key = 'aion_cluster'
                    mydict_verbose[key] = []
                    for crypto_index in values:
                        try:
                            dct['cluster'][int(crypto_index)] = key
                            mydict_verbose[key].append(
                                self.cryptos[int(crypto_index)])

                        except:
                            logger.warning('cannot change to int:%s',
                                           crypto_index)  # save to redis
                self.write_clusters(mydict_verbose)
                logger.warning('line 229: cluster labels:%s', mydict_verbose)

                df = pd.DataFrame.from_dict(dct)
                self.launch_cluster_table = False
                cols = ['crypto', 'cluster']
                return df.hvplot.table(columns=cols,
                                       width=500,
                                       height=1200,
                                       title='Cluster table')
            except Exception:
                logger.error('cluster table', exc_info=True)

        def write_clusters(self, my_dict):
            try:
                # write to redis
                cluster_dct = my_dict.copy()
                cluster_dct['timestamp'] = datetime.now().strftime(
                    self.DATEFORMAT)
                cluster_dct['features'] = self.feature_list
                save_params = 'clusters:cryptocurrencies'
                self.redis.save(cluster_dct,
                                save_params,
                                "",
                                "",
                                type='checkpoint')
                logger.warning('%s saved to redis', save_params)
            except:
                logger.error('', exc_info=True)

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.trigger += 1
        stream_launch_elbow_plot.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'external_daily'
        #cols = list(groupby_dict.keys()) + ['crypto']
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=340)
        # initial function call
        thistab.df_load(first_date, last_date, timestamp_col='timestamp')
        thistab.cols = sorted(list(thistab.df.columns))

        # MANAGE STREAMS
        stream_launch_elbow_plot = streams.Stream.define('Launch_elbow_plot',
                                                         launch=-1)()
        stream_launch_cluster_table = streams.Stream.define(
            'Launch_cluster_table', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)

        # PLOTS
        hv_cluster_table = hv.DynamicMap(thistab.cluster_table,
                                         streams=[stream_launch_cluster_table])
        cluster_table = renderer.get_plot(hv_cluster_table)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end)

        # create the dashboards

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.information_div(), controls],
                         [thistab.section_headers['Crypto families']],
                         [Spacer(width=20, height=30)], [cluster_table.state],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('crypto:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #15

Datei anzeigen

Datei: blockminer.py Projekt: andre-aion/analytics_demo

def blockminer_tab(page_width):

    # source for top N table
    topN_src = ColumnDataSource(
        data=dict(percentage=[], address=[], block_count=[]))

    class This_tab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.df2 = None
            self.key_tab = 'blockminer'
            self.n = 20
            # ------- DIVS setup begin
            self.page_width = page_width
            txt = """<hr/>
                  <div style="text-align:center;width:{}px;height:{}px;
                         position:relative;background:black;margin-bottom:200px">
                         <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                  </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {}

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def load_this_data(self, start_date, end_date):
            end_date = datetime.combine(end_date, datetime.min.time())
            start_date = datetime.combine(start_date, datetime.min.time())

            logger.warning('load_data start date:%s', start_date)
            logger.warning('load_data end date:%s', end_date)

            # load only mined blocks and remove the double entry
            supplemental_where = "AND update_type = 'mined_block' AND amount >= 0"

            self.df_load(start_date,
                         end_date,
                         supplemental_where=supplemental_where,
                         cols=['address', 'amount', 'block_time'])
            logger.warning('after load:%s', self.df.head(30))

            return self.prep_dataset(start_date, end_date)

        def prep_dataset(self, start_date, end_date):
            try:
                logger.warning("prep dataset start date:%s", start_date)

                self.df1 = self.df1[['address', 'block_time']]
                self.df1['address'] = self.df1['address']\
                    .map(self.poolname_verbose_trun)
                self.df1 = self.df1.groupby(['address'
                                             ]).agg({'block_time': 'count'})
                self.df1 = self.df1.reset_index()
                self.df1 = self.df1.rename(
                    columns={'block_time': 'block_count'})
                self.df1['percentage'] = 100*self.df1['block_count']\
                                         /self.df1['block_count'].sum()
                self.df1['percentage'] = self.df1['percentage'].map(
                    lambda x: round(x, 1))
                self.df1 = self.df1.reset_index()
                logger.warning("topN column:%s", self.df1.columns.tolist())
                #logger.warning('END prep dataset DF1:%s', self.df1.head())

                return self.df1.hvplot.bar(
                    'address',
                    'block_count',
                    rot=90,
                    height=600,
                    width=self.page_width,
                    title='# of blocks mined by miner address',
                    hover_cols=['percentage'])

            except Exception:
                logger.error('prep dataset:', exc_info=True)

        def view_topN(self):
            logger.warning("top n called:%s", self.n)
            # change n from string to int
            try:
                #table_n = df1.hvplot.table(columns=['address','percentage'],
                #title=title, width=400)
                logger.warning('top N:%s', self.n)
                df2 = self.df1.nlargest(self.n, 'percentage')
                df2 = df2.compute()
                logger.warning('in view top n :%s', df2.head(10))

                new_data = dict(percentage=df2.percentage.tolist(),
                                address=df2.address.tolist(),
                                block_count=df2.block_count.tolist())
                topN_src.stream(new_data, rollover=self.n)
                columns = [
                    TableColumn(field="address", title="Address"),
                    TableColumn(field="percentage", title="percentage"),
                    TableColumn(field="block_count", title="# of blocks")
                ]

                table_n = DataTable(source=topN_src,
                                    columns=columns,
                                    width=300,
                                    height=600)

                gc.collect()
                return table_n
            except Exception:
                logger.error('view_topN:', exc_info=True)

        def set_n(self, n):
            if isinstance(n, int):
                pass
            else:
                try:
                    self.n = int(n)
                except Exception:
                    logger.error('set_n', exc_info=True)

        # ####################################################
        #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:green;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=20)

        def notification_updater_2(self, text):
            self.notification_div.text = '<h3  style="color:red">{}</h3>'.format(
                text)

        def spacing_div(self, width=20, height=100):
            return Div(text='', width=width, height=height)

        def spacing_paragraph(self, width=20, height=100):
            return Paragraph(text='', width=width, height=height)

    def update(attrname, old, new):
        this_tab.notification_updater(
            "Calculations underway. Please be patient")
        stream_start_date.event(start_date=datepicker_start.value)
        stream_end_date.event(end_date=datepicker_end.value)
        this_tab.set_n(topN_select.value)
        this_tab.view_topN()
        this_tab.notification_updater("ready")

    # update based on selected top n
    def update_topN():
        this_tab.notification_updater("Calculations in progress! Please wait.")
        logger.warning('topN selected value:%s', topN_select.value)
        this_tab.set_n(topN_select.value)
        this_tab.view_topN()
        this_tab.notification_updater("ready")

    try:
        # create class and get date range
        cols = ['address', 'block_timestamp', 'block_time']
        this_tab = This_tab('account_ext_warehouse', cols, [])

        #STATIC DATES
        first_date_range = "2018-04-23 00:00:00"
        first_date_range = datetime.strptime(first_date_range,
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = last_date_range
        first_date = datetime_to_date(last_date - timedelta(days=60))

        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_start_date = streams.Stream.define('Start_date',
                                                  start_date=first_date)()
        stream_end_date = streams.Stream.define('End_date',
                                                end_date=last_date)()

        # create a text widget for top N
        topN_select = Select(title='Top N',
                             value=str(this_tab.n),
                             options=menu)

        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        # ALL MINERS.
        # --------------------- ALL  MINERS ----------------------------------
        hv_bar_plot = hv.DynamicMap(
            this_tab.load_this_data,
            streams=[stream_start_date, stream_end_date],
            datashade=True)

        renderer = hv.renderer('bokeh')
        bar_plot = renderer.get_plot(hv_bar_plot)

        # --------------------- TOP N MINERS -----------------------------------
        # set up data source for the ton N miners table
        this_tab.view_topN()
        columns = [
            TableColumn(field="address", title="Address"),
            TableColumn(field="percentage", title="percentage"),
            TableColumn(field="block_count", title="# of blocks")
        ]
        topN_table = DataTable(source=topN_src,
                               columns=columns,
                               width=400,
                               height=600)

        # add callbacks
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        topN_select.on_change("value", lambda attr, old, new: update_topN())

        download_button = Button(label='Save Table to CSV',
                                 button_type="success")
        download_button.callback = CustomJS(
            args=dict(source=topN_src),
            code=open(
                join(dirname(__file__),
                     "../../../static/js/topN_download.js")).read())

        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end, download_button,
                             topN_select)

        # create the dashboards
        grid = gridplot([[this_tab.notification_div['top']],
                         [Spacer(width=20, height=70)], [
                             topN_table,
                             controls,
                         ], [bar_plot.state]])

        # Make a tab with the layout
        tab = Panel(child=grid, title='miners: blocks')

        return tab

    except Exception:
        logger.error("Blockminer", exc_info=True)

        return tab_error_flag('miners: blocks')

Beispiel #16

Datei anzeigen

Datei: cryptocurrency.py Projekt: andre-aion/analytics_demo

def cryptocurrency_eda_tab(cryptos, panel_title):
    lags_corr_src = ColumnDataSource(data=dict(variable_1=[],
                                               variable_2=[],
                                               relationship=[],
                                               lag=[],
                                               r=[],
                                               p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = ['watch', 'fork', 'issue', 'release', 'push']
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0

            self.groupby_dict = groupby_dict
            self.feature_list = list(self.groupby_dict.keys())
            self.variable = 'fork'
            self.crypto = 'all'
            self.lag_variable = 'push'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                            style='width:350px; margin-left:-600px;
                            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """

            self.header_style = """ style='color:blue;text-align:center;' """
            # track variable for AI for significant effects
            self.adoption_variables = {
                'user': [],
                'developer': ['watch', 'fork']
            }

            self.significant_effect_dict = {}
            self.reset_adoption_dict(self.variable)
            self.relationships_to_check = ['weak', 'moderate', 'strong']
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                           <div style="text-align:center;width:{}px;height:{}px;
                                  position:relative;background:black;margin-bottom:200px">
                                  <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                           </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            #self.lag_section_head_txt = 'Lag relationships between {} and...'.format(self.variable)
            self.lag_section_head_txt = 'Lag relationships:'
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=self.lag_section_head_txt,
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(
                    text='Pre transform distribution:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
                'non_linear':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:{}px;">
                           <h4 style="color:#fff;">
                           {}</h4></div>""".format(self.page_width, text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def reset_adoption_dict(self, variable):
            self.significant_effect_dict[variable] = []

        def section_header_updater(self,
                                   text,
                                   section,
                                   html_header='h3',
                                   margin_top=150,
                                   margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            self.section_headers[section].text = text

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////
        def prep_data(self, df1):
            try:
                self.cols = list(df1.columns)

                df1['timestamp'] = df1['timestamp'].astype('M8[us]')
                df = df1.set_index('timestamp')
                #logger.warning('LINE 195 df:%s',df.head())
                # handle lag for all variables
                if self.crypto != 'all':
                    df = df[df.crypto == self.crypto]
                df = df.compute()
                #logger.warning('LINE 199: length before:%s',len(df))
                df = df.groupby('crypto').resample(self.resample_period).agg(
                    self.groupby_dict)
                #logger.warning('LINE 201: length after:%s',len(df))

                df = df.reset_index()
                vars = self.feature_list.copy()
                if int(self.lag) > 0:
                    for var in vars:
                        if self.variable != var:
                            df[var] = df[var].shift(int(self.lag))
                df = df.dropna()
                self.df1 = df
                #logger.warning('line 184- prep data: df:%s',self.df.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def set_groupby_dict(self):
            try:
                pass

            except Exception:
                logger.error('set groupby dict', exc_info=True)

        #   ///////////////// PLOTS /////////////////////

        def lags_plot(self, launch):
            try:
                df = self.df.copy()
                df = df[[self.lag_variable, self.variable]]
                df = df.compute()
                cols = [self.lag_variable]
                lags = self.lag_days.split(',')
                for day in lags:
                    try:
                        label = self.lag_variable + '_' + day
                        df[label] = df[self.lag_variable].shift(int(day))
                        cols.append(label)
                    except:
                        logger.warning('%s is not an integer', day)
                df = df.dropna()
                self.lags_corr(df)
                # plot the comparison
                #logger.warning('in lags plot: df:%s',df.head(10))
                return df.hvplot(x=self.variable,
                                 y=cols,
                                 kind='scatter',
                                 alpha=0.4)
            except Exception:
                logger.error('lags plot', exc_info=True)

        # calculate the correlation produced by the lags vector
        def lags_corr(self, df):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'lag': [],
                    'r': [],
                    'p_value': []
                }
                a = df[self.variable].tolist()
                for col in df.columns:
                    if col not in ['timestamp', self.variable]:
                        # find lag
                        var = col.split('_')
                        try:
                            tmp = int(var[-1])

                            lag = tmp
                        except Exception:
                            lag = 'None'

                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        corr_dict_data['variable_1'].append(self.variable)
                        corr_dict_data['variable_2'].append(col)
                        corr_dict_data['relationship'].append(txt)
                        corr_dict_data['lag'].append(lag)
                        corr_dict_data['r'].append(round(rvalue, 4))
                        corr_dict_data['p_value'].append(round(pvalue, 4))

                lags_corr_src.stream(corr_dict_data,
                                     rollover=(len(corr_dict_data['lag'])))
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="lag", title="lag(days)"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=lags_corr_src,
                                       columns=columns,
                                       width=900,
                                       height=400)
                return data_table
            except Exception:
                logger.error('lags corr', exc_info=True)

        def correlation_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'r': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                a = df[self.variable].tolist()

                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        # add to dict
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                        # update significant effect variables
                        if self.variable in self.adoption_variables[
                                'developer']:
                            if any(relationship in txt for relationship in
                                   self.relationships_to_check):
                                if self.variable not in self.significant_effect_dict.keys(
                                ):
                                    self.significant_effect_dict[
                                        self.variable] = []
                                self.significant_effect_dict[
                                    self.variable].append(col)

                if self.variable in self.adoption_variables['developer']:
                    tmp = self.significant_effect_dict[self.variable].copy()
                    tmp = list(set(tmp))
                    tmp_dct = {
                        'features': tmp,
                        'timestamp': datetime.now().strftime(self.DATEFORMAT)
                    }
                    # write to redis
                    save_params = 'adoption_features:developer' + '-' + self.variable
                    self.redis.save(tmp_dct,
                                    save_params,
                                    "",
                                    "",
                                    type='checkpoint')

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'r': corr_dict['r'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=[
                    'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value'
                ],
                                       width=550,
                                       height=400,
                                       title='Correlation between variables')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def non_parametric_relationship_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                #logger.warning('line df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        stat, pvalue, txt = self.mann_whitneyu_label(a, b)
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['stat'].append(round(stat, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'stat': corr_dict['stat'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(
                    columns=[
                        'Variable 1', 'Variable 2', 'Relationship', 'stat',
                        'p-value'
                    ],
                    width=550,
                    height=400,
                    title='Non parametricrelationship between variables')
            except Exception:
                logger.error('non parametric table', exc_info=True)

        def hist(self, launch):
            try:

                return self.df.hvplot.hist(y=self.feature_list,
                                           subplots=True,
                                           shared_axes=False,
                                           bins=25,
                                           alpha=0.3,
                                           width=300).cols(4)
            except Exception:
                logger.warning('histogram', exc_info=True)

        def matrix_plot(self, launch=-1):
            try:
                logger.warning('line 306 self.feature list:%s',
                               self.feature_list)
                df = self.df1
                #df = df[self.feature_list]

                # get difference for money columns

                #thistab.prep_data(thistab.df)
                if 'timestamp' in df.columns:
                    df = df.drop('timestamp', axis=1)
                #df = df.repartition(npartitions=1)
                #df = df.compute()

                df = df.fillna(0)
                #logger.warning('line 302. df: %s',df.head(10))

                cols_temp = self.feature_list.copy()
                if self.variable in cols_temp:
                    cols_temp.remove(self.variable)
                #variable_select.options = cols_lst

                p = df.hvplot.scatter(x=self.variable,
                                      y=cols_temp,
                                      width=330,
                                      subplots=True,
                                      shared_axes=False,
                                      xaxis=False).cols(4)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)

        '''
        def regression(self,df):
            try:

            except Exception:
                logger.error('matrix plot', exc_info=True)
        '''

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data(thistab.df)
        thistab.variable = new
        if thistab.variable in thistab.adoption_variables['developer']:
            thistab.reset_adoption_dict(thistab.variable)
        thistab.lag_section_head_txt = 'Lag relationships between {} and...'.format(
            thistab.variable)
        #thistab.section_header_updater('lag',thistab.lag_section_head_txt)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag_plot_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_variable = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_crypto(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.crypto = crypto_select.value
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag(attr, old, new):  # update lag & cryptocurrency
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'external_daily'
        cols = list(groupby_dict.keys()) + ['timestamp', 'crypto']
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=200)
        # initial function call
        thistab.df_load(first_date, last_date, timestamp_col='timestamp')
        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        # date comes out stream in milliseconds
        #stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value='fork',
                                 options=thistab.feature_list)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        crypto_select = Select(title='Select cryptocurrency',
                               value='all',
                               options=['all'] + thistab.items)

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="lag", title="lag(days)"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        lags_corr_table = DataTable(source=lags_corr_src,
                                    columns=columns,
                                    width=500,
                                    height=280)

        width = 800

        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_nonpara_table = hv.DynamicMap(
            thistab.non_parametric_relationship_table,
            streams=[stream_launch_corr])
        #hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist])
        hv_lags_plot = hv.DynamicMap(thistab.lags_plot,
                                     streams=[stream_launch_lags_var])

        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        nonpara_table = renderer.get_plot(hv_nonpara_table)
        lags_plot = renderer.get_plot(hv_lags_plot)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        lag_variable_select.on_change('value', update_lag_plot_variable)
        lag_select.on_change('value', update_lag)  # individual lag
        resample_select.on_change('value', update_resample)
        crypto_select.on_change('value', update_crypto)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        lags_input_button.on_click(update_lags_selected)  # lags array

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end, variable_select,
                             lag_select, crypto_select, resample_select)

        controls_lag = WidgetBox(lag_variable_select, lags_input,
                                 lags_input_button)

        # create the dashboards
        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [matrix_plot.state, controls],
                         [thistab.section_headers['relationships']],
                         [Spacer(width=20, height=30)],
                         [thistab.section_headers['correlations']],
                         [Spacer(width=20, height=30)],
                         [corr_table.state,
                          thistab.corr_information_div()],
                         [thistab.section_headers['non_linear']],
                         [Spacer(width=20, height=30)], [nonpara_table.state],
                         [thistab.section_headers['lag']],
                         [Spacer(width=20, height=30)],
                         [lags_plot.state, controls_lag], [lags_corr_table],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('crypto:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #17

Datei anzeigen

def KPI_user_adoption_tab(DAYS_TO_LOAD=90):
    class Thistab(KPI):
        def __init__(self, table, cols=[]):
            KPI.__init__(self, table, name='social_media', cols=cols)
            self.table = table
            self.df = None

            self.checkboxgroup = {'account_type': [], 'update_type': []}

            self.KPI_card_div = self.initialize_cards(self.page_width,
                                                      height=350)

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                          position:relative;background:black;margin-bottom:200px">
                          <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'cards':
                self.section_header_div(text='Period to date:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'pop':
                self.section_header_div(text='Period over period:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        # ----------------------  DIVS ----------------------------

        def reset_checkboxes(self, value='all', checkboxgroup=''):
            try:
                self.checkboxgroup[checkboxgroup].value = value
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        def information_div(self, width=400, height=300):
            div_style = """ 
                          style='width:350px;margin-right:-800px;
                          border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                      """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                </li>
                <li>
                </li>
                <li>
                </li>
                <li>
                </li>
                 <li>
                </li>
                 <li>
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # -------------------- CARDS -----------------------------------------
        def initialize_cards(self, width, height=250):
            try:
                txt = ''
                for period in ['year', 'quarter', 'month', 'week']:
                    design = random.choice(list(KPI_card_css.keys()))
                    txt += self.card(title='', data='', card_design=design)

                text = """<div style="margin-top:100px;display:flex; flex-direction:row;">
                       {}
                       </div>""".format(txt)
                div = Div(text=text, width=width, height=height)
                return div
            except Exception:
                logger.error('initialize cards', exc_info=True)

        # -------------------- GRAPHS -------------------------------------------

        def graph_periods_to_date(self, df1, filter_col):
            try:
                if self.account_type != 'all':
                    df1 = df1[df1.account_type == self.account_type]

                dct = {}
                for idx, period in enumerate(
                    ['week', 'month', 'quarter', 'year']):
                    df = self.period_to_date(
                        df1,
                        timestamp=dashboard_config['dates']['last_date'],
                        timestamp_filter_col=filter_col,
                        period=period)
                    # get unique instances
                    df = df[['address']]
                    df = df.compute()
                    df = df.drop_duplicates(keep='first')
                    #logger.warning('post duplicates dropped:%s', df.head(10))
                    data = len(df)
                    del df
                    gc.collect()
                    dct[period] = data
                self.update_cards(dct)

            except Exception:
                logger.error('graph periods to date', exc_info=True)

        def graph_period_over_period(self, period):
            try:
                periods = [period]
                start_date = self.pop_start_date
                end_date = self.pop_end_date
                if isinstance(start_date, date):
                    start_date = datetime.combine(start_date,
                                                  datetime.min.time())
                if isinstance(end_date, date):
                    end_date = datetime.combine(end_date, datetime.min.time())
                cols = ['account_type', 'timestamp_of_first_event', 'day']
                df = self.load_df(start_date=start_date,
                                  end_date=end_date,
                                  cols=cols,
                                  timestamp_col='timestamp_of_first_event')
                if abs(start_date - end_date).days > 7:
                    if 'week' in periods:
                        periods.remove('week')
                if abs(start_date - end_date).days > 31:
                    if 'month' in periods:
                        periods.remove('month')
                if abs(start_date - end_date).days > 90:
                    if 'quarter' in periods:
                        periods.remove('quarter')

                if self.account_type != 'all':
                    df = df[df.account_type == self.account_type]

                # col for when list is empty
                self.variable = 'account_type'

                for idx, period in enumerate(periods):
                    df_period = self.period_over_period(
                        df,
                        start_date=start_date,
                        end_date=end_date,
                        period=period,
                        history_periods=self.pop_history_periods,
                        timestamp_col='timestamp_of_first_event')

                    groupby_cols = ['dayset', 'period']
                    if len(df_period) > 0:
                        df_period = df_period.groupby(groupby_cols).agg(
                            {'account_type': 'count'})
                        df_period = df_period.reset_index()
                        df_period = df_period.compute()
                    else:
                        df_period = df_period.compute()
                        df_period = df_period.rename(index=str,
                                                     columns={'day': 'dayset'})
                    prestack_cols = list(df_period.columns)
                    logger.warning('Line 179:%s', df_period.head(10))
                    df_period = self.split_period_into_columns(
                        df_period,
                        col_to_split='period',
                        value_to_copy='account_type')
                    logger.warning('line 180 df_period columns:%s',
                                   df_period.head(50))
                    poststack_cols = list(df_period.columns)
                    title = "{} over {}".format(period, period)

                    plotcols = list(np.setdiff1d(poststack_cols,
                                                 prestack_cols))
                    df_period, plotcols = self.pop_include_zeros(
                        df_period=df_period, plotcols=plotcols, period=period)
                    if idx == 0:
                        p = df_period.hvplot.bar('dayset',
                                                 plotcols,
                                                 rot=45,
                                                 title=title,
                                                 stacked=False)
                    else:
                        p += df_period.hvplot.bar('dayset',
                                                  plotcols,
                                                  rot=45,
                                                  title=title,
                                                  stacked=False)
                return p

            except Exception:
                logger.error('period over period to date', exc_info=True)

    def update_account(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.account_type = new
        thistab.graph_periods_to_date(thistab.df, 'timestamp_of_first_event')
        thistab.section_header_updater('cards')
        thistab.section_header_updater('pop')
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_period_over_period(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.pop_history_periods = history_periods_select.value
        thistab.pop_start_date = datepicker_period_start.value  # trigger period over period
        thistab.pop_end_date = datepicker_period_end.value  # trigger period
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_history_periods(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        cols = [
            'address', 'account_type', 'update_type', 'balance',
            'timestamp_of_first_event'
        ]
        thistab = Thistab(table='account_ext_warehouse', cols=cols)
        # -------------------------------------  SETUP   ----------------------------
        # format dates
        first_date_range = thistab.initial_date
        last_date_range = datetime.now().date()

        last_date = dashboard_config['dates']['last_date']
        first_date = datetime(last_date.year, 1, 1, 0, 0, 0)

        thistab.df = thistab.load_df(first_date, last_date, cols,
                                     'timestamp_of_first_event')
        thistab.graph_periods_to_date(thistab.df,
                                      filter_col='timestamp_of_first_event')
        thistab.section_header_updater('cards')
        thistab.section_header_updater('pop')

        # MANAGE STREAM
        # date comes out stream in milliseconds
        # --------------------------------CREATE WIDGETS ---------------------------------
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        thistab.pop_end_date = last_date
        thistab.pop_start_date = thistab.first_date_in_period(
            thistab.pop_end_date, 'week')
        stream_launch = streams.Stream.define('Launch', launch=-1)()

        datepicker_period_start = DatePicker(title="Period start",
                                             min_date=first_date_range,
                                             max_date=last_date_range,
                                             value=thistab.pop_start_date)
        datepicker_period_end = DatePicker(title="Period end",
                                           min_date=first_date_range,
                                           max_date=last_date_range,
                                           value=thistab.pop_end_date)

        history_periods_select = Select(
            title='Select # of comparative periods',
            value='2',
            options=thistab.menus['history_periods'])
        account_type_select = Select(title='Select account type',
                                     value='all',
                                     options=thistab.menus['account_type'])
        pop_number_select = Select(title='Select # of comparative periods',
                                   value=str(thistab.pop_history_periods),
                                   options=thistab.menus['history_periods'])

        # ---------------------------------  GRAPHS ---------------------------
        hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch])
        pop_week = renderer.get_plot(hv_pop_week)

        hv_pop_month = hv.DynamicMap(thistab.pop_month,
                                     streams=[stream_launch])
        pop_month = renderer.get_plot(hv_pop_month)

        hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter,
                                       streams=[stream_launch])
        pop_quarter = renderer.get_plot(hv_pop_quarter)

        # -------------------------------- CALLBACKS ------------------------
        #datepicker_start.on_change('value', update)
        #datepicker_end.on_change('value', update)
        account_type_select.on_change('value', update_account)
        history_periods_select.on_change('value', update_period_over_period)
        datepicker_period_start.on_change('value', update_period_over_period)
        datepicker_period_end.on_change('value', update_period_over_period)
        pop_number_select.on_change('value', update_history_periods)

        # -----------------------------------LAYOUT ----------------------------
        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end,
                             account_type_select)
        controls_pop = WidgetBox(datepicker_period_start,
                                 datepicker_period_end, history_periods_select)

        # create the dashboards
        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['cards']],
                         [Spacer(width=20, height=2)],
                         [thistab.KPI_card_div, controls],
                         [thistab.section_headers['pop']],
                         [Spacer(width=20, height=25)],
                         [pop_week.state, controls_pop], [pop_month.state],
                         [pop_quarter.state],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title='KPI: user adoption')
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag('KPI accounts')

Beispiel #18

Datei anzeigen

Datei: accounts.py Projekt: andre-aion/analytics_demo

def accounts_tsa_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')

            self.forecast_days = 30
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'
            self.update_type = 'all'
            self.status = 'all'
            self.account_type = 'all'
            self.interest_var = 'amount'

            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.address_select = Select(title='Filter by address',
                                         value='all',
                                         options=[])
            self.address = 'all'
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            self.addresses = []

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.max_loaded_date = None
            self.min_loaded_date = None

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                            position:relative;background:black;margin-bottom:200px">
                            <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.address_selected = ""
                self.address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self, start_date, end_date):
            try:
                logger.warning("data load begun")
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)

                if self.df is not None:
                    self.max_loaded_date = self.df.block_timestamp.max(
                    ).compute()
                    self.min_loaded_date = self.df.block_timestamp.min(
                    ).compute()
                    if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date:
                        logger.warning("data already loaded - %s",
                                       self.df.tail(10))
                        pass
                    else:
                        self.df_load(start_date, end_date, cols=self.cols)
                        self.df = self.df.fillna(0)
                        df = self.df[['address']]
                        df = df.compute()
                        self.addresses = ['all'] + list(set(list(df)))
                        #self.make_delta()
                        #self.df = self.df.set_index('block_timestamp')
                        logger.warning("data loaded - %s", self.df.tail(10))
                else:
                    self.df_load(start_date, end_date, cols=self.cols)
                    self.df = self.df.fillna(0)
                    df = self.df[['address']]
                    df = df.compute()
                    self.addresses = ['all'] + list(set(list(df)))
                    # self.make_delta()
                    # self.df = self.df.set_index('block_timestamp')
                    logger.warning("data loaded - %s", self.df.tail(10))
                    self.df = self.filter(self.df)

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS

        def filter(self, df):
            try:
                df = df.assign(freq=df.address)
                if self.status != 'all':
                    df = df[df.status == self.status]
                if self.account_type != 'all':
                    df = df[df.acccount_type == self.account_type]
                if self.update_type != 'all':
                    df = df[df.update_type == self.update_type]
                if self.address != 'all':
                    df = df[df.address == self.address]

                return df
            except Exception:
                logger.error("filter:", exc_info=True)

        def tsa_amount(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'amount': 'mean'})
                df = df.reset_index()
                df = df.compute()
                label = 'amount_diff'
                df[label] = df[self.interest_var].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'amount': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        def tsa_freq(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'address': 'nunique'})
                df = df.reset_index()
                df = df.compute()
                label = 'freq_diff'
                df[label] = df['address'].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'address': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)

                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(
                            x='ds',
                            y=col,
                            width=600,
                            height=250,
                            value_label='#').relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='#').relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.update_type = update_type_select.value
        thistab.status = status_select.value
        thistab.account_type = account_type_select.value
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.address = thistab.address_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_load(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type', 'amount'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = last_date - timedelta(days=60)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])
        status_select = Select(title='Select account status',
                               value=thistab.status,
                               options=menus['status'])
        account_type_select = Select(title='Select account type',
                                     value=thistab.account_type,
                                     options=menus['account_type'])
        update_type_select = Select(title='Select transfer type',
                                    value=thistab.update_type,
                                    options=menus['update_type'])
        # search by address checkboxes
        thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses,
                                                 active=[0])

        # ----------------------------------- LOAD DATA
        # load model-making data
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        # load data for period to be predicted

        # tables
        hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount,
                                      streams=[stream_launch])
        tsa_amount = renderer.get_plot(hv_tsa_amount)

        hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch])
        tsa_freq = renderer.get_plot(hv_tsa_freq)

        # add callbacks
        datepicker_start.on_change('value', update_load)
        datepicker_end.on_change('value', update_load)
        thistab.address_select.on_change('value', update)
        select_forecast_days.on_change('value', update)
        update_type_select.on_change('value', update)
        account_type_select.on_change('value', update)
        status_select.on_change('value', update)

        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end,
                             thistab.address_select, select_forecast_days,
                             update_type_select, account_type_select,
                             status_select, thistab.checkboxes)

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [Spacer(width=20, height=30)],
                         [tsa_amount.state, controls], [tsa_freq.state],
                         [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #19

Datei anzeigen

Datei: projects.py Projekt: andre-aion/analytics_demo

def KPI_projects_tab(panel_title, DAYS_TO_LOAD=90):
    timeline_source = ColumnDataSource(data=dict(
        Item=[], Start=[], End=[], Color=[], start=[], end=[], ID=[], ID1=[]))

    class Thistab(KPI):
        def __init__(self, table, cols=[]):
            KPI.__init__(self, table, name='project', cols=cols)
            self.table = table
            self.df = None
            self.df_pop = None

            self.checkboxgroup = {}
            self.period_to_date_cards = {}
            self.ptd_startdate = datetime(datetime.today().year, 1, 1, 0, 0, 0)

            self.timestamp_col = 'project_startdate_actual'
            self.pym = PythonMongo('aion')
            self.groupby_dict = {
                'project': 'sum',
                'project_duration': 'sum',
                'project_start_delay': 'mean',
                'project_end_delay': ' mean',
                'milestone': 'sum',
                'milestone_duration': 'sum',
                'milestone_start_delay': 'mean',
                'milestone_end_delay': ' mean',
                'task': 'sum',
                'task_duration': 'sum',
                'task_start_delay': 'mean',
                'task_end_delay': ' mean',
            }

            self.menus = {
                'status': ['all', 'open', 'closed'],
                'type': [
                    'all', 'research', 'reconciliation', 'audit', 'innovation',
                    'construction', 'manufacturing', 'conference'
                ],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
            }

            self.status = 'all'
            self.pm_gender = 'all'
            self.m_gender = 'all'
            self.t_gender = 'all'
            self.type = 'all'
            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = self.variables[0]
            self.groupby_var = 'project'

            self.chord_data = {
                'rename': {
                    'project_owner': 'source',
                    'milestone_owner': 'target',
                    'remuneration': 'value'
                },
                'percentile_threshold': .75,
            }

            self.percentile_threshold = 10

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                         position:relative;background:black;margin-bottom:200px">
                                         <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                   </div>""".format(self.page_width, 50,
                                                    'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'cards':
                self.section_header_div(text='Period to date:{}'.format(
                    self.section_divider),
                                        width=1000,
                                        html_header='h2',
                                        margin_top=50,
                                        margin_bottom=5),
                'pop':
                self.section_header_div(text='Period over period:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'chord':
                self.section_header_div(text='Relationships:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'timeline':
                self.section_header_div(text='Project timeline:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }
            self.KPI_card_div = self.initialize_cards(self.page_width,
                                                      height=350)
            start = datetime(2014, 1, 1, 0, 0, 0)
            end = datetime(2019, 5, 15, 0, 0, 0)
            self.tools = [
                BoxZoomTool(),
                ResetTool(),
                PanTool(),
                SaveTool(),
                WheelZoomTool()
            ]
            self.timeline_vars = {
                'projects':
                '',
                'project':
                '',
                'types': ['all', 'milestone', 'task', 'project'],
                'type':
                'all',
                'DF':
                None,
                'G':
                figure(title=None,
                       x_axis_type='datetime',
                       width=1200,
                       height=900,
                       y_range=[],
                       x_range=Range1d(start, end),
                       toolbar_location=None),
                'toolbar_box':
                ToolbarBox()
            }

            # ----- UPDATED DIVS END

        # ----------------------  DIVS ----------------------------
        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>"""\
                .format(margin_top,margin_bottom,html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=300):
            txt = """
            <div {}>
                <h4 {}>How to interpret sentiment score</h4>
                <ul style='margin-top:-10px;'>
                    <li>
                    </li>
                    <li>
                    </li>
                    <li>
                    </li>
                    <li>
                    </li>

                </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def initialize_cards(self, width, height=250):
            try:
                txt = ''
                for period in ['year', 'quarter', 'month', 'week']:
                    design = random.choice(list(KPI_card_css.keys()))
                    txt += self.card(title='', data='', card_design=design)

                text = """<div style="margin-top:100px;display:flex; flex-direction:row;">
                {}
                </div>""".format(txt)
                div = Div(text=text, width=width, height=height)
                return div
            except Exception:
                logger.error('initialize cards', exc_info=True)

        def load_df(self, req_startdate, req_enddate, table, cols,
                    timestamp_col):
            try:
                # get min and max of loaded df
                if self.df is not None:
                    loaded_min = self.df[timestamp_col].min()
                    loaded_max = self.df[timestamp_col].max()

                    if loaded_min <= req_startdate and loaded_max >= req_enddate:
                        df = self.df[(self.df[timestamp_col] >= req_startdate)
                                     & (self.df[timestamp_col] <= req_enddate)]
                        return df
                return self.pym.load_df(req_startdate,
                                        req_enddate,
                                        table=table,
                                        cols=cols,
                                        timestamp_col=timestamp_col)

            except Exception:
                logger.error('load_df', exc_info=True)

        def filter_df(self, df1):
            if self.status != 'all':
                df1 = df1[df1.status == self.status]
            if self.pm_gender != 'all':
                df1 = df1[df1.project_owner_gender == self.pm_gender]
            if self.m_gender != 'all':
                df1 = df1[df1.milestone_owner_gender == self.m_gender]
            if self.t_gender != 'all':
                df1 = df1[df1.task_owner_gender == self.t_gender]

            if self.type != 'all':
                df1 = df1[df1.type == self.type]
            return df1

        def period_to_date(self,
                           df,
                           timestamp=None,
                           timestamp_filter_col=None,
                           cols=[],
                           period='week'):
            try:
                if timestamp is None:
                    timestamp = datetime.now()
                    timestamp = datetime(timestamp.year, timestamp.month,
                                         timestamp.day, timestamp.hour, 0, 0)

                start = self.first_date_in_period(timestamp, period)
                # filter

                df[timestamp_filter_col] = pd.to_datetime(
                    df[timestamp_filter_col], format=self.DATEFORMAT_PTD)
                #logger.warning('df:%s', df[self.timestamp_col])

                df = df[(df[timestamp_filter_col] >= start)
                        & (df[timestamp_filter_col] <= timestamp)]
                if len(cols) > 0:
                    df = df[cols]
                return df
            except Exception:
                logger.error('period to date', exc_info=True)

        def period_over_period(self,
                               df,
                               start_date,
                               end_date,
                               period,
                               history_periods=2,
                               timestamp_col='timestamp_of_first_event'):
            try:
                # filter cols if necessary
                string = '0 {}(s) prev(current)'.format(period)

                # filter out the dates greater than today
                df_current = df.copy()
                df_current = self.filter_df(df_current)
                logger.warning('df current:%s', df_current.head(10))
                df_current['period'] = string

                # label the days being compared with the same label
                if len(df_current) > 0:
                    df_current = self.label_dates_pop(df_current, period,
                                                      timestamp_col)
                    cols = [self.variable, 'period', 'dayset']
                    if 'project' in self.variable:
                        if self.variable != 'project':
                            df_current = df_current[[
                                self.variable, 'period', 'dayset', 'project'
                            ]]
                    elif 'milestone' in self.variable:
                        if self.variable != 'milestone':
                            df_current = df_current[[
                                self.variable, 'period', 'dayset', 'milestone',
                                'project'
                            ]]
                    elif 'task' in self.variable:
                        if self.variable != 'task':
                            df_current = df_current[[
                                self.variable, 'period', 'dayset', 'task',
                                'milestone', 'project'
                            ]]

                # zero out time information
                start = datetime(start_date.year, start_date.month,
                                 start_date.day, 0, 0, 0)
                end = datetime(end_date.year, end_date.month, end_date.day, 0,
                               0, 0)

                cols = list(df.columns)
                counter = 1
                if isinstance(history_periods, str):
                    history_periods = int(history_periods)
                # make dataframes for request no. of periods
                start, end = self.shift_period_range(period, start, end)
                while counter < history_periods and start >= self.initial_date:
                    # load data
                    df_temp = self.load_df(start,
                                           end,
                                           table=self.table,
                                           cols=[],
                                           timestamp_col=timestamp_col)
                    df_temp = self.filter_df(df_temp)
                    if df_temp is not None:
                        if len(df_temp) > 1:
                            df_temp[timestamp_col] = pd.to_datetime(
                                df_temp[timestamp_col])

                            string = '{} {}(s) prev'.format(counter, period)
                            # label period
                            df_temp['period'] = string
                            # relabel days to get matching day of week,doy, dom, for different periods
                            df_temp = self.label_dates_pop(
                                df_temp, period, timestamp_col)
                            df_temp = df_temp[cols]
                            # logger.warning('df temp loaded for %s previous: %s',counter,len(df_temp))

                            df_current = pd.concat([df_current, df_temp])
                            del df_temp
                            gc.collect()
                    # shift the loading window
                    counter += 1
                    start, end = self.shift_period_range(period, start, end)
                return df_current
            except Exception:
                logger.error('period over period', exc_info=True)

            # label dates for period over period (pop)

        def pop_include_zeros(self, df_period, plotcols, period):
            try:
                # check for no data on original dates
                tmp_title = '0 {}(s) prev(current)'.format(period)
                if tmp_title not in plotcols:
                    df_period[tmp_title] = [0] * len(df_period)
                    plotcols.append(tmp_title)

                    logger.warning('line 218 cols to plot:%s', plotcols)
                # do other periods
                tmp = plotcols[0]
                txt = tmp[1:]
                if isinstance(self.pop_history_periods, str):
                    self.pop_history_periods = int(self.pop_history_periods)
                for i in range(1, self.pop_history_periods):
                    tmp_txt = str(i) + txt
                    if tmp_txt not in plotcols:
                        df_period[tmp_txt] = [0] * len(df_period)
                        plotcols.append(tmp_txt)

                clean_plotcols = []
                for idx, col in enumerate(plotcols):
                    if 'prev' in col or 'curr' in col:
                        clean_plotcols.append(col)

                logger.warning(
                    'LINE 340 plotcols at end of pop include zeros:%s',
                    clean_plotcols)

                return df_period, sorted(clean_plotcols)
            except Exception:
                logger.error('pop include zeros', exc_info=True)

        def label_dates_pop(self, df, period, timestamp_col):
            #df[timestamp_col] = pd.to_datetime(df[timestamp_col])
            def label_qtr_pop(y):
                try:
                    curr_quarter = int((y.month - 1) / 3 + 1)
                    start = datetime(y.year, 3 * curr_quarter - 2, 1)
                    return abs((start - y).days)
                except Exception:
                    logger.error('df label quarter', exc_info=True)

            try:
                logger.warning('df columns:%s', list(df.columns))
                if period == 'week':
                    df['dayset'] = df[timestamp_col].dt.dayofweek
                elif period == 'month':
                    df['dayset'] = df[timestamp_col].dt.day
                elif period == 'year':
                    #logger.warning('LINE 218:%s', df.head(5))
                    df['dayset'] = df[timestamp_col].dt.dayofyear
                elif period == 'quarter':
                    df['dayset'] = df[timestamp_col].apply(
                        lambda x: label_qtr_pop(x))

                return df
            except Exception:
                logger.error('label data ', exc_info=True)

        def get_groupby_pop_df(self, df, variable, groupby_cols):
            try:
                if df is not None:
                    if len(df) > 0:
                        if 'dayset' in df.columns:
                            if variable in ['project']:
                                df = df.groupby(groupby_cols).agg(
                                    {variable: 'count'})
                                df = df.reset_index()
                                #logger.warning('LINE 286 df:%s',df)
                            elif variable in ['milestone']:
                                df = df.groupby(groupby_cols).agg(
                                    {variable: 'count'})
                                df = df.reset_index()
                                #logger.warning('LINE 291 df:%s', df)
                            elif variable in ['task']:
                                df = df.groupby(groupby_cols).agg(
                                    {variable: 'count'})
                                df = df.reset_index()
                            elif variable in ['remuneration']:
                                df = df.groupby(groupby_cols).agg(
                                    {variable: 'sum'})
                                df = df.reset_index()
                            else:
                                #logger.warning('LINE 259:df:%s',df.head())
                                df = df.groupby(groupby_cols).agg(
                                    {variable: 'mean'})
                                df = df.reset_index()

                            # clean up
                            if self.groupby_var in df.columns and self.variable != self.groupby_var:
                                df = df.drop([self.groupby_var], axis=1)

                return df
            except Exception:
                logger.error('get groupby card data', exc_info=True)

        def get_groupby_card_data(self, df, variable):
            try:
                if variable in ['project']:
                    data = len(df[variable].unique())
                    data = "{} {}s".format(data, variable)
                elif variable in ['milestone']:
                    df = df.groupby(['project']).agg({variable: 'nunique'})
                    data = df[variable].sum()
                    data = "{} {}s".format(data, variable)
                elif variable in ['task']:
                    df = df.groupby(['project',
                                     'milestone']).agg({variable: 'count'})
                    data = df[variable].sum()
                    data = "{} {}s".format(data, variable)
                elif variable in ['project_duration'] or 'delay' in variable:
                    df = df.groupby([self.groupby_var]).agg({variable: 'mean'})
                    df = df.reset_index()
                    data = "{} days".format(round(df[variable].sum(), 2))
                elif variable in ['milestone_duration']:
                    df = df.groupby([self.groupby_var,
                                     'project']).agg({variable: 'mean'})
                    df = df.reset_index()
                    data = "{} days".format(round(df[variable].sum(), 2))
                elif variable in [
                        'task_duration', 'task_start_delay', 'task_start_end'
                ]:
                    df = df.groupby([self.groupby_var, 'project',
                                     'milestone']).agg({variable: 'mean'})
                    df = df.reset_index()
                    data = "{} hours".format(round(df[variable].sum(), 2))
                elif variable in ['remuneration']:
                    data = df[variable].sum()
                    data = "${:,.2f}".format(data)

                return data
            except Exception:
                logger.error('get groupby card data', exc_info=True)

        # -------------------- GRAPHS -------------------------------------------
        def graph_periods_to_date(self, df2, timestamp_filter_col, variable):
            df1 = df2.copy()
            #self.section_header_updater(section='cards',label=variable,margin_top=159,html_header='h2')
            try:
                df1 = self.filter_df(df1)
                dct = {}
                for idx, period in enumerate(
                    ['week', 'month', 'quarter', 'year']):
                    df = self.period_to_date(
                        df1,
                        timestamp=dashboard_config['dates']['last_date'],
                        timestamp_filter_col=timestamp_filter_col,
                        period=period)

                    df = df.drop_duplicates(keep='first')

                    # groupby to eliminate repetition
                    data = self.get_groupby_card_data(df, variable)

                    del df
                    gc.collect()
                    dct[period] = data
                    #title = "{} to date".format(period)

                    #p = self.card(title=title, data=data, card_design=random.choice(list(self.KPI_card_css.keys())))
                    #self.period_to_date_cards[period].text = p.text
                self.update_cards(dct)

            except Exception:
                logger.error('graph periods to date', exc_info=True)

        def graph_period_over_period(self, period):
            try:

                periods = [period]
                start_date = self.pop_start_date
                end_date = self.pop_end_date
                if isinstance(start_date, date):
                    start_date = datetime.combine(start_date,
                                                  datetime.min.time())
                if isinstance(end_date, date):
                    end_date = datetime.combine(end_date, datetime.min.time())
                today = datetime.combine(datetime.today().date(),
                                         datetime.min.time())

                df = self.df_pop.copy()
                df = self.filter_df(df)
                #logger.warning('LINE 363 -df:%s',df.head())

                cols = [self.variable, self.timestamp_col]
                if self.variable != 'project':
                    cols.append('project')

                if abs(start_date - end_date).days > 7:
                    if 'week' in periods:
                        periods.remove('week')
                if abs(start_date - end_date).days > 31:
                    if 'month' in periods:
                        periods.remove('month')
                if abs(start_date - end_date).days > 90:
                    if 'quarter' in periods:
                        periods.remove('quarter')
                for idx, period in enumerate(periods):
                    df_period = self.period_over_period(
                        df,
                        start_date=start_date,
                        end_date=end_date,
                        period=period,
                        history_periods=self.pop_history_periods,
                        timestamp_col=self.timestamp_col)

                    groupby_cols = ['dayset', 'period']
                    if len(df_period) > 0:
                        logger.warning('LINE 473:%s', list(df_period.columns))
                        df_period = self.get_groupby_pop_df(
                            df_period,
                            variable=self.variable,
                            groupby_cols=groupby_cols)
                        df_period = df_period.reset_index()
                    else:
                        if not 'day' in df_period.columns:
                            df_period['dayset'] = ""
                        else:
                            df_period = df_period.rename(
                                index=str, columns={'day': 'dayset'})

                        logger.warning('LINE 478:%s', list(df_period.columns))

                    prestack_cols = list(df_period.columns)
                    df_period = self.split_period_into_columns(
                        df_period,
                        col_to_split='period',
                        value_to_copy=self.variable)

                    # short term fix: filter out the unnecessary first day added by a corrupt quarter functionality
                    if period == 'quarter':
                        if 'dayset' in df_period.columns:
                            min_day = df_period['dayset'].min()
                            df_period = df_period[
                                df_period['dayset'] > min_day]

                    poststack_cols = list(df_period.columns)

                    title = "{} over {}".format(period, period)
                    plotcols = list(np.setdiff1d(poststack_cols,
                                                 prestack_cols))

                    # include current period if not extant
                    df_period, plotcols = self.pop_include_zeros(
                        df_period, plotcols=plotcols, period=period)

                    if self.variable in [
                            'task_start_delay', 'task_end_delay',
                            'task_duration'
                    ]:
                        ylabel = 'hours'
                    elif self.variable in [
                            'project_duration', 'milestone_duration',
                            'project_start_delay', 'project_end_delay',
                            'milestone_start_delay', 'milestone_end_delay'
                    ]:
                        ylabel = 'days'
                    elif self.variable in ['project', 'task', 'milestone']:
                        ylabel = '#'
                    elif self.variable == 'remuneration':
                        ylabel = '$'

                    if 'dayset' not in df_period.columns:
                        leng = len(df_period)
                        if leng > 0:
                            df_period['dayset'] = 0
                            logger.warning('LINE 549')
                        else:
                            logger.warning('LINE 551')
                            df_period['dayset'] = ''

                    logger.warning('LINE 552: df columns:%s',
                                   list(df_period.columns))

                    if idx == 0:
                        p = df_period.hvplot.bar('dayset',
                                                 plotcols,
                                                 rot=45,
                                                 title=title,
                                                 stacked=False,
                                                 width=1200,
                                                 height=400,
                                                 value_label=ylabel)
                    else:
                        p += df_period.hvplot.bar('dayset',
                                                  plotcols,
                                                  rot=45,
                                                  title=title,
                                                  stacked=False,
                                                  width=1200,
                                                  height=400,
                                                  value_label=ylabel)
                return p

            except Exception:
                logger.error('period over period to date', exc_info=True)

        def chord_diagram(self, launch):
            try:

                def normalize_value(x, total):
                    x = int((x / total) * 1000)
                    if x <= 0:
                        return 1
                    return x

                df = self.df.copy()

                # --------------  nodes
                data = {}
                data['nodes'] = []
                source_list = df['milestone_owner'].tolist()
                names = list(set(source_list))

                person_type_dict = dict(zip(df.milestone_owner, df.type))
                type_dict = {}
                types = list(set(df['type'].tolist()))
                name_dict = {}
                for idx, name in enumerate(names):
                    name_dict[name] = idx

                for idx, name in enumerate(names):
                    type_tmp = person_type_dict[name]
                    index = name_dict[name]
                    data['nodes'].append({
                        'OwnerID': index,
                        'index': idx,
                        'Type': type_tmp
                    })

                nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index')

                # --------- make the links

                data['links'] = []

                for idx, row in df.iterrows():
                    src = name_dict[row['project_owner']]
                    tgt = name_dict[row['milestone_owner']]
                    val = row['remuneration']
                    data['links'].append({
                        'source': src,
                        'target': tgt,
                        'value': val
                    })

                links = pd.DataFrame(data['links'])
                # get the individual links
                links = links.groupby(['source', 'target'])['value'].sum()
                links = links.reset_index()
                total = links['value'].sum()
                links['value'] = links['value'].apply(
                    lambda x: normalize_value(x, total))

                # filter for top percentile
                quantile_val = links['value'].quantile(
                    self.chord_data['percentile_threshold'])
                links = links[links['value'] >= quantile_val]
                #logger.warning('after quantile filter:%s',len(links))

                chord_ = hv.Chord((links, nodes), ['source', 'target'],
                                  ['value'])
                chord_.opts(
                    opts.Chord(cmap='Category20',
                               edge_cmap='Category20',
                               edge_color=dim('source').str(),
                               labels='Type',
                               node_color=dim('index').str(),
                               width=1000,
                               height=1000))

                return chord_

            except Exception:
                logger.error('chord diagram', exc_info=True)

        def timeline(self, project, type='milestone'):
            try:
                DF = self.df.copy()
                if type != project:
                    DF = DF[DF['project'] == project]

                if type == 'all':
                    rename_dct = {
                        'milestone_enddate_proposed': 'milestone_enddate',
                        'milestone_startdate_proposed': 'milestone_startdate',
                        'task_enddate_proposed': 'task_enddate',
                        'task_startdate_proposed': 'task_startdate',
                    }
                    DF = DF.rename(index=str, columns=rename_dct)

                    DF = DF.groupby(['milestone', 'task']).agg({
                        'milestone_startdate':
                        'min',
                        'milestone_enddate':
                        'max',
                        'task_startdate':
                        'min',
                        'task_enddate':
                        'max',
                    })
                    DF = DF.reset_index()

                    # melt to get milestone and task into one column
                    df = pd.melt(DF,
                                 value_vars=['milestone', 'task'],
                                 id_vars=[
                                     'milestone_startdate',
                                     'milestone_enddate', 'task_startdate',
                                     'task_enddate'
                                 ],
                                 value_name='Item',
                                 var_name='type')

                    df = df.groupby(['Item', 'type']).agg({
                        'milestone_startdate':
                        'min',
                        'milestone_enddate':
                        'max',
                        'task_startdate':
                        'min',
                        'task_enddate':
                        'max'
                    }).reset_index()
                    df = pd.melt(
                        df,
                        id_vars=[
                            'Item', 'type', 'milestone_startdate',
                            'task_startdate'
                        ],
                        value_vars=['milestone_enddate', 'task_enddate'],
                        value_name='End',
                        var_name='enddate_type')
                    # filter out where tasks label dates and vice versa
                    df1 = df[(df['type'] == 'task')
                             & (df['enddate_type'] == 'task_enddate')]
                    df = df[(df['type'] == 'milestone')
                            & (df['enddate_type'] == 'milestone_enddate')]
                    df = pd.concat([df1, df])
                    df = df.drop('enddate_type', axis=1)

                    # do startdate
                    df = pd.melt(
                        df,
                        id_vars=['Item', 'type', 'End'],
                        value_vars=['milestone_startdate', 'task_startdate'],
                        value_name='Start',
                        var_name='startdate_type')
                    # filter out where tasks label dates and vice versa
                    df1 = df[(df['type'] == 'task')
                             & (df['startdate_type'] == 'task_startdate')]
                    df = df[(df['type'] == 'milestone')
                            & (df['startdate_type'] == 'milestone_startdate')]
                    df = pd.concat([df1, df])
                    df = df.drop('startdate_type', axis=1)
                    # label colors
                    df['Color'] = df['type'].apply(
                        lambda x: 'black' if x == 'milestone' else 'green')
                    # organize by milestone and tasks belonging to milestone
                    df = df.sort_values(by=['Start']).reset_index()
                    df = df.drop('index', axis=1)
                    #logger.warning('LINE 605 - df:%s',df.head(50))
                    DF = df
                    print(
                        '##################################################################################'
                    )
                else:
                    start_str = type + '_startdate_proposed'
                    end_str = type + '_enddate_proposed'
                    # group milestone
                    rename_dct = {
                        start_str: 'Start',
                        end_str: 'End',
                        type: 'Item'
                    }
                    DF = DF.rename(index=str, columns=rename_dct)
                    DF = DF[['Item', 'Start', 'End']]
                    DF = DF.groupby(['Item']).agg({
                        'Start': 'min',
                        'End': 'max'
                    })
                    DF = DF.reset_index()

                    color_list = []
                    for item in DF.Item.tolist():
                        color_list.append(
                            random.choice(dashboard_config['colors']))
                    DF['Color'] = np.array(color_list)

                DF['start'] = DF['Start'].dt.strftime('%Y-%m-%d')
                DF['end'] = DF['End'].dt.strftime('%Y-%m-%d')
                DF['ID'] = DF.index + 0.6
                DF['ID1'] = DF.index + 1.4

                logger.warning('LINE 648 %s', DF)
                self.timeline_vars['DF'] = DF
                # update source
                data = dict(Item=DF.Item.tolist(),
                            Start=DF.Start.tolist(),
                            End=DF.End.tolist(),
                            Color=DF.Color.tolist(),
                            start=DF.start.tolist(),
                            end=DF.end.tolist(),
                            ID=DF.ID.tolist(),
                            ID1=DF.ID1.tolist())
                # <-- This is the trick, make the x_rage empty first, before assigning new value

                self.timeline_vars['G'].y_range.factors = []
                self.timeline_vars['G'].y_range.factors = DF.Item.tolist()
                #self.timeline_vars['G'].x_range.factors = []
                #self.timeline_vars['G'].x_range.factors = sorted(DF.Start.tolist())

                timeline_source.data = data

            except Exception:
                logger.error('timeline', exc_info=True)

        def timeline_plot(self, DF):
            try:
                hover = HoverTool(tooltips="Task: @Item<br>\
                Start: @start<br>\
                End: @end")
                self.timeline_vars['G'].quad(left='Start',
                                             right='End',
                                             bottom='ID',
                                             top='ID1',
                                             source=timeline_source,
                                             color="Color")

                self.tools = [hover] + self.tools
                self.timeline_vars['G'].tools = self.tools
                self.timeline_vars['toolbar_box'] = ToolbarBox()
                self.timeline_vars['toolbar_box'].toolbar = Toolbar(
                    tools=self.tools)
                self.timeline_vars['toolbar_box'].toolbar_location = "above"

                self.timeline_vars['G'].x_range.start = DF.Start.min(
                ) - timedelta(days=10)
                self.timeline_vars['G'].x_range.start = DF.End.max(
                ) + timedelta(days=10)

                return self.timeline_vars['G']
            except Exception:
                logger.error('timeline', exc_info=True)

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.pm_gender = pm_gender_select.value
        thistab.m_gender = m_gender_select.value
        thistab.t_gender = t_gender_select.value

        thistab.type = type_select.value
        thistab.variable = variable_select.value
        if 'project' in thistab.variable:
            thistab.groupby_var = 'project'
        elif 'milestone' in thistab.variable:
            thistab.groupby_var = 'milestone'
        elif 'task' in thistab.variable:
            thistab.groupby_var = 'task'

        thistab.status = status_select.value
        thistab.graph_periods_to_date(thistab.df,
                                      thistab.timestamp_col,
                                      variable=thistab.variable)
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_pop_dates():
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.pop_start_date = datepicker_pop_start.value  # trigger period over period
        thistab.pop_end_date = datepicker_pop_end.value
        thistab.df_pop = thistab.pym.load_df(start_date=thistab.pop_start_date,
                                             end_date=thistab.pop_end_date,
                                             cols=[],
                                             table=thistab.table,
                                             timestamp_col='startdate_actual')
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_history_periods(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.pop_history_periods = pop_number_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_timeline(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.timeline_vars['project'] = timeline_project_select.value
        thistab.timeline_vars['type'] = timeline_type_select.value
        thistab.timeline(thistab.timeline_vars['project'],
                         thistab.timeline_vars['type'])
        thistab.notification_updater("ready")

    try:
        cols = []
        thistab = Thistab(table='project_composite', cols=cols)
        # -------------------------------------  SETUP   ----------------------------
        # format dates
        first_date_range = thistab.initial_date
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = datetime(last_date.year, 4, 1, 0, 0, 0)

        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         table=thistab.table,
                                         cols=[],
                                         timestamp_col=thistab.timestamp_col)
        thistab.graph_periods_to_date(
            thistab.df,
            timestamp_filter_col=thistab.timestamp_col,
            variable=thistab.variable)
        thistab.pop_end_date = last_date
        thistab.pop_start_date = last_date - timedelta(days=5)
        thistab.df_pop = thistab.pym.load_df(
            start_date=thistab.pop_start_date,
            end_date=thistab.pop_end_date,
            cols=[],
            table=thistab.table,
            timestamp_col=thistab.timestamp_col)

        thistab.timeline_vars['projects'] = sorted(
            list(set(thistab.df['project'].tolist())))
        thistab.timeline_vars['project'] = thistab.timeline_vars['projects'][0]

        # MANAGE STREAM
        # date comes out stream in milliseconds
        # --------------------------------CREATE WIDGETS ---------------------------------

        stream_launch = streams.Stream.define('Launch', launch=-1)()

        datepicker_pop_start = DatePicker(title="Period start",
                                          min_date=first_date_range,
                                          max_date=last_date_range,
                                          value=thistab.pop_start_date)

        datepicker_pop_end = DatePicker(title="Period end",
                                        min_date=first_date_range,
                                        max_date=last_date_range,
                                        value=thistab.pop_end_date)

        pop_number_select = Select(title='Select # of comparative periods',
                                   value=str(thistab.pop_history_periods),
                                   options=thistab.menus['history_periods'])
        pop_dates_button = Button(label="Select dates, then click me!",
                                  width=15,
                                  button_type="success")

        type_select = Select(title='Select project type',
                             value=thistab.type,
                             options=thistab.menus['type'])

        status_select = Select(title='Select project status',
                               value=thistab.status,
                               options=thistab.menus['status'])

        pm_gender_select = Select(title="Select project owner's gender",
                                  value=thistab.pm_gender,
                                  options=thistab.menus['gender'])

        m_gender_select = Select(title="Select milestone owner's gender",
                                 value=thistab.m_gender,
                                 options=thistab.menus['gender'])

        t_gender_select = Select(title="Select task owner's gender",
                                 value=thistab.t_gender,
                                 options=thistab.menus['gender'])

        variable_select = Select(title='Select variable of interest',
                                 value=thistab.variable,
                                 options=thistab.menus['variables'])

        timeline_project_select = Select(
            title='Select project',
            value=thistab.timeline_vars['project'],
            options=thistab.timeline_vars['projects'])

        timeline_type_select = Select(title='Select granularity',
                                      value='all',
                                      options=thistab.timeline_vars['types'])

        # ---------------------------------  GRAPHS ---------------------------
        hv_pop_week = hv.DynamicMap(thistab.pop_week, streams=[stream_launch])
        pop_week = renderer.get_plot(hv_pop_week)

        hv_pop_month = hv.DynamicMap(thistab.pop_month,
                                     streams=[stream_launch])
        pop_month = renderer.get_plot(hv_pop_month)

        hv_pop_quarter = hv.DynamicMap(thistab.pop_quarter,
                                       streams=[stream_launch])
        pop_quarter = renderer.get_plot(hv_pop_quarter)

        hv_pop_year = hv.DynamicMap(thistab.pop_year, streams=[stream_launch])
        pop_year = renderer.get_plot(hv_pop_year)

        hv_chord = hv.DynamicMap(thistab.chord_diagram,
                                 streams=[stream_launch])
        chord = renderer.get_plot(hv_chord)

        thistab.timeline(thistab.timeline_vars['project'],
                         thistab.timeline_vars['type'])
        timeline = thistab.timeline_plot(DF=thistab.timeline_vars['DF'])

        # -------------------------------- CALLBACKS ------------------------

        type_select.on_change('value', update)
        pop_dates_button.on_click(update_pop_dates)  # lags array
        status_select.on_change('value', update)
        pm_gender_select.on_change('value', update)
        m_gender_select.on_change('value', update)
        t_gender_select.on_change('value', update)
        variable_select.on_change('value', update)
        pop_number_select.on_change('value', update_history_periods)
        timeline_project_select.on_change('value', update_timeline)
        timeline_type_select.on_change('value', update_timeline)
        # -----------------------------------LAYOUT ----------------------------
        # put the controls in a single element
        controls_top = WidgetBox(
            variable_select,
            type_select,
            status_select,
            pm_gender_select,
            m_gender_select,
            t_gender_select,
        )

        controls_pop = WidgetBox(datepicker_pop_start, datepicker_pop_end,
                                 pop_number_select)
        controls_timeline = WidgetBox(thistab.timeline_vars['toolbar_box'],
                                      timeline_project_select,
                                      timeline_type_select)

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['cards']],
                         [thistab.KPI_card_div, controls_top],
                         [thistab.section_headers['pop']],
                         [Spacer(width=20, height=25)],
                         [pop_week.state, controls_pop], [pop_month.state],
                         [pop_quarter.state], [pop_year.state],
                         [thistab.section_headers['chord']],
                         [Spacer(width=20, height=25)], [chord.state],
                         [thistab.section_headers['timeline']],
                         [Spacer(width=20, height=25)],
                         [timeline, controls_timeline],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)

Beispiel #20

Datei anzeigen

Datei: search.py Projekt: andre-aion/various_foods_on_a_blanket

def twitter_loader_tab(panel_title):
    class TwitterLoader():
        def __init__(self,search_term = 'beiber'):
            # TWITTER SETUP
            self.api = None
            self.topic = search_term
            self.options = {
                'messages': [str(x) for x in range(100, 10, -10)]+['5000'],
                'time': ['40000'] + [str(x) for x in range(30, 100000, 3000)],
            }
            self.limits = {
                'messages' : int(self.options['messages'][0]),
                'time' : int(self.options['time'][0]) #secs
            }
            self.hidden_path = dashboard_config['hidden_path']
            self.timestamp = {
                'start_loading' : datetime.now(timezone.utc).timestamp(),
                'stop_loading' :  datetime.now(timezone.utc).timestamp() - self.limits['time']
            }
            self.DATEFORMAT = "%Y-%d-%m %H:%M:%S"
            self.df = None
            self.messages_dict = {
                'message_ID': [],
                'human_readable_creation_date':[],
                'creation_date': [],
                'text': [],
                'user_ID': [],
                'user_creation_date': [],
                'user_name': [],
                'user_screen_name': []
            }

            self.selects = {
                'window' : Select(title='Select rolling mean window',
                                 value='1',
                                 options=[str(x) for x in range(1,20,2)]),
            }
            self.selects_values = {
                'window': int(self.selects['window'].value),
            }
            self.resample_period = {
                'menu' : []
            }
            for val in range(300,3000,200):
                self.resample_period['menu'].append(str(val)+'S')
            self.resample_period['value'] = self.resample_period['menu'][0]
            # DIV VISUAL SETUP
            self.trigger = -1
            self.html_header = 'h2'
            self.margin_top = 150
            self.margin_bottom = -150

            self.div_style = """ 
                           style='width:350px; margin-left:25px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.page_width = 1250
            txt = """<hr/>
                               <div style="text-align:center;width:{}px;height:{}px;
                                      position:relative;background:black;margin-bottom:200px">
                                      <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                               </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {

                'twitter': self.section_header_div(text='Twitter search results:',
                                                        width=600, html_header='h2', margin_top=155,
                                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END



        # ----------------------  DIVS ----------------------------

        def section_header_div(self, text, html_header='h2', width=600, margin_top=150, margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                         position:relative;background:black;">
                                         <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                   </div>""".format(self.page_width, 50, text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt


        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        # //////////////////////////  DIVS SETUP END   /////////////////////////////////

        # /////////////////////////// UTILS BEGIN ///////////////////////////


        def twitter_datetime_to_epoch(self, ts):
            ts = datetime.strptime(ts, '%a %b %d %H:%M:%S %z %Y')
            ts_epoch = ts.timestamp()
            ts = datetime.strftime(ts, self.DATEFORMAT)
            ts = datetime.strptime(ts,self.DATEFORMAT)
            return ts, ts_epoch

        def write_to_file(self):
            try:
                filename = """{}_searches_for_last_{}sec_or_last_{}messages.csv""".format(self.topic,self.limits['time'],
                                                                         self.limits['messages'])
                self.df.to_csv(filename,sep='\t', index=False)
            except:
                logger.error('Error writing to file', exc_info=True)

        # /////////////////////////// UTILS END /////////////////////
        def reset_data(self):
            self.messages_dict = {
                'message_ID': [],
                'human_readable_creation_date':[],
                'creation_date': [],
                'text': [],
                'user_ID': [],
                'user_creation_date': [],
                'user_name': [],
                'user_screen_name': []
            }
            self.df = None

        def get_credentials(self, filename='twitter_credentials.json'):
            try:
                filename = self.hidden_path +filename
                filepath = join(dirname(__file__),filename)
                print(filepath)
                if self.api is None:
                    with open(filepath, 'r') as f:
                        credentials_dict = json.load(f)
                    self.api = twitter.Api(
                        consumer_key=credentials_dict['consumer_key'],
                        consumer_secret=credentials_dict['consumer_secret'],
                        access_token_key=credentials_dict['access_token_key'],
                        access_token_secret=credentials_dict['access_token_secret'],
                    )
                logger.info('CREDENTIALS LOADED')
            except:
                print('credentials not loaded')



        def construct_query(self):
            try:
                qry = 'q='
                if ',' in self.topic:
                    topics = self.topic.split(',')
                    for topic, count in enumerate(topics):
                        if count > 0:
                            qry += '%20' + topic
                        else:
                            qry += topic
                else:
                    qry += self.topic

                qry += '&count={}'.format(self.limits['messages'])
                qry += '&result_type=recent'
                logger.warning('QUERY CONSTRUCTED:%s',qry)
                print(qry)
                return qry

            except:
                logger.error('error constructing query',exc_info=True)
                return "q=beiber&count=100&result_type=recent"


        def load_data_about_topic(self):
            try:
                if self.api is None:
                    self.get_credentials()
                qry = self.construct_query()
                results = self.api.GetSearch(raw_query=qry)
                self.timestamp['start_loading'] = datetime.now(timezone.utc).timestamp()
                self.timestamp['stop_loading'] = self.timestamp['start_loading'] - self.limits['time']
                logger.warning('# of results retreived:%s',len(results))
                return results
            except:
                logger.error('error in loading data', exc_info=True)

        # parse, truncate to requested records or seconds, make a dataframe from groupby
        def parse_results(self,results):
            try:

                messages_count = 0
                stop = False

                logger.warning('start:end= %s:  %s',self.timestamp['start_loading'],self.timestamp['stop_loading'])
                while not stop:
                    res = results[messages_count]
                    tweet_ts, ts_epoch = self.twitter_datetime_to_epoch(res.created_at)
                    self.messages_dict['message_ID'].append(res.id)
                    self.messages_dict['creation_date'].append(ts_epoch)
                    self.messages_dict['human_readable_creation_date'].append(tweet_ts)
                    self.messages_dict['text'].append(res.text)
                    user = res.user
                    ts, ts_epoch_user = self.twitter_datetime_to_epoch(user.created_at)
                    self.messages_dict['user_ID'].append(user.id)
                    self.messages_dict['user_creation_date'].append(ts_epoch_user)
                    self.messages_dict['user_name'].append(user.name)
                    self.messages_dict['user_screen_name'].append(user.screen_name)
                    messages_count += 1

                    # the 100000  represents unlimited messages in case we want to load more than 30 seconds worth
                    if messages_count >= len(results):
                        stop = True
                        if self.limits['messages'] != 5000:
                            if messages_count >= self.limits['messages']:
                                stop = True
                        # make a dataframe
                self.df = pd.DataFrame.from_dict(self.messages_dict)
                if self.df is not None:
                    logger.warning('df:, length=%s,%s',len(self.df),self.df.head())

            except:
                logger.error('error in parsing results', exc_info=True)

        def munge_data(self):
            try:
                if self.df is not None:
                    # groupby user, then sort by message time
                    self.df = self.df.sort_values(by=['creation_date','user_ID'])
                else:
                    self.df = pd.DataFrame.from_dict(self.messages_dict)
            except:
                logger.error('munge data', exc_info=True)

        def run(self):
            try:
                results = self.load_data_about_topic()
                self.parse_results(results)
                self.munge_data()
                #self.write_to_file()

            except Exception:
                logger.error('run', exc_info=True)

        # #################################### PLOTS ######################################
        def sentiment_analysis(self,launch = 1):
            try:
                df = self.df[['text','human_readable_creation_date']]
                cols = ['pos', 'neg', 'neu']
                for col in cols:
                    if col not in df.columns:  # create only once
                        df[col] = 0

                df['pos'], df['neg'], df['neu'] = zip(*df['text'].map(sentiment_analyzer_scores))
                df = df.fillna(0)
                logger.warning('resample period:%s',self.resample_period['value'])
                df = df.set_index('human_readable_creation_date').resample(self.resample_period['value'])\
                    .agg({'pos': 'mean',
                          'neg': 'mean',
                          'neu': 'mean'})
                df = df.reset_index()
                df = df.fillna(0)
                logger.warning('LINE 307, df:%s',df.head(30))

                p = df.hvplot.line(x='human_readable_creation_date', y=cols, width=1200, height=600)
                return p
            except Exception:
                logger.error('run', exc_info=True)
                
                
        def visual(self,launch=1):
            try:

                df = self.df[self.df.creation_date >= self.timestamp['stop_loading']]
                p = df.hvplot.table(columns=['message_ID','creation_date','human_readable_creation_date','text',
                                                  'user_ID','user_creation_date','user_name','user_screen_name'],
                                         width=1200,height=2000)
                return p
            except Exception:
                logger.error('output data', exc_info=True)

        def jitter(self, launch=1):
            try:
                df = self.df.set_index('human_readable_creation_date')
                df = df[['creation_date']]
                df['jitter'] = df['creation_date'].diff(periods=-1)
                df['jitter'] = df['jitter'] * -1
                df = df.dropna()

                df = df.reset_index()
                p = df.hvplot.line(x='creation_date',y='jitter',width=1200,height=600)
                return p
            except Exception:
                logger.error('output data', exc_info=True)


        def rolling_mean(self,launch=1):
            try:
                df = self.df.set_index('human_readable_creation_date')
                df = df.resample(self.resample_period['value']).agg({'message_ID':'count'})
                df = df['message_ID'].rolling(self.selects_values['window']).mean()
                df = df.reset_index()
                df = df.rename(columns={'message_ID':'messages',
                                        'human_readable_creation_date':'date'})
                p = df.hvplot.scatter(x='date', y='messages', width=1200, height=500)

                return p
            except Exception:
                logger.error('time series analysis', exc_info=True)

    def update_tweet_search():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.reset_data()
        thistab.limits['messages'] = int(inputs['messages_limit'].value)
        thistab.limits['time'] = int(inputs['time_limit'].value)
        thistab.topic = inputs['search_term'].value
        thistab.run()
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_sentiment.event(launch_this=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_rolling_mean(attr,old,new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.selects_values['window'] = int(thistab.selects['window'].value)
        thistab.trigger += 1
        stream_launch_rolling_mean.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample_period(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.resample_period['value'] = new
        thistab.trigger += 1
        #stream_launch_rolling_mean.event(launch=thistab.trigger)
        stream_launch_sentiment.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        thistab = TwitterLoader()
        thistab.run()

        # MANAGE STREAM
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_rolling_mean = streams.Stream.define('Launch', launch=-1)()
        stream_launch_sentiment = streams.Stream.define('Launch', launch=-1)()


        # DYNAMIC GRAPHS/OUTPUT
        hv_visual = hv.DynamicMap(thistab.visual,streams=[stream_launch])
        visual = renderer.get_plot(hv_visual)

        hv_jitter = hv.DynamicMap(thistab.jitter, streams=[stream_launch])
        jitter = renderer.get_plot(hv_jitter)

        hv_rolling_mean = hv.DynamicMap(thistab.rolling_mean, streams=[stream_launch_rolling_mean])
        rolling_mean = renderer.get_plot(hv_rolling_mean)

        hv_sentiment_analysis = hv.DynamicMap(thistab.sentiment_analysis, streams=[stream_launch_sentiment])
        sentiment_analysis = renderer.get_plot(hv_sentiment_analysis)
        
        # CREATE WIDGETS
        inputs = {
            'search_term' : TextInput(title='Enter search term. For list, use commas',value=thistab.topic),

            'messages_limit' : Select(title='Select messages limit (5000 = unbounded)',
                                   value= str(thistab.limits['messages']),
                                   options=thistab.options['messages']),

            'time_limit' : Select(title='Select time limit (seconds)',
                                    value=str(thistab.limits['time']),
                                    options=thistab.options['time']),
            'resample' :  Select(title='Select resample period',
                                  value=thistab.resample_period['value'],
                                  options=thistab.resample_period['menu'])


        }
        tweet_search_button = Button(label='Enter filters/inputs, then press me', button_type="success")

        # WIDGET CALLBACK
        tweet_search_button.on_click(update_tweet_search)
        thistab.selects['window'].on_change('value',update_rolling_mean)
        inputs['resample'].on_change('value',update_resample_period)
        

        # COMPOSE LAYOUT
        # group controls (filters/input elements)
        controls_tweet_search = WidgetBox(
            inputs['search_term'],
            inputs['messages_limit'],
            inputs['time_limit'],
            tweet_search_button,
        )

        controls_rolling_mean = WidgetBox(
            thistab.selects['window'],
        )

        controls_resample_period = WidgetBox(
            inputs['resample']
        )

        grid = gridplot([
            [thistab.notification_div['top']],
            [Spacer(width=20, height=70)],
            [thistab.title_div('Sentiment analysis of tweets:', 1000)],
            [Spacer(width=20, height=30)],
            [sentiment_analysis.state, controls_resample_period],
            [thistab.title_div('Smooth graphs:', 1000)],
            [Spacer(width=20, height=30)],
            [rolling_mean.state, controls_rolling_mean],
            [thistab.title_div('Time between tweets:', 1000)],
            [Spacer(width=20, height=30)],
            [jitter.state],
            [thistab.title_div('Twitter search results (use filters on right, then click button):', 1000)],
            [Spacer(width=20, height=30)],
            [visual.state, controls_tweet_search],

            [thistab.notification_div['bottom']],
        ])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('Twitter loader:', exc_info=True)
        return tab_error_flag(panel_title)