Esempio n. 1
0
    def init_climate_input_wx(self, planet_climate):
        # Climate inputs
        planet_emiss = TextInput(title='Planetary IR energy out (W/m^2)',
                                 value='{:.2f}'.format(planet_climate.A))
        planet_atm_forcing = TextInput(
            title='Atmosphere IR adjustment (W/m^2)',
            value='{:.1f}'.format(planet_climate.B))
        solar_input = TextInput(title='Incoming solar (W/m^2) [Divided by 4]',
                                value='{:.2f}'.format(planet_climate.Q))
        energy_transport = TextInput(
            title='Energy transport towards poles (1/C)',
            value='{:.1f}'.format(planet_climate.D))
        s2_input = TextInput(title='S2 (what is this for?)',
                             value='{:.3f}'.format(planet_climate.S2))
        heat_capacity = TextInput(title='Planetary heat capacity (C/yr)',
                                  value='{:.1f}'.format(planet_climate.C))
        numlats = Slider(start=40,
                         end=180,
                         step=1,
                         value=70,
                         title='Number of latitudes in model')
        init_planet_T = Select(title='Initial planet temperature',
                               value='normal',
                               options=['normal', 'warm', 'cold'])
        calc_climate = Button(label='Simulate Climate', button_type='success')
        calc_climate.on_click(self.update_planet_climate)
        refresh_energy_in = Button(label='Refresh Solar Input')
        refresh_energy_in.on_click(self._update_energy_in)

        float_input = {
            'A': planet_emiss,
            'B': planet_atm_forcing,
            'Q': solar_input,
            'D': energy_transport,
            'S2': s2_input,
            'C': heat_capacity
        }
        general_input = {'nlats': numlats, 'init_condition': init_planet_T}

        clim_input_grp1 = WidgetBox(children=[
            planet_emiss, planet_atm_forcing, solar_input, refresh_energy_in
        ],
                                    width=int(self._plot_width / 3))
        clim_input_grp2 = WidgetBox(energy_transport, s2_input, heat_capacity)
        clim_input_grp3 = WidgetBox(numlats, init_planet_T)

        return (calc_climate,
                [clim_input_grp1, clim_input_grp2,
                 clim_input_grp3], (float_input, general_input))
Esempio n. 2
0
    def _generate_device_plot(self, device_events):
        data_source = self._convert_events_to_datasource(
            device_events['events'])
        n_rows = device_events['n_rows']
        if n_rows == 0:
            n_rows = 1
        elif n_rows == 1:
            n_rows = 2
        name = device_events['name']

        plot = figure(
            title="{}".format(name),
            plot_height=20 * n_rows + 60,
            plot_width=1200,
            tools=self._tools,
            sizing_mode='stretch_both',
            # sizing_mode='scale_width',
            active_scroll='xwheel_zoom')
        plot.hbar(left='start',
                  right='end',
                  y='height',
                  color='color',
                  height=0.85,
                  source=data_source,
                  hover_fill_alpha=0.5,
                  line_join='round',
                  line_cap='round',
                  hover_line_color='red')

        plot.x_range = Range1d(0, self._iteration_time, bounds="auto")
        plot.y_range = Range1d(0, n_rows)

        plot.yaxis.visible = False
        plot.ygrid.ticker = SingleIntervalTicker(interval=1)
        plot.ygrid.grid_line_color = None
        plot.ygrid.band_fill_alpha = 0.1
        plot.ygrid.band_fill_color = "gray"

        button = Button(label=" Sync",
                        width=20,
                        button_type='primary',
                        disabled=True)
        button.css_classes = ['xl-hidden']
        button.js_on_click(
            CustomJS(args={
                'me': plot,
            }, code=self._js_update_ranges))

        plot.x_range.js_on_change(
            'start',
            CustomJS(args={
                'button': button,
            },
                     code=self._js_on_change_callback))

        return plot, WidgetBox(button)
def make_wb(sizing_mode):
    w1 = make_widgets(sizing_mode)
    wb = WidgetBox(
        children=[
            w1['oscars'], w1['genre'], w1['director'], w1['x_axis'],
            w1['y_axis']
        ],
        sizing_mode=sizing_mode,
        width=400,
    )
    return wb
def slider():
    x = np.linspace(0, 10, 100)
    y = np.sin(x)
    source = ColumnDataSource(data=dict(x=x, y=y))
    plot = figure(y_range=(-10, 10),
                  tools='',
                  toolbar_location=None,
                  title="Sliders example")
    plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)
    callback = CustomJS(args=dict(source=source),
                        code="""
        var data = source.data;
        var A = amp.value;
        var k = freq.value;
        var phi = phase.value;
        var B = offset.value;
        x = data['x']
        y = data['y']
        for (i = 0; i < x.length; i++) {
            y[i] = B + A*Math.sin(k*x[i]+phi);
        }
        source.change.emit();
    """)
    amp_slider = Slider(start=0.1,
                        end=10,
                        value=1,
                        step=.1,
                        title="Amplitude",
                        callback=callback,
                        callback_policy='mouseup')
    callback.args["amp"] = amp_slider
    freq_slider = Slider(start=0.1,
                         end=10,
                         value=1,
                         step=.1,
                         title="Frequency",
                         callback=callback)
    callback.args["freq"] = freq_slider
    phase_slider = Slider(start=0,
                          end=6.4,
                          value=0,
                          step=.1,
                          title="Phase",
                          callback=callback)
    callback.args["phase"] = phase_slider
    offset_slider = Slider(start=-5,
                           end=5,
                           value=0,
                           step=.1,
                           title="Offset",
                           callback=callback)
    callback.args["offset"] = offset_slider
    widgets = WidgetBox(amp_slider, freq_slider, phase_slider, offset_slider)
    return [widgets, plot]
Esempio n. 5
0
def classification_tab():
    pairs = [["stackoverflow.com", "academia.stackexchange.com"],["stackoverflow.com", "softwareengineering.stackexchange.com"]]
    
    # pretrained classification models
    nbsoac = load("app/models/10k_so_ac_bayes_model.joblib")
    nbsose = load("app/models/10k_so_se_bayes_model.joblib")
    svmsoac = load("app/models/10k_so_ac_SVM_model.joblib")
    svmsose = load("app/models/10k_so_se_SVM_model.joblib")
    
    learning_type = RadioButtonGroup(labels=["Bayes", "Support Vector Machine"], active=0)
    
    site_pair = RadioButtonGroup(labels=["Stack Overflow/Academia", "Stack Overflow/Software Engineering"], active=0)
    
    tai = TextAreaInput(value="", rows=6, title="Enter a post message:")
    
    predict = Button(label="Predict", button_type="success")
    
    p = Paragraph(text="""Your Site Prediction will be displayed here""",
            width=300, height=50)
    
    def make_prediction():
        lt = learning_type.active
        sp = site_pair.active
        model = None
        if lt == 0:
            if sp == 0:
                model = nbsoac
            else:
                model = nbsose
        else:
            if sp == 0:
                model = svmsoac
            else:
                model = svmsose
        prediction = model.predict([tai.value])[0]
        p.text = "Message belongs to site: " + pairs[sp][prediction - 1]


    predict.on_click(make_prediction)

    # Put controls in a single element
    controls = WidgetBox(learning_type, site_pair, tai, predict, p)

    # Create a row layout
    layout = row(controls)

    tab = Panel(child=layout, title='Message Site Classification')
    
    return tab
Esempio n. 6
0
def bkapp(doc):
    textin = TextInput(title = "Submit Blog Post:")
    button = Button(label="Submit", button_type="success")
    p = Paragraph(text="Blog entry here")
    def update_data(event):
        data = str(textin.value)
        vector = transformer.transform([' '.join(clean(data))])
        result = model.predict(vector)
        if int(result) == 1:
            pred_text = 'Male'
        else:
            pred_text = 'Female'
        output = {'prediction': pred_text}
        p.text = "{}".format(output)
        
    button.on_click(update_data)
    box = WidgetBox(children = [textin, button, p])
    doc.add_root(box)
    def plot(self, x=None, y=None, z=None, name=''):
        """
        :param x: str, dataframe key
        :param y: str, dataframe key
        :param z: str, dataframe key
        :param name: str, name of plot
        :return: Interactive HTML plot
        """
        self._set_output_file(name)

        self._turn_values_negative(y)

        self.df['x'] = self.df[x]
        self.df['y'] = self.df[y]

        source = ColumnDataSource(self.df)

        circle_plot = self.circle_plot(x=x, y=y, source=source)
        line_plot = self.line_plot(x=x, y=y, source=source)

        xrange_slider = self._get_xaxis_slider(circle_plot)

        xaxis_selecter, yaxis_selecter = self._get_source_selecters(x=x, y=y, source=source)

        data_table = self._get_data_table(source=source)

        # show(widgetbox(data_table))

        # spacer = Spacer(width=100, height=100)

        widget_list = [yaxis_selecter, xaxis_selecter, xrange_slider, data_table]

        widgets = WidgetBox(*widget_list)

        col_1 = column(circle_plot, sizing_mode='scale_width')
        col_2 = column(widgets, sizing_mode='scale_width')
        row_1 = row([col_1, col_2], sizing_mode='scale_width')


        
        
        
        return row_1
Esempio n. 8
0
File: ui.py Progetto: razz0/kisaviz
def daily_most_common_ticks():
    """
    A Table view of the most common ticks per day.

    :return: Panel
    """
    ticks = kisa.get_daily_popular_ticks(start_date, end_date)
    tick_source = {}
    dates, species, count = zip(*((dat, sp[0], sp[1]) for (dat, sp) in ticks))

    tick_source['date'] = dates
    tick_source['species'] = species
    tick_source['count'] = count

    tick_source = ColumnDataSource(tick_source)

    columns = [TableColumn(field="date", title="Päivämäärä"),
               TableColumn(field="species", title="Päivän yleisin laji"),
               TableColumn(field="count", title="Pinnojen lukumäärä")]
    table = DataTable(source=tick_source, columns=columns, width=PLOT_WIDTH, height=PLOT_HEIGHT)
    layout = WidgetBox(table)

    return Panel(child=layout, title="Päivien yleisimmät")
Esempio n. 9
0
def forecasting_bcc_rentals_visitor_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0
            self.groupby_dict = {
                'category': 'nunique',
                'item': 'nunique',
                'area': 'nunique',
                'visit_duration': 'mean',
                'age': 'mean',
                'gender_coded': 'mean',
                'status_coded': 'mean',
                'rental_employee_gender_coded': 'mean',
                'rental_employee_age': 'mean',
                'rental_tab': 'sum'
            }

            self.feature_list = ['age', 'rental_employee_age', 'rental_tab']
            self.tsa_variable = 'rental_tab'
            self.forecast_days = 40
            self.lag_variable = 'visit_duration'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                style='width:350px; margin-left:25px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = 'rental_tab'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.pym = PythonMongo('aion')
            self.menus = {
                'item': ['all'],
                'category': ['all'],
                'status': ['all', 'guest', 'member'],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
                'area': ['all', 'bar', 'rentals'],
                'tsa': ['rental_tab', 'visit_duration']
            }
            self.select = {}
            self.select['area'] = Select(title='Select BCC area',
                                         value='all',
                                         options=self.menus['area'])

            self.select['item'] = Select(title='Select item',
                                         value='all',
                                         options=self.menus['item'])

            self.select['status'] = Select(title='Select visitor status',
                                           value='all',
                                           options=self.menus['status'])

            self.select['gender'] = Select(title="Select visitor gender",
                                           value='all',
                                           options=self.menus['gender'])

            self.select['category'] = Select(title="Select category",
                                             value='all',
                                             options=self.menus['category'])

            self.select['rental_employee_gender'] = Select(
                title="Select category",
                value='all',
                options=self.menus['category'])

            self.select_values = {}
            for item in self.select.keys():
                self.select_values[item] = 'all'

            self.multiline_vars = {'x': 'gender', 'y': 'rental_tab'}
            self.timestamp_col = 'visit_start'
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                    <div style="text-align:center;width:{}px;height:{}px;
                           position:relative;background:black;margin-bottom:200px">
                           <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(text='Pre-transform distribution:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(text='Correlations:',
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px; margin-left:-600px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////

        def load_df(self, req_startdate, req_enddate, table, cols,
                    timestamp_col):
            try:
                # get min and max of loaded df
                if self.df is not None:
                    loaded_min = self.df[timestamp_col].min()
                    loaded_max = self.df[timestamp_col].max()

                    if loaded_min <= req_startdate and loaded_max >= req_enddate:
                        df = self.df[(self.df[timestamp_col] >= req_startdate)
                                     & (self.df[timestamp_col] <= req_enddate)]
                        return df
                return self.pym.load_df(req_startdate,
                                        req_enddate,
                                        table=table,
                                        cols=cols,
                                        timestamp_col=timestamp_col)

            except Exception:
                logger.error('load_df', exc_info=True)

        def filter_df(self, df1):
            try:
                df1 = df1[self.cols]

                for key, value in self.groupby_dict.items():
                    if value == 'count':
                        if self.select_values[key] != 'all':
                            df1 = df1[df1[key] == self.select_values[key]]
                return df1

            except Exception:
                logger.error('filter', exc_info=True)

        def prep_data(self, df):
            try:
                df = self.filter_df(df)
                # set up code columns
                codes = {
                    'gender': {
                        'male': 1,
                        'female': 2,
                        'other': 3
                    },
                    'status': {
                        'guest': 1,
                        'member': 2
                    }
                }
                for col in df.columns:
                    coded_col = col + '_coded'
                    if 'gender' in col:
                        df[coded_col] = df[col].map(codes['gender'])
                    if 'status' == col:
                        df[coded_col] = df[col].map(codes['status'])

                self.df = df.set_index(self.timestamp_col)
                # groupby and resample
                self.df1 = self.df.groupby('name').resample(
                    self.resample_period).agg(self.groupby_dict)
                self.df1 = self.df1.reset_index()
                self.df1 = self.df1.fillna(0)

                logger.warning('LINE 288 df:%s', self.df1.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def tsa(self, launch):
            try:
                df = self.df.resample('D').agg({self.tsa_variable: 'mean'})
                df = df.reset_index()
                label = self.tsa_variable + '_diff'
                df[label] = df[self.tsa_variable].diff()
                df = df.fillna(0)

                rename = {self.timestamp_col: 'ds', self.tsa_variable: 'y'}
                df = df.rename(columns=rename)
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("TSA:", exc_info=True)

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.variable = new
        thistab.section_head_updater('lag', thistab.variable)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_IVs(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        for item in thistab.select_values.keys():
            thistab.select_values[item] = thistab.select[item].value
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df = thistab.pym.load_df(start_date=datepicker_start.value,
                                         end_date=datepicker_end.value,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.df['gender_code'] = thistab.df['gender'].apply(
            lambda x: 1 if x == 'male' else 2)
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['x'] = multiline_x_select.value
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_forecast(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.tsa_variable = forecast_variable_select.value
        thistab.trigger += 1
        stream_launch_tsa.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'bcc_composite'
        cols = cols_to_load['guest'] + cols_to_load['rental']
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2013-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=1)
        first_date = last_date - timedelta(days=1000)
        # initial function call
        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_tsa = streams.Stream.define('Launch_tsa', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=thistab.variables)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])

        forecast_variable_select = Select(title='Select forecast variable',
                                          value=thistab.menus['tsa'][0],
                                          options=thistab.menus['tsa'])

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        multiline_y_select = Select(
            title='Select comparative DV(y)',
            value=thistab.multiline_vars['y'],
            options=['price', 'amount', 'visit_duration'])

        multiline_x_select = Select(title='Select comparative IV(x)',
                                    value=thistab.multiline_vars['x'],
                                    options=[
                                        'category', 'gender',
                                        'rental_employee_gender', 'status',
                                        'item'
                                    ])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------

        # tables
        hv_tsa = hv.DynamicMap(thistab.tsa, streams=[stream_launch_tsa])
        tsa = renderer.get_plot(hv_tsa)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        resample_select.on_change('value', update_resample)
        thistab.select['area'].on_change('value', update_IVs)
        thistab.select['gender'].on_change('value', update_IVs)
        thistab.select['rental_employee_gender'].on_change('value', update_IVs)
        thistab.select['item'].on_change('value', update_IVs)
        thistab.select['category'].on_change('value', update_IVs)
        thistab.select['status'].on_change('value', update_IVs)
        select_forecast_days.on_change('value', update_forecast)
        forecast_variable_select.on_change('value', update_forecast)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)

        multiline_x_select.on_change('value', update_multiline)
        multiline_y_select.on_change('value', update_multiline)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_tsa = WidgetBox(datepicker_start, datepicker_end,
                                 variable_select, thistab.select['status'],
                                 resample_select, thistab.select['gender'],
                                 thistab.select['category'],
                                 thistab.select['area'],
                                 forecast_variable_select,
                                 select_forecast_days)

        # create the dashboards

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [tsa.state, controls_tsa],
                         [Spacer(width=20, height=30)],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)
Esempio n. 10
0
def pm_risk_assessment_tab(panel_title):
    risk_matrix_src = ColumnDataSource(data=dict(Severity=[],
                                                 Unlikely=[],
                                                 Seldom=[],
                                                 Occaisional=[],
                                                 Likely=[],
                                                 Definite=[]))

    corr_src = ColumnDataSource(data=dict(
        variable_1=[], variable_2=[], relationship=[], r=[], p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0

            self.groupby_dict = {}

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """
            self.variable = 'delay_end'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.status = 'all'
            self.gender = 'all'
            self.type = 'all'
            self.ratings = {
                'severity': {
                    'Insignificant': 1,
                    'Minor': 2,
                    'Moderate': 3,
                    'Critical': 4,
                    'Catastrophic': 5
                },
                'likelihood': {
                    'Unlikely': 1,
                    'Seldom': 2,
                    'Occaisional': 3,
                    'Likely': 4,
                    'Definite': 5
                }
            }

            self.variables = {
                'severity': list(self.ratings['severity'].keys()),
                'likelihood': list(self.ratings['likelihood'].keys()),
            }
            self.pym = PythonMongo('aion')
            self.menus = {
                'status': ['all', 'open', 'closed'],
                'gender': ['all', 'male', 'female'],
            }
            self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'}
            self.timestamp_col = 'analysis_date'

            self.risks = []
            self.risk = ''
            self.matrices = []
            self.matrix = ''
            self.risk_select = Select(title='Select risk',
                                      value=self.risk,
                                      options=self.risks)
            self.risk_threshold = {'acceptable': 8, 'doubtful': 15}

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                position:relative;background:black;margin-bottom:200px">
                                                <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                          </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=1000,
                                        html_header='h2',
                                        margin_top=50,
                                        margin_bottom=5),
                'distribution':
                self.section_header_div(text='Pre-transform distribution',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'matrix':
                self.section_header_div(text='Risk Matrix:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'risk_solution':
                self.section_header_div(
                    text='Risk Matrix vs Solution :{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def load_df(self):
            try:
                risk_matrx = json_normalize(
                    list(self.pym.db['risk_matrix'].find()))
                logger.warning('LINE 169:RISK MATIRX:%s', risk_matrx.head())
                if len(risk_matrx) > 0:
                    risk_matrx = drop_cols(risk_matrx, ['desc'])
                    logger.warning('LINE 159:RISK MATIRX:%s',
                                   risk_matrx.head())

                    risk = json_normalize(list(self.pym.db['risk'].find()))
                    risk = risk.rename(columns={'matrix': 'matrix_id'})
                    analysis = json_normalize(
                        list(self.pym.db['risk_analysis'].find()))
                    analysis = drop_cols(analysis, ['_id'])
                    analysis = analysis.rename(columns={'risk': 'risk_id'})

                    # merges
                    risk = risk.merge(analysis,
                                      how='inner',
                                      left_on='_id',
                                      right_on='risk_id')
                    risk = drop_cols(risk, [
                        '_id', 'likelihood_comment', 'severity_comment',
                        'desc', 'risk_id'
                    ])
                    logger.warning('LINE 167:RISK:%s', risk.head())
                    logger.warning('LINE 169:RISK MATIRX:%s',
                                   risk_matrx.head())

                    risk = risk_matrx.merge(risk,
                                            how='inner',
                                            left_on='_id',
                                            right_on='matrix_id')

                    df = drop_cols(risk, ['_id', 'matrix_id', 'analyst'])
                    df = df.rename(columns={'name': 'matrix'})
                    dfs = {}
                    for component in ['severity', 'likelihood']:
                        table = 'risk_' + component
                        dfs[component] = json_normalize(
                            list(self.pym.db[table].find()))

                        dfs[component] = drop_cols(dfs[component],
                                                   ['desc', 'level'])
                        df = df.merge(dfs[component],
                                      how='left',
                                      left_on=component,
                                      right_on='_id')
                        df = drop_cols(df, ['_id', 'project', component])
                        df = df.rename(columns={'value': component})
                        df[component] = df[component].fillna(0)
                    df['composite'] = df.severity * df.likelihood

                    # set selection variables
                    logger.warning('LINE 154 df:%s', df)
                    self.df = df
                    self.matrices = list(df['matrix'].unique())
                    self.matrix = self.matrices[0]
                    self.set_risks(df, matrix=self.matrix)

            except Exception:
                logger.error('load df', exc_info=True)

        def set_risks(self, df, matrix):
            try:

                df = df[df.matrix == matrix]
                self.risks = list(df['risk'].unique())
                self.risk = self.risks[0]
                self.risk_select.options = self.risks

                self.df1 = df
            except Exception:
                logger.error('prep data', exc_info=True)

        # //////////////  DIVS   //////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        # ////////////// GRAPHS ////////////////////

        def action_table(self, launch):
            try:

                def label_action(x):
                    if x < self.risk_threshold['acceptable']:
                        return 'Proceed (risk is acceptable)'
                    elif x < self.risk_threshold['doubtful']:
                        return 'Proceed, if no other options are available'
                    else:
                        return 'Do no proceed (Risk unacceptable)'

                df = self.df
                df = df.groupby(['matrix', 'risk']).agg({
                    'likelihood': 'mean',
                    'severity': 'mean'
                })
                df = df.reset_index()
                df['composite'] = df.likelihood * df.severity
                df['action'] = df['composite'].map(label_action)
                self.risk_matrix()
                return df.hvplot.table(
                    columns=[
                        'matrix', 'risk', 'severity', 'likelihood', 'action'
                    ],
                    width=1000,
                )
            except Exception:
                logger.error('action table', exc_info=True)

        def risk_matrix(self):
            try:
                # filter
                df = self.df1
                df = df.groupby(['matrix', 'risk']).agg({
                    'likelihood': 'mean',
                    'severity': 'mean'
                })
                df = df.reset_index()
                df = df[df['risk'] == self.risk]
                severity_value = int(df['severity'].mean())
                #severity = [key for (key, value) in self.ratings['severity'].items() if value == severity_value][0]
                likelihood_value = int(df['likelihood'].mean())
                logger.warning('severity=%s,likelihood=%s', severity_value,
                               likelihood_value)

                # make the matrix
                dct = {
                    'Severity': list(self.ratings['severity'].keys()),
                }
                cols = list(self.ratings['likelihood'].keys())

                for idx_row, val_col in enumerate(
                        list(self.ratings['likelihood'].keys())):
                    row = idx_row + 1
                    dct[val_col] = []
                    for idx_row, val_row in enumerate(dct['Severity']):
                        col = idx_row + 1
                        val = row * col
                        if row == severity_value and col == likelihood_value:
                            logger.warning('CONDITIONS MET')
                            txt = 'BINGO ' + str(val)
                        else:
                            txt = val

                        dct[val_col].append(txt)

                logger.warning('LINE 288 %s - length=%s', val_col,
                               len(dct[val_col]))

                risk_matrix_src.stream(dct, rollover=(len(dct['Severity'])))
                columns = [
                    TableColumn(field="Severity", title='severity'),
                    TableColumn(
                        field="Unlikely",
                        title='unlikely',
                        formatter=dashboard_config['formatters']['Unlikely']),
                    TableColumn(
                        field="Seldom",
                        title='seldom',
                        formatter=dashboard_config['formatters']['Seldom']),
                    TableColumn(field="Occaisional",
                                title='occaisional',
                                formatter=dashboard_config['formatters']
                                ['Occaisional']),
                    TableColumn(
                        field="Likely",
                        title='likely',
                        formatter=dashboard_config['formatters']['Likely']),
                    TableColumn(
                        field="Definite",
                        title='definite',
                        formatter=dashboard_config['formatters']['Definite']),
                ]
                risk_matrix_table = DataTable(source=risk_matrix_src,
                                              columns=columns,
                                              width=800,
                                              height=500)
                self.corr()
                return risk_matrix_table
            except Exception:
                logger.error('risk matrix', exc_info=True)

        def correlate_solution_risk(self, launch):
            try:
                # load solution
                df = json_normalize(
                    list(self.pym.db['project_composite1'].find(
                        {}, {
                            'severity': 1,
                            'likelihood': 1,
                            'solution': 1,
                            'project_owner_gender': 1,
                            'project': 1
                        })))
                df['solution'] = df.solution.apply(lambda x: x[0] * 10)

                df = df.groupby(['project']).agg({
                    'severity': 'mean',
                    'likelihood': 'mean',
                    'solution': 'mean'
                })
                df = df.reset_index()
                df['composite'] = df.severity * df.likelihood
                logger.warning('df:%s', df.head(20))

                # load project
                for idx, col in enumerate(
                    ['severity', 'likelihood', 'composite']):
                    if idx == 0:
                        p = df.hvplot.scatter(x='solution', y=col)
                    else:
                        p *= df.hvplot.scatter(x='solution', y=col)
                return p
                # load risk
            except Exception:
                logger.error('correlate solution risk', exc_info=True)

        def risk_information_div(self, width=400, height=300):
            txt = """
                   <div {}>
                   <h4 {}>How to interpret Risk assessment matrix:</h4>
                   <ul style='margin-top:-10px;'>
                       <li>
                       Red: Unacceptable risk. Do NOT proceed.
                       </li>
                       <li>
                       Yellow: Risky. Proceed only after ensuring better options aren't reasonable available
                       </li>
                       <li>
                       Green: Acceptable risk. Proceed.
                       </li>
                   </ul>
                   </div>
    
                   """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # calculate the correlation produced by the lags vector
        def corr(self):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'r': [],
                    'p_value': []
                }
                # load solution
                df = json_normalize(
                    list(self.pym.db['project_composite1'].find(
                        {}, {
                            'severity': 1,
                            'likelihood': 1,
                            'solution': 1,
                            'project_owner_gender': 1,
                            'project': 1
                        })))
                df['solution'] = df.solution.apply(lambda x: x[0] * 10)

                df = df.groupby(['project']).agg({
                    'severity': 'mean',
                    'likelihood': 'mean',
                    'solution': 'mean'
                })
                df = df.reset_index()
                df['composite'] = df.severity * df.likelihood
                logger.warning('df:%s', df.head(20))

                a = df['solution'].tolist()
                for col in ['composite', 'severity', 'likelihood']:
                    # find lag
                    logger.warning('column:%s', col)
                    b = df[col].tolist()
                    slope, intercept, rvalue, pvalue, txt = self.corr_label(
                        a, b)
                    corr_dict_data['variable_1'].append('solution')
                    corr_dict_data['variable_2'].append(col)
                    corr_dict_data['relationship'].append(txt)
                    corr_dict_data['r'].append(round(rvalue, 3))
                    corr_dict_data['p_value'].append(round(pvalue, 3))

                corr_src.stream(corr_dict_data, rollover=3)
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=corr_src,
                                       columns=columns,
                                       width=900,
                                       height=400)
                return data_table
            except Exception:
                logger.error(' corr', exc_info=True)

    def update_matrix(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.matrix = matrix_select.value
        thistab.set_risks(thistab.df, matrix=thistab.matrix)
        thistab.trigger += 1
        stream_launch_action_table.event(launch=thistab.trigger)
        stream_launch_matrix.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_risk(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.risk = thistab.risk_select.value
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        thistab.risk_matrix()
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'project_composite'
        thistab = Thistab(table, [], [])
        thistab.load_df()
        thistab.corr()

        # MANAGE STREAM
        stream_launch_action_table = streams.Stream.define('Launch',
                                                           launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch', launch=-1)()
        stream_launch_risk_solution = streams.Stream.define('Launch',
                                                            launch=-1)()

        # MAKE TABLES
        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="Severity", title="severity"),
            TableColumn(field="Unlikely",
                        title='unlikely',
                        formatter=dashboard_config['formatters']['Unlikely']),
            TableColumn(field="Seldom",
                        title='seldom',
                        formatter=dashboard_config['formatters']['Seldom']),
            TableColumn(
                field="Occaisional",
                title='occaisional',
                formatter=dashboard_config['formatters']['Occaisional']),
            TableColumn(field="Likely",
                        title='likely',
                        formatter=dashboard_config['formatters']['Likely']),
            TableColumn(field="Definite",
                        title='definite',
                        formatter=dashboard_config['formatters']['Definite']),
        ]

        risk_matrix = DataTable(source=risk_matrix_src,
                                columns=columns,
                                width=800,
                                height=500)

        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        corr_table = DataTable(source=corr_src,
                               columns=columns,
                               width=500,
                               height=280)

        width = 800

        hv_action_table = hv.DynamicMap(thistab.action_table,
                                        streams=[stream_launch_action_table])
        action_table = renderer.get_plot(hv_action_table)

        hv_risk_solution = hv.DynamicMap(thistab.correlate_solution_risk,
                                         streams=[stream_launch_risk_solution])
        risk_solution = renderer.get_plot(hv_risk_solution)

        # CREATE WIDGETS
        matrix_select = Select(title='Select matrix',
                               value=thistab.matrix,
                               options=thistab.matrices)

        # handle callbacks
        matrix_select.on_change('value', update_matrix)
        thistab.risk_select.on_change('value', update_risk)

        # create the dashboards
        controls = WidgetBox(matrix_select, thistab.risk_select)

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.title_div('Determine action', 400)],
                         [Spacer(width=20, height=30)], [action_table.state],
                         [thistab.section_headers['matrix']],
                         [Spacer(width=20, height=30)],
                         [risk_matrix, controls],
                         [thistab.section_headers['risk_solution']],
                         [Spacer(width=20, height=30)], [corr_table],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)
Esempio n. 11
0
def crypto_clusters_eda_tab(cryptos, panel_title):
    global groupby_dict
    global features
    global cluster_dct
    #global source

    redis = PythonRedis()
    cluster_dct = redis.simple_load('clusters:cryptocurrencies')
    if cluster_dct is not None:
        groupby_dict = {}
        for var in cluster_dct['features']:
            groupby_dict[var] = 'sum'

        features = cluster_dct['features']
        source = {}
        for feature in features:
            source[feature] = ColumnDataSource(
                data=dict(xs=[], ys=[], labels=[], colors=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self,
                           table,
                           cols,
                           dedup_cols,
                           panel_title=panel_title)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = [
                'watch', 'fork', 'issue', 'release', 'push', 'tw_mentions',
                'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative',
                'tw_emojis_positive', 'tw_emojis_compound',
                'tw_emojis_negative', 'tw_emojis_count', 'tw_reply_hashtags'
            ]
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0
            txt = """<div style="text-align:center;background:black;width:100%;">
                                                                           <h1 style="color:#fff;">
                                                                           {}</h1></div>""".format(
                'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=1400, height=20),
                'bottom': Div(text=txt, width=1400, height=10),
            }
            self.cluster_dct = cluster_dct
            self.groupby_dict = groupby_dict
            self.features = features
            self.crypto = 'all'

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.significant_effect_dict = {}
            self.df1 = None
            self.section_headers = {
                'ts':
                self.section_header_div(
                    'Comparison of clusters across variables:---------------------',
                    width=600)
            }
            self.timestamp_col = None
            self.colors = None

        # ----------------------  DIVS ----------------------------
        def section_header_div(self, text, html_header='h2', width=1400):
            text = '<{} style="color:#4221cc;">{}</{}>'.format(
                html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=300):
            txt = """
               <div {}>
               <h4 {}>How to interpret relationships </h4>
               <ul style='margin-top:-10px;'>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                    <li>
                   </li>
                    <li>
                   </li>
               </ul>
               </div>

               """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ////////////////////////// UPDATERS ///////////////////////
        def section_head_updater(self, section, txt):
            try:
                self.section_header_div[section].text = txt
            except Exception:
                logger.error('', exc_info=True)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # /////////////////////////// LOAD CLUSTERS  //////////////////////
        def prep_data(self, df, timestamp_col):
            def label_cluster(x):
                for key, values in self.cluster_dct.items():
                    if key not in ['timestamp', 'variables']:
                        if x in values:
                            return key
                return x

            try:
                cols = self.features + ['crypto', 'timestamp']
                df = df[cols]
                # groupby and resample
                df['crypto'] = df['crypto'].map(lambda x: label_cluster(x))
                df = df.rename(columns={'crypto': 'cluster'})
                df = df.compute()
                df[timestamp_col] = pd.to_datetime(df[timestamp_col],
                                                   errors='coerce')
                df.set_index(timestamp_col, inplace=True)
                df = df.groupby('cluster').resample(self.resample_period).agg(
                    self.groupby_dict)
                df.reset_index(inplace=True)
                df.set_index(timestamp_col, inplace=True)
                self.timestamp_col = timestamp_col
                self.df1 = df

            except Exception:
                logger.error('prep data', exc_info=True)

        def graph_ts(self):
            try:
                #global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    clusters = df['cluster'].unique()
                    self.colors = [''] * len(clusters)
                    for idx, feature in enumerate(clusters):
                        self.colors[idx] = dashboard_config['colors'][idx]
                    if self.features is not None:
                        for idx, feature in enumerate(self.features):
                            df1 = df[['cluster', feature]]
                            # pivot into columns for cluster
                            df1 = df1.pivot(columns='cluster')
                            data = dict(x=[df1.index.values] * len(clusters),
                                        y=[df1[name].values for name in df1],
                                        labels=clusters,
                                        colors=self.colors)
                            source[feature].data = data
            except Exception:
                logger.error('graph ts', exc_info=True)

        def graph_chartify(self, timestamp_col):
            try:
                # global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    df = df.reset_index()

                    for feature in self.features:
                        ch = chartify.Chart(blank_labels=True,
                                            x_axis_type='datetime')
                        ch.set_title("CHARTIFY")
                        ch.plot.line(
                            # Data must be sorted by x column
                            data_frame=df.sort_values(timestamp_col),
                            x_column=timestamp_col,
                            y_column=feature,
                            color_column='cluster')
                        return ch

            except Exception:
                logger.error('graph chartify', exc_info=True)

    def update():
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = resample_select.value
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("ready")

    try:
        table = 'external_daily'
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = dashboard_config['dates']['current_year_start']
        # initial function call
        thistab.df_load(first_date,
                        last_date,
                        timestamp_col='timestamp',
                        cols=[])
        thistab.prep_data(thistab.df, timestamp_col='timestamp')

        # MANAGE STREAMS ---------------------------------------------------------

        # CREATE WIDGETS ----------------------------------------------------------------
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        load_dates_button = Button(
            label="Select dates/periods, then click me!",
            width=20,
            height=8,
            button_type="success")

        resample_select = Select(title='Select summary period',
                                 value=thistab.resample_period,
                                 options=thistab.menus['resample_periods'])

        # -------------------------------- PLOTS ---------------------------
        thistab.graph_ts()
        p = {}
        for feature in features:
            p[feature] = figure(x_axis_type="datetime",
                                plot_width=1400,
                                plot_height=400,
                                title=feature)

            p[feature].multi_line(
                xs='x',
                ys='y',
                legend='labels',
                line_color='colors',
                line_width=5,
                hover_line_color='colors',
                hover_line_alpha=1.0,
                source=source[feature],
            )
            p[feature].add_tools(
                HoverTool(show_arrow=False,
                          line_policy='next',
                          tooltips=[
                              ('freq', '$y'),
                          ]))

        # ch = thistab.graph_chartify(timestamp_col='timestamp')
        # -------------------------------- CALLBACKS ------------------------

        load_dates_button.on_click(update)  # lags array
        resample_select.on_change('value', update_resample)

        # -----------------------------------LAYOUT ----------------------------
        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_left = WidgetBox(datepicker_start, load_dates_button)

        controls_right = WidgetBox(datepicker_end)

        grid_data = [
            #[ch.figure],
            [thistab.notification_div['top']],
            [controls_left, controls_right],
            [thistab.section_headers['ts'], resample_select],
        ]
        for feature in features:
            grid_data.append([p[feature]])
            logger.warning('p:%s', p[feature])

        grid_data.append([thistab.notification_div['bottom']])

        grid = gridplot(grid_data)

        # Make a tab with the layout
        tab = Panel(child=grid, title=thistab.panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(thistab.panel_title)
Esempio n. 12
0
def tab2():
    data = pd.read_csv('cdph-race-ethnicity.csv')
    data['date_time'] = pd.to_datetime(data['date'])
    max_date = data['date'].iloc[0]
    data = data[(data['age'] == 'all')]
    percentages = ['confirmed cases', 'general population']
    regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white']
    x = [(race, percent) for race in regions for percent in percentages]

    def create_dataset(df):
        counts = sum(
            zip(df['confirmed_cases_percent'], df['population_percent']),
            ())  # like an hstack
        source = ColumnDataSource(data=dict(x=x, counts=counts))
        return source

    def create_plot(source):
        p = figure(
            x_range=FactorRange(*x),
            title=
            'Comparison of the persent of cases by race to the general population',
            y_axis_label='Persentage')
        palette = ["#CAB2D6", "#e84d60"]
        p.vbar(x='x',
               top='counts',
               width=0.9,
               source=source,
               line_color="white",
               fill_color=factor_cmap('x',
                                      palette=palette,
                                      factors=percentages,
                                      start=1,
                                      end=2))
        p.y_range.start = 0
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        p.x_range.range_padding = 0.1
        p.xgrid.grid_line_color = None
        p.legend.location = "top_left"
        p.legend.orientation = "horizontal"
        p.xgrid.grid_line_color = None
        p.add_tools(
            HoverTool(tooltips=[('Race, category', "@x"),
                                ('Percentage', "@counts")], ))
        mytext = Label(
            x=20,
            y=-150,
            x_units='screen',
            text=
            f"Source of data: coming from a continual Times survey of California's 58 county health\n "
            " agencies and three city agencieas, pubished on https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/"
            " , access from Github repository https://github.com/datadesk/california-coronavirus-data/blob/master/cdph-race-ethnicity.csv"
            f"  Date of last update: 2020-11-04",
            render_mode='css',
            y_units='screen',
            border_line_color='black',
            border_line_alpha=1.0,
            background_fill_color='white',
            background_fill_alpha=1.0,
        )
        p.add_layout(mytext)
        return p

    def callback(attr, old, new):
        new_src = create_dataset(
            data[(data['date_time'] == date_picker.value)])
        src.data.update(new_src.data)

    # Initial Plot
    src = create_dataset(data[(data['date_time'] == '2020-10-01')])
    p = create_plot(src)
    date_picker = DatePicker(
        title='Click to choose a date (blank means no data)',
        min_date="2020-05-14",
        max_date=date.today())
    date_picker.on_change('value', callback)
    controls = WidgetBox(date_picker)
    layout = row(controls, p)
    tab = Panel(child=layout, title='Percentage of confirmed cases by race')
    return tab
Esempio n. 13
0
            'indices': {}
        }
    }
    source2.selected = {
        '0d': {
            'flag': False,
            'indices': []
        },
        '1d': {
            'indices': []
        },
        '2d': {
            'indices': {}
        }
    }


reset.on_click(on_reset_click)

widgetBox = WidgetBox(children=[reset], width=150)
row = Row(children=[widgetBox, plot1, plot2])

document = curdoc()
document.add_root(row)

if __name__ == "__main__":
    print("\npress ctrl-C to exit")
    session = push_session(document)
    session.show()
    session.loop_until_closed()
Column(w1['genre'],
       w1['director'],
       w1['x_axis'],
       w1['y_axis'],
       sizing_mode=sizing_mode)
Column(w2['genre'],
       w2['director'],
       w2['x_axis'],
       w2['y_axis'],
       sizing_mode=sizing_mode)

layout = Column(Row(intro, sizing_mode=sizing_mode),
                Row(WidgetBox(w1['reviews'],
                              w1['genre'],
                              w1['oscars'],
                              w1['director'],
                              w1['x_axis'],
                              w1['y_axis'],
                              sizing_mode=sizing_mode),
                    WidgetBox(w2['y_axis'],
                              w2['reviews'],
                              w2['genre'],
                              w2['oscars'],
                              w2['director'],
                              w2['x_axis'],
                              sizing_mode=sizing_mode),
                    sizing_mode=sizing_mode),
                Row(WidgetBox(Button(label="Left column",
                                     icon=check,
                                     sizing_mode=sizing_mode),
                              sizing_mode=sizing_mode),
Esempio n. 15
0
def accounts_tsa_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')

            self.forecast_days = 30
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'
            self.update_type = 'all'
            self.status = 'all'
            self.account_type = 'all'
            self.interest_var = 'amount'

            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.address_select = Select(title='Filter by address',
                                         value='all',
                                         options=[])
            self.address = 'all'
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            self.addresses = []

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.max_loaded_date = None
            self.min_loaded_date = None

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                            position:relative;background:black;margin-bottom:200px">
                            <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.address_selected = ""
                self.address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self, start_date, end_date):
            try:
                logger.warning("data load begun")
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)

                if self.df is not None:
                    self.max_loaded_date = self.df.block_timestamp.max(
                    ).compute()
                    self.min_loaded_date = self.df.block_timestamp.min(
                    ).compute()
                    if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date:
                        logger.warning("data already loaded - %s",
                                       self.df.tail(10))
                        pass
                    else:
                        self.df_load(start_date, end_date, cols=self.cols)
                        self.df = self.df.fillna(0)
                        df = self.df[['address']]
                        df = df.compute()
                        self.addresses = ['all'] + list(set(list(df)))
                        #self.make_delta()
                        #self.df = self.df.set_index('block_timestamp')
                        logger.warning("data loaded - %s", self.df.tail(10))
                else:
                    self.df_load(start_date, end_date, cols=self.cols)
                    self.df = self.df.fillna(0)
                    df = self.df[['address']]
                    df = df.compute()
                    self.addresses = ['all'] + list(set(list(df)))
                    # self.make_delta()
                    # self.df = self.df.set_index('block_timestamp')
                    logger.warning("data loaded - %s", self.df.tail(10))
                    self.df = self.filter(self.df)

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS

        def filter(self, df):
            try:
                df = df.assign(freq=df.address)
                if self.status != 'all':
                    df = df[df.status == self.status]
                if self.account_type != 'all':
                    df = df[df.acccount_type == self.account_type]
                if self.update_type != 'all':
                    df = df[df.update_type == self.update_type]
                if self.address != 'all':
                    df = df[df.address == self.address]

                return df
            except Exception:
                logger.error("filter:", exc_info=True)

        def tsa_amount(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'amount': 'mean'})
                df = df.reset_index()
                df = df.compute()
                label = 'amount_diff'
                df[label] = df[self.interest_var].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'amount': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        def tsa_freq(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'address': 'nunique'})
                df = df.reset_index()
                df = df.compute()
                label = 'freq_diff'
                df[label] = df['address'].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'address': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)

                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(
                            x='ds',
                            y=col,
                            width=600,
                            height=250,
                            value_label='#').relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='#').relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.update_type = update_type_select.value
        thistab.status = status_select.value
        thistab.account_type = account_type_select.value
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.address = thistab.address_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_load(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type', 'amount'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = last_date - timedelta(days=60)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])
        status_select = Select(title='Select account status',
                               value=thistab.status,
                               options=menus['status'])
        account_type_select = Select(title='Select account type',
                                     value=thistab.account_type,
                                     options=menus['account_type'])
        update_type_select = Select(title='Select transfer type',
                                    value=thistab.update_type,
                                    options=menus['update_type'])
        # search by address checkboxes
        thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses,
                                                 active=[0])

        # ----------------------------------- LOAD DATA
        # load model-making data
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        # load data for period to be predicted

        # tables
        hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount,
                                      streams=[stream_launch])
        tsa_amount = renderer.get_plot(hv_tsa_amount)

        hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch])
        tsa_freq = renderer.get_plot(hv_tsa_freq)

        # add callbacks
        datepicker_start.on_change('value', update_load)
        datepicker_end.on_change('value', update_load)
        thistab.address_select.on_change('value', update)
        select_forecast_days.on_change('value', update)
        update_type_select.on_change('value', update)
        account_type_select.on_change('value', update)
        status_select.on_change('value', update)

        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end,
                             thistab.address_select, select_forecast_days,
                             update_type_select, account_type_select,
                             status_select, thistab.checkboxes)

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [Spacer(width=20, height=30)],
                         [tsa_amount.state, controls], [tsa_freq.state],
                         [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)
Esempio n. 16
0
def cryptocurrency_eda_tab(cryptos, panel_title):
    lags_corr_src = ColumnDataSource(data=dict(variable_1=[],
                                               variable_2=[],
                                               relationship=[],
                                               lag=[],
                                               r=[],
                                               p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = ['watch', 'fork', 'issue', 'release', 'push']
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0

            self.groupby_dict = groupby_dict
            self.feature_list = list(self.groupby_dict.keys())
            self.variable = 'fork'
            self.crypto = 'all'
            self.lag_variable = 'push'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                            style='width:350px; margin-left:-600px;
                            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """

            self.header_style = """ style='color:blue;text-align:center;' """
            # track variable for AI for significant effects
            self.adoption_variables = {
                'user': [],
                'developer': ['watch', 'fork']
            }

            self.significant_effect_dict = {}
            self.reset_adoption_dict(self.variable)
            self.relationships_to_check = ['weak', 'moderate', 'strong']
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                           <div style="text-align:center;width:{}px;height:{}px;
                                  position:relative;background:black;margin-bottom:200px">
                                  <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                           </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            #self.lag_section_head_txt = 'Lag relationships between {} and...'.format(self.variable)
            self.lag_section_head_txt = 'Lag relationships:'
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=self.lag_section_head_txt,
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(
                    text='Pre transform distribution:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
                'non_linear':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:{}px;">
                           <h4 style="color:#fff;">
                           {}</h4></div>""".format(self.page_width, text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def reset_adoption_dict(self, variable):
            self.significant_effect_dict[variable] = []

        def section_header_updater(self,
                                   text,
                                   section,
                                   html_header='h3',
                                   margin_top=150,
                                   margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            self.section_headers[section].text = text

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////
        def prep_data(self, df1):
            try:
                self.cols = list(df1.columns)

                df1['timestamp'] = df1['timestamp'].astype('M8[us]')
                df = df1.set_index('timestamp')
                #logger.warning('LINE 195 df:%s',df.head())
                # handle lag for all variables
                if self.crypto != 'all':
                    df = df[df.crypto == self.crypto]
                df = df.compute()
                #logger.warning('LINE 199: length before:%s',len(df))
                df = df.groupby('crypto').resample(self.resample_period).agg(
                    self.groupby_dict)
                #logger.warning('LINE 201: length after:%s',len(df))

                df = df.reset_index()
                vars = self.feature_list.copy()
                if int(self.lag) > 0:
                    for var in vars:
                        if self.variable != var:
                            df[var] = df[var].shift(int(self.lag))
                df = df.dropna()
                self.df1 = df
                #logger.warning('line 184- prep data: df:%s',self.df.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def set_groupby_dict(self):
            try:
                pass

            except Exception:
                logger.error('set groupby dict', exc_info=True)

        #   ///////////////// PLOTS /////////////////////

        def lags_plot(self, launch):
            try:
                df = self.df.copy()
                df = df[[self.lag_variable, self.variable]]
                df = df.compute()
                cols = [self.lag_variable]
                lags = self.lag_days.split(',')
                for day in lags:
                    try:
                        label = self.lag_variable + '_' + day
                        df[label] = df[self.lag_variable].shift(int(day))
                        cols.append(label)
                    except:
                        logger.warning('%s is not an integer', day)
                df = df.dropna()
                self.lags_corr(df)
                # plot the comparison
                #logger.warning('in lags plot: df:%s',df.head(10))
                return df.hvplot(x=self.variable,
                                 y=cols,
                                 kind='scatter',
                                 alpha=0.4)
            except Exception:
                logger.error('lags plot', exc_info=True)

        # calculate the correlation produced by the lags vector
        def lags_corr(self, df):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'lag': [],
                    'r': [],
                    'p_value': []
                }
                a = df[self.variable].tolist()
                for col in df.columns:
                    if col not in ['timestamp', self.variable]:
                        # find lag
                        var = col.split('_')
                        try:
                            tmp = int(var[-1])

                            lag = tmp
                        except Exception:
                            lag = 'None'

                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        corr_dict_data['variable_1'].append(self.variable)
                        corr_dict_data['variable_2'].append(col)
                        corr_dict_data['relationship'].append(txt)
                        corr_dict_data['lag'].append(lag)
                        corr_dict_data['r'].append(round(rvalue, 4))
                        corr_dict_data['p_value'].append(round(pvalue, 4))

                lags_corr_src.stream(corr_dict_data,
                                     rollover=(len(corr_dict_data['lag'])))
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="lag", title="lag(days)"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=lags_corr_src,
                                       columns=columns,
                                       width=900,
                                       height=400)
                return data_table
            except Exception:
                logger.error('lags corr', exc_info=True)

        def correlation_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'r': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                a = df[self.variable].tolist()

                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        # add to dict
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                        # update significant effect variables
                        if self.variable in self.adoption_variables[
                                'developer']:
                            if any(relationship in txt for relationship in
                                   self.relationships_to_check):
                                if self.variable not in self.significant_effect_dict.keys(
                                ):
                                    self.significant_effect_dict[
                                        self.variable] = []
                                self.significant_effect_dict[
                                    self.variable].append(col)

                if self.variable in self.adoption_variables['developer']:
                    tmp = self.significant_effect_dict[self.variable].copy()
                    tmp = list(set(tmp))
                    tmp_dct = {
                        'features': tmp,
                        'timestamp': datetime.now().strftime(self.DATEFORMAT)
                    }
                    # write to redis
                    save_params = 'adoption_features:developer' + '-' + self.variable
                    self.redis.save(tmp_dct,
                                    save_params,
                                    "",
                                    "",
                                    type='checkpoint')

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'r': corr_dict['r'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=[
                    'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value'
                ],
                                       width=550,
                                       height=400,
                                       title='Correlation between variables')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def non_parametric_relationship_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                #logger.warning('line df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        stat, pvalue, txt = self.mann_whitneyu_label(a, b)
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['stat'].append(round(stat, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'stat': corr_dict['stat'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(
                    columns=[
                        'Variable 1', 'Variable 2', 'Relationship', 'stat',
                        'p-value'
                    ],
                    width=550,
                    height=400,
                    title='Non parametricrelationship between variables')
            except Exception:
                logger.error('non parametric table', exc_info=True)

        def hist(self, launch):
            try:

                return self.df.hvplot.hist(y=self.feature_list,
                                           subplots=True,
                                           shared_axes=False,
                                           bins=25,
                                           alpha=0.3,
                                           width=300).cols(4)
            except Exception:
                logger.warning('histogram', exc_info=True)

        def matrix_plot(self, launch=-1):
            try:
                logger.warning('line 306 self.feature list:%s',
                               self.feature_list)
                df = self.df1
                #df = df[self.feature_list]

                # get difference for money columns

                #thistab.prep_data(thistab.df)
                if 'timestamp' in df.columns:
                    df = df.drop('timestamp', axis=1)
                #df = df.repartition(npartitions=1)
                #df = df.compute()

                df = df.fillna(0)
                #logger.warning('line 302. df: %s',df.head(10))

                cols_temp = self.feature_list.copy()
                if self.variable in cols_temp:
                    cols_temp.remove(self.variable)
                #variable_select.options = cols_lst

                p = df.hvplot.scatter(x=self.variable,
                                      y=cols_temp,
                                      width=330,
                                      subplots=True,
                                      shared_axes=False,
                                      xaxis=False).cols(4)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)

        '''
        def regression(self,df):
            try:

            except Exception:
                logger.error('matrix plot', exc_info=True)
        '''

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data(thistab.df)
        thistab.variable = new
        if thistab.variable in thistab.adoption_variables['developer']:
            thistab.reset_adoption_dict(thistab.variable)
        thistab.lag_section_head_txt = 'Lag relationships between {} and...'.format(
            thistab.variable)
        #thistab.section_header_updater('lag',thistab.lag_section_head_txt)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag_plot_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_variable = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_crypto(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.crypto = crypto_select.value
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag(attr, old, new):  # update lag & cryptocurrency
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'external_daily'
        cols = list(groupby_dict.keys()) + ['timestamp', 'crypto']
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=200)
        # initial function call
        thistab.df_load(first_date, last_date, timestamp_col='timestamp')
        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        # date comes out stream in milliseconds
        #stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value='fork',
                                 options=thistab.feature_list)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        crypto_select = Select(title='Select cryptocurrency',
                               value='all',
                               options=['all'] + thistab.items)

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="lag", title="lag(days)"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        lags_corr_table = DataTable(source=lags_corr_src,
                                    columns=columns,
                                    width=500,
                                    height=280)

        width = 800

        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_nonpara_table = hv.DynamicMap(
            thistab.non_parametric_relationship_table,
            streams=[stream_launch_corr])
        #hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist])
        hv_lags_plot = hv.DynamicMap(thistab.lags_plot,
                                     streams=[stream_launch_lags_var])

        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        nonpara_table = renderer.get_plot(hv_nonpara_table)
        lags_plot = renderer.get_plot(hv_lags_plot)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        lag_variable_select.on_change('value', update_lag_plot_variable)
        lag_select.on_change('value', update_lag)  # individual lag
        resample_select.on_change('value', update_resample)
        crypto_select.on_change('value', update_crypto)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        lags_input_button.on_click(update_lags_selected)  # lags array

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end, variable_select,
                             lag_select, crypto_select, resample_select)

        controls_lag = WidgetBox(lag_variable_select, lags_input,
                                 lags_input_button)

        # create the dashboards
        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [matrix_plot.state, controls],
                         [thistab.section_headers['relationships']],
                         [Spacer(width=20, height=30)],
                         [thistab.section_headers['correlations']],
                         [Spacer(width=20, height=30)],
                         [corr_table.state,
                          thistab.corr_information_div()],
                         [thistab.section_headers['non_linear']],
                         [Spacer(width=20, height=30)], [nonpara_table.state],
                         [thistab.section_headers['lag']],
                         [Spacer(width=20, height=30)],
                         [lags_plot.state, controls_lag], [lags_corr_table],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('crypto:', exc_info=True)
        return tab_error_flag(panel_title)
Esempio n. 17
0
def slider():
    x = np.linspace(0, 10, 500)
    y = np.sin(x)

    source = ColumnDataSource(data=dict(x=x, y=y))

    plot = figure(y_range=(-10, 10),
                  tools='',
                  toolbar_location=None,
                  responsive='box')
    plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

    callback = CustomJS(args=dict(source=source),
                        code="""
        var data = source.get('data');
        var A = amp.get('value')
        var k = freq.get('value')
        var phi = phase.get('value')
        var B = offset.get('value')
        x = data['x']
        y = data['y']
        for (i = 0; i < x.length; i++) {
            y[i] = B + A*Math.sin(k*x[i]+phi);
        }
        source.trigger('change');
    """)

    amp_slider = Slider(start=0.1,
                        end=10,
                        value=1,
                        step=.1,
                        title="Amplitude",
                        callback=callback,
                        callback_policy='mouseup')
    callback.args["amp"] = amp_slider

    freq_slider = Slider(start=0.1,
                         end=10,
                         value=1,
                         step=.1,
                         title="Frequency",
                         callback=callback)
    callback.args["freq"] = freq_slider

    phase_slider = Slider(start=0,
                          end=6.4,
                          value=0,
                          step=.1,
                          title="Phase",
                          callback=callback)
    callback.args["phase"] = phase_slider

    offset_slider = Slider(start=-5,
                           end=5,
                           value=0,
                           step=.1,
                           title="Offset",
                           callback=callback)
    callback.args["offset"] = offset_slider

    layout = Row(WidgetBox(amp_slider,
                           freq_slider,
                           phase_slider,
                           offset_slider,
                           responsive='box'),
                 Column(plot, responsive='box'),
                 responsive='box')
    return layout
Esempio n. 18
0
def account_predictive_tab(page_width=1200):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')
            self.feature_list = hyp_variables

            self.targets = {
                'classification': {
                    'churned': {
                        'cols': ['churned', 'active'],
                        'target_col': 'status'
                    }
                },
                'regression': {
                    'aion_fork': {
                        'cols': [1, 0],
                        'target_col': 'aion_fork'
                    }
                }
            }
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'

            self.clf = None
            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.prediction_address_selected = ""
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            for col in self.feature_list:
                self.groupby_dict[col] = 'mean'

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.metrics_div = Div(text='', width=400, height=300)
            self.accuracy_df = None
            self.inspected_variable = 'amount'

            # ------- DIVS setup begin
            self.page_width = page_width
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                                       position:relative;background:black;margin-bottom:200px">
                                                                       <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                                                 </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'churn':
                self.section_header_div(
                    text=
                    'Churned accounts: prediction model accuracy, variable ranking:{}'
                    .format('----'),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'variable behavior':
                self.section_header_div(text='Variable behavior:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'predictions':
                self.section_header_div(
                    text='Select date range to make predictions:{}'.format(
                        self.section_divider),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.prediction_address_selected = ""
                self.prediction_address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self,
                    start_date="2018-04-25 00:00:00",
                    end_date="2018-12-10 00:00:00"):
            try:
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)
                self.df_load(start_date, end_date)
                self.df = self.df.fillna(0)
                #self.make_delta()
                #self.df = self.df.set_index('block_timestamp')
                #logger.warning("data loaded - %s",self.df.tail(10))

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        def split_df(self, df, target):
            cols = self.target['classification'][target]
            target_col = self.target['classification'][target]
            for val in cols:
                self.df1[val] = df[target_col] == val
            logger.warning(
                "Finished split into churned and retained dataframes")

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS
        def rf_clf(self):
            try:
                logger.warning("RANDOM FOREST LAUNCHED")

                error_lst = []
                df_temp = self.df
                df_temp = self.normalize(df_temp,
                                         timestamp_col='block_timestamp')
                # if all addresses used filter for only positive transactions

                for target in self.targets['classification']:
                    # filter out joined
                    df = df_temp.copy()
                    if target == 'churned':
                        df = df[df['status'] != 'joined']

                    #logger.warning("line 205: df columns in %s:",df.columns.tolist())
                    df = df.groupby(['address',
                                     'status']).agg(self.groupby_dict)
                    df = df.reset_index()
                    #logger.warning("line 222: df columns in %s:",df.tail(10))

                    df = df.compute()
                    '''
                    # only retain wanted values
                    col_values = list(self.df[self.targets['classification'][target]['target_col']].unique())
                    for val in col_values:
                        if val in self.targets['classification'][target]['cols']:
                            pass
                        else:
                            df[self.targets['classification'][target]['target_col']] = \
                            df[df[self.targets['classification'][target]['cols']] != val]
                    '''
                    X = df[self.feature_list]
                    y = df[self.targets['classification'][target]
                           ['target_col']]
                    #logger.warning('y=:%s',y.head(100))

                    X_train, X_test, y_train, y_test = train_test_split(
                        X, y, test_size=0.3)
                    self.feature_list = X_train.columns.tolist()

                    self.pl[target] = Pipeline([
                        ('imp',
                         SimpleImputer(missing_values=0, strategy='median')),
                        ('rf',
                         RandomForestClassifier(n_estimators=100,
                                                random_state=42,
                                                max_depth=4,
                                                class_weight='balanced'))
                    ])
                    self.pl[target].fit(X_train, y_train)

                    y_pred = self.pl[target].predict(X_test)
                    error_lst.append(
                        round(100 * metrics.accuracy_score(y_test, y_pred), 2))

                self.accuracy_df = pd.DataFrame({
                    'Outcome':
                    list(self.targets['classification'].keys()),
                    'Accuracy':
                    error_lst,
                })
                #logger.warning('accuracy_df:%s',self.accuracy_df.head())
                #self.make_tree(target=target)

                print('confusion matrix:\n')
                print(confusion_matrix(y_test, y_pred))
                print('classification report:\n')
                print(classification_report(y_test, y_pred))
                #logger.warning("clf model built:%s",self.pl)

            except Exception:
                logger.error("RF:", exc_info=True)

        def accuracy_table(self):
            try:
                columns = self.accuracy_df.columns.tolist()
                return self.accuracy_df.hvplot.table(
                    columns=['Outcome', 'Accuracy'],
                    width=250,
                    title='Prediction accuracy')

            except Exception:
                logger.error("RF:", exc_info=True)

        def prediction_information_div(self, width=350, height=450):
            txt = """
            <div {}>
            <h4 {}>Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            The table shows the predicted change.</br>
            </li>
            <li>
            For desirable outcomes:
            </br> ... a positive number is good!
            </br> ... the bigger the number the better.
            </br> ... a negative number is bad!
            </br> ... the bigger the negative number the worse it is.
            </li>
            <>
            For non-desirable outcomes:
            </br>... the inverse is true
            </li>
            <li>
            Use the datepicker(s) to select dates for the period desired
            </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def metrics_div_update(self, data):
            div_style = """ 
                   style='width:350px;margin-right:-600px;
                   border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
               """
            txt = """<div {}>
            <h4 {}>Prediction Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            {}% likely to churn
            </li>
            </ul>
            </div>""".format(div_style, self.header_style, data)
            self.metrics_div.text = txt

        def stats_information_div(self, width=400, height=300):
            div_style = """ 
                           style='width:350px;margin-left:-600px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """
            txt = """
            <div {}>
                   <h4 {}>Metadata Info </h4>
                   <ul>
                   <li >
                   <h4 style='margin-bottom:-2px;'>Table left:</h4>
                   - shows the outcome,</br>
                     and the accuracy in %</br>
                     <strong><i>100% is perfection!</i></strong>
                   </li>
                   <li>
                   <h4 style='margin-bottom:-2px;'>Table right:</h4>
                     - shows the desired outcome, the variables(things Aion controls)
                   </br> and their importance to the particular outcome
                   </br> ...which variable(s) have a greater impact on an outcome.
                   </br>- lower = better
                   </br>- generally only the best ranked 3 matter
                   </br>- business advice: manipulate the top ranked variables to attain desirable outcomes
                   </li>
                   </ul>
            </div>""".format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def load_prediction_df(self, start_date, end_date):
            if isinstance(start_date, date):
                start_date = datetime.combine(start_date, datetime.min.time())
            if isinstance(end_date, date):
                end_date = datetime.combine(end_date, datetime.min.time())
            cols = self.feature_list + ['address', 'block_timestamp']
            self.df_predict = self.cl.load_data(table=self.table,
                                                cols=cols,
                                                start_date=start_date,
                                                end_date=end_date)
            logger.warning('319:in load prediction: %s',
                           self.df_predict.head(5))

        def update_prediction_addresses_select(self):
            self.prediction_address_select.options = ['all']
            if len(self.df_predict) > 0:
                lst = ['all'] + list(
                    self.df_predict['address'].unique().compute())
                self.prediction_address_select.options = lst

        # the period for which the user wants a prediction
        def make_account_predictions(self, launch=-1):
            try:
                logger.warning("MAKE PREDICTIONS LAUNCHED")
                target = list(self.targets['classification'].keys())[0]
                # make
                df = self.df_predict
                #logger.warning("line 363%s",df.head(10))
                # make list of address for prediction select
                # filter if prediction for certain addresses
                #logger.warning('address selected:%s',self.prediction_address_select.value)
                if self.prediction_address_select.value is not None:
                    if len(self.prediction_address_select.value) > 0:
                        if self.prediction_address_select.value not in [
                                'all', ''
                        ]:
                            df = df[df.address ==
                                    self.prediction_address_select.value]

                #logger.warning('line 409 predict-df post filter:%s', df.head(20))
                # make table for display
                self.predict_df = pd.DataFrame({
                    'address': [],
                    'likely action': []
                })
                for target in list(self.targets['classification'].keys()):
                    if len(df) > 0:

                        df = self.normalize(df,
                                            timestamp_col='block_timestamp')
                        df = self.group_data(df,
                                             self.groupby_dict,
                                             timestamp_col='block_timestamp')
                        interest_labels = list(df['address'].unique())

                        # run model
                        df = df.fillna(0)
                        X = df[self.feature_list]
                        #logger.warning("df before prediction:%s",X.tail(10))
                        y_pred = self.pl[target].predict(X)
                        logger.warning('y_pred:%s', y_pred)
                        if target == 'churned':
                            y_pred_verbose = [
                                'remain' if x in ["active", 1] else "churn"
                                for x in y_pred
                            ]

                        #---- make table for display
                        self.predict_df = pd.DataFrame({
                            'address':
                            interest_labels,
                            'likely action':
                            y_pred_verbose
                        })

                        #------ label pools
                        self.predict_df['address'] = self.predict_df[
                            'address'].map(self.poolname_verbose_trun)
                        #logger.warning('self.predict_df:%s',self.predict_df)

                        churn_df = self.predict_df[
                            self.predict_df['likely action'] == 'churn']
                        perc_to_churn = round(
                            100 * len(churn_df) / len(self.predict_df), 1)
                        txt = target[:-2]
                        text = """<div {}>
                        <h3>Percentage likely to {}:</h3>
                        <strong 'style=color:black;'>{}%</strong></div>""".format(
                            self.header_style, txt, perc_to_churn)
                        self.metrics_div_update(data=perc_to_churn)
                    else:

                        text = """<div {}>
                            <br/> <h3>Sorry, address not found</h3>
                            </div>""".format(self.header_style)
                        self.metrics_div.text = text
                    logger.warning("end of %s predictions", target)
                return self.predict_df.hvplot.table(
                    columns=['address', 'likely action'],
                    width=500,
                    title='Account predictions')
            except Exception:
                logger.error("prediction:", exc_info=True)

        def make_tree(self, target='churned'):
            try:
                if not self.pl:
                    self.rf_clf()
                # Limit depth of tree to 3 levels
                # Extract the small tree
                tree_small = self.pl[target].named_steps['rf'].estimators_[5]
                # Save the tree as a png image
                export_graphviz(tree_small,
                                out_file='small_tree.dot',
                                feature_names=self.feature_list,
                                rounded=True,
                                precision=1)

                (graph, ) = pydot.graph_from_dot_file('small_tree.dot')
                # filepath = self.make_filepath('../../../static/images/small_tree.gif')
                # .write_png(filepath)
                filepath = self.make_filepath(
                    '/home/andre/Downloads/small_tree.png')
                graph.write_png(filepath)
                logger.warning("TREE SAVED")
            except Exception:
                logger.error("make tree:", exc_info=True)

        def make_feature_importances(self):
            try:
                if not self.pl:
                    self.rf_clf()

                results_dct = {
                    'outcome': [],
                    'feature': [],
                    'importance': [],
                    'rank_within_outcome': []
                }
                for target in self.targets['classification'].keys():
                    logger.warning('make feature importances for :%s', target)
                    # Get numerical feature importances
                    importances = list(
                        self.pl[target].named_steps['rf'].feature_importances_)

                    # List of tuples with variable and importance
                    feature_importances = [(feature, round(importance, 4))
                                           for feature, importance in zip(
                                               self.feature_list, importances)]

                    sorted_importances = sorted(feature_importances,
                                                key=itemgetter(1))

                    # logger.warning('importances :%s',importances)
                    # logger.warning("feature_importances:%s",feature_importances)
                    target_lst = [target] * len(importances)

                    count = 1
                    rank_lst = []
                    for i in importances:
                        rank_lst.append(count)
                        count += 1

                    results_dct['outcome'] += target_lst
                    results_dct['feature'] += [
                        i[0] for i in sorted_importances
                    ]
                    results_dct['importance'] += [
                        i[1] for i in sorted_importances
                    ]
                    results_dct['rank_within_outcome'] += sorted(rank_lst,
                                                                 reverse=True)

                df = pd.DataFrame.from_dict(results_dct)
                logger.warning('MAKE FEATURE IMPORTANCES FINISHED')
                return df.hvplot.table(
                    columns=[
                        'outcome', 'feature', 'importance',
                        'rank_within_outcome'
                    ],
                    width=600,
                    title="Variables ranked by importance (for each output)")

            except Exception:
                logger.error("Feature importances:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    def update_address_predictions(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_select_variable(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.inspected_variable = select_variable.value
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = hyp_variables + [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        last_date = last_date - timedelta(days=50)
        first_date = last_date - timedelta(days=5)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_variable = Select(title='Filter by variable',
                                 value=thistab.inspected_variable,
                                 options=thistab.feature_list)

        # search by address checkboxes
        thistab.prediction_address_select = Select(title='Filter by address',
                                                   value='all',
                                                   options=[])
        reset_prediction_address_button = Button(label="reset address(es)",
                                                 button_type="success")

        # ----------------------------------- LOAD DATA
        # load model-making data
        end = datepicker_start.value
        start = end - timedelta(days=60)
        thistab.load_df(start, end)
        thistab.rf_clf()
        # load data for period to be predicted
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()

        # tables
        hv_account_prediction_table = hv.DynamicMap(
            thistab.make_account_predictions, streams=[stream_launch])
        account_prediction_table = renderer.get_plot(
            hv_account_prediction_table)

        hv_features_table = hv.DynamicMap(thistab.make_feature_importances)
        features_table = renderer.get_plot(hv_features_table)

        hv_accuracy_table = hv.DynamicMap(thistab.accuracy_table)
        accuracy_table = renderer.get_plot(hv_accuracy_table)


        hv_variable_plot = hv.DynamicMap(thistab.box_plot,
                                 streams=[stream_select_variable])\
            .opts(plot=dict(width=800, height=500))

        variable_plot = renderer.get_plot(hv_variable_plot)

        # add callbacks
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        thistab.prediction_address_select.on_change(
            'value', update_address_predictions)
        reset_prediction_address_button.on_click(thistab.reset_checkboxes)
        select_variable.on_change('value', update_select_variable)

        # put the controls in a single element
        controls = WidgetBox(select_variable, datepicker_start, datepicker_end,
                             thistab.prediction_address_select,
                             reset_prediction_address_button)

        controls_prediction = WidgetBox(datepicker_start, datepicker_end,
                                        thistab.prediction_address_select,
                                        reset_prediction_address_button)

        grid = gridplot(
            [[thistab.notification_div['top']], [Spacer(width=20, height=70)],
             [thistab.section_headers['churn']], [Spacer(width=20, height=70)],
             [accuracy_table.state,
              thistab.stats_information_div()], [features_table.state],
             [thistab.section_headers['variable behavior']],
             [Spacer(width=20, height=30)], [variable_plot.state, controls],
             [thistab.section_headers['predictions']],
             [Spacer(width=20, height=30)],
             [
                 account_prediction_table.state, thistab.metrics_div,
                 controls_prediction
             ], [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title='predictions: accounts by value')
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        text = 'predictions: accounts by value'
        return tab_error_flag(text)
Esempio n. 19
0
    def init_climate_wx(self):

        cloud_frac_slider = Slider(start=0,
                                   end=1,
                                   step=0.05,
                                   value=self.f_cloud,
                                   title='Cloud Fraction')
        cloud_albedo_slider = Slider(start=0,
                                     end=1,
                                     step=0.05,
                                     value=self.A_cloud,
                                     title='Cloud Albedo')
        land_frac_slider = Slider(start=0,
                                  end=1,
                                  step=0.05,
                                  value=self.f_land,
                                  title='Land Fraction')
        land_albedo_slider = Slider(start=0,
                                    end=1,
                                    step=0.05,
                                    value=self.A_land,
                                    title='Land Albedo')

        tau_star_opts = [('Mars', '0.125'), ('Earth (100 ppm CO2)', '0.66'),
                         ('Earth (200 ppm CO2)', '0.75'),
                         ('Earth (400 ppm CO2)', '0.84'),
                         ('Earth (800 ppm CO2)', '0.93'),
                         ('Earth (1600 ppm CO2)', '1.02'),
                         ('Earth (3200 ppm CO2)', '1.12'), ('Titan', '3'),
                         ('Venus', '125')]

        greenhouse_dropdown = Dropdown(label='Preset Greenhouse Effect',
                                       button_type='primary',
                                       menu=tau_star_opts)

        tau_star_slider = Slider(start=-1,
                                 end=np.log10(150),
                                 step=0.1,
                                 value=self.tau_star,
                                 title='Atmosphere Greenhouse Effect (10^x)')

        refresh_s0_button = Button(label='Refresh Solar In & Calculate '
                                   'Hab. Zone')

        def _land_alb_handler(attr, old, new):
            self.A_land = new
            self.alpha = self.calc_albedo()
            self._update_albedo_line()

        def _land_frac_handler(attr, old, new):
            self.f_land = new
            self.alpha = self.calc_albedo()
            self._update_albedo_line()

        def _cloud_alb_handler(attr, old, new):
            self.A_cloud = new
            self.alpha = self.calc_albedo()
            self._update_albedo_line()

        def _cloud_frac_handler(attr, old, new):
            self.f_cloud = new
            self.alpha = self.calc_albedo()
            self._update_albedo_line()

        def _tau_slider_handler(attr, old, new):
            self.tau_star = 10**new
            self._update_greenhouse_line()

        def _refresh_s0_handler():
            refresh_s0_button.disabled = True
            self._update_Ts_plot()
            refresh_s0_button.disabled = False

        def _tau_dropdown_handler(attr, old, new):
            slide_value = np.log10(float(new))
            tau_star_slider.value = slide_value
            _tau_slider_handler(None, None, slide_value)

        cloud_albedo_slider.on_change('value', _cloud_alb_handler)
        cloud_frac_slider.on_change('value', _cloud_frac_handler)
        land_albedo_slider.on_change('value', _land_alb_handler)
        land_frac_slider.on_change('value', _land_frac_handler)
        tau_star_slider.on_change('value', _tau_slider_handler)
        refresh_s0_button.on_click(_refresh_s0_handler)
        greenhouse_dropdown.on_change('value', _tau_dropdown_handler)

        albedo_wx = WidgetBox(land_albedo_slider, land_frac_slider,
                              cloud_albedo_slider, cloud_frac_slider)
        tau_wx = WidgetBox(greenhouse_dropdown, tau_star_slider,
                           refresh_s0_button)

        return [albedo_wx, tau_wx]
Esempio n. 20
0
def buildPlot():
    #####################Setup
    # Grab graph colors, pop undesireable ones
    colors = SEABORN_PALETTES['bright']

    #Grab and sort the FQs
    quals = fruit_df.reset_index()
    quals = quals['FruitQuality'].unique().tolist()
    for idx, i in enumerate(list(quals)):
        if type(i) == type(0.5):
            quals.pop(idx)
    unique_FQs = quals

    #a little math to get the epoch time to set the initial x range
    minDate = ts_to_epoch(fruit_df['Date'].min())
    maxDate = ts_to_epoch(fruit_df['Date'].max())

    ###########Create and format the plot
    plot = figure(
        x_axis_type="datetime",
        plot_width=600,
        plot_height=400,
        tools=[PanTool(),
               WheelZoomTool(),
               SaveTool(),
               BoxZoomTool()],
        x_range=DataRange1d(
            start=minDate, end=maxDate
        ),  #sets the initial date range  to the limits of the data
        y_range=DataRange1d(start=0, end=1),
        name='the_plot',
        toolbar_location='above')
    #some styling
    plot.title.text = "Historical Volatility"
    plot.xaxis.axis_label = "Trade Date"
    plot.yaxis.axis_label = "Vol"
    plot.background_fill_color = '#EAEBF0'
    plot.xgrid.grid_line_color = 'white'
    plot.ygrid.grid_line_color = 'white'
    plot.xaxis.axis_line_color = 'white'
    plot.xaxis.major_tick_line_color = 'white'
    plot.xaxis.minor_tick_line_color = 'white'
    plot.yaxis.axis_line_color = 'white'
    plot.yaxis.major_tick_line_color = 'white'
    plot.yaxis.minor_tick_line_color = 'white'
    plot.toolbar.logo = None

    #a list for all of the lines to reside in
    lines = []
    legends = []

    ##############Create the widgets

    #a console style window to show debug messages TODO: add on/off functionality
    debug = PreText(text="", width=1200, height=500)

    #echos the debug in a place more visiable for the user
    user_message = Paragraph(text='')

    #Asset_Class, Product, and From dropdown boxes. Sets dropdown's initial value.
    asCls = Select(title="Asset Class",
                   options=ddOpts['Asset_Class'].unique().tolist())
    asCls.value = asCls.options[0]
    prod = Select(title="Products",
                  options=ddOpts[ddOpts['Asset_Class'] == asCls.value]
                  ['Product'].unique().tolist())
    prod.value = prod.options[0]
    whereFrom = Select(title="From",
                       options=ddOpts[(ddOpts['Asset_Class'] == asCls.value)
                                      & (ddOpts['Product'] == prod.value)]
                       ['From'].unique().tolist())
    whereFrom.value = whereFrom.options[0]
    FQslider = Slider(title='Fruit Quality',
                      start=min(unique_FQs),
                      end=max(unique_FQs),
                      step=1)

    #the amount of days back to look for the data
    days_back = TextInput(title='Days ago', value='365')
    days_back_buttons = RadioButtonGroup(
        labels=['10', '30', '90', '180', '365', '730'], active=4)

    #the date to linear fit to
    fixed_date_buttons = RadioButtonGroup(
        labels=['30', '60', '90', '120', '180', '365'], active=2)
    fixed_date = TextInput(title='Days to Exp', value='90')

    #the amount of days with which to calculate the rolling mean
    rolling_days_buttons = RadioButtonGroup(labels=['1', '2', '5', '10'],
                                            active=0)
    rolling_days = TextInput(title='Rolling Mean Days', value='1')

    #a dynamically resizing checkbox group that allows for the changing of the visablity of any line on the plot
    line_onOff = CheckboxGroup(width=400, name='line_onOff')

    #the associated colors to act as a legend for line_onOff
    legendDiv = Div(width=50)

    #button to add a line
    addLine = Button(label="Add Line")

    #an html rendered visualization of the data for each line
    descriptions = Div(text='', width=500)

    #resizes the plot
    rszButton = Button(label='resize')

    ##########Define functions associated with the widgets

    #concats any dubug call to the end of the current debug text, and changes the user message
    def updateDebug(inString):
        inString = str(inString)
        user_message.text = inString
        oldText = debug.text
        newText = ("*- " + str(datetime.now()) + " : " + inString)
        debug.text = oldText + '\n' + newText

    #changes the potential products and contract categories to match the user selected asset class
    def asClsChange(attrname, old, new):
        prod.options = ddOpts[ddOpts['Asset_Class'] ==
                              asCls.value]['Product'].unique().tolist()
        prod.value = prod.options[0]

    #changes the potential contract categories to match the user selected product
    def prodChange(attrname, old, new):
        whereFrom.options = ddOpts[(ddOpts['Asset_Class'] == asCls.value) & (
            ddOpts['Product'] == prod.value)]['From'].unique().tolist()
        whereFrom.value = whereFrom.options[0]

    #links the days back button and text box
    def days_back_buttonChange(attrname, old, new):
        days_back.value = days_back_buttons.labels[days_back_buttons.active]

    #checks that the users input is an int
    def days_backChange(attrname, old, new):
        try:
            days_back.value = str(int(days_back.value))
        except ValueError:
            days_back.value = '0'
            updateDebug('please type an integer')

    #links the fixed date button and text box
    def fixed_date_buttonChange(attrname, old, new):
        fixed_date.value = fixed_date_buttons.labels[fixed_date_buttons.active]

    #checks that the users input is an int
    def fixed_dateChange(attrname, old, new):
        try:
            fixed_date.value = str(int(fixed_date.value))
        except ValueError:
            fixed_date.value = '0'
            updateDebug('please type an integer')

    #links the rolling days button and text box
    def rolling_days_buttonsChange(attrname, old, new):
        rolling_days.value = rolling_days_buttons.labels[
            rolling_days_buttons.active]

    #checks that the users input is an int
    def rolling_daysChange(attrname, old, new):
        try:
            rolling_days.value = str(int(rolling_days.value))
        except ValueError:
            rolling_days.value = '0'
            updateDebug('please type an integer')

    #fits the plot to the currently visiable lines
    def resize():
        if len(line_onOff.active) == 0 or len(line_onOff.labels) == 0:

            plot.x_range.start = ts_to_epoch(fruit_df['Date'].min())
            plot.x_range.end = ts_to_epoch(fruit_df['Date'].max())
            plot.y_range.start = 0
            plot.y_range.end = 100
        else:
            xmin, xmax, ymin, ymax = calc_range(lines)
            plot.x_range.start = xmin
            plot.x_range.end = xmax
            plot.y_range.start = ymin
            plot.y_range.end = ymax

    #turn lines on or off
    def line_onOffChange(attrname, old, new):
        for i in range(len(line_onOff.labels)):
            if i in line_onOff.active:
                lines[i].glyph.visible = True
            else:
                lines[i].glyph.visible = False
        legendDiv.text = '<div>'
        for line in lines:
            legendDiv.text += '<br><div style="background-color: %s; float:up; padding: 4px 4px 4px 4px"></div><br>' % line.glyph.line_color
        legendDiv.text += '</div>'
        resize()

    #adds a line to the graph
    def grphUpdt():
        #adds some debug messages, grabs the current time as to later show the total time taken to calculate
        updateDebug("Starting")
        updateDebug("total dataframe size: " + str(fruit_df.shape))
        stTime = datetime.now()

        #the value to linear fit to
        fit_to = int(fixed_date.value)

        #instiantiate an empty dataframe that will eventually contain the graphs data
        graphData = pd.DataFrame({
            'Date': [],
            'PriceVolatility': [],
            'Days_to_Exp': []
        })

        #grab the appropriate subset of the whole dataframe based on the users input into the widgets
        updateDebug("querying the data..")

        try:
            workingDf = fruit_df.loc[asCls.value, prod.value, whereFrom.value]
        except KeyError:
            updateDebug(
                'no data with that combination of Asset Class, Product, From')
            return

        try:
            workingDf = workingDf[[
                'Date', 'PriceVolatility', 'Days_to_Exp'
            ]][(workingDf['Date'] >
                (date.today() - timedelta(days=int(days_back.value))))]
        except KeyError:
            updateDebug(
                'no data with that combination of Asset Class, Product, From, and days back'
            )
            return
        updateDebug("done breaking down df")

        #a hook in the case that the users inputs resulted in an empty dataframe
        if (workingDf.empty):
            updateDebug(
                'no data with that combination of Asset Class, Product, From, and days back'
            )
            return

        #widdle down the database to only contain the user specified FQ
        try:
            graphData = workingDf.loc[int(FQslider.value)].copy()
        except KeyError:
            updateDebug('no data with that FQ')

        #another empty graph hook
        if (graphData.empty):
            updateDebug(
                'no data with that combination of Asset Class, Product, Contract Category, FQ, and days back'
            )
            return
        updateDebug('grabed correct FQs')

        #calculate linear fit on the current subset
        updateDebug('calculating linear fit...')
        graphData = mu.linearFit(fit_to=fit_to,
                                 group_on_column='Date',
                                 df=graphData,
                                 fit_column='Days_to_Exp',
                                 on_columns=['PriceVolatility'])
        updateDebug('finished with linear fit')

        # a few more debug messages
        updateDebug(
            "working df qry: Asset_Class = %s and Product = %s and From = %s and Date > %s "
            % (asCls.value, prod.value, whereFrom.value,
               str(date.today() - timedelta(days=int(days_back.value)))))
        updateDebug("graph data shape: " + str(workingDf.shape))

        #makes sure graph data has at least 5 rows, so that rolling mean can be calculated
        if graphData.shape[0] > int(rolling_days.value):

            #make the graph legend, based on if there's a denominator specified or not
            this_legend = '%s - %s FQ: %s Days to Exp: %s From: %s Rolling Days: %s' % (
                prod.value, whereFrom.value, int(
                    FQslider.value), fixed_date.value,
                str(date.today() - timedelta(days=int(days_back.value))),
                rolling_days.value)

            #add a new line to the graph, and add the accosiated GlyphRenderer created by adding the line to the lines list.
            #Set the legend to the previously calculated legend, and set the color to the next color in the current theme (if there are more lines than colors, there will be multiple lines with the same color)
            #Calculates a 5 day rolling mean on the y values. Maybe add a slider/text box/other widget so the user can set the rolling mean themselves
            updateDebug('adding line to plot')
            lines.append(
                plot.line(graphData.index.values[int(rolling_days.value) - 1:],
                          graphData['PriceVolatility'].rolling(
                              window=int(rolling_days.value)).mean()
                          [int(rolling_days.value) - 1:],
                          line_width=3,
                          color=colors[len(lines) % len(colors)]))
            legends.append(this_legend)
            updateDebug("updated graph")

            global descDf

            #either creates, or adds to, a dataframe containing statistics about the data. stats come from pandas DataFrame.describe.
            if descDf is None:
                graphData[this_legend] = graphData['PriceVolatility']
                descDf = graphData[[
                    this_legend
                ]].rolling(window=int(rolling_days.value)).mean(
                )[int(rolling_days.value) -
                  1:].describe(percentiles=[]).transpose().copy()
            else:
                graphData[this_legend] = graphData['PriceVolatility']
                descDf = pd.concat([
                    descDf, graphData[[
                        this_legend
                    ]].rolling(window=int(rolling_days.value)).mean()
                    [int(rolling_days.value) -
                     1:].describe(percentiles=[]).transpose().copy()
                ])

            descDf = descDf.round(1)
            descriptions.text = descDf.to_html().replace('\\n', '')
            graphData.drop(this_legend, 1, inplace=True)

            #add the name of the line to the checkbox so that it can be turned off and o
            line_onOff.labels.append(this_legend)
            line_onOff.active.append(len(line_onOff.labels) - 1)
            legendDiv.text = '<div>'
            for line in lines:
                legendDiv.text += '<br><div style="background-color: %s; float:up; padding: 4px 4px 4px 4px"></div><br>' % line.glyph.line_color
            legendDiv.text += '</div>'
            ##leaving this in case we get around to figuring out the hover tool
            ##formats the date values for the hover tool, currently commented out until we, or bokeh, fix the hover tool for multiple lines
            #formDates= pd.to_datetime(graphData['Date'] ,format="%m-%d-%Y")
            #lines[-1].data_source.data['formDates'] = formDates.apply(lambda x: x.strftime('%m-%d-%Y'))

            ##Displays the amout of time it took to draw the line, as well as the number of points in the graph
            updateDebug("updated y vals, with rolling mean calculated")
            updateDebug(
                str(datetime.now() - stTime) + " FOR " +
                str(len(lines[-1].data_source.data['x'])) + " points")
        else:
            updateDebug("There's no data to display")
        del graphData
        del workingDf

    #######Link widgets to their associated functions
    asCls.on_change('value', asClsChange)
    prod.on_change('value', prodChange)
    days_back_buttons.on_change('active', days_back_buttonChange)
    days_back.on_change('value', days_backChange)
    fixed_date_buttons.on_change('active', fixed_date_buttonChange)
    fixed_date.on_change('value', fixed_dateChange)
    rolling_days_buttons.on_change('active', rolling_days_buttonsChange)
    rolling_days.on_change('value', rolling_daysChange)
    line_onOff.on_change('active', line_onOffChange)
    addLine.on_click(grphUpdt)
    rszButton.on_click(resize)

    #Formatting
    fixed_date_box = WidgetBox(fixed_date, fixed_date_buttons)
    days_back_box = WidgetBox(days_back, days_back_buttons)
    rolling_days_box = WidgetBox(rolling_days, rolling_days_buttons)
    widgets = [
        asCls, prod, whereFrom, FQslider, days_back_box, fixed_date_box,
        rolling_days_box, addLine, rszButton, user_message
    ]
    plot_w_description = VBox(plot, descriptions, width=700)
    pwd_w_leg = HBox(plot_w_description,
                     VBox(legendDiv),
                     VBox(line_onOff),
                     width=plot_w_description.width + line_onOff.width + 100,
                     name='div_to_save')
    input_box = VBox(*widgets, width=400, height=1200)
    total_box = HBox(VBox(input_box),
                     VBox(pwd_w_leg),
                     width=input_box.width + pwd_w_leg.width + 100,
                     height=1200)
    tot_w_debug = VBox(total_box, VBox(HBox(debug)))

    resize()
    return tot_w_debug
Esempio n. 21
0
        'rank': list(rank.values()),
        'year': [db.column_names] * len(rank),
        'color': Category20[14]
    })
p = Figure(plot_width=1000,
           plot_height=300,
           x_axis_label='Year',
           y_axis_label='Rank',
           toolbar_location='above',
           title='재배면적 상위 10개종목 순위변화')
p.multi_line('year', 'rank', alpha=1, color='color', source=ranksource)
p.add_tools(HoverTool(tooltips=[('Crop', '@crop')]))

# 작물 select 하면 맞춰서 그래프 움직이는 부분
select = Select(title="Crop", value="감자", options=crops)


def update_crop(attr, old, new):

    source.data = get_data(str(select.value))


select.on_change('value', update_crop)

seconddiv = Div(text="""<h1>읍면동별 작물 시계열 차트</h1>""", width=1000)

rankwidget = WidgetBox(titlediv, data_table, para, cums_table)
chartwidget = WidgetBox(seconddiv, select)
layout = column(rankwidget, p, chartwidget, p1, p2, p3, p4)
curdoc().add_root(layout)
Esempio n. 22
0
    animate_start = animate_start + 2000
    p_trade_feed.x_range.start = animate_start
    p_trade_feed.x_range.end = animate_start + 600000


callback_id = None


def animate():
    global callback_id, start, end
    if button.label == '► Play':
        button.label = '❚❚ Pause'
        callback_id = curdoc().add_periodic_callback(animate_update, 1)
    else:
        button.label = '► Play'
        start = datetime(2019, 3, 3, 14, 0, 0,
                         tzinfo=timezone.utc).timestamp() * 1000
        end = datetime(2019, 3, 3, 15, 0, 0,
                       tzinfo=timezone.utc).timestamp() * 1000
        curdoc().remove_periodic_callback(callback_id)


button = Button(label='► Play', width=60)
button.on_click(animate)

left = Column(p_trade_feed, p_candlestick, p_trade_feed_hist,
              WidgetBox(button))
right = Column(p_order_book, p_trade_feed_distribution, p_buy_sell_pie)

curdoc().add_root(Row(left, right))
Esempio n. 23
0
def twitter_loader_tab(panel_title):
    class TwitterLoader():
        def __init__(self, search_term='beiber'):
            # TWITTER SETUP
            self.api = None
            self.topic = search_term

            self.options = {'messages': [str(x) for x in range(10, 1000, 50)]}
            self.limits = {
                'messages': int(self.options['messages'][0]),
            }
            self.hidden_path = dashboard_config['hidden_path']
            self.DATEFORMAT = "%Y-%d-%m %H:%M:%S"
            self.df = None
            min_date = datetime.today() - timedelta(days=7)
            print(min_date)
            self.selects = {
                'window':
                Select(title='Select rolling mean window',
                       value='1',
                       options=[str(x) for x in range(1, 20, 2)]),
                'date_since':
                DatePicker(title="Tweets since:",
                           min_date=min_date,
                           max_date=datetime.today(),
                           value=min_date)
            }
            self.selects_values = {
                'window': int(self.selects['window'].value),
                'date_since': self.selects['date_since'].value
            }
            self.resample_period = {'menu': []}
            for val in range(30, 350, 30):
                self.resample_period['menu'].append(str(val) + 'Min')
            self.resample_period['value'] = self.resample_period['menu'][0]
            # DIV VISUAL SETUP
            self.trigger = -1
            self.html_header = 'h2'
            self.margin_top = 150
            self.margin_bottom = -150

            self.div_style = """ 
                           style='width:350px; margin-left:25px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.page_width = 1250
            txt = """<hr/>
                               <div style="text-align:center;width:{}px;height:{}px;
                                      position:relative;background:black;margin-bottom:200px">
                                      <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                               </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'twitter':
                self.section_header_div(text='Twitter search results:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=155,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

        # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                         position:relative;background:black;">
                                         <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                   </div>""".format(self.page_width, 50, text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        # //////////////////////////  DIVS SETUP END   /////////////////////////////////

        # /////////////////////////// UTILS BEGIN ///////////////////////////

        def twitter_datetime_to_epoch(self, ts):
            ts = datetime.strptime(ts, '%a %b %d %H:%M:%S %z %Y')
            ts_epoch = ts.created_at()
            ts = datetime.strftime(ts, self.DATEFORMAT)
            ts = datetime.strptime(ts, self.DATEFORMAT)
            return ts, ts_epoch

        def write_to_file(self):
            try:
                filename = """{}_searches_for_last_{}sec_or_last_{}messages.csv""".format(
                    self.topic, self.limits['time'], self.limits['messages'])
                self.df.to_csv(filename, sep='\t', index=False)
            except:
                logger.error('Error writing to file', exc_info=True)

        # /////////////////////////// UTILS END /////////////////////
        def reset_data(self):
            self.df = None

        def get_credentials(self, filename='twitter_credentials.json'):
            try:
                filename = self.hidden_path + filename
                filepath = join(dirname(__file__), filename)
                print(filepath)
                if self.api is None:
                    with open(filepath, 'r') as f:
                        credentials_dict = json.load(f)
                    auth = tw.OAuthHandler(credentials_dict['consumer_key'],
                                           credentials_dict['consumer_secret'])
                    auth.set_access_token(
                        credentials_dict['access_token_key'],
                        credentials_dict['access_token_secret'],
                    )
                    self.api = tw.API(auth, wait_on_rate_limit=True)
                logger.info('CREDENTIALS LOADED')
                try:
                    self.api.verify_credentials()
                    print("Authentication OK")
                except:
                    print("Error during authentication")
            except:
                print('credentials not loaded')

        def load_data_about_topic(self):
            try:
                if self.api is None:
                    self.get_credentials()
                date_since = datetime.combine(
                    self.selects_values['date_since'], datetime.min.time())
                logger.warning('LINE 186:%s,messages=%s', self.topic,
                               self.limits['messages'])
                # initialize a list to hold all the tweepy Tweets
                alltweets = []

                # make initial request for most recent tweets (200 is the maximum allowed count)
                new_tweets = self.api.search(q=self.topic,
                                             count=self.limits['messages'])

                # save most recent tweets
                alltweets.extend(new_tweets)

                # save the id of the oldest tweet less one
                oldest = alltweets[-1].id - 1

                # keep grabbing tweets until there are no tweets left to grab
                stop = False
                while not stop:
                    print(f"getting tweets before {oldest}")

                    # all subsequent requests use the max_id param to prevent duplicates
                    new_tweets = self.api.search(q=self.topic,
                                                 count=100,
                                                 max_id=oldest,
                                                 tweet_mode='extended')

                    # save most recent tweets
                    alltweets.extend(new_tweets)
                    if len(alltweets) > self.limits['messages'] or len(
                            new_tweets) <= 0:
                        stop = True
                    # update the id of the oldest tweet less one
                    oldest = alltweets[-1].id - 1

                    print(f"...{len(alltweets)} tweets downloaded so far")

                # transform the tweepy tweets into a 2D array that will populate the csv
                results = []
                for tweet in alltweets:
                    try:
                        results.append([tweet.created_at, tweet.text])
                    except:
                        print("skipped this one")

                self.df = pd.DataFrame(data=results,
                                       columns=['created_at', 'text'])
                logger.warning('LINE 211 self.df:%s', self.df.head(20))
            except:
                logger.error('error in loading data', exc_info=True)

        def run(self):
            try:
                self.load_data_about_topic()
                # self.write_to_file()

            except Exception:
                logger.error('run', exc_info=True)

        # #################################### PLOTS ######################################
        def sentiment_analysis(self, launch=1):
            try:
                df = self.df[['text', 'created_at']]
                cols = ['pos', 'neg', 'neu']
                for col in cols:
                    if col not in df.columns:  # create only once
                        df[col] = 0

                df['pos'], df['neg'], df['neu'] = zip(
                    *df['text'].map(sentiment_analyzer_scores))
                df = df.fillna(0)
                logger.warning('resample period:%s',
                               self.resample_period['value'])
                df = df.set_index('created_at').resample(self.resample_period['value']) \
                    .agg({'pos': 'mean',
                          'neg': 'mean',
                          'neu': 'mean'})
                df = df.reset_index()
                df = df.fillna(0)
                logger.warning('LINE 307, df:%s', df.head(30))

                p = df.hvplot.line(x='created_at',
                                   y=cols,
                                   width=1200,
                                   height=600)
                return p
            except Exception:
                logger.error('run', exc_info=True)

        def visual(self, launch=1):
            try:
                p = self.df.hvplot.table(columns=['created_at', 'text'],
                                         width=1200,
                                         height=2000)
                return p
            except Exception:
                logger.error('output data', exc_info=True)

        def jitter(self, launch=1):
            try:
                df = self.df.copy()
                df['jitter'] = df['created_at'].diff(periods=-1)
                df['jitter'] = df['jitter'] * -1
                df = df.dropna()

                p = df.hvplot.line(x='created_at',
                                   y='jitter',
                                   width=1200,
                                   height=600)
                return p
            except Exception:
                logger.error('output data', exc_info=True)

    def update_tweet_search():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.reset_data()
        thistab.limits['messages'] = int(inputs['messages_limit'].value)
        thistab.topic = inputs['search_term'].value
        thistab.run()
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_launch_sentiment.event(launch_this=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample_period(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.resample_period['value'] = new
        thistab.trigger += 1
        # stream_launch_rolling_mean.event(launch=thistab.trigger)
        stream_launch_sentiment.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        thistab = TwitterLoader()
        thistab.run()

        # MANAGE STREAM
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_rolling_mean = streams.Stream.define('Launch',
                                                           launch=-1)()
        stream_launch_sentiment = streams.Stream.define('Launch', launch=-1)()

        # DYNAMIC GRAPHS/OUTPUT
        hv_visual = hv.DynamicMap(thistab.visual, streams=[stream_launch])
        visual = renderer.get_plot(hv_visual)

        hv_jitter = hv.DynamicMap(thistab.jitter, streams=[stream_launch])
        jitter = renderer.get_plot(hv_jitter)

        hv_sentiment_analysis = hv.DynamicMap(
            thistab.sentiment_analysis, streams=[stream_launch_sentiment])
        sentiment_analysis = renderer.get_plot(hv_sentiment_analysis)

        # CREATE WIDGETS
        inputs = {
            'search_term':
            TextInput(title='Enter search term. For list, use commas',
                      value=thistab.topic),
            'messages_limit':
            Select(title='Select messages limit (5000 = unbounded)',
                   value=str(thistab.limits['messages']),
                   options=thistab.options['messages']),
            'resample':
            Select(title='Select resample period',
                   value=thistab.resample_period['value'],
                   options=thistab.resample_period['menu'])
        }
        tweet_search_button = Button(
            label='Enter filters/inputs, then press me', button_type="success")

        # WIDGET CALLBACK
        tweet_search_button.on_click(update_tweet_search)
        inputs['resample'].on_change('value', update_resample_period)

        # COMPOSE LAYOUT
        # group controls (filters/input elements)
        controls_tweet_search = WidgetBox(
            inputs['search_term'],
            inputs['messages_limit'],
            tweet_search_button,
        )

        controls_rolling_mean = WidgetBox(thistab.selects['window'], )

        controls_resample_period = WidgetBox(inputs['resample'])

        grid = gridplot([
            [thistab.notification_div['top']],
            [Spacer(width=20, height=70)],
            [thistab.title_div('Sentiment analysis of tweets:', 1000)],
            [Spacer(width=20, height=30)],
            [sentiment_analysis.state, controls_resample_period],
            [thistab.title_div('Time between tweets:', 1000)],
            [Spacer(width=20, height=30)],
            [jitter.state],
            [
                thistab.title_div(
                    'Twitter search results (use filters on right, then click button):',
                    1000)
            ],
            [Spacer(width=20, height=30)],
            [visual.state, controls_tweet_search],
            [thistab.notification_div['bottom']],
        ])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('Twitter loader:', exc_info=True)
        return tab_error_flag(panel_title)
Esempio n. 24
0
def aion_analytics(doc):
    class SelectionTab:
        def __init__(self):
            self.selected_tabs = []
            self.tablist = []
            self.selected_tracker = [
            ]  # used to monitor if a tab has already been launched
            self.div_style = """ style='width:300px; margin-left:-200%;
                       border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """
            self.page_width = 1200

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                     <h4 style="color:#fff;">
                     {}</h4></div>""".format(text)
            return txt

        def get_selections(self, checkboxes):
            self.selected_tabs = [
                checkboxes.labels[i] for i in checkboxes.active
            ]
            return self.selected_tabs

    selection_tab = SelectionTab()
    # SETUP BOKEH OBJECTS
    try:
        tablist = []
        TABS = Tabs(tabs=tablist)

        @gen.coroutine
        def load_callstack(tablist):
            lst = selection_tab.get_selections(selection_checkboxes)
            #logger.warning('selections:%s',lst)

            panel_title = 'EDA: crypto clusters'
            if panel_title in lst:
                if panel_title not in selection_tab.selected_tracker:
                    eda_cc = yield crypto_clusters_eda_tab(
                        cryptocurrencies, panel_title=panel_title)
                    selection_tab.selected_tracker.append(panel_title)
                    if eda_cc not in tablist:
                        tablist.append(eda_cc)

            panel_title = 'clustering: cryptocurrencies'
            if panel_title in lst:
                if panel_title not in selection_tab.selected_tracker:
                    cct = yield cryptocurrency_clustering_tab(panel_title)
                    selection_tab.selected_tracker.append(panel_title)
                    if cct not in tablist:
                        tablist.append(cct)

            if 'predictions: accounts by value' in lst:
                if 'predictions: accounts by value' not in selection_tab.selected_tracker:
                    ap = yield account_predictive_tab(
                        page_width=selection_tab.page_width)
                    selection_tab.selected_tracker.append(
                        'predictions: accounts by value')
                    if ap not in tablist:
                        tablist.append(ap)

            panel_title = 'Forecasting: Rentals'
            if panel_title in lst:
                if panel_title not in selection_tab.selected_tracker:
                    tsa = yield forecasting_bcc_rentals_visitor_tab(
                        panel_title)
                    selection_tab.selected_tracker.append(
                        panel_title=panel_title)
                    if tsa not in tablist:
                        tablist.append(tsa)

            panel_title = 'KPI: Rentals'
            if panel_title in lst:
                if panel_title not in selection_tab.selected_tracker:
                    rentals = yield kpi_bcc_rentals_visitor_tab(
                        panel_title=panel_title)
                    selection_tab.selected_tracker.append(panel_title)
                    if rentals not in tablist:
                        tablist.append(rentals)

            panel_title = 'EDA: Rentals'
            if panel_title in lst:
                if panel_title not in selection_tab.selected_tracker:
                    rentals = yield eda_bcc_rentals_visitor_tab(
                        panel_title=panel_title)
                    selection_tab.selected_tracker.append(panel_title)
                    if rentals not in tablist:
                        tablist.append(rentals)

            # make list unique
            tablist = list(set(tablist))
            TABS.update(tabs=tablist)

        @gen.coroutine
        def select_tabs():
            notification_div.text = """
                <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;">
                        <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                </div>""".format(selection_tab.page_width, 'Tabs are loading')
            yield load_callstack(tablist)
            notification_div.text = """
                <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;">
                        <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                </div>""".format(selection_tab.page_width,
                                 'Welcome to BCC Data Science Portal')

        @gen.coroutine
        def update_selected_tabs():
            notification_div.text = """
                <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;">
                        <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                </div>""".format(selection_tab.page_width, 'Refresh underway')

            doc.clear()
            tablist = []
            selection_checkboxes.active = []

            mgmt = Panel(child=grid, title='Tab Selection')
            tablist.append(mgmt)
            TABS.update(tabs=tablist)
            doc.add_root(TABS)
            yield load_callstack(tablist)
            notification_div.text = """
                <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;">
                        <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                </div>""".format(selection_tab.page_width,
                                 'Welcome to Aion Data Science Portal')

        # -----------------------
        txt = """
                <div {}>
                <h3 style='color:blue;text-align:center'>Info:</h3>
                <ul style='margin-top:-10px;height:200px;'>
                <li>
                Select the tab(s) you want activated
                </li>
                <li>
                Then click the 'launch activity' button.
                </li>
                </ul>
            </div>
            """.format(selection_tab.div_style)

        information_div = Div(text=txt, width=400, height=250)
        footer_div = Div(text="""<hr/><div style="width:{}px;height:{}px;
                              position:relative;background:black;"></div>""".
                         format(selection_tab.page_width, 50),
                         width=selection_tab.page_width,
                         height=100)
        txt = """
            <div style="text-align:center;background:black;width:{}px;margin-bottom:100px;">
                    <h1 style="color:#fff;margin-bottom:300px">{}</h1>
            </div>""".format(selection_tab.page_width,
                             'Welcome to Aion Data Science Portal')
        notification_div = Div(text=txt,
                               width=selection_tab.page_width,
                               height=40)

        # choose startup tabs
        selection_checkboxes = CheckboxGroup(
            labels=labels, active=[DEFAULT_CHECKBOX_SELECTION])
        run_button = Button(label='Launch tabs', button_type="success")
        run_button.on_click(select_tabs)

        # setup layout
        controls = WidgetBox(selection_checkboxes, run_button)

        # create the dashboards
        grid = gridplot(
            [[notification_div],
             [Spacer(width=50, height=2, sizing_mode='scale_width')],
             [controls, information_div], [footer_div]])

        # setup launch tabs
        mgmt = Panel(child=grid, title='Tab Selection')

        tablist.append(mgmt)
        TABS.update(tabs=tablist)
        doc.add_root(TABS)
    except Exception:
        logger.error("TABS:", exc_info=True)
Esempio n. 25
0
def tab2():
    data = pd.read_csv('cdph-race-ethnicity.csv')
    data['date_time'] = pd.to_datetime(data['date'])
    data = data[(data['age'] == 'all')]
    percentages = ['confirmed cases', 'general population']
    regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white']
    x = [(race, percent) for race in regions for percent in percentages]

    def create_dataset(df):
        counts = sum(
            zip(df['confirmed_cases_percent'], df['population_percent']), ())
        source = ColumnDataSource(data=dict(x=x, counts=counts))
        return source

    def create_plot(source):
        p = figure(x_range=FactorRange(*x),
                   y_axis_label='Percentage',
                   plot_width=1030)
        p.title.text = "Confirmed_case% VS Population% by races"
        p.title.align = "center"
        p.title.text_font_size = "20px"
        p.vbar(x='x',
               top='counts',
               width=0.9,
               source=source,
               line_color="white",
               fill_color=factor_cmap('x',
                                      factors=percentages,
                                      palette=["#c9d9d3", "#718dbf"],
                                      start=1,
                                      end=2))
        p.y_range.start = 0
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        p.x_range.range_padding = 0.1
        p.xgrid.grid_line_color = None
        p.legend.location = "top_left"
        p.legend.orientation = "horizontal"
        p.xgrid.grid_line_color = None
        p.add_tools(
            HoverTool(tooltips=[('Race, category', "@x"),
                                ('Percentage', "@counts")], ))
        p.add_layout(
            Title(
                text="Data "
                "published by latimes.com/coronavirustracker; download data "
                "from "
                "https://github.com/datadesk/california-coronavirus-data/cdph-race"
                "-ethnicity.csv in GitHub",
                text_font_style="italic"), 'below')
        p.add_layout(
            Title(
                text="Data Source: California Department of Public Health "
                "https://www.cdph.ca.gov/Programs/CID/DCDC/Pages/COVID-19/Race-Ethnicity.aspx",
                text_font_style="italic"), 'below')
        p.add_layout(
            Title(text="Date of last update: 2020-10-14",
                  text_font_style="italic"), 'below')
        return p

    def callback(attr, old, new):
        new_src = create_dataset(
            data[(data['date_time'] == date_picker.value)])
        src.data.update(new_src.data)

    src = create_dataset(data[(data['date_time'] == '2020-10-01')])
    p = create_plot(src)
    date_picker = DatePicker(title='Choose a date',
                             min_date="2020-05-14",
                             max_date='2020-10-14')
    date_picker.on_change('value', callback)
    controls = WidgetBox(date_picker)
    layout = row(p, controls)
    tab = Panel(child=layout, title='Percentage of confirmed cases by race')
    return tab
Esempio n. 26
0
def eda_projects_tab(panel_title):
    lags_corr_src = ColumnDataSource(data=dict(variable_1=[],
                                               variable_2=[],
                                               relationship=[],
                                               lag=[],
                                               r=[],
                                               p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0
            self.groupby_dict = {
                'project_duration': 'sum',
                'project_start_delay': 'mean',
                'project_end_delay': 'mean',
                'project_owner_age': 'mean',
                'project_owner_gender': 'mean',
                'milestone_duration': 'sum',
                'milestone_start_delay': 'mean',
                'milestone_end_delay': 'mean',
                'milestone_owner_age': 'mean',
                'milestone_owner_gender': 'mean',
                'task_duration': 'sum',
                'task_start_delay': 'sum',
                'task_end_delay': 'mean',
                'task_owner_age': 'mean',
                'task_owner_gender': 'mean'
            }
            self.feature_list = list(self.groupby_dict.keys())
            self.lag_variable = 'task_duration'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                style='width:350px; margin-left:25px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = self.variables[0]

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.status = 'all'
            self.pm_gender = 'all'
            self.m_gender = 'all'
            self.t_gender = 'all'
            self.type = 'all'

            self.pym = PythonMongo('aion')
            self.menus = {
                'status': ['all', 'open', 'closed'],
                'type': [
                    'all', 'research', 'reconciliation', 'audit', 'innovation',
                    'construction', 'manufacturing', 'conference'
                ],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
            }
            self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'}
            self.timestamp_col = 'project_startdate_actual'
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                    <div style="text-align:center;width:{}px;height:{}px;
                           position:relative;background:black;margin-bottom:200px">
                           <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(text='Pre-transform distribution:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(text='Correlations:',
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def reset_adoption_dict(self, variable):
            self.significant_effect_dict[variable] = []

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px; margin-left:-600px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////
        def filter_df(self, df1):
            if self.status != 'all':
                df1 = df1[df1.status == self.status]
            if self.pm_gender != 'all':
                df1 = df1[df1.project_owner_gender == self.pm_gender]
            if self.m_gender != 'all':
                df1 = df1[df1.milestone_owner_gender == self.m_gender]
            if self.t_gender != 'all':
                df1 = df1[df1.task_owner_gender == self.t_gender]

            if self.type != 'all':
                df1 = df1[df1.type == self.type]
            return df1

        def prep_data(self, df1):
            try:
                '''
                df1[self.timestamp_col] = df1[self.timestamp_col].apply(lambda x: datetime(x.year,
                                                                                   x.month,
                                                                                   x.day,
                                                                                   x.hour,0,0))
                '''
                df1 = df1.set_index(self.timestamp_col)
                logger.warning('LINE 195 df:%s', df1.head())
                # handle lag for all variables

                df = df1.copy()
                df = self.filter_df(df)

                logger.warning('LINE 199: length before:%s', len(df))
                slice = df[['project']]
                df = df[list(self.groupby_dict.keys())]
                logger.warning('LINE 218: columns:%s', df.head())
                df = df.astype(float)
                df = pd.concat([df, slice], axis=1)
                df = df.groupby('project').resample(self.resample_period).agg(
                    self.groupby_dict)
                logger.warning('LINE 201: length after:%s', len(df))

                df = df.reset_index()
                vars = self.feature_list.copy()
                if int(self.lag) > 0:
                    for var in vars:
                        if self.variable != var:
                            df[var] = df[var].shift(int(self.lag))
                df = df.dropna()
                self.df1 = df
                logger.warning('line 184- prep data: df:%s', self.df.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def lags_plot(self, launch):
            try:
                df = self.df.copy()
                df = df[[self.lag_variable, self.variable]]
                cols = [self.lag_variable]
                lags = self.lag_days.split(',')
                for day in lags:
                    try:
                        label = self.lag_variable + '_' + day
                        df[label] = df[self.lag_variable].shift(int(day))
                        cols.append(label)
                    except:
                        logger.warning('%s is not an integer', day)
                df = df.dropna()
                self.lags_corr(df)
                # plot the comparison
                logger.warning('in lags plot: df:%s', df.head(10))
                return df.hvplot(x=self.variable,
                                 y=cols,
                                 kind='scatter',
                                 alpha=0.4)
            except Exception:
                logger.error('lags plot', exc_info=True)

        # calculate the correlation produced by the lags vector
        def lags_corr(self, df):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'lag': [],
                    'r': [],
                    'p_value': []
                }
                a = df[self.variable].tolist()
                for col in df.columns:
                    if col not in [self.timestamp_col, self.variable]:
                        # find lag
                        var = col.split('_')
                        try:
                            tmp = int(var[-1])

                            lag = tmp
                        except Exception:
                            lag = 'None'

                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        corr_dict_data['variable_1'].append(self.variable)
                        corr_dict_data['variable_2'].append(col)
                        corr_dict_data['relationship'].append(txt)
                        corr_dict_data['lag'].append(lag)
                        corr_dict_data['r'].append(round(rvalue, 4))
                        corr_dict_data['p_value'].append(round(pvalue, 4))

                lags_corr_src.stream(corr_dict_data,
                                     rollover=(len(corr_dict_data['lag'])))
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="lag", title="lag(days)"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=lags_corr_src,
                                       columns=columns,
                                       width=500,
                                       height=280)
                return data_table
            except Exception:
                logger.error('lags corr', exc_info=True)

        def correlation_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'r': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop(self.timestamp_col, axis=1)
                # df = df.compute()

                a = df[self.variable].tolist()

                for col in self.feature_list:
                    logger.warning('col :%s', col)
                    if col != self.variable:
                        logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        # add to dict
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'r': corr_dict['r'],
                    'p-value': corr_dict['p-value']
                })
                # logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=[
                    'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value'
                ],
                                       width=550,
                                       height=200,
                                       title='Correlation between variables')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def non_parametric_relationship_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop(self.timestamp_col, axis=1)
                # df = df.compute()

                # logger.warning('line df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in self.feature_list:
                    logger.warning('col :%s', col)
                    if col != self.variable:
                        logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        stat, pvalue, txt = self.mann_whitneyu_label(a, b)
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['stat'].append(round(stat, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'stat': corr_dict['stat'],
                    'p-value': corr_dict['p-value']
                })
                # logger.warning('df:%s',df.head(23))
                return df.hvplot.table(
                    columns=[
                        'Variable 1', 'Variable 2', 'Relationship', 'stat',
                        'p-value'
                    ],
                    width=550,
                    height=200,
                    title='Non parametric relationship between variables')
            except Exception:
                logger.error('non parametric table', exc_info=True)

        def hist(self, launch):
            try:

                return self.df.hvplot.hist(y=self.feature_list,
                                           subplots=True,
                                           shared_axes=False,
                                           bins=25,
                                           alpha=0.3,
                                           width=300).cols(4)
            except Exception:
                logger.warning('histogram', exc_info=True)

        def matrix_plot(self, launch=-1):
            try:
                logger.warning('line 306 self.feature list:%s',
                               self.feature_list)

                df = self.df1

                if df is not None:
                    # thistab.prep_data(thistab.df)
                    if self.timestamp_col in df.columns:
                        df = df.drop(self.timestamp_col, axis=1)

                    df = df.fillna(0)
                    # logger.warning('line 302. df: %s',df.head(10))

                    cols_temp = self.feature_list.copy()
                    if self.variable in cols_temp:
                        cols_temp.remove(self.variable)
                    # variable_select.options = cols_lst

                    p = df.hvplot.scatter(x=self.variable,
                                          y=cols_temp,
                                          width=330,
                                          subplots=True,
                                          shared_axes=False,
                                          xaxis=False).cols(4)
                else:
                    p = df.hvplot.scatter(x=[0, 0, 0], y=[0, 0, 0], width=330)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)

        def multiline(self, launch=1):
            try:
                yvar = self.multiline_vars['y']
                xvar = self.multiline_vars['x']
                df = self.df.copy()
                df = df[[xvar, yvar, self.timestamp_col]]
                df = df.set_index(self.timestamp_col)
                df = df.groupby(xvar).resample(self.resample_period).agg(
                    {yvar: 'mean'})
                df = df.reset_index()
                lines = df[xvar].unique()
                # split data frames
                dfs = {}
                for idx, line in enumerate(lines):
                    dfs[line] = df[df[xvar] == line]
                    dfs[line] = dfs[line].fillna(0)
                    logger.warning('LINE 428:%s - %s:', line, dfs[line].head())
                    if idx == 0:
                        p = dfs[line].hvplot.line(x=self.timestamp_col,
                                                  y=yvar,
                                                  width=1200,
                                                  height=500).relabel(line)
                    else:
                        p *= dfs[line].hvplot.line(x=self.timestamp_col,
                                                   y=yvar,
                                                   width=2,
                                                   height=500).relabel(line)
                return p
            except Exception:
                logger.error('multiline plot', exc_info=True)

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data(thistab.df)
        if 'milestone owner gender' == new:
            thistab.variable = 'm_gender_code'
        if 'project owner gender' == new:
            thistab.variable = 'pm_gender_code'
        if 'task owner gender' == new:
            thistab.variable = 't_gender_code'

        if thistab.variable in thistab.adoption_variables['developer']:
            thistab.reset_adoption_dict(thistab.variable)
        thistab.section_head_updater('lag', thistab.variable)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag_plot_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_variable = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_IVs(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.pm_gender = pm_gender_select.value
        thistab.m_gender = m_gender_select.value
        thistab.t_gender = t_gender_select.value
        thistab.status = status_select.value
        thistab.type = type_select.value
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag(attr, old, new):  # update lag & cryptocurrency
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df = thistab.pym.load_df(start_date=datepicker_start.value,
                                         end_date=datepicker_end.value,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)
        thistab.df['project_owner_gender'] = thistab.df[
            'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.df['milestone_owner_gender'] = thistab.df[
            'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.df['task_owner_gender'] = thistab.df[
            'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['x'] = multiline_x_select.value
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'project_composite1'
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2013-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=30)
        # initial function call
        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)
        if len(thistab.df) > 0:
            thistab.df['manager_gender'] = thistab.df['project_owner_gender']
            thistab.df['project_owner_gender'] = thistab.df[
                'project_owner_gender'].apply(lambda x: 1
                                              if x == 'male' else 2)
            thistab.df['milestone_owner_gender'] = thistab.df[
                'milestone_owner_gender'].apply(lambda x: 1
                                                if x == 'male' else 2)
            thistab.df['task_owner_gender'] = thistab.df[
                'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
            logger.warning('LINE 527:columns %s', list(thistab.df.columns))

            thistab.prep_data(thistab.df)

        # MANAGE STREAM
        stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()
        stream_launch = streams.Stream.define('Launch', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=thistab.variables)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        type_select = Select(title='Select project type',
                             value=thistab.type,
                             options=thistab.menus['type'])

        status_select = Select(title='Select project status',
                               value=thistab.status,
                               options=thistab.menus['status'])

        pm_gender_select = Select(title="Select project owner's gender",
                                  value=thistab.pm_gender,
                                  options=thistab.menus['gender'])

        m_gender_select = Select(title="Select milestone owner's gender",
                                 value=thistab.m_gender,
                                 options=thistab.menus['gender'])

        t_gender_select = Select(title="Select task owner's gender",
                                 value=thistab.t_gender,
                                 options=thistab.menus['gender'])

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        multiline_y_select = Select(title='Select comparative DV(y)',
                                    value=thistab.multiline_vars['y'],
                                    options=[
                                        'remuneration', 'delay_start',
                                        'delay_end', 'project_duration'
                                    ])

        multiline_x_select = Select(
            title='Select comparative IV(x)',
            value=thistab.multiline_vars['x'],
            options=['manager_gender', 'type', 'status'])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="lag", title="lag(days)"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        lags_corr_table = DataTable(source=lags_corr_src,
                                    columns=columns,
                                    width=500,
                                    height=200)

        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_nonpara_table = hv.DynamicMap(
            thistab.non_parametric_relationship_table,
            streams=[stream_launch_corr])
        # hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist])
        hv_lags_plot = hv.DynamicMap(thistab.lags_plot,
                                     streams=[stream_launch_lags_var])
        hv_multiline = hv.DynamicMap(thistab.multiline,
                                     streams=[stream_launch])

        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        nonpara_table = renderer.get_plot(hv_nonpara_table)
        lags_plot = renderer.get_plot(hv_lags_plot)
        multiline = renderer.get_plot(hv_multiline)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        lag_variable_select.on_change('value', update_lag_plot_variable)
        lag_select.on_change('value', update_lag)  # individual lag
        resample_select.on_change('value', update_resample)
        pm_gender_select.on_change('value', update_IVs)
        m_gender_select.on_change('value', update_IVs)
        t_gender_select.on_change('value', update_IVs)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        lags_input_button.on_click(update_lags_selected)  # lags array

        status_select.on_change('value', update_IVs)
        type_select.on_change('value', update_IVs)

        multiline_x_select.on_change('value', update_multiline)
        multiline_y_select.on_change('value', update_multiline)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_lag = WidgetBox(lags_input, lags_input_button,
                                 lag_variable_select)

        controls_multiline = WidgetBox(multiline_x_select, multiline_y_select)

        controls_page = WidgetBox(datepicker_start, datepicker_end,
                                  variable_select, type_select, status_select,
                                  resample_select, pm_gender_select,
                                  m_gender_select, t_gender_select)
        controls_gender = WidgetBox(pm_gender_select, m_gender_select,
                                    t_gender_select)

        # create the dashboards

        grid = gridplot(
            [[thistab.notification_div['top']], [Spacer(width=20, height=70)],
             [thistab.section_headers['relationships']],
             [Spacer(width=20, height=30)], [matrix_plot.state, controls_page],
             [thistab.section_headers['correlations']],
             [Spacer(width=20, height=30)],
             [corr_table.state,
              thistab.corr_information_div()],
             [thistab.title_div('Compare levels in a variable', 400)],
             [Spacer(width=20, height=30)],
             [multiline.state, controls_multiline],
             [thistab.section_headers['lag']], [Spacer(width=20, height=30)],
             [lags_plot.state, controls_lag], [lags_corr_table],
             [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)
Esempio n. 27
0
p.ygrid.grid_line_color = None

callback = CustomJS(args=dict(source=source),
                    code="""
    var data = source.data;
    var f = CLTV.value;
    var g = cost.value;
    x = data['x']
    y = data['y']
    x[0] = Math.round(f/g)
    source.trigger('change');
""")

text_input1 = TextInput(value="", title="CLTV", callback=callback)
callback.args["CLTV"] = text_input1

text_input2 = TextInput(value="", title="Cost", callback=callback)
callback.args["cost"] = text_input2

text = 'Text for option A will go here.'

div = Div(text=text, width=200, height=100)
# div.js_on_change(text, callback)
# callback.args["cost"] = text_input2

widgets = WidgetBox(text_input1, text_input2)

grid = gridplot([widgets, p, div], ncols=2, plot_width=250, plot_height=250)
output_file("bar.html")
show(grid)
def eda_country_indexes_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0

            self.groupby_dict = {}

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """
            self.countries = []
            self.country = 'Barbados'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.pym = PythonMongo('aion')
            self.menus = {
                'status': ['all', 'open', 'closed'],
                'gender': ['all', 'male', 'female'],
            }
            self.multiline_vars = {'x': '', 'y': ''}
            self.timestamp_col = 'timestamp'

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                position:relative;background:black;margin-bottom:200px">
                                                <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                          </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'info': self.section_header_div(text='Country indexes')
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def load_df(self):
            try:
                df = json_normalize(
                    list(self.pym.db[self.table].find({}, {'_id': False})))
                df = df.fillna(0)
                logger.warning('LINE 96:  country indicator:%s', df.head())
                self.countries = []
                self.df = df

            except Exception:
                logger.error('load', exc_info=True)

        def get_row_column_labels(self, txt):
            x = txt.split('.')
            if x[0] not in self.countries:
                self.countries.append(x[0])
                sorted(self.countries)
            x[-1] = x[-1].replace('-', '_')
            return x[0], x[-1]

        def melt_df(self):

            try:
                # logger.warning('%s',df.head(20))
                temp_dct = {'country': []}

                # loop through items
                counter = 0
                values_present = []

                for col in self.df.columns:
                    if col != 'timestamp':
                        # label for each coin, only run once
                        if counter == 0:
                            row, column = self.get_row_column_labels(col)
                            temp_dct['country'].append(row)
                            if column not in temp_dct.keys():
                                temp_dct[column] = []
                            try:
                                tmp = self.df[[col]]
                                val = tmp.values[0]
                            except Exception:
                                val = [0]
                            temp_dct[column].append(val[0])

                #logger.warning('LINE 140 tmp dict:%s',temp_dct)

                # find items that are not present
                # not_present = list

                counter += 1
                '''
                # logger.warning('item-length=%s-%s',key,len(temp_dct[key]))
                # convert to dataframe
                for item in temp_dct.keys():
                    # logger.warning('%s length = %s',item,len(temp_dct[item]))
                    if len(temp_dct[item]) == 0:
                        temp_dct[item] = [0] * len(temp_dct)
                '''
                self.df1 = pd.DataFrame.from_dict(temp_dct)
                # logger.warning('df after melt:%s',self.df1.head())
            except Exception:
                logger.error('melt coins', exc_info=True)

        def plot_country_rows(self, launch):
            try:
                if self.df1 is None:
                    self.melt_df()

            except Exception:
                logger.error('plot', exc_info=True)

    def update_country(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.country = thistab.country_select.value
        thistab.trigger += 1
        stream_launch_action_table.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'country_indexes'
        thistab = Thistab(table, [], [])
        thistab.load_df()

        # MANAGE STREAM
        stream_launch_action_table = streams.Stream.define('Launch',
                                                           launch=-1)()

        # MAKE TABLES
        # --------------------- PLOTS---------------------------------

        hv_action_table = hv.DynamicMap(thistab.plot_country_rows,
                                        streams=[stream_launch_action_table])
        action_table = renderer.get_plot(hv_action_table)

        # CREATE WIDGETS
        country_select = Select(title='Select matrix',
                                value=thistab.load_df(),
                                options=thistab.countries)

        # handle callbacks
        country_select.on_change('value', update_country)

        # create the dashboards
        controls = WidgetBox()

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.title_div('info', 400)],
                         [Spacer(width=20, height=30)], [action_table.state],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)
Esempio n. 29
0
    if (mode == 'None') {
        data['x'] = [];
        data['y'] = [];
    }
    else {
        if (mode == 'Linear') { interp = linear; }
        else if (mode == 'Step (before)') { interp = step; step.mode = 'before'; }
        else if (mode == 'Step (center)') { interp = step; step.mode = 'center'; }
        else if (mode == 'Step (after)')  { interp = step; step.mode = 'after';  }

        for (i=0; i < %d; i++) {
            data['x'][i] = i * dx
            data['y'][i] = interp.compute(data['x'][i])
        }
    }

    source.trigger('change')

""" % (N, N))

mode = Select(title='Interpolation Mode',
              value='None',
              options=[
                  'None', 'Linear', 'Step (before)', 'Step (center)',
                  'Step (after)'
              ],
              callback=callback)
output_file("transform_interpolator.html", title="Example Transforms")

show(Column(WidgetBox(mode, width=300), p))
Esempio n. 30
0
                                                 port=server_port,
                                                 dir='compute'),
                        url_predict=generate_url(MYIP,
                                                 port=server_port,
                                                 dir='predict')))


def redraw():
    static_source.data = default_data['results']


field_data = req.post(url=generate_url(MYIP, port=server_port, dir='fields'),
                      timeout=20).json()
sliders = WidgetBox(children=list(
    Slider(**dict(
        zip(list(f.keys()) + ['callback'],
            list(f.values()) + [callback]))) for f in field_data['results']),
                    width=30)

#scatter = Scatter3d(x='x', y='y', z='z', color='color', data_source=static_source)
plot = figure(title='PCA Plot',
              plot_height=300,
              plot_width=400,
              responsive=True,
              tools="pan,reset,save,wheel_zoom")
plot.scatter(x='x', y='y', color='color', source=static_source)

def_cont = req.post(url=generate_url(MYIP,
                                     port=server_port,
                                     dir=['predict', 'default']),
                    timeout=20).json()