コード例 #1
0
ファイル: test_datepicker.py プロジェクト: not-so-rabh/bokeh
    def test_js_on_change_executes(self, bokeh_model_page) -> None:
        dp = DatePicker(title='Select date',
                        value=date(2019, 9, 20),
                        min_date=date(2019, 9, 1),
                        max_date="2019-09-30",
                        css_classes=["foo"])
        dp.js_on_change('value',
                        CustomJS(code=RECORD("value", "cb_obj.value")))

        page = bokeh_model_page(dp)

        el = page.driver.find_element_by_css_selector('.foo input')
        el.click()
        el.click()

        el = page.driver.find_element_by_css_selector(
            'span[aria-label="September 16, 2019"]')
        assert el.is_displayed()
        el.click()

        results = page.results
        assert results['value'] == '2019-09-16'

        el = page.driver.find_element_by_css_selector('.bk-input')
        assert el.get_attribute('value') == '2019-09-16'

        assert page.has_no_console_errors()
コード例 #2
0
    def test_js_on_change_executes(self, bokeh_model_page):
        dp = DatePicker(title='Select date',
                        value=datetime(2019, 9, 20),
                        min_date=datetime(2019, 9, 1),
                        max_date=datetime.utcnow(),
                        css_classes=["foo"])
        dp.js_on_change('value',
                        CustomJS(code=RECORD("value", "cb_obj.value")))

        page = bokeh_model_page(dp)

        el = page.driver.find_element_by_css_selector('.foo input')
        el.click()

        el = page.driver.find_element_by_css_selector(
            'button[data-pika-day="16"]')
        el.click()

        results = page.results
        assert results['value'] == 'Mon Sep 16 2019'

        el = page.driver.find_element_by_css_selector('.bk-input')
        assert el.get_attribute('value') == 'Mon Sep 16 2019'

        assert page.has_no_console_errors()
コード例 #3
0
 def modify_doc(doc):
     source = ColumnDataSource(dict(x=[1, 2], y=[1, 1], val=["a", "b"]))
     plot = Plot(plot_height=400, plot_width=400, x_range=Range1d(0, 1), y_range=Range1d(0, 1), min_border=0)
     plot.add_tools(CustomAction(callback=CustomJS(args=dict(s=source), code=RECORD("data", "s.data"))))
     plot.add_glyph(source, Circle(x='x', y='y', size=20))
     dp = DatePicker(title='Select date', value=datetime(2019, 9, 20), min_date=datetime(2019, 9, 1), max_date=datetime.utcnow(), css_classes=["foo"])
     def cb(attr, old, new):
         source.data['val'] = [old, new]
     dp.on_change('value', cb)
     doc.add_root(column(dp, plot))
コード例 #4
0
ファイル: test_datepicker.py プロジェクト: sliu-baz/bokeh
    def test_disabled_dates(self, bokeh_model_page) -> None:
        dp = DatePicker(title='Select date', value=date(2019, 9, 20), min_date=date(2019, 9, 1), max_date="2019-09-30",
                        disabled_dates=["2019-09-14", ("2019-09-16", date(2019, 9, 18))], css_classes=["foo"])

        page = bokeh_model_page(dp)

        el = page.driver.find_element_by_css_selector('.foo label')
        el.click()

        # not disabled
        el = page.driver.find_element_by_css_selector('span[aria-label="September 13, 2019"]')
        assert "flatpickr-disabled" not in el.get_attribute("class")

        el = page.driver.find_element_by_css_selector('span[aria-label="September 14, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        # not disabled
        el = page.driver.find_element_by_css_selector('span[aria-label="September 15, 2019"]')
        assert "flatpickr-disabled" not in el.get_attribute("class")

        el = page.driver.find_element_by_css_selector('span[aria-label="September 16, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        el = page.driver.find_element_by_css_selector('span[aria-label="September 17, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        el = page.driver.find_element_by_css_selector('span[aria-label="September 18, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        # not disabled
        el = page.driver.find_element_by_css_selector('span[aria-label="September 19, 2019"]')
        assert "flatpickr-disabled" not in el.get_attribute("class")

        assert page.has_no_console_errors()
コード例 #5
0
    def test_basic(self, bokeh_model_page):
        dp = DatePicker(title='Select date', value=datetime(2019, 9, 20), min_date=datetime(2019, 9, 1), max_date=datetime.utcnow(), css_classes=["foo"])

        page = bokeh_model_page(dp)

        el = page.driver.find_element_by_css_selector('.foo label')
        assert el.text == "Select date"

        assert page.has_no_console_errors()
コード例 #6
0
ファイル: test_datepicker.py プロジェクト: sliu-baz/bokeh
    def test_widget_disabled(self, bokeh_model_page) -> None:
        dp = DatePicker(title='Select date', value=date(2019, 9, 20), min_date=date(2019, 9, 1), max_date="2019-09-30",
                        disabled=True, css_classes=["foo"])

        page = bokeh_model_page(dp)

        el = page.driver.find_element_by_css_selector('.flatpickr-input')
        assert el.get_attribute("disabled") == "true"

        assert page.has_no_console_errors()
コード例 #7
0
ファイル: controlbox.py プロジェクト: torsli/scorigami
    def __init__(self, team_list, interval):
        self.team = Select(title='Choose a Team',
                           options=team_list,
                           value='All')
        st = dt.strptime(str(interval[0]), '%Y%m%d')
        en = dt.strptime(str(interval[1]), '%Y%m%d') + timedelta(1)
        self.dp1 = DatePicker(title='Start Date:',
                              min_date=st,
                              max_date=en,
                              value=st)
        self.dp2 = DatePicker(title='End Date:',
                              min_date=st,
                              max_date=en,
                              value=en)

        self.gradient = CheckboxGroup(labels=['Show gradient'], active=[])
        self.movbox = CheckboxGroup(labels=['Show margin of victory grid'],
                                    active=[])
        self.upperbox = CheckboxGroup(labels=['Split wins and losses'],
                                      active=[])
        self.ticker = Div(text='')
コード例 #8
0
ファイル: test_datepicker.py プロジェクト: sliu-baz/bokeh
    def test_basic(self, bokeh_model_page) -> None:
        dp = DatePicker(title='Select date', value=date(2019, 9, 20), min_date=date(2019, 9, 1), max_date="2019-09-30", css_classes=["foo"])

        page = bokeh_model_page(dp)

        el = page.driver.find_element_by_css_selector('.foo label')
        assert el.text == "Select date"

        el = page.driver.find_element_by_css_selector('.flatpickr-calendar')
        assert "inline" not in el.get_attribute("class")

        assert page.has_no_console_errors()
コード例 #9
0
ファイル: test_datepicker.py プロジェクト: zebulon2/bokeh
    def test_widget_disabled(self, bokeh_model_page: BokehModelPage) -> None:
        dp = DatePicker(title='Select date',
                        value=date(2019, 9, 20),
                        min_date=date(2019, 9, 1),
                        max_date="2019-09-30",
                        disabled=True)

        page = bokeh_model_page(dp)

        el = find_element_for(page.driver, dp, '.flatpickr-input')
        assert el.get_attribute("disabled") == "true"

        assert page.has_no_console_errors()
コード例 #10
0
ファイル: test_datepicker.py プロジェクト: zebulon2/bokeh
    def test_basic(self, bokeh_model_page: BokehModelPage) -> None:
        dp = DatePicker(title='Select date',
                        value=date(2019, 9, 20),
                        min_date=date(2019, 9, 1),
                        max_date="2019-09-30")

        page = bokeh_model_page(dp)

        el = find_element_for(page.driver, dp, "label")
        assert el.text == "Select date"

        el = find_element_for(page.driver, dp, '.flatpickr-calendar')
        assert "inline" not in el.get_attribute("class")

        assert page.has_no_console_errors()
コード例 #11
0
ファイル: test_datepicker.py プロジェクト: zebulon2/bokeh
    def _test_server_update_disabled(
            self, bokeh_server_page: BokehServerPage) -> None:
        dp = DatePicker(title='Select date',
                        value=date(2019, 9, 20),
                        min_date=date(2019, 9, 1),
                        max_date="2019-09-30")

        def modify_doc(doc):
            source = ColumnDataSource(dict(x=[1, 2], y=[1, 1], val=["a", "b"]))
            plot = Plot(height=400,
                        width=400,
                        x_range=Range1d(0, 1),
                        y_range=Range1d(0, 1),
                        min_border=0)
            plot.tags.append(
                CustomJS(name="custom-action",
                         args=dict(s=source),
                         code=RECORD("data", "s.data")))
            plot.add_glyph(source, Circle(x='x', y='y', size=20))

            def cb(attr, old, new):
                source.data['val'] = [old, new]
                dp.disabled_dates = ["2019-09-15"]

            dp.on_change('value', cb)
            doc.add_root(column(dp, plot))

        page = bokeh_server_page(modify_doc)

        el = find_element_for(page.driver, dp, "input")
        el.click()
        el.click()

        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 16, 2019"]')
        assert el.is_displayed()
        el.click()

        page.eval_custom_action()

        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 15, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        results = page.results
        assert results['data']['val'] == ['2019-09-20', '2019-09-16']
コード例 #12
0
ファイル: test_datepicker.py プロジェクト: zebulon2/bokeh
    def test_enabled_dates(self, bokeh_model_page: BokehModelPage) -> None:
        dp = DatePicker(
            title='Select date',
            value=date(2019, 9, 20),
            min_date=date(2019, 9, 1),
            max_date="2019-09-30",
            enabled_dates=["2019-09-14", ("2019-09-16", date(2019, 9, 18))])

        page = bokeh_model_page(dp)

        el = find_element_for(page.driver, dp, "label")
        el.click()

        # not enabled
        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 13, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 14, 2019"]')
        assert "flatpickr-disabled" not in el.get_attribute("class")

        # not enabled
        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 15, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 16, 2019"]')
        assert "flatpickr-disabled" not in el.get_attribute("class")

        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 17, 2019"]')
        assert "flatpickr-disabled" not in el.get_attribute("class")

        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 18, 2019"]')
        assert "flatpickr-disabled" not in el.get_attribute("class")

        # not enabled
        el = find_element_for(page.driver, dp,
                              'span[aria-label="September 19, 2019"]')
        assert "flatpickr-disabled" in el.get_attribute("class")

        assert page.has_no_console_errors()
コード例 #13
0
def make_header(date_folder, time_folder, res_folder):
    # 1.1 - date picker: choose the date of the results
    date_default = str(date.today()) if date_folder is None else date_folder
    date_available = [
        dirs for root, dirs, _ in walk(res_folder) if dirs != []
    ][0]
    date_picker = DatePicker(title='Select date:',
                             value=date_default,
                             min_date="2020-09-27",
                             max_date=str(date.today()),
                             enabled_dates=date_available)

    # 1.2 - select: choose the time of the results
    select_time = Select(title="Select time:", value=time_folder)

    # 1.3 - select: choose the algorithm
    select_algorithm = Select(title="Select algorithm:")

    # 1.1.4 - div: show the experiment summary
    div = Div()

    return date_picker, select_time, select_algorithm, div
コード例 #14
0
intro = Div(
    text=
    """This is a project for <b>visualization of covid-19 data</b>. Basically 3 questions can be answered by 
selecting the date in the <b>date picker</b>: <br>(1) Number of new covid-19 cases in CA on a particular day <br>
(2) For a particular day, what is the %percent cases by race compared to their representation in the
general population? <br>(3) For a particular day, what is the %percent deaths by race compared to their representation in the
general population? <br> The first question is answered in a <b>scatter</b> with one point and the second and third ones are answered in a <b>bar 
chart</b>. If there is no data in the figure, that means there is no available data for that figure on the selected date. <br> All data used is collected from <a href="https://github.com/datadesk/california-coronavirus-data">A Github 
repository having data from <b>LA-Times</b></a>. 
The latest update date is {}""".format(latest_date),
    width=1000,
    height=100)

# date picker
date_picker = DatePicker(title="",
                         value="2020-08-01",
                         min_date="2020-08-01",
                         max_date="2020-12-31")

# answers
selected_date = latest_date
description = Div(
    text="""New cases on {} (Hover for more information)""".format(
        selected_date),
    width=1000,
    height=10)
# p1
total_cases = data_total['confirmed_cases']
dates = data_total['date']
new_cases = []
for i in range(len(dates)):
    if dates[i] == selected_date:
コード例 #15
0
ファイル: bokeh_test.py プロジェクト: v-popov/simple_app
    def __init__(self):
        self.input_df = pd.DataFrame({
            'x': ['2010-01-01'] * DF_NUM_PREVIEW_ROWS,
            'y': [0] * DF_NUM_PREVIEW_ROWS
        })
        self.forecasted_df = None
        self.datefmt = DateFormatter(format='%m-%d-%Y')
        self.inputs = None
        self.x_range = [0, 10]
        self.demand_plot = figure(
            x_range=self.x_range,
            x_axis_type="datetime",
            tools=["pan", 'wheel_zoom'])  #,wheel_zoom,box_zoom,reset,resize")

        self.plot_data_source = ColumnDataSource(
            data=self.input_df)  #dict(x=[0], y=[0])
        self.line1 = self.demand_plot.line(x='x',
                                           y='y',
                                           source=self.plot_data_source,
                                           line_color='blue',
                                           name='line1')
        self.demand_plot.xaxis.formatter = DatetimeTickFormatter(
            days="%d %b %Y", hours="")
        self.demand_plot.axis.minor_tick_line_color = None
        self.demand_plot.xaxis[
            0].ticker.desired_num_ticks = 10  #num_minor_ticks = 0
        self.demand_plot.xaxis.major_label_orientation = radians(
            30)  # from math import radians

        # Set up widgets
        self.data_source_selector = Select(
            title='Step 1/5: Select Data',
            value='Not Selected',
            options=['Not Selected', 'Use Example Data', 'Upload Data'])
        self.file_input = FileInput(accept='.csv,.xlsx')
        self.data_table = DataTable(
            height=DATATABLE_PREVIEW_HEIGHT,
            width=DATATABLE_PREVIEW_WIDTH,
            fit_columns=False,
            index_position=None,
            margin=(0, 15, 0, 15),  #aspect_ratio=0.5,
            #default_size=50
        )
        self.data_preview_paragraph = Paragraph(text='Data Preview:',
                                                margin=(0, 15, 0, 15))
        self.values_col_selector = Select(
            title='Step 2/5: Select column with demand values',
            value='Not Selected',
            options=['Not Selected'])
        self.product_id_col_selector = Select(
            title='Step 3/5: Select column with product ID',
            value='Not Selected',
            options=['Not Selected'])
        self.date_col_selector = Select(title="Step 4/5: Select date column",
                                        value='Not Selected',
                                        options=['Not Selected'])
        self.last_date_picker = DatePicker(
            title='Select the date of last observation',
            max_date=datetime.datetime.date(pd.to_datetime("today")),
            value=datetime.datetime.date(pd.to_datetime("today")))
        self.workdays_checkboxgroup = CheckboxGroup(
            labels=["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"],
            active=[],
            inline=True,
            margin=(0, 15, 0, 0))
        self.workdays_apply_button = Button(label='Select Business Days',
                                            button_type='primary')
        self.product_selector_plotting = Select(
            title='Select Product to Display',
            value='v1',
            options=['v1', 'v2'])
        self.prediction_button = Button(
            label='Forecast Demand for Selected Product ID',
            button_type='primary')
        self.default_info_msg = 'This window will contain additional information,\nas you interact with the app.'
        self.info_paragraph = PreText(
            text='Details:\n{}'.format(self.default_info_msg))
        # self.text = TextInput(title='title', value='my sine wave')
        # self.offset = Slider(title='offset', value=0.0, start=-5.0, end=5.0, step=0.1)

        self.widgets = {
            'data_source_selector': self.data_source_selector,
            'file_input': self.file_input,
            'values_col_selector': self.values_col_selector,
            'product_id_col_selector': self.product_id_col_selector,
            'data_preview_paragraph': self.data_preview_paragraph,
            'data_table': self.data_table,
            'product_selector': self.product_selector_plotting,
            'demand_plot': self.demand_plot,
            'date_col_selector': self.date_col_selector,
            'last_date_picker': self.last_date_picker,
            'workdays_checkboxgroup': self.workdays_checkboxgroup,
            'workdays_apply_button': self.workdays_apply_button,
            'prediction_button': self.prediction_button,
            #'': self.,
        }

        self.values_colname = None
        self.product_id_colname = None
        self.date_colname = None
        self.product_ids = []
コード例 #16
0
def account_predictive_tab(page_width=1200):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')
            self.feature_list = hyp_variables

            self.targets = {
                'classification': {
                    'churned': {
                        'cols': ['churned', 'active'],
                        'target_col': 'status'
                    }
                },
                'regression': {
                    'aion_fork': {
                        'cols': [1, 0],
                        'target_col': 'aion_fork'
                    }
                }
            }
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'

            self.clf = None
            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.prediction_address_selected = ""
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            for col in self.feature_list:
                self.groupby_dict[col] = 'mean'

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.metrics_div = Div(text='', width=400, height=300)
            self.accuracy_df = None
            self.inspected_variable = 'amount'

            # ------- DIVS setup begin
            self.page_width = page_width
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                                       position:relative;background:black;margin-bottom:200px">
                                                                       <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                                                 </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'churn':
                self.section_header_div(
                    text=
                    'Churned accounts: prediction model accuracy, variable ranking:{}'
                    .format('----'),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'variable behavior':
                self.section_header_div(text='Variable behavior:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'predictions':
                self.section_header_div(
                    text='Select date range to make predictions:{}'.format(
                        self.section_divider),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.prediction_address_selected = ""
                self.prediction_address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self,
                    start_date="2018-04-25 00:00:00",
                    end_date="2018-12-10 00:00:00"):
            try:
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)
                self.df_load(start_date, end_date)
                self.df = self.df.fillna(0)
                #self.make_delta()
                #self.df = self.df.set_index('block_timestamp')
                #logger.warning("data loaded - %s",self.df.tail(10))

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        def split_df(self, df, target):
            cols = self.target['classification'][target]
            target_col = self.target['classification'][target]
            for val in cols:
                self.df1[val] = df[target_col] == val
            logger.warning(
                "Finished split into churned and retained dataframes")

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS
        def rf_clf(self):
            try:
                logger.warning("RANDOM FOREST LAUNCHED")

                error_lst = []
                df_temp = self.df
                df_temp = self.normalize(df_temp,
                                         timestamp_col='block_timestamp')
                # if all addresses used filter for only positive transactions

                for target in self.targets['classification']:
                    # filter out joined
                    df = df_temp.copy()
                    if target == 'churned':
                        df = df[df['status'] != 'joined']

                    #logger.warning("line 205: df columns in %s:",df.columns.tolist())
                    df = df.groupby(['address',
                                     'status']).agg(self.groupby_dict)
                    df = df.reset_index()
                    #logger.warning("line 222: df columns in %s:",df.tail(10))

                    df = df.compute()
                    '''
                    # only retain wanted values
                    col_values = list(self.df[self.targets['classification'][target]['target_col']].unique())
                    for val in col_values:
                        if val in self.targets['classification'][target]['cols']:
                            pass
                        else:
                            df[self.targets['classification'][target]['target_col']] = \
                            df[df[self.targets['classification'][target]['cols']] != val]
                    '''
                    X = df[self.feature_list]
                    y = df[self.targets['classification'][target]
                           ['target_col']]
                    #logger.warning('y=:%s',y.head(100))

                    X_train, X_test, y_train, y_test = train_test_split(
                        X, y, test_size=0.3)
                    self.feature_list = X_train.columns.tolist()

                    self.pl[target] = Pipeline([
                        ('imp',
                         SimpleImputer(missing_values=0, strategy='median')),
                        ('rf',
                         RandomForestClassifier(n_estimators=100,
                                                random_state=42,
                                                max_depth=4,
                                                class_weight='balanced'))
                    ])
                    self.pl[target].fit(X_train, y_train)

                    y_pred = self.pl[target].predict(X_test)
                    error_lst.append(
                        round(100 * metrics.accuracy_score(y_test, y_pred), 2))

                self.accuracy_df = pd.DataFrame({
                    'Outcome':
                    list(self.targets['classification'].keys()),
                    'Accuracy':
                    error_lst,
                })
                #logger.warning('accuracy_df:%s',self.accuracy_df.head())
                #self.make_tree(target=target)

                print('confusion matrix:\n')
                print(confusion_matrix(y_test, y_pred))
                print('classification report:\n')
                print(classification_report(y_test, y_pred))
                #logger.warning("clf model built:%s",self.pl)

            except Exception:
                logger.error("RF:", exc_info=True)

        def accuracy_table(self):
            try:
                columns = self.accuracy_df.columns.tolist()
                return self.accuracy_df.hvplot.table(
                    columns=['Outcome', 'Accuracy'],
                    width=250,
                    title='Prediction accuracy')

            except Exception:
                logger.error("RF:", exc_info=True)

        def prediction_information_div(self, width=350, height=450):
            txt = """
            <div {}>
            <h4 {}>Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            The table shows the predicted change.</br>
            </li>
            <li>
            For desirable outcomes:
            </br> ... a positive number is good!
            </br> ... the bigger the number the better.
            </br> ... a negative number is bad!
            </br> ... the bigger the negative number the worse it is.
            </li>
            <>
            For non-desirable outcomes:
            </br>... the inverse is true
            </li>
            <li>
            Use the datepicker(s) to select dates for the period desired
            </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def metrics_div_update(self, data):
            div_style = """ 
                   style='width:350px;margin-right:-600px;
                   border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
               """
            txt = """<div {}>
            <h4 {}>Prediction Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            {}% likely to churn
            </li>
            </ul>
            </div>""".format(div_style, self.header_style, data)
            self.metrics_div.text = txt

        def stats_information_div(self, width=400, height=300):
            div_style = """ 
                           style='width:350px;margin-left:-600px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """
            txt = """
            <div {}>
                   <h4 {}>Metadata Info </h4>
                   <ul>
                   <li >
                   <h4 style='margin-bottom:-2px;'>Table left:</h4>
                   - shows the outcome,</br>
                     and the accuracy in %</br>
                     <strong><i>100% is perfection!</i></strong>
                   </li>
                   <li>
                   <h4 style='margin-bottom:-2px;'>Table right:</h4>
                     - shows the desired outcome, the variables(things Aion controls)
                   </br> and their importance to the particular outcome
                   </br> ...which variable(s) have a greater impact on an outcome.
                   </br>- lower = better
                   </br>- generally only the best ranked 3 matter
                   </br>- business advice: manipulate the top ranked variables to attain desirable outcomes
                   </li>
                   </ul>
            </div>""".format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def load_prediction_df(self, start_date, end_date):
            if isinstance(start_date, date):
                start_date = datetime.combine(start_date, datetime.min.time())
            if isinstance(end_date, date):
                end_date = datetime.combine(end_date, datetime.min.time())
            cols = self.feature_list + ['address', 'block_timestamp']
            self.df_predict = self.cl.load_data(table=self.table,
                                                cols=cols,
                                                start_date=start_date,
                                                end_date=end_date)
            logger.warning('319:in load prediction: %s',
                           self.df_predict.head(5))

        def update_prediction_addresses_select(self):
            self.prediction_address_select.options = ['all']
            if len(self.df_predict) > 0:
                lst = ['all'] + list(
                    self.df_predict['address'].unique().compute())
                self.prediction_address_select.options = lst

        # the period for which the user wants a prediction
        def make_account_predictions(self, launch=-1):
            try:
                logger.warning("MAKE PREDICTIONS LAUNCHED")
                target = list(self.targets['classification'].keys())[0]
                # make
                df = self.df_predict
                #logger.warning("line 363%s",df.head(10))
                # make list of address for prediction select
                # filter if prediction for certain addresses
                #logger.warning('address selected:%s',self.prediction_address_select.value)
                if self.prediction_address_select.value is not None:
                    if len(self.prediction_address_select.value) > 0:
                        if self.prediction_address_select.value not in [
                                'all', ''
                        ]:
                            df = df[df.address ==
                                    self.prediction_address_select.value]

                #logger.warning('line 409 predict-df post filter:%s', df.head(20))
                # make table for display
                self.predict_df = pd.DataFrame({
                    'address': [],
                    'likely action': []
                })
                for target in list(self.targets['classification'].keys()):
                    if len(df) > 0:

                        df = self.normalize(df,
                                            timestamp_col='block_timestamp')
                        df = self.group_data(df,
                                             self.groupby_dict,
                                             timestamp_col='block_timestamp')
                        interest_labels = list(df['address'].unique())

                        # run model
                        df = df.fillna(0)
                        X = df[self.feature_list]
                        #logger.warning("df before prediction:%s",X.tail(10))
                        y_pred = self.pl[target].predict(X)
                        logger.warning('y_pred:%s', y_pred)
                        if target == 'churned':
                            y_pred_verbose = [
                                'remain' if x in ["active", 1] else "churn"
                                for x in y_pred
                            ]

                        #---- make table for display
                        self.predict_df = pd.DataFrame({
                            'address':
                            interest_labels,
                            'likely action':
                            y_pred_verbose
                        })

                        #------ label pools
                        self.predict_df['address'] = self.predict_df[
                            'address'].map(self.poolname_verbose_trun)
                        #logger.warning('self.predict_df:%s',self.predict_df)

                        churn_df = self.predict_df[
                            self.predict_df['likely action'] == 'churn']
                        perc_to_churn = round(
                            100 * len(churn_df) / len(self.predict_df), 1)
                        txt = target[:-2]
                        text = """<div {}>
                        <h3>Percentage likely to {}:</h3>
                        <strong 'style=color:black;'>{}%</strong></div>""".format(
                            self.header_style, txt, perc_to_churn)
                        self.metrics_div_update(data=perc_to_churn)
                    else:

                        text = """<div {}>
                            <br/> <h3>Sorry, address not found</h3>
                            </div>""".format(self.header_style)
                        self.metrics_div.text = text
                    logger.warning("end of %s predictions", target)
                return self.predict_df.hvplot.table(
                    columns=['address', 'likely action'],
                    width=500,
                    title='Account predictions')
            except Exception:
                logger.error("prediction:", exc_info=True)

        def make_tree(self, target='churned'):
            try:
                if not self.pl:
                    self.rf_clf()
                # Limit depth of tree to 3 levels
                # Extract the small tree
                tree_small = self.pl[target].named_steps['rf'].estimators_[5]
                # Save the tree as a png image
                export_graphviz(tree_small,
                                out_file='small_tree.dot',
                                feature_names=self.feature_list,
                                rounded=True,
                                precision=1)

                (graph, ) = pydot.graph_from_dot_file('small_tree.dot')
                # filepath = self.make_filepath('../../../static/images/small_tree.gif')
                # .write_png(filepath)
                filepath = self.make_filepath(
                    '/home/andre/Downloads/small_tree.png')
                graph.write_png(filepath)
                logger.warning("TREE SAVED")
            except Exception:
                logger.error("make tree:", exc_info=True)

        def make_feature_importances(self):
            try:
                if not self.pl:
                    self.rf_clf()

                results_dct = {
                    'outcome': [],
                    'feature': [],
                    'importance': [],
                    'rank_within_outcome': []
                }
                for target in self.targets['classification'].keys():
                    logger.warning('make feature importances for :%s', target)
                    # Get numerical feature importances
                    importances = list(
                        self.pl[target].named_steps['rf'].feature_importances_)

                    # List of tuples with variable and importance
                    feature_importances = [(feature, round(importance, 4))
                                           for feature, importance in zip(
                                               self.feature_list, importances)]

                    sorted_importances = sorted(feature_importances,
                                                key=itemgetter(1))

                    # logger.warning('importances :%s',importances)
                    # logger.warning("feature_importances:%s",feature_importances)
                    target_lst = [target] * len(importances)

                    count = 1
                    rank_lst = []
                    for i in importances:
                        rank_lst.append(count)
                        count += 1

                    results_dct['outcome'] += target_lst
                    results_dct['feature'] += [
                        i[0] for i in sorted_importances
                    ]
                    results_dct['importance'] += [
                        i[1] for i in sorted_importances
                    ]
                    results_dct['rank_within_outcome'] += sorted(rank_lst,
                                                                 reverse=True)

                df = pd.DataFrame.from_dict(results_dct)
                logger.warning('MAKE FEATURE IMPORTANCES FINISHED')
                return df.hvplot.table(
                    columns=[
                        'outcome', 'feature', 'importance',
                        'rank_within_outcome'
                    ],
                    width=600,
                    title="Variables ranked by importance (for each output)")

            except Exception:
                logger.error("Feature importances:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    def update_address_predictions(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_select_variable(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.inspected_variable = select_variable.value
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = hyp_variables + [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        last_date = last_date - timedelta(days=50)
        first_date = last_date - timedelta(days=5)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_variable = Select(title='Filter by variable',
                                 value=thistab.inspected_variable,
                                 options=thistab.feature_list)

        # search by address checkboxes
        thistab.prediction_address_select = Select(title='Filter by address',
                                                   value='all',
                                                   options=[])
        reset_prediction_address_button = Button(label="reset address(es)",
                                                 button_type="success")

        # ----------------------------------- LOAD DATA
        # load model-making data
        end = datepicker_start.value
        start = end - timedelta(days=60)
        thistab.load_df(start, end)
        thistab.rf_clf()
        # load data for period to be predicted
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()

        # tables
        hv_account_prediction_table = hv.DynamicMap(
            thistab.make_account_predictions, streams=[stream_launch])
        account_prediction_table = renderer.get_plot(
            hv_account_prediction_table)

        hv_features_table = hv.DynamicMap(thistab.make_feature_importances)
        features_table = renderer.get_plot(hv_features_table)

        hv_accuracy_table = hv.DynamicMap(thistab.accuracy_table)
        accuracy_table = renderer.get_plot(hv_accuracy_table)


        hv_variable_plot = hv.DynamicMap(thistab.box_plot,
                                 streams=[stream_select_variable])\
            .opts(plot=dict(width=800, height=500))

        variable_plot = renderer.get_plot(hv_variable_plot)

        # add callbacks
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        thistab.prediction_address_select.on_change(
            'value', update_address_predictions)
        reset_prediction_address_button.on_click(thistab.reset_checkboxes)
        select_variable.on_change('value', update_select_variable)

        # put the controls in a single element
        controls = WidgetBox(select_variable, datepicker_start, datepicker_end,
                             thistab.prediction_address_select,
                             reset_prediction_address_button)

        controls_prediction = WidgetBox(datepicker_start, datepicker_end,
                                        thistab.prediction_address_select,
                                        reset_prediction_address_button)

        grid = gridplot(
            [[thistab.notification_div['top']], [Spacer(width=20, height=70)],
             [thistab.section_headers['churn']], [Spacer(width=20, height=70)],
             [accuracy_table.state,
              thistab.stats_information_div()], [features_table.state],
             [thistab.section_headers['variable behavior']],
             [Spacer(width=20, height=30)], [variable_plot.state, controls],
             [thistab.section_headers['predictions']],
             [Spacer(width=20, height=30)],
             [
                 account_prediction_table.state, thistab.metrics_div,
                 controls_prediction
             ], [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title='predictions: accounts by value')
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        text = 'predictions: accounts by value'
        return tab_error_flag(text)
コード例 #17
0
def cryptocurrency_eda_tab(cryptos, panel_title):
    lags_corr_src = ColumnDataSource(data=dict(variable_1=[],
                                               variable_2=[],
                                               relationship=[],
                                               lag=[],
                                               r=[],
                                               p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = ['watch', 'fork', 'issue', 'release', 'push']
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0

            self.groupby_dict = groupby_dict
            self.feature_list = list(self.groupby_dict.keys())
            self.variable = 'fork'
            self.crypto = 'all'
            self.lag_variable = 'push'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                            style='width:350px; margin-left:-600px;
                            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """

            self.header_style = """ style='color:blue;text-align:center;' """
            # track variable for AI for significant effects
            self.adoption_variables = {
                'user': [],
                'developer': ['watch', 'fork']
            }

            self.significant_effect_dict = {}
            self.reset_adoption_dict(self.variable)
            self.relationships_to_check = ['weak', 'moderate', 'strong']
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                           <div style="text-align:center;width:{}px;height:{}px;
                                  position:relative;background:black;margin-bottom:200px">
                                  <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                           </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            #self.lag_section_head_txt = 'Lag relationships between {} and...'.format(self.variable)
            self.lag_section_head_txt = 'Lag relationships:'
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=self.lag_section_head_txt,
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(
                    text='Pre transform distribution:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
                'non_linear':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:{}px;">
                           <h4 style="color:#fff;">
                           {}</h4></div>""".format(self.page_width, text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def reset_adoption_dict(self, variable):
            self.significant_effect_dict[variable] = []

        def section_header_updater(self,
                                   text,
                                   section,
                                   html_header='h3',
                                   margin_top=150,
                                   margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            self.section_headers[section].text = text

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////
        def prep_data(self, df1):
            try:
                self.cols = list(df1.columns)

                df1['timestamp'] = df1['timestamp'].astype('M8[us]')
                df = df1.set_index('timestamp')
                #logger.warning('LINE 195 df:%s',df.head())
                # handle lag for all variables
                if self.crypto != 'all':
                    df = df[df.crypto == self.crypto]
                df = df.compute()
                #logger.warning('LINE 199: length before:%s',len(df))
                df = df.groupby('crypto').resample(self.resample_period).agg(
                    self.groupby_dict)
                #logger.warning('LINE 201: length after:%s',len(df))

                df = df.reset_index()
                vars = self.feature_list.copy()
                if int(self.lag) > 0:
                    for var in vars:
                        if self.variable != var:
                            df[var] = df[var].shift(int(self.lag))
                df = df.dropna()
                self.df1 = df
                #logger.warning('line 184- prep data: df:%s',self.df.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def set_groupby_dict(self):
            try:
                pass

            except Exception:
                logger.error('set groupby dict', exc_info=True)

        #   ///////////////// PLOTS /////////////////////

        def lags_plot(self, launch):
            try:
                df = self.df.copy()
                df = df[[self.lag_variable, self.variable]]
                df = df.compute()
                cols = [self.lag_variable]
                lags = self.lag_days.split(',')
                for day in lags:
                    try:
                        label = self.lag_variable + '_' + day
                        df[label] = df[self.lag_variable].shift(int(day))
                        cols.append(label)
                    except:
                        logger.warning('%s is not an integer', day)
                df = df.dropna()
                self.lags_corr(df)
                # plot the comparison
                #logger.warning('in lags plot: df:%s',df.head(10))
                return df.hvplot(x=self.variable,
                                 y=cols,
                                 kind='scatter',
                                 alpha=0.4)
            except Exception:
                logger.error('lags plot', exc_info=True)

        # calculate the correlation produced by the lags vector
        def lags_corr(self, df):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'lag': [],
                    'r': [],
                    'p_value': []
                }
                a = df[self.variable].tolist()
                for col in df.columns:
                    if col not in ['timestamp', self.variable]:
                        # find lag
                        var = col.split('_')
                        try:
                            tmp = int(var[-1])

                            lag = tmp
                        except Exception:
                            lag = 'None'

                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        corr_dict_data['variable_1'].append(self.variable)
                        corr_dict_data['variable_2'].append(col)
                        corr_dict_data['relationship'].append(txt)
                        corr_dict_data['lag'].append(lag)
                        corr_dict_data['r'].append(round(rvalue, 4))
                        corr_dict_data['p_value'].append(round(pvalue, 4))

                lags_corr_src.stream(corr_dict_data,
                                     rollover=(len(corr_dict_data['lag'])))
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="lag", title="lag(days)"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=lags_corr_src,
                                       columns=columns,
                                       width=900,
                                       height=400)
                return data_table
            except Exception:
                logger.error('lags corr', exc_info=True)

        def correlation_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'r': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                a = df[self.variable].tolist()

                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        # add to dict
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                        # update significant effect variables
                        if self.variable in self.adoption_variables[
                                'developer']:
                            if any(relationship in txt for relationship in
                                   self.relationships_to_check):
                                if self.variable not in self.significant_effect_dict.keys(
                                ):
                                    self.significant_effect_dict[
                                        self.variable] = []
                                self.significant_effect_dict[
                                    self.variable].append(col)

                if self.variable in self.adoption_variables['developer']:
                    tmp = self.significant_effect_dict[self.variable].copy()
                    tmp = list(set(tmp))
                    tmp_dct = {
                        'features': tmp,
                        'timestamp': datetime.now().strftime(self.DATEFORMAT)
                    }
                    # write to redis
                    save_params = 'adoption_features:developer' + '-' + self.variable
                    self.redis.save(tmp_dct,
                                    save_params,
                                    "",
                                    "",
                                    type='checkpoint')

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'r': corr_dict['r'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=[
                    'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value'
                ],
                                       width=550,
                                       height=400,
                                       title='Correlation between variables')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def non_parametric_relationship_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                #logger.warning('line df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        stat, pvalue, txt = self.mann_whitneyu_label(a, b)
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['stat'].append(round(stat, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'stat': corr_dict['stat'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(
                    columns=[
                        'Variable 1', 'Variable 2', 'Relationship', 'stat',
                        'p-value'
                    ],
                    width=550,
                    height=400,
                    title='Non parametricrelationship between variables')
            except Exception:
                logger.error('non parametric table', exc_info=True)

        def hist(self, launch):
            try:

                return self.df.hvplot.hist(y=self.feature_list,
                                           subplots=True,
                                           shared_axes=False,
                                           bins=25,
                                           alpha=0.3,
                                           width=300).cols(4)
            except Exception:
                logger.warning('histogram', exc_info=True)

        def matrix_plot(self, launch=-1):
            try:
                logger.warning('line 306 self.feature list:%s',
                               self.feature_list)
                df = self.df1
                #df = df[self.feature_list]

                # get difference for money columns

                #thistab.prep_data(thistab.df)
                if 'timestamp' in df.columns:
                    df = df.drop('timestamp', axis=1)
                #df = df.repartition(npartitions=1)
                #df = df.compute()

                df = df.fillna(0)
                #logger.warning('line 302. df: %s',df.head(10))

                cols_temp = self.feature_list.copy()
                if self.variable in cols_temp:
                    cols_temp.remove(self.variable)
                #variable_select.options = cols_lst

                p = df.hvplot.scatter(x=self.variable,
                                      y=cols_temp,
                                      width=330,
                                      subplots=True,
                                      shared_axes=False,
                                      xaxis=False).cols(4)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)

        '''
        def regression(self,df):
            try:

            except Exception:
                logger.error('matrix plot', exc_info=True)
        '''

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data(thistab.df)
        thistab.variable = new
        if thistab.variable in thistab.adoption_variables['developer']:
            thistab.reset_adoption_dict(thistab.variable)
        thistab.lag_section_head_txt = 'Lag relationships between {} and...'.format(
            thistab.variable)
        #thistab.section_header_updater('lag',thistab.lag_section_head_txt)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag_plot_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_variable = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_crypto(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.crypto = crypto_select.value
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag(attr, old, new):  # update lag & cryptocurrency
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'external_daily'
        cols = list(groupby_dict.keys()) + ['timestamp', 'crypto']
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=200)
        # initial function call
        thistab.df_load(first_date, last_date, timestamp_col='timestamp')
        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        # date comes out stream in milliseconds
        #stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value='fork',
                                 options=thistab.feature_list)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        crypto_select = Select(title='Select cryptocurrency',
                               value='all',
                               options=['all'] + thistab.items)

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="lag", title="lag(days)"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        lags_corr_table = DataTable(source=lags_corr_src,
                                    columns=columns,
                                    width=500,
                                    height=280)

        width = 800

        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_nonpara_table = hv.DynamicMap(
            thistab.non_parametric_relationship_table,
            streams=[stream_launch_corr])
        #hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist])
        hv_lags_plot = hv.DynamicMap(thistab.lags_plot,
                                     streams=[stream_launch_lags_var])

        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        nonpara_table = renderer.get_plot(hv_nonpara_table)
        lags_plot = renderer.get_plot(hv_lags_plot)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        lag_variable_select.on_change('value', update_lag_plot_variable)
        lag_select.on_change('value', update_lag)  # individual lag
        resample_select.on_change('value', update_resample)
        crypto_select.on_change('value', update_crypto)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        lags_input_button.on_click(update_lags_selected)  # lags array

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end, variable_select,
                             lag_select, crypto_select, resample_select)

        controls_lag = WidgetBox(lag_variable_select, lags_input,
                                 lags_input_button)

        # create the dashboards
        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [matrix_plot.state, controls],
                         [thistab.section_headers['relationships']],
                         [Spacer(width=20, height=30)],
                         [thistab.section_headers['correlations']],
                         [Spacer(width=20, height=30)],
                         [corr_table.state,
                          thistab.corr_information_div()],
                         [thistab.section_headers['non_linear']],
                         [Spacer(width=20, height=30)], [nonpara_table.state],
                         [thistab.section_headers['lag']],
                         [Spacer(width=20, height=30)],
                         [lags_plot.state, controls_lag], [lags_corr_table],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('crypto:', exc_info=True)
        return tab_error_flag(panel_title)
コード例 #18
0
multi_choice = MultiChoice(options=["Option %d" % (i+1) for i in range(16)])

slider = Slider(value=10, start=0, end=100, step=0.5)

range_slider = RangeSlider(value=[10, 90], start=0, end=100, step=0.5)

date_slider = DateSlider(value=date(2016, 1, 1), start=date(2015, 1, 1), end=date(2017, 12, 31))

date_range_slider = DateRangeSlider(value=(date(2016, 1, 1), date(2016, 12, 31)), start=date(2015, 1, 1), end=date(2017, 12, 31))

spinner = Spinner(value=100)

color_picker = ColorPicker(color="red", title="Choose color:")

date_picker = DatePicker(value=date(2017, 8, 1))

paragraph = Paragraph(text="some text")

div = Div(text="some <b>text</b>")

pre_text = PreText(text="some text")

def mk_tab(color):
    plot = figure(width=300, height=300)
    plot.scatter(flowers["petal_length"], flowers["petal_width"], color=color, fill_alpha=0.2, size=12)
    return Panel(title="Tab 1: %s" % color.capitalize(), child=plot)

tabs = Tabs(tabs=[mk_tab("red"), mk_tab("green"), mk_tab("blue")])

source = ColumnDataSource(data=mpg)
コード例 #19
0
def crypto_clusters_eda_tab(cryptos, panel_title):
    global groupby_dict
    global features
    global cluster_dct
    #global source

    redis = PythonRedis()
    cluster_dct = redis.simple_load('clusters:cryptocurrencies')
    if cluster_dct is not None:
        groupby_dict = {}
        for var in cluster_dct['features']:
            groupby_dict[var] = 'sum'

        features = cluster_dct['features']
        source = {}
        for feature in features:
            source[feature] = ColumnDataSource(
                data=dict(xs=[], ys=[], labels=[], colors=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self,
                           table,
                           cols,
                           dedup_cols,
                           panel_title=panel_title)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = [
                'watch', 'fork', 'issue', 'release', 'push', 'tw_mentions',
                'tw_positive', 'tw_compound', 'tw_neutral', 'tw_negative',
                'tw_emojis_positive', 'tw_emojis_compound',
                'tw_emojis_negative', 'tw_emojis_count', 'tw_reply_hashtags'
            ]
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0
            txt = """<div style="text-align:center;background:black;width:100%;">
                                                                           <h1 style="color:#fff;">
                                                                           {}</h1></div>""".format(
                'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=1400, height=20),
                'bottom': Div(text=txt, width=1400, height=10),
            }
            self.cluster_dct = cluster_dct
            self.groupby_dict = groupby_dict
            self.features = features
            self.crypto = 'all'

            self.div_style = """ style='width:350px; margin-left:25px;
                                    border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                                    """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.significant_effect_dict = {}
            self.df1 = None
            self.section_headers = {
                'ts':
                self.section_header_div(
                    'Comparison of clusters across variables:---------------------',
                    width=600)
            }
            self.timestamp_col = None
            self.colors = None

        # ----------------------  DIVS ----------------------------
        def section_header_div(self, text, html_header='h2', width=1400):
            text = '<{} style="color:#4221cc;">{}</{}>'.format(
                html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=300):
            txt = """
               <div {}>
               <h4 {}>How to interpret relationships </h4>
               <ul style='margin-top:-10px;'>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                   <li>
                   </li>
                    <li>
                   </li>
                    <li>
                   </li>
               </ul>
               </div>

               """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ////////////////////////// UPDATERS ///////////////////////
        def section_head_updater(self, section, txt):
            try:
                self.section_header_div[section].text = txt
            except Exception:
                logger.error('', exc_info=True)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # /////////////////////////// LOAD CLUSTERS  //////////////////////
        def prep_data(self, df, timestamp_col):
            def label_cluster(x):
                for key, values in self.cluster_dct.items():
                    if key not in ['timestamp', 'variables']:
                        if x in values:
                            return key
                return x

            try:
                cols = self.features + ['crypto', 'timestamp']
                df = df[cols]
                # groupby and resample
                df['crypto'] = df['crypto'].map(lambda x: label_cluster(x))
                df = df.rename(columns={'crypto': 'cluster'})
                df = df.compute()
                df[timestamp_col] = pd.to_datetime(df[timestamp_col],
                                                   errors='coerce')
                df.set_index(timestamp_col, inplace=True)
                df = df.groupby('cluster').resample(self.resample_period).agg(
                    self.groupby_dict)
                df.reset_index(inplace=True)
                df.set_index(timestamp_col, inplace=True)
                self.timestamp_col = timestamp_col
                self.df1 = df

            except Exception:
                logger.error('prep data', exc_info=True)

        def graph_ts(self):
            try:
                #global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    clusters = df['cluster'].unique()
                    self.colors = [''] * len(clusters)
                    for idx, feature in enumerate(clusters):
                        self.colors[idx] = dashboard_config['colors'][idx]
                    if self.features is not None:
                        for idx, feature in enumerate(self.features):
                            df1 = df[['cluster', feature]]
                            # pivot into columns for cluster
                            df1 = df1.pivot(columns='cluster')
                            data = dict(x=[df1.index.values] * len(clusters),
                                        y=[df1[name].values for name in df1],
                                        labels=clusters,
                                        colors=self.colors)
                            source[feature].data = data
            except Exception:
                logger.error('graph ts', exc_info=True)

        def graph_chartify(self, timestamp_col):
            try:
                # global source
                if self.df1 is not None:
                    df = self.df1.copy()
                    df = df.reset_index()

                    for feature in self.features:
                        ch = chartify.Chart(blank_labels=True,
                                            x_axis_type='datetime')
                        ch.set_title("CHARTIFY")
                        ch.plot.line(
                            # Data must be sorted by x column
                            data_frame=df.sort_values(timestamp_col),
                            x_column=timestamp_col,
                            y_column=feature,
                            color_column='cluster')
                        return ch

            except Exception:
                logger.error('graph chartify', exc_info=True)

    def update():
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = resample_select.value
        thistab.prep_data(thistab.df, 'timestamp')
        thistab.graph_ts()
        thistab.notification_updater("ready")

    try:
        table = 'external_daily'
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = dashboard_config['dates']['current_year_start']
        # initial function call
        thistab.df_load(first_date,
                        last_date,
                        timestamp_col='timestamp',
                        cols=[])
        thistab.prep_data(thistab.df, timestamp_col='timestamp')

        # MANAGE STREAMS ---------------------------------------------------------

        # CREATE WIDGETS ----------------------------------------------------------------
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        load_dates_button = Button(
            label="Select dates/periods, then click me!",
            width=20,
            height=8,
            button_type="success")

        resample_select = Select(title='Select summary period',
                                 value=thistab.resample_period,
                                 options=thistab.menus['resample_periods'])

        # -------------------------------- PLOTS ---------------------------
        thistab.graph_ts()
        p = {}
        for feature in features:
            p[feature] = figure(x_axis_type="datetime",
                                plot_width=1400,
                                plot_height=400,
                                title=feature)

            p[feature].multi_line(
                xs='x',
                ys='y',
                legend='labels',
                line_color='colors',
                line_width=5,
                hover_line_color='colors',
                hover_line_alpha=1.0,
                source=source[feature],
            )
            p[feature].add_tools(
                HoverTool(show_arrow=False,
                          line_policy='next',
                          tooltips=[
                              ('freq', '$y'),
                          ]))

        # ch = thistab.graph_chartify(timestamp_col='timestamp')
        # -------------------------------- CALLBACKS ------------------------

        load_dates_button.on_click(update)  # lags array
        resample_select.on_change('value', update_resample)

        # -----------------------------------LAYOUT ----------------------------
        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_left = WidgetBox(datepicker_start, load_dates_button)

        controls_right = WidgetBox(datepicker_end)

        grid_data = [
            #[ch.figure],
            [thistab.notification_div['top']],
            [controls_left, controls_right],
            [thistab.section_headers['ts'], resample_select],
        ]
        for feature in features:
            grid_data.append([p[feature]])
            logger.warning('p:%s', p[feature])

        grid_data.append([thistab.notification_div['bottom']])

        grid = gridplot(grid_data)

        # Make a tab with the layout
        tab = Panel(child=grid, title=thistab.panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(thistab.panel_title)
コード例 #20
0
# Layout
path_input_section = column(input_dir_paths_div,
                            row(clm_in_div, clm_input_dir_text_input),
                            row(cases_div, cases_dir_text_input),
                            row(output_div, output_dir_text_input))

###############################################################################
"""
Simulation period
"""
###############################################################################
period_div = Div(text="<h2>Simulation period</h2>",
                 sizing_mode="stretch_width")
start_date_picker = DatePicker(title='Select simulation start date:',
                               value="2000-01-01",
                               min_date="1900-01-01",
                               max_date="2020-12-30")
start_date_picker.on_change('value', _input_changed)

end_date_picker = DatePicker(title='Select simulation end date:',
                             value="2001-01-01",
                             min_date="1900-01-02",
                             max_date="2020-12-31")
end_date_picker.on_change('value', _input_changed)

dates_section = column(period_div, row(start_date_picker, end_date_picker))


def _check_dates():
    global start_date, end_date
    start_date = datetime.strptime(start_date_picker.value, '%Y-%m-%d')
コード例 #21
0
)
last_date = data_race.iloc[1, 0]

# title
title1 = Div(
    text=
    """This is a visualization of covid-19 data. These data can shows: <br>1. number of nnew covid-19 cases in CA on a particular day in August.
<br>2. The %percent cases by race compared to their representation in general population in a particular day. <br>3. The %percent death by race compared to their representation in general population in a particular day.
<br>All data was collected from 'Los Angeles Times Data and Graphics Department', url=<a href="https://github.com/datadesk/california-coronavirus-data">https://github.com/datadesk/california-coronavirus-data</a>"
<br><b>Last updated on:{}</b>""".format(last_date),
    width=1000,
    height=120)

# data picker
date_picker = DatePicker(title='Select date',
                         value="2020-08-15",
                         min_date="2020-08-01",
                         max_date="2020-08-31")
date_picker2 = DatePicker(title='Select date',
                          value="2020-08-16",
                          min_date="2020-05-14",
                          max_date=last_date)
date1 = pd.to_datetime(date_picker.value)
date2 = pd.to_datetime(date_picker2.value)
data_total['date'] = pd.to_datetime(data_total['date'])
data_race['date'] = pd.to_datetime(data_race['date'])

# p1 aug plot
Aug = data_total[(data_total["date"] >= "2020-08-01")
                 & (data_total["date"] < "2020-09-01")]
output_file("datetime.html")
data_got = get_data2()
コード例 #22
0
ファイル: stocks.py プロジェクト: dmgoyer/stocks
def update_data():
    t = ticker_input.value
    s = start.value
    e = end.value
    src = stocks_data(t, s, e)
    source.data.update(src.data)
    price.y_range.start = min(src.data['Close'].min(), src.data['MA_50'].min(), src.data['MA_200'].min())
    price.y_range.end = max(src.data['Close'].max(), src.data['MA_50'].max(), src.data['MA_200'].max())
    price.extra_y_ranges['Volume'].start = src.data['Volume'].min()
    price.extra_y_ranges['Volume'].end = src.data['Volume'].max()

ticker_input = TextInput(value='GE', title='Company Symbol: ')
button = Button(label='Show', button_type='success')

start = DatePicker(title='Start Date: ', value=date.today()-timedelta(weeks=13), min_date=date(2010, 1, 1), max_date=date.today(), width=375)
end = DatePicker(title='End Date: ', value=date.today(), min_date=date(2010, 1, 1), max_date=date.today(), width=375)
source = stocks_data(ticker_input.value, start.value, end.value)

tool_bar = 'crosshair, reset, save'
minimum = min(source.data['Close'].min(), source.data['MA_50'].min(), source.data['MA_200'].min())
maximum = max(source.data['Close'].max(), source.data['MA_50'].max(), source.data['MA_200'].max())

price = figure(plot_height=400, plot_width=750, title="Stock Price", tools=tool_bar, x_axis_type='datetime', x_axis_label = 'Date', y_axis_label = 'Adjusted Close Price ($)')
# First Axis for Close price
price.line('Date', 'Close', source=source, legend_label='Close', color='black') # Close
price.line('Date', 'MA_50', source=source, legend_label='MA(50)', color='blue') # MA_50
price.line('Date', 'MA_200', source=source, legend_label='MA(200)', color='pink') # MA_200
price.y_range = Range1d(minimum, maximum)
# second Axis for Volume
price.extra_y_ranges = {'Volume': Range1d(source.data['Volume'].min(), source.data['Volume'].max())}
コード例 #23
0
ファイル: accounts.py プロジェクト: andre-aion/analytics_demo
def accounts_tsa_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')

            self.forecast_days = 30
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'
            self.update_type = 'all'
            self.status = 'all'
            self.account_type = 'all'
            self.interest_var = 'amount'

            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.address_select = Select(title='Filter by address',
                                         value='all',
                                         options=[])
            self.address = 'all'
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            self.addresses = []

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.max_loaded_date = None
            self.min_loaded_date = None

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                            position:relative;background:black;margin-bottom:200px">
                            <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.address_selected = ""
                self.address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self, start_date, end_date):
            try:
                logger.warning("data load begun")
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)

                if self.df is not None:
                    self.max_loaded_date = self.df.block_timestamp.max(
                    ).compute()
                    self.min_loaded_date = self.df.block_timestamp.min(
                    ).compute()
                    if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date:
                        logger.warning("data already loaded - %s",
                                       self.df.tail(10))
                        pass
                    else:
                        self.df_load(start_date, end_date, cols=self.cols)
                        self.df = self.df.fillna(0)
                        df = self.df[['address']]
                        df = df.compute()
                        self.addresses = ['all'] + list(set(list(df)))
                        #self.make_delta()
                        #self.df = self.df.set_index('block_timestamp')
                        logger.warning("data loaded - %s", self.df.tail(10))
                else:
                    self.df_load(start_date, end_date, cols=self.cols)
                    self.df = self.df.fillna(0)
                    df = self.df[['address']]
                    df = df.compute()
                    self.addresses = ['all'] + list(set(list(df)))
                    # self.make_delta()
                    # self.df = self.df.set_index('block_timestamp')
                    logger.warning("data loaded - %s", self.df.tail(10))
                    self.df = self.filter(self.df)

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS

        def filter(self, df):
            try:
                df = df.assign(freq=df.address)
                if self.status != 'all':
                    df = df[df.status == self.status]
                if self.account_type != 'all':
                    df = df[df.acccount_type == self.account_type]
                if self.update_type != 'all':
                    df = df[df.update_type == self.update_type]
                if self.address != 'all':
                    df = df[df.address == self.address]

                return df
            except Exception:
                logger.error("filter:", exc_info=True)

        def tsa_amount(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'amount': 'mean'})
                df = df.reset_index()
                df = df.compute()
                label = 'amount_diff'
                df[label] = df[self.interest_var].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'amount': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        def tsa_freq(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'address': 'nunique'})
                df = df.reset_index()
                df = df.compute()
                label = 'freq_diff'
                df[label] = df['address'].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'address': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)

                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(
                            x='ds',
                            y=col,
                            width=600,
                            height=250,
                            value_label='#').relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='#').relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.update_type = update_type_select.value
        thistab.status = status_select.value
        thistab.account_type = account_type_select.value
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.address = thistab.address_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_load(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type', 'amount'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = last_date - timedelta(days=60)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])
        status_select = Select(title='Select account status',
                               value=thistab.status,
                               options=menus['status'])
        account_type_select = Select(title='Select account type',
                                     value=thistab.account_type,
                                     options=menus['account_type'])
        update_type_select = Select(title='Select transfer type',
                                    value=thistab.update_type,
                                    options=menus['update_type'])
        # search by address checkboxes
        thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses,
                                                 active=[0])

        # ----------------------------------- LOAD DATA
        # load model-making data
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        # load data for period to be predicted

        # tables
        hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount,
                                      streams=[stream_launch])
        tsa_amount = renderer.get_plot(hv_tsa_amount)

        hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch])
        tsa_freq = renderer.get_plot(hv_tsa_freq)

        # add callbacks
        datepicker_start.on_change('value', update_load)
        datepicker_end.on_change('value', update_load)
        thistab.address_select.on_change('value', update)
        select_forecast_days.on_change('value', update)
        update_type_select.on_change('value', update)
        account_type_select.on_change('value', update)
        status_select.on_change('value', update)

        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end,
                             thistab.address_select, select_forecast_days,
                             update_type_select, account_type_select,
                             status_select, thistab.checkboxes)

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [Spacer(width=20, height=30)],
                         [tsa_amount.state, controls], [tsa_freq.state],
                         [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)
コード例 #24
0
                <h2> Global visualization </h2>
                Select the country of your choice and the desired range on the bottom graph.""",
                width=900)

# Panel 2
# Plot 1
widget_country_2 = Select(title="Country",
                          value=country,
                          options=sorted(COUNTRIES_INDEX),
                          width=170)
widget_nb_clusters = Select(title="Number of clusters",
                            value=nbCluster,
                            options=["3", "4", "5"],
                            width=170)
widget_start = DatePicker(title="Start date",
                          min_date=datetime(2013, 1, 1),
                          max_date=datetime(2013, 12, 31),
                          value=datetime(2013, 1, 1))
widget_end = DatePicker(title="End date",
                        min_date=datetime(2013, 1, 1),
                        max_date=datetime(2017, 4, 30),
                        value=datetime(2013, 12, 31))
widget_type_date = Select(title="Type of date",
                          value=type_date,
                          options=["All", "WeekDay", "WeekEnd"],
                          width=170)
widget_metric = Select(title="Metric",
                       value=metric_init,
                       options=["DTW", "Euclidian"],
                       width=170)
widget_transformation = Select(title="Data transformation",
                               value=transf_init,
コード例 #25
0
race_table = raceTable(data_race, t_source, t_last_update)

tableData = dict(
    race=['Asian', 'Black', 'Cdph-other', 'Latino', 'White', 'Other'],
    confirmed_cases_percent=race_table.confirm,
    deaths_percent=race_table.death,
    population_percent=race_table.percent,
    last_update=race_table.last_update,
    source=race_table.source)
source2 = ColumnDataSource1(tableData)
columns = [
    TableColumn(field="race", title="Race"),
    TableColumn(field='confirmed_cases_percent',
                title="Confirmed_cases_percent"),
    TableColumn(field="deaths_percent", title="Deaths_percent"),
    TableColumn(field="population_percent", title="Population_percent")
]
# TableColumn(field="last_update",title="Last_update"),TableColumn(field="source",title="Source")
table = DataTable(source=source2, columns=columns, width=width, height=500)
date_picker = DatePicker(title='Select a date',
                         value="2020-10-26",
                         min_date="2020-05-01",
                         max_date="2020-11-02")
date_picker.on_change('value', update1)
pre3 = TextAreaInput(value='Source: ' + t_source + '\nLast update: ' +
                     t_last_update,
                     rows=3)
layout = column(pre, dropdown_state, textbox, p, pre2, date_picker, pre3,
                table)
curdoc().add_root(layout)
コード例 #26
0
ファイル: main.py プロジェクト: spmcelrath/demos
        labels.visible = True
    else:
        print("SYMBOLS OFF")
        labels.visible = False
    if 0 in attr:
        print("LEGEND ON")
        plot.legend.visible = True
    else:
        print("LEGEND OFF")
        plot.legend.visible = False


checkbox_button_group.on_click(check_btn_callback)

date_picker = DatePicker(value="2019-01-22",
                         min_date="2019-01-22",
                         max_date="2020-11-22")

as_slider = Slider(start=1, end=10, value=5, step=.1, title="animation speed")
l_slider = RangeSlider(start=-1.00,
                       end=1.00,
                       value=(0.75, 1.0),
                       step=0.01,
                       title="show links for")
spacer = Spacer(width=240, height=700)
controls = [
    spacer, date_picker, af_button, ab_button, as_slider, l_slider,
    checkbox_button_group
]

index_select = Select(title="Benchmark:",
コード例 #27
0
        labels=["Radio Option 7", "Radio Option 8", "Radio Option 9"],
        button_type="success",
        active=2),
    TextInput(placeholder="TextInput 1"),
    TextInput(placeholder="TextInput 2"),
    TextInput(placeholder="TextInput 3"),
    AutocompleteInput(placeholder="AutocompleteInput 1 ...",
                      completions=["aaa", "aab", "aac", "baa", "caa"]),
    AutocompleteInput(placeholder="AutocompleteInput 2 ...",
                      completions=["AAA", "AAB", "AAC", "BAA", "CAA"]),
    AutocompleteInput(placeholder="AutocompleteInput 3 ...",
                      completions=["000", "001", "002", "100", "200"]),
    AutocompleteInput(placeholder="AutocompleteInput 4 ...",
                      completions=["foo", "foobar", "fuzzymatch", "foozzy"],
                      fuzzy_threshold=4),
    DatePicker(value=date(2018, 9, 1)),
    DatePicker(value=date(2018, 9, 2)),
    DatePicker(value=date(2018, 9, 3)),
)

#Slider(value=10, start=0, end=100, step=0.5),
#RangeSlider(value=[20, 30], start=0, end=100, step=0.5),
#DateSlider(value=date(2018, 9, 1), start=date(2018, 1, 1), end=date(2018, 12, 31)),
#DateRangeSlider(value=(date(2018, 9, 1), date(2018, 9, 30)), start=date(2018, 1, 1), end=date(2018, 12, 31)),

#CheckboxGroup(labels=["Checkbox Option 1", "Checkbox Option 2", "Checkbox Option 3"], active=[0, 1]),
#CheckboxGroup(labels=["Checkbox Option 4", "Checkbox Option 5", "Checkbox Option 6"], active=[1, 2]),
#CheckboxGroup(labels=["Checkbox Option 7", "Checkbox Option 8", "Checkbox Option 9"], active=[0, 2]),

#CheckboxGroup(labels=["Checkbox Option 1", "Checkbox Option 2", "Checkbox Option 3"], active=[0, 1], inline=True),
#CheckboxGroup(labels=["Checkbox Option 4", "Checkbox Option 5", "Checkbox Option 6"], active=[1, 2], inline=True),
コード例 #28
0
def plotHistogram(fileName,
                  initData,
                  stations,
                  dateRange,
                  bokehPlaceholderId='bokehContent'):
    data = {
        'xs': [initData['bins']],
        'ys': [initData['values']],
        'ss': [1, 2],
        'es': [3, 4]
    }  #ss and es are for test purposes we'll add  other values of the controlles e.g. age, usertype, Gender coming fetshed from initdata

    source = ColumnDataSource(data=data)
    stations.insert(0, "All")
    selectSS = Select(title="Start Station:", value="All", options=stations)
    selectES = Select(title="End Station:", value="All", options=stations)

    selectUT = Select(title="User Type:",
                      value="All",
                      options=["All", "Subscriber", "Customer"])
    selectGender = Select(title="Gender:",
                          value="All",
                          options=["All", "Male", "Female"])
    sliderAge = Slider(start=8, end=100, value=30, step=5, title="Age")

    startDP = DatePicker(title="Start Date:",
                         min_date=dateRange[0],
                         max_date=dateRange[1],
                         value=dateRange[0])
    endDP = DatePicker(title="End Date:",
                       min_date=dateRange[0],
                       max_date=dateRange[1],
                       value=dateRange[1])
    binSize = TextInput(value="15", title="Bin Size (Days):")
    AddButton = Toggle(label="Add", type="success")
    DeleteButton = Toggle(label="delete", type="success")

    columns = [
        TableColumn(field="ss", title="Start Station"),
        TableColumn(field="es", title="End Station")
    ]  # add other columns contains values of other controllers
    data_table = DataTable(source=source,
                           columns=columns,
                           width=650,
                           height=300)

    model = dict(source=source,
                 selectSS=selectSS,
                 selectES=selectES,
                 startDP=startDP,
                 endDP=endDP,
                 binSize=binSize,
                 selectUT=selectUT,
                 selectGender=selectGender,
                 sliderAge=sliderAge)
    plot = Figure(plot_width=650, plot_height=400, x_axis_type="datetime")
    plot.multi_line('xs',
                    'ys',
                    source=source,
                    line_width='width',
                    line_alpha=0.6,
                    line_color='color')

    callback = CustomJS(args=model,
                        code="""
            //alert("callback");
            var startStation = selectSS.get('value');
            var endStation = selectES.get('value');
            var startDate = startDP.get('value');
            
            if ( typeof(startDate) !== "number")
                startDate = startDate.getTime();
                
            var endDate = endDP.get('value');
            
            if ( typeof(endDate) !== "number")
                endDate = endDate.getTime();            
            
            var binSize = binSize.get('value');
            //alert(startStation + " " + endStation + " " + startDate + " " + endDate + " " + binSize);
            var xmlhttp;
            xmlhttp = new XMLHttpRequest();
            
            xmlhttp.onreadystatechange = function() {
                if (xmlhttp.readyState == XMLHttpRequest.DONE ) {
                    if(xmlhttp.status == 200){
                        var data = source.get('data');
                        var result = JSON.parse(xmlhttp.responseText);
                        var temp=[];
                        
                        for(var date in result.x) {
                            temp.push(new Date(result.x[date]));
                        }
                        
                        data['xs'].push(temp);
                        data['ys'].push(result.y);
                        source.trigger('change');
                    }
                    else if(xmlhttp.status == 400) {
                        alert(400);
                    }
                    else {
                        alert(xmlhttp.status);
                    }
                }
            };
        var params = {ss:startStation, es:endStation, sd:startDate, ed:endDate, bs: binSize};
        url = "/histogram?" + jQuery.param( params );
        xmlhttp.open("GET", url, true);
        xmlhttp.send();
        """)

    AddButton.callback = callback
    #DeleteButton.on_click(callback1)
    layout1 = vform(startDP, endDP, binSize)
    layout2 = vform(plot, DeleteButton, data_table)
    layout3 = vform(selectSS, selectES, selectUT, selectGender, sliderAge,
                    AddButton)
    layout = hplot(layout1, layout2, layout3)
    script, div = components(layout)
    html = readHtmlFile(fileName)
    html = insertScriptIntoHeader(html, script)
    html = appendElementContent(html, div, "div", "bokehContent")

    return html
コード例 #29
0
ファイル: rentals.py プロジェクト: andre-aion/analytics_demo
def forecasting_bcc_rentals_visitor_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0
            self.groupby_dict = {
                'category': 'nunique',
                'item': 'nunique',
                'area': 'nunique',
                'visit_duration': 'mean',
                'age': 'mean',
                'gender_coded': 'mean',
                'status_coded': 'mean',
                'rental_employee_gender_coded': 'mean',
                'rental_employee_age': 'mean',
                'rental_tab': 'sum'
            }

            self.feature_list = ['age', 'rental_employee_age', 'rental_tab']
            self.tsa_variable = 'rental_tab'
            self.forecast_days = 40
            self.lag_variable = 'visit_duration'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                style='width:350px; margin-left:25px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = 'rental_tab'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.pym = PythonMongo('aion')
            self.menus = {
                'item': ['all'],
                'category': ['all'],
                'status': ['all', 'guest', 'member'],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
                'area': ['all', 'bar', 'rentals'],
                'tsa': ['rental_tab', 'visit_duration']
            }
            self.select = {}
            self.select['area'] = Select(title='Select BCC area',
                                         value='all',
                                         options=self.menus['area'])

            self.select['item'] = Select(title='Select item',
                                         value='all',
                                         options=self.menus['item'])

            self.select['status'] = Select(title='Select visitor status',
                                           value='all',
                                           options=self.menus['status'])

            self.select['gender'] = Select(title="Select visitor gender",
                                           value='all',
                                           options=self.menus['gender'])

            self.select['category'] = Select(title="Select category",
                                             value='all',
                                             options=self.menus['category'])

            self.select['rental_employee_gender'] = Select(
                title="Select category",
                value='all',
                options=self.menus['category'])

            self.select_values = {}
            for item in self.select.keys():
                self.select_values[item] = 'all'

            self.multiline_vars = {'x': 'gender', 'y': 'rental_tab'}
            self.timestamp_col = 'visit_start'
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                    <div style="text-align:center;width:{}px;height:{}px;
                           position:relative;background:black;margin-bottom:200px">
                           <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(text='Pre-transform distribution:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(text='Correlations:',
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px; margin-left:-600px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////

        def load_df(self, req_startdate, req_enddate, table, cols,
                    timestamp_col):
            try:
                # get min and max of loaded df
                if self.df is not None:
                    loaded_min = self.df[timestamp_col].min()
                    loaded_max = self.df[timestamp_col].max()

                    if loaded_min <= req_startdate and loaded_max >= req_enddate:
                        df = self.df[(self.df[timestamp_col] >= req_startdate)
                                     & (self.df[timestamp_col] <= req_enddate)]
                        return df
                return self.pym.load_df(req_startdate,
                                        req_enddate,
                                        table=table,
                                        cols=cols,
                                        timestamp_col=timestamp_col)

            except Exception:
                logger.error('load_df', exc_info=True)

        def filter_df(self, df1):
            try:
                df1 = df1[self.cols]

                for key, value in self.groupby_dict.items():
                    if value == 'count':
                        if self.select_values[key] != 'all':
                            df1 = df1[df1[key] == self.select_values[key]]
                return df1

            except Exception:
                logger.error('filter', exc_info=True)

        def prep_data(self, df):
            try:
                df = self.filter_df(df)
                # set up code columns
                codes = {
                    'gender': {
                        'male': 1,
                        'female': 2,
                        'other': 3
                    },
                    'status': {
                        'guest': 1,
                        'member': 2
                    }
                }
                for col in df.columns:
                    coded_col = col + '_coded'
                    if 'gender' in col:
                        df[coded_col] = df[col].map(codes['gender'])
                    if 'status' == col:
                        df[coded_col] = df[col].map(codes['status'])

                self.df = df.set_index(self.timestamp_col)
                # groupby and resample
                self.df1 = self.df.groupby('name').resample(
                    self.resample_period).agg(self.groupby_dict)
                self.df1 = self.df1.reset_index()
                self.df1 = self.df1.fillna(0)

                logger.warning('LINE 288 df:%s', self.df1.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def tsa(self, launch):
            try:
                df = self.df.resample('D').agg({self.tsa_variable: 'mean'})
                df = df.reset_index()
                label = self.tsa_variable + '_diff'
                df[label] = df[self.tsa_variable].diff()
                df = df.fillna(0)

                rename = {self.timestamp_col: 'ds', self.tsa_variable: 'y'}
                df = df.rename(columns=rename)
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("TSA:", exc_info=True)

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.variable = new
        thistab.section_head_updater('lag', thistab.variable)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_IVs(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        for item in thistab.select_values.keys():
            thistab.select_values[item] = thistab.select[item].value
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df = thistab.pym.load_df(start_date=datepicker_start.value,
                                         end_date=datepicker_end.value,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.df['gender_code'] = thistab.df['gender'].apply(
            lambda x: 1 if x == 'male' else 2)
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['x'] = multiline_x_select.value
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_forecast(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.tsa_variable = forecast_variable_select.value
        thistab.trigger += 1
        stream_launch_tsa.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'bcc_composite'
        cols = cols_to_load['guest'] + cols_to_load['rental']
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2013-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=1)
        first_date = last_date - timedelta(days=1000)
        # initial function call
        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_tsa = streams.Stream.define('Launch_tsa', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=thistab.variables)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])

        forecast_variable_select = Select(title='Select forecast variable',
                                          value=thistab.menus['tsa'][0],
                                          options=thistab.menus['tsa'])

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        multiline_y_select = Select(
            title='Select comparative DV(y)',
            value=thistab.multiline_vars['y'],
            options=['price', 'amount', 'visit_duration'])

        multiline_x_select = Select(title='Select comparative IV(x)',
                                    value=thistab.multiline_vars['x'],
                                    options=[
                                        'category', 'gender',
                                        'rental_employee_gender', 'status',
                                        'item'
                                    ])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------

        # tables
        hv_tsa = hv.DynamicMap(thistab.tsa, streams=[stream_launch_tsa])
        tsa = renderer.get_plot(hv_tsa)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        resample_select.on_change('value', update_resample)
        thistab.select['area'].on_change('value', update_IVs)
        thistab.select['gender'].on_change('value', update_IVs)
        thistab.select['rental_employee_gender'].on_change('value', update_IVs)
        thistab.select['item'].on_change('value', update_IVs)
        thistab.select['category'].on_change('value', update_IVs)
        thistab.select['status'].on_change('value', update_IVs)
        select_forecast_days.on_change('value', update_forecast)
        forecast_variable_select.on_change('value', update_forecast)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)

        multiline_x_select.on_change('value', update_multiline)
        multiline_y_select.on_change('value', update_multiline)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_tsa = WidgetBox(datepicker_start, datepicker_end,
                                 variable_select, thistab.select['status'],
                                 resample_select, thistab.select['gender'],
                                 thistab.select['category'],
                                 thistab.select['area'],
                                 forecast_variable_select,
                                 select_forecast_days)

        # create the dashboards

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [tsa.state, controls_tsa],
                         [Spacer(width=20, height=30)],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)
コード例 #30
0
from bokeh.models import DatePicker, HBox
from bokeh.io import curdoc

from datetime import datetime

beginning = DatePicker(title="Begin Date",
                       min_date=datetime(2014, 11, 1),
                       max_date=datetime.now(),
                       value=datetime(datetime.now().year, 1, 1))


def cb(attr, old, new):
    print(new)


beginning.on_change('value', cb)

curdoc().add_root(HBox(children=[beginning]))