def main():
    with open("/path/to/glowroot/data.json", 'rt') as file:
        glowroot_data = json.load(file)

        x = list()
        y = list()
        for point in glowroot_data["dataSeries"][0]["data"]:
            x.append(point[0])
            y.append(point[1])

        data_frame = pd.DataFrame({
            "timestamp": pd.to_datetime(x, unit="ms"),
            "y": y
        })
        data_frame = data_frame.set_index("timestamp")
        logging.info(data_frame)
        pre_period = [
            pd.to_datetime(1573661277259, unit="ms"),
            pd.to_datetime(1573661647328, unit="ms")
        ]
        post_period = [
            pd.to_datetime(1573661652328, unit="ms"),
            pd.to_datetime(1573661932369, unit="ms")
        ]

        causal_impact = CausalImpact(data_frame,
                                     pre_period,
                                     post_period,
                                     prior_level_sd=0.1)
        logging.info(causal_impact.summary())
        causal_impact.plot()
def test_plotter(monkeypatch, rand_data, pre_int_period, post_int_period):
    plotter_mock = mock.Mock()
    fit_mock = mock.Mock()
    process_mock = mock.Mock()
    summarize_mock = mock.Mock()
    monkeypatch.setattr('causalimpact.main.CausalImpact._fit_model', fit_mock)
    monkeypatch.setattr('causalimpact.main.CausalImpact._summarize_inferences',
                        summarize_mock)
    monkeypatch.setattr(
        'causalimpact.main.CausalImpact._process_posterior_inferences',
        process_mock)
    monkeypatch.setattr('causalimpact.main.plotter', plotter_mock)
    ci = CausalImpact(rand_data,
                      pre_int_period,
                      post_int_period,
                      model_args={'fit_method': 'vi'})
    ci.inferences = 'inferences'
    ci.pre_data = 'pre_data'
    ci.post_data = 'post_data'
    ci.plot()
    plotter_mock.plot.assert_called_with(
        'inferences',
        'pre_data',
        'post_data',
        panels=['original', 'pointwise', 'cumulative'],
        figsize=(10, 7))
Beispiel #3
0
def test_plot_cumulative_panel(rand_data, pre_int_period, post_int_period, monkeypatch):
    ci = CausalImpact(rand_data, pre_int_period, post_int_period)
    ax_mock = mock.Mock()
    plotter_mock = mock.Mock()
    plotter_mock.subplot.return_value = ax_mock
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)

    ci.plot(panels=['cumulative'])
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(15, 12))
    plotter_mock.subplot.assert_any_call(1, 1, 1, sharex=ax_mock)
    ax_args = ax_mock.plot.call_args

    inferences = ci.inferences.iloc[1:, :]

    assert_array_equal(inferences['post_cum_effects'], ax_args[0][0])
    assert ax_args[0][1] == 'b--'
    assert ax_args[1] == {'label': 'Cumulative Effect'}

    ax_mock.axvline.assert_called_with(ci.post_period[0] - 1, c='k', linestyle='--')

    ax_args = ax_mock.fill_between.call_args_list[0]
    assert_array_equal(ax_args[0][0], inferences['post_cum_effects'].index)
    assert_array_equal(ax_args[0][1], inferences['post_cum_effects_lower'])
    assert_array_equal(ax_args[0][2], inferences['post_cum_effects_upper'])
    assert ax_args[1] == {'facecolor': 'blue', 'interpolate': True, 'alpha': 0.25}

    ax_mock.axhline.assert_called_with(y=0, color='k', linestyle='--')

    ax_mock.grid.assert_called_with(True, linestyle='--')
    ax_mock.legend.assert_called()

    plotter_mock.show.assert_called_once()
Beispiel #4
0
def test_plot_raises_when_not_initialized(rand_data, pre_int_period,
                                          post_int_period, monkeypatch):
    ci = CausalImpact(rand_data, pre_int_period, post_int_period)
    ci.summary_data = None
    plotter_mock = mock.Mock()
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)
    with pytest.raises(RuntimeError):
        ci.plot()
Beispiel #5
0
def test_plot_raises_wrong_input_panel(rand_data, pre_int_period,
                                       post_int_period, monkeypatch):
    ci = CausalImpact(rand_data, pre_int_period, post_int_period)
    plotter_mock = mock.Mock()
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)
    with pytest.raises(ValueError) as excinfo:
        ci.plot(panels=['test'])
    assert str(
        excinfo.value) == ('"test" is not a valid panel. Valid panels are: '
                           '"original", "pointwise", "cumulative".')
Beispiel #6
0
def test_plot_multi_panels(rand_data, pre_int_period, post_int_period,
                           monkeypatch):
    ci = CausalImpact(rand_data, pre_int_period, post_int_period)
    ax_mock = mock.Mock()
    ax_mock.get_xticklabels.return_value = 'xticklabels'
    plotter_mock = mock.Mock()
    plotter_mock.subplot.return_value = ax_mock
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)

    ci.plot(panels=['original', 'pointwise'], figsize=(10, 10))
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(10, 10))
    plotter_mock.subplot.assert_any_call(2, 1, 1)
    plotter_mock.subplot.assert_any_call(2, 1, 2, sharex=ax_mock)
    plotter_mock.setp.assert_called_once_with('xticklabels', visible=False)
    assert ax_mock.plot.call_count == 3
    plotter_mock.show.assert_called_once()

    ax_mock.reset_mock()
    plot_mock.reset_mock()
    plot_mock.reset_mock()

    ci.plot(panels=['original', 'cumulative'], figsize=(10, 10))
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(10, 10))
    plotter_mock.subplot.assert_any_call(2, 1, 1)
    plotter_mock.subplot.assert_any_call(2, 1, 2, sharex=ax_mock)
    plotter_mock.setp.assert_called_once_with('xticklabels', visible=False)
    assert ax_mock.plot.call_count == 3
    plotter_mock.show.assert_called_once()

    ax_mock.reset_mock()
    plot_mock.reset_mock()
    plot_mock.reset_mock()

    ci.plot(panels=['pointwise', 'cumulative'], figsize=(10, 10))
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(10, 10))
    plotter_mock.subplot.assert_any_call(2, 1, 1, sharex=ax_mock)
    plotter_mock.subplot.assert_any_call(2, 1, 2, sharex=ax_mock)
    plotter_mock.setp.assert_called_once_with('xticklabels', visible=False)
    assert ax_mock.plot.call_count == 2
    plotter_mock.show.assert_called_once()

    ax_mock.reset_mock()
    plot_mock.reset_mock()
    plot_mock.reset_mock()

    ci.plot(panels=['pointwise', 'cumulative', 'original'], figsize=(10, 10))
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(10, 10))
    plotter_mock.subplot.assert_any_call(3, 1, 1)
    plotter_mock.subplot.assert_any_call(3, 1, 2, sharex=ax_mock)
    plotter_mock.subplot.assert_any_call(3, 1, 3, sharex=ax_mock)
    plotter_mock.setp.assert_called_with('xticklabels', visible=False)
    assert ax_mock.plot.call_count == 4
    plotter_mock.show.assert_called_once()
Beispiel #7
0
def test_plot_cumulative_panel_date_index_no_freq(date_rand_data,
                                                  pre_str_period,
                                                  post_str_period,
                                                  monkeypatch):
    ci = CausalImpact(date_rand_data, pre_str_period, post_str_period)
    dd = date_rand_data.copy()
    dd.drop(dd.index[10:20])
    ax_mock = mock.Mock()
    plotter_mock = mock.Mock()
    plotter_mock.subplot.return_value = ax_mock
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)

    ci.plot(panels=['cumulative'])
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(15, 12))
    plotter_mock.subplot.assert_any_call(1, 1, 1, sharex=ax_mock)
    ax_args = ax_mock.plot.call_args

    inferences = ci.inferences.iloc[1:, :]

    assert_array_equal(inferences['post_cum_effects'], ax_args[0][0])
    assert ax_args[0][1] == 'b--'
    assert ax_args[1] == {'label': 'Cumulative Effect'}

    date_ = datetime.strptime(ci.post_period[0], "%Y%m%d")
    date_ = date_ + timedelta(days=-1)
    date_ = Timestamp(date_.strftime("%Y-%m-%d %H:%M:%S"))
    ax_mock.axvline.assert_called_with(date_, c='k', linestyle='--')

    ax_args = ax_mock.fill_between.call_args_list[0]
    assert_array_equal(ax_args[0][0], inferences['post_cum_effects'].index)
    assert_array_equal(ax_args[0][1], inferences['post_cum_effects_lower'])
    assert_array_equal(ax_args[0][2], inferences['post_cum_effects_upper'])
    assert ax_args[1] == {
        'facecolor': 'blue',
        'interpolate': True,
        'alpha': 0.25
    }

    ax_mock.axhline.assert_called_with(y=0, color='k', linestyle='--')

    ax_mock.grid.assert_called_with(True, linestyle='--')
    ax_mock.legend.assert_called()

    plotter_mock.show.assert_called_once()
Beispiel #8
0
def test_plot_original_panel_date_index(date_rand_data, pre_str_period,
                                        post_str_period, monkeypatch):
    ci = CausalImpact(date_rand_data, pre_str_period, post_str_period)
    ax_mock = mock.Mock()
    plotter_mock = mock.Mock()
    plotter_mock.subplot.return_value = ax_mock
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)

    ci.plot(panels=['original'])
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(15, 12))
    plotter_mock.subplot.assert_any_call(1, 1, 1)
    ax_args = ax_mock.plot.call_args_list

    assert_array_equal(ci.data.iloc[:, 0], ax_args[0][0][0])
    assert ax_args[0][0][1] == 'k'
    assert ax_args[0][1] == {'label': 'y'}

    inferences = ci.inferences.iloc[1:, :]

    assert_array_equal(inferences['preds'], ax_args[1][0][0])
    assert ax_args[1][0][1] == 'b--'
    assert ax_args[1][1] == {'label': 'Predicted'}

    date_ = datetime.strptime(ci.post_period[0], "%Y%m%d")
    date_ = date_ + timedelta(days=-1)
    date_ = Timestamp(date_.strftime("%Y-%m-%d %H:%M:%S"))
    ax_mock.axvline.assert_called_with(date_, c='k', linestyle='--')

    ax_args = ax_mock.fill_between.call_args_list[0]
    assert_array_equal(ax_args[0][0], inferences['preds'].index)
    assert_array_equal(ax_args[0][1], inferences['preds_lower'])
    assert_array_equal(ax_args[0][2], inferences['preds_upper'])
    assert ax_args[1] == {
        'facecolor': 'blue',
        'interpolate': True,
        'alpha': 0.25
    }

    ax_mock.grid.assert_called_with(True, linestyle='--')
    ax_mock.legend.assert_called()

    plotter_mock.show.assert_called_once()
Beispiel #9
0
def graph_check_all_bsts_control(data_mean, intervention, X_names, title):
    experiment = int(list(intervention)[0])
    intervention_data = INTERVENTION_CALENDAR[experiment]
    pre_period = intervention_data[1]
    post_period = intervention_data[2]
    end_intervention_date = intervention_data[3]
    data, time = prepare_data_control_bsts(data_mean.set_index('timestamp'),
                                           intervention, X_names)
    x = data.copy()
    x = x.rename(columns={'y': 'Observation', 'x1': 'Random. Control Group'})
    ci = CausalImpact(data,
                      pre_period,
                      post_period,
                      prior_level_sd=None,
                      standarize=True)
    font = {'family': 'Arial', 'size': 18}
    ci.plot(figsize=(7, 9),
            end_intervention_date=end_intervention_date,
            title=title)
    matplotlib.rc('font', **font)
    return ci, x
Beispiel #10
0
def graph_check_cumulative_bsts(data_mean, X_names):
    visual = data_mean.set_index('timestamp')
    intervention_list = visual['INTERVENTION'].unique()
    # Set up the matplotlib figure
    f, axes = plt.subplots(3, 3, figsize=(15, 9))
    dataframe2 = pd.DataFrame()
    for i, intervention in enumerate(intervention_list):
        if list(intervention)[-1] == 'L':
            x = 1  # do nothing
        else:
            experiment = int(list(intervention)[0])
            intervention_data = INTERVENTION_CALENDAR[experiment]
            pre_period = intervention_data[1]
            post_period = intervention_data[2]
            end_intervention_date = intervention_data[3]

            # get position for the plot
            row = int(list(intervention)[-1]) - 1
            column = int(list(intervention)[0]) - 1

            if intervention == '2T4':
                row = 0
                column = 1
            elif intervention == '2T5':
                row = 1
                column = 1

            ax = axes[row, column]
            data = prepare_data_synthetic_bsts(
                data_mean.set_index('timestamp'), intervention, X_names)
            ci = CausalImpact(data,
                              pre_period,
                              post_period,
                              prior_level_sd=None,
                              standarize=True)
            ax = ci.plot(figsize=(5, 3),
                         end_intervention_date=end_intervention_date,
                         panels=['cumulative'],
                         add_axes=ax)
            ax.set_title(intervention)

            # get data
            table = ci.summary_data
            pi_value = ci.p_value
            effect = str(round(
                table.loc['rel_effect', 'average'] * 100,
                2)) + '/n' + '[' + str(
                    round(table.loc['rel_effect_lower', 'average'] * 100,
                          2)) + ',' + str(
                              round(
                                  table.loc['rel_effect_upper', 'average'] *
                                  100, 2)) + ']'
            table_df = pd.DataFrame({
                'id': [intervention],
                'effect': [effect],
                'p_value': [pi_value]
            })
            dataframe2 = dataframe2.append(table_df, ignore_index=True)
    print(dataframe2)
    plt.show()
    font = {'family': 'Arial', 'size': 10}
    matplotlib.rc('font', **font)
Beispiel #11
0
def main():
    max_width_(width=1200)

    image = Image.open('causal_impact_explainer_logo.png')
    st.sidebar.image(image, caption='', use_column_width=True)
    st.title(
        "Mega sequía: midiendo el impacto económico :volcano: :earth_americas:"
    )
    texto(
        """Esta aplicación ayuda a explorar los resultados de la librería Causal Impact para medir el impacto económico generado por la mega sequía que ocurre en Chile. Estos resultados son generados usando datos del banco central, donde usamos como control series económicas no afectadas por la sequía que ayudan a reconstruir un escenario contrafactual donde respondemos: qué hubiese pasado en la economía si no hubiese habido mega sequía.""",
        nfont=17)
    disclaimer()

    link_libreria()
    st.markdown(
        '## Los datos provienen del  Producto Interno Bruto de Chile entre los años 2013-19'
    )

    st.markdown('### Choose Causal Impact parameters')

    chosen_df = load_dataframe()
    col1, col2 = st.beta_columns(2)
    with col1:
        time_var = st.selectbox("Choose the time variable",
                                chosen_df.columns,
                                index=0,
                                key='time_variable')  #date
        alpha = st.number_input("Significance level",
                                0.01,
                                0.5,
                                value=0.05,
                                step=0.01,
                                key='significance_level')

    with col2:
        y_var = st.selectbox("Choose the outcome variable (y)",
                             chosen_df.columns,
                             index=2,
                             key='analysis_variable')

        df_experiment = chosen_df.copy()
        df_experiment[time_var] = df_experiment[time_var].apply(pd.to_datetime)
        df_experiment.sort_values(time_var, inplace=True)
        df_experiment.index = range(len(df_experiment))

        min_date = df_experiment[time_var].min().date()
        last_date = df_experiment[time_var].max().date()
        mid_point = int(len(df_experiment) / 2)
        intervention_time = st.slider('Fecha del inicio de la sequía',
                                      min_date,
                                      last_date,
                                      value=df_experiment.loc[mid_point + 20,
                                                              time_var].date(),
                                      key='training_period')

    beg_pre_period, end_pre_period = min_date, intervention_time
    beg_eval_period, end_eval_period = intervention_time, last_date
    beg_eval_period = beg_eval_period + pd.DateOffset(months=1)

    st.sidebar.markdown("#### Select the control variables")
    x_vars = sorted(list([
        col for col in chosen_df.columns
        if col != y_var and col != time_var and col != 'group'
    ]),
                    key=len)
    selected_x_vars = st.sidebar.multiselect(
        "Las variables de control no deben haber sido afectadas por la sequía",
        x_vars,
        default=x_vars)

    strftime_format = "%Y-%m-%d"
    parameters = {
        "alpha": alpha,
        "beg_pre_period": beg_pre_period.strftime(strftime_format),
        "end_pre_period": end_pre_period.strftime(strftime_format),
        "beg_eval_period": beg_eval_period.strftime(strftime_format),
        "end_eval_period": end_eval_period.strftime(strftime_format),
        "selected_x_vars": selected_x_vars,
        "y_var": y_var,
        "time_var": time_var,
    }

    with st.beta_expander('Show dataframe'):
        st.write(df_experiment.head(5))

    with st.beta_expander('Ploting variables'):
        vars_to_plot = st.multiselect("Variables to plot",
                                      list(df_experiment.columns),
                                      default=y_var)

        plot_vars(
            df_experiment,
            vars_to_plot,
            time_var,
            beg_pre_period=parameters['beg_pre_period'],
            end_pre_period=parameters['end_pre_period'],
            beg_eval_period=parameters['beg_eval_period'],
            end_eval_period=parameters['end_eval_period'],
        )

        col_mutual_info, col_precip, col_tdw = st.beta_columns(3)
        with col_mutual_info:
            find_mutual_info(df_experiment, time_var, y_var,
                             parameters['end_pre_period'])
        with col_tdw:
            find_dynamic_time_warp(df_experiment, time_var, y_var,
                                   parameters['end_pre_period'])
        with col_precip:
            find_mutual_info_precipitaciones(df_experiment, time_var, y_var,
                                             parameters['end_pre_period'])

        texto(' ')

    with st.beta_expander("Estimate Causal Impact model"):

        beg_pre_period = parameters['beg_pre_period'].split(
            '-')[0] + parameters['beg_pre_period'].split('-')[1] + '01'
        end_pre_period = parameters['end_pre_period'].split(
            '-')[0] + parameters['end_pre_period'].split('-')[1] + '01'

        beg_eval_period = parameters['beg_eval_period'].split(
            '-')[0] + parameters['beg_eval_period'].split('-')[1] + '01'
        end_eval_period = parameters['end_eval_period'].split(
            '-')[0] + parameters['end_eval_period'].split('-')[1] + '01'

        pre_period = [beg_pre_period, end_pre_period]
        post_period = [beg_eval_period, end_eval_period]

        df_experiment[time_var] = df_experiment[time_var].dt.strftime(
            '%m/%d/%Y')
        df_experiment[y_var] = df_experiment[y_var].astype(float)

        mapa_nombres = {
            var_x: f'x_{idx}'
            for idx, var_x in enumerate(parameters['selected_x_vars'])
        }
        mapa_nombres[y_var] = 'y'
        df_toci = df_experiment[[time_var, y_var] +
                                parameters['selected_x_vars']].set_index(
                                    time_var).copy()
        df_toci.rename(columns=mapa_nombres, inplace=True)
        texto('Puede tardar varios segundos', 11)
        if st.checkbox('Run Causal Impact', value=False):

            ci = CausalImpact(df_toci,
                              pre_period,
                              post_period,
                              alpha=alpha,
                              model_args={'nseasons': 12})

            results = ci.inferences.copy()
            results.reset_index(inplace=True)
            results.rename(columns={'index': time_var}, inplace=True)

            print_column_description(results,
                                     time_var,
                                     min_date=parameters["beg_eval_period"])
            ci.plot()
            st.pyplot()

            efecto_acumulado_total = results['post_cum_effects_means'].values[
                -1]
            valor_promedio = df_toci['y'].mean()
            porcentaje = 100 * efecto_acumulado_total / valor_promedio
            col1, col2 = st.beta_columns([1, 2])
            with col1:
                texto(' ', 40)
                texto('      El efecto acumulado:', 25)
                texto(
                    'Corresponde al efecto acumulado desde la intervención en adelante'
                )
            with col2:
                estadisticos(efecto_acumulado_total, porcentaje)

            df_toci_plot = df_toci.copy()
            df_toci_plot.reset_index(inplace=True)
            df_toci_plot.rename(columns={'index': time_var}, inplace=True)
Beispiel #12
0
    def test_plot(self, monkeypatch):
        causal = CausalImpact()

        params = {'alpha': 0.05, 'post_period': [2, 4], 'pre_period': [0, 1]}

        inferences_mock = {
            'point_pred': 'points predicted',
            'response': 'y obs',
            'point_pred_lower': 'lower predictions',
            'point_pred_upper': 'upper predictions'
        }

        class Inferences(object):
            @property
            def iloc(self):
                class Iloc(object):
                    def __getitem__(*args, **kwargs):
                        class EnhancedDict(dict):
                            @property
                            def index(self):
                                return [0, 1]

                            @property
                            def point_effect(self):
                                return 'lift'

                            @property
                            def point_effect_lower(self):
                                return 'point effect lower'

                            @property
                            def point_effect_upper(self):
                                return 'point effect upper'

                            @property
                            def cum_effect(self):
                                return 'cum effect'

                            @property
                            def cum_effect_upper(self):
                                return 'cum effect upper'

                            @property
                            def cum_effect_lower(self):
                                return 'cum effect lower'

                        return EnhancedDict(inferences_mock)

                return Iloc()

        class Data(object):
            @property
            def index(self):
                return 'index'

            @property
            def shape(self):
                return [(1, 2)]

        plot_mock = mock.Mock()
        fill_mock = mock.Mock()
        show_mock = mock.Mock()
        np_zeros_mock = mock.Mock()
        np_zeros_mock.side_effect = lambda x: [0, 0]

        get_lib_mock = mock.Mock(return_value=plot_mock)
        monkeypatch.setattr('causalimpact.analysis.get_matplotlib',
                            get_lib_mock)

        monkeypatch.setattr('numpy.zeros', np_zeros_mock)

        causal.params = params
        causal.inferences = Inferences()
        causal.data = Data()

        causal.plot(panels=['original', 'pointwise', 'cumulative'])
        causal.plot(panels=['pointwise', 'cumulative'])

        causal.plot(panels=['original'])

        plot_mock.plot.assert_any_call('y obs',
                                       'k',
                                       label='endog',
                                       linewidth=2)
        plot_mock.plot.assert_any_call('points predicted',
                                       'r--',
                                       label='model',
                                       linewidth=2)

        plot_mock.fill_between.assert_any_call([0, 1],
                                               'lower predictions',
                                               'upper predictions',
                                               facecolor='gray',
                                               interpolate=True,
                                               alpha=0.25)

        causal.plot(panels=['pointwise'])

        plot_mock.plot.assert_any_call('lift', 'r--', linewidth=2)
        plot_mock.plot.assert_any_call('index', [0, 0], 'g-', linewidth=2)

        causal.plot(panels=['cumulative'])

        plot_mock.plot.assert_any_call([0, 1],
                                       'cum effect',
                                       'r--',
                                       linewidth=2)
        plot_mock.plot.assert_any_call('index', [0, 0], 'g-', linewidth=2)
Beispiel #13
0
# Causal Impact
# 8. Using a custom model
# 結局こっちにする
# pip install pycausalimpact
# https://github.com/dafiti/causalimpact/blob/master/examples/getting_started.ipynb

from causalimpact import CausalImpact

# x_test, x_train =
# y_test, y_train =

# 予測期間
# pre_piriodだけ, exogを入れる必要がある
pre_period = ['2019-12-01', '2019-12-31']
post_period = ['2019-12-01', '2019-12-31']

# prior_level_sd=None, nseasons=[{'period': 52}]
ci = CausalImpact(data=nq, model=model, pre_period, post_period)

# 可視化
ci.plot(figsize=(14, 8))

# モデル・サマリー
ci.summary()





                 axis=1)

# Rename things, for the neatness
data = data.rename(columns={
    'close': 'close_voo',
})

print(data)

# Check if the SP500 looks like a solid input for our synthetic control
# data.plot()
# plt.savefig('summary.svg')

# Define periods. Article came out ~2:30 EDT on May 9th so let's say treatment end of markets the friday before
pre_period = [
    pd.Timestamp('2020-05-01 13:30:00+00:00'),
    pd.Timestamp('2020-05-08 20:00:00+00:00')
]
post_period = [
    pd.Timestamp('2020-05-11 13:30:00+00:00'),
    pd.Timestamp('2020-05-15 19:50:00+00:00')
]

# Shove it into CasualImpact
amc_data = data[['close_amc', 'close_voo', 'date']]
amc_data = amc_data.set_index('date')

ci = CausalImpact(amc_data, pre_period, post_period, prior_level_sd=None)
ci.plot()
print(ci.summary('report', 5))
Beispiel #15
0
t2 = 32

df['close_252d_rolling'] = df['close'].rolling(t1).mean()
df['close_21d_rolling'] = df['close'].rolling(t2).mean()

pre_period = [df.index[0], '2020/10/19 22:00']  # Define pre-event period
post_period = ['2020/10/20 01:00', df.index[-1]]  # Define post-event period

pre_period_df = df[df.index <= '2020/10/19 22:00']
post_period_df = df[df.index >= '2020/10/20 01:00']
print('Pre-Event Statistics')
print(pre_period_df.describe())
print('Post-Event Statistics')
print(post_period_df.describe())

ci = CausalImpact(df['close'], pre_period, post_period)
##########################
# DESDE ACA FALTA ARREGLAR
##########################
ci.plot(figsize=(12, 6))
ci.plot(panels=['original', 'pointwise'], figsize=(12, 8))
print(ci.summary())

ci.trained_model.params
print(ci.trained_model.summary())
_ = ci.trained_model.plot_diagnostics(figsize=(14, 6))

ci.trained_model.specification

df['close'].plot(figsize=(12, 4))
Beispiel #16
0
def main():
    st.title("""AB-Testing Tool """)
    html_temp = """
    <div style="background-color:orange;padding:10px">
    <h2 style="color:black;text-align:center;">Online Marketing Campaigns</h2>
    </div>
    """

    print('----Data Imports ------')
    df = pd.read_csv('./main/streamlit/data/fake_data.csv')  ###1
    cup_df = pd.read_csv('./main/streamlit/data/fake_data_cuped.csv')  ###2

    test_f = {'Control_Matrix': 'Control', 'Variant_BT': 'Test'}

    df['test_flag'] = df['Variant'].replace(test_f)
    cup_df['test_flag'] = cup_df['Variant'].replace(test_f)
    test = df[df['test_flag'] == 'Test']
    control = df[df['test_flag'] == 'Control']

    test_cuped = cup_df[cup_df['test_flag'] == 'Test']
    control = cup_df[cup_df['test_flag'] == 'Control']

    np.random.seed(12345)
    ar = np.r_[1, 0.9]
    ma = np.array([1])
    arma_process = ArmaProcess(ar, ma)
    X = 100 + arma_process.generate_sample(nsample=100)
    y = 1.2 * X + np.random.normal(size=100)
    y[70:] += 5

    pre_post_data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X'])  ###3
    pre_period = [0, 69]
    post_period = [70, 99]

    print('======================================================')
    print('----------- Sample Size Estimation--------------------')
    print('======================================================')
    st.markdown(html_temp, unsafe_allow_html=True)
    detectable_change = [
        0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10
    ]
    MENU = [
        'Sample-Size-Estimation', 'Stat Base Measurement',
        'Analysis & Recommendation'
    ]

    choice = st.sidebar.radio(''' Click here ''', MENU)
    if choice == 'Sample-Size-Estimation':
        mean_sales = st.sidebar.number_input('Base-Mean', 1)
        std_sales = st.sidebar.number_input('Base-StdDev', 1)
        alpha = st.sidebar.number_input('Alpha_Value', 0.05)
        power = st.sidebar.number_input('Power_Value', 0.8)
        k = min_detectable_data_prep(mean_sales, std_sales, detectable_change)
        k['require_sample_size'] = np.vectorize(sample_size_calculator)(
            k['mu_base'], k['mu_hat'], k['std_base'])
        st.subheader(
            'Sample Sizes for different scenario of Minimum Detectable Effect')
        st.write("""
                    Enter your  data into the sidebar and choose what will be Base Mean of KPI & Base Std Deviation of KPI.
                    Below table shows the different sample sizes for different MDE(Minimum detectable Effect) """
                 )
        st.dataframe(k)
        k['effect_in_%'] = (k['detectable_effect'] * 100)
        sns.pointplot(
            x=k['effect_in_%'],
            y=k['require_sample_size'],
            color='blue',
        )
        st.pyplot()
    elif choice == 'Stat Base Measurement':
        METRIC = st.sidebar.selectbox('Choose the metric', ['Pvs_per_session'])
        METHOD = st.sidebar.selectbox('Choose the method', [
            'Post (Control) Vs Post (Test)', 'Pre (Test) Vs Post(Test)',
            'CUPED', 'Post (Control) Vs Post (Test) NonParametric'
        ])
        if METHOD == 'Post (Control) Vs Post (Test)':
            print('---Step-1:Distribution Plot---')
            plt.figure()
            ax1 = sns.distplot(test[METRIC], hist=False, kde=True)
            ax2 = sns.distplot(control[METRIC], hist=False, kde=True)
            plt.axvline(np.mean(test[METRIC]),
                        color='b',
                        linestyle='dashed',
                        label='TEST',
                        linewidth=5)
            plt.axvline(np.mean(control[METRIC]),
                        color='orange',
                        linestyle='dashed',
                        label='CONTROL',
                        linewidth=5)
            plt.legend(labels=['TEST', 'CONTROL'])
            st.subheader('Distribution Comparison(Density Plot)')
            st.pyplot()
            sns.boxplot(data=[test[METRIC], control[METRIC]], showmeans=True)
            st.subheader('Distribution Comparison(Box Plot)')
            st.pyplot()
            print('--Step-2:T-Test for Mean Comparison--')
            st.subheader(
                'Mean comparison between Test & Control Distribution using Welsh T-Test'
            )
            r = t_distribution_ci(df,
                                  metric=METRIC,
                                  control='Control',
                                  test='Test',
                                  alpha=0.05)
            st.dataframe(r)
            if r['p-value'].iloc[0] > 0.1:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is no difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')
            else:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is siginificant difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')

        elif METHOD == 'Pre (Test) Vs Post(Test)':
            figsize = (20, 6)
            ci = CausalImpact(pre_post_data, pre_period, post_period)
            print(ci.summary())
            print(ci.summary(output='report'))
            pre_post_report = ci.summary_data
            pre_post_report['p_value'] = ci.p_value
            pre_post_report['siginificance'] = np.where(
                pre_post_report['p_value'] > 0.1, 'Not Significant',
                'Significant')
            st.subheader('Causal Inference Analysis')
            ci.plot()
            st.pyplot()
            st.subheader('Causal Inference statistical output')
            st.write(ci.summary(output='report'))
            st.dataframe(pre_post_report)
        elif METHOD == 'CUPED':
            cup_df = CUPED(cup_df, KPI=METRIC)
            test_cuped = cup_df[cup_df['test_flag'] == 'Test']
            control_cuped = cup_df[cup_df['test_flag'] == 'Control']
            cup_r = t_distribution_ci(cup_df,
                                      metric='CUPED-adjusted_metric',
                                      control='Control',
                                      test='Test',
                                      alpha=0.05)
            cor_df = cup_r.corr()
            st.subheader('Pre Vs Post Correlation to understand Variance')
            sns.jointplot(cup_df[METRIC],
                          cup_df[METRIC + '_pre_experiment'],
                          kind="reg",
                          stat_func=r2)
            st.pyplot()
            ax1 = sns.distplot(test_cuped['CUPED-adjusted_metric'],
                               hist=False,
                               kde=True)
            ax2 = sns.distplot(control_cuped['CUPED-adjusted_metric'],
                               hist=False,
                               kde=True)
            plt.axvline(np.mean(test_cuped['CUPED-adjusted_metric']),
                        color='b',
                        linestyle='dashed',
                        label='TEST',
                        linewidth=5)
            plt.axvline(np.mean(control_cuped['CUPED-adjusted_metric']),
                        color='orange',
                        linestyle='dashed',
                        label='CONTROL',
                        linewidth=5)
            plt.legend(labels=['TEST', 'CONTROL'])
            st.subheader(
                'CUPED-Distribution Comparison(Density Plot) after removing variance'
            )
            st.pyplot()
            st.subheader(
                'CUPED-Mean comparison between Test & Control Distribution using Welsh T-Test after removing variance'
            )
            st.dataframe(cup_r)
        elif METHOD == 'Post (Control) Vs Post (Test) NonParametric':
            print('---Step-1:Distribution Plot---')
            plt.figure()
            ax1 = sns.distplot(test[METRIC], hist=False, kde=True)
            ax2 = sns.distplot([METRIC], hist=False, kde=True)
            plt.axvline(np.mean(test[METRIC]),
                        color='b',
                        linestyle='dashed',
                        label='TEST',
                        linewidth=5)
            plt.axvline(np.mean([METRIC]),
                        color='orange',
                        linestyle='dashed',
                        label='CONTROL',
                        linewidth=5)
            plt.legend(labels=['TEST', 'CONTROL'])
            st.subheader('Distribution Comparison(Density Plot)')
            st.pyplot()
            sns.boxplot(data=[test[METRIC], [METRIC]], showmeans=True)
            st.subheader('Distribution Comparison(Box Plot)')
            st.pyplot()
            print('--Step-2:T-Test for Mean Comparison--')
            st.subheader(
                'Mean comparison between Test & Control Distribution using Welsh T-Test'
            )
            df[METRIC] = df[METRIC].astype('float')
            r = mann_whitney_u_test(df,
                                    metric=METRIC,
                                    control='Control',
                                    test='Test',
                                    test_flag='test_flag',
                                    alpha=0.05)
            st.dataframe(r)
            if r['p-value'].iloc[0] > 0.1:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is no difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')
            else:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is siginificant difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')