def test_summarizer(monkeypatch, rand_data, pre_int_period, post_int_period):
    summarizer_mock = mock.Mock()
    fit_mock = mock.Mock()
    process_mock = mock.Mock()
    summarize_mock = mock.Mock()
    monkeypatch.setattr('causalimpact.main.CausalImpact._fit_model', fit_mock)
    monkeypatch.setattr('causalimpact.main.CausalImpact._summarize_inferences',
                        summarize_mock)
    monkeypatch.setattr(
        'causalimpact.main.CausalImpact._process_posterior_inferences',
        process_mock)
    monkeypatch.setattr('causalimpact.main.summarizer', summarizer_mock)
    ci = CausalImpact(rand_data,
                      pre_int_period,
                      post_int_period,
                      model_args={'fit_method': 'vi'})
    ci.summary_data = 'summary_data'
    ci.p_value = 0.5
    ci.alpha = 0.05
    ci.summary()
    summarizer_mock.summary.assert_called_with('summary_data', 0.5, 0.05,
                                               'summary', 2)

    with pytest.raises(ValueError) as excinfo:
        ci.summary(digits='1')
    assert str(
        excinfo.value) == ('Input value for digits must be integer. Received '
                           '"<class \'str\'>" instead.')
    def test_summary_w_report_output(self, monkeypatch, inference_input,
                                     summary_report_filename):
        inferences_df = pd.DataFrame(inference_input)
        causal = CausalImpact()

        params = {'alpha': 0.05, 'post_period': [2, 4]}

        causal.params = params
        causal.inferences = inferences_df

        dedent_mock = mock.Mock()

        expected = open(summary_report_filename).read()
        expected = re.sub(r'\s+', ' ', expected)
        expected = expected.strip()

        tmpdir = mkdtemp()
        tmp_file = os.path.join(tmpdir, 'summary_test')

        def dedent_side_effect(msg):
            with open(tmp_file, 'a') as file_obj:
                msg = re.sub(r'\s+', ' ', msg)
                msg = msg.strip()
                file_obj.write(msg)
            return msg

        dedent_mock.side_effect = dedent_side_effect
        monkeypatch.setattr('textwrap.dedent', dedent_mock)

        causal.summary(output='report')
        result_str = open(tmp_file, 'r').read()
        assert result_str == expected
    def test_summary_wrong_argument_raises(self, inference_input):
        inferences_df = pd.DataFrame(inference_input)
        causal = CausalImpact()

        params = {'alpha': 0.05, 'post_period': [2, 4]}

        causal.params = params
        causal.inferences = inferences_df

        with pytest.raises(ValueError):
            causal.summary(output='wrong_argument')
Beispiel #4
0
def causal_impact_analysis(ori_data, when_fi_started):
    x = list()
    y = list()
    post_period_index = 0
    for point in ori_data:
        x.append(point[0])
        y.append(point[1])
        if post_period_index == 0 and when_fi_started <= point[0]:
            post_period_index = ori_data.index(point)

    data_frame = pd.DataFrame({
        "timestamp": pd.to_datetime(x, unit="ms"),
        "y": y
    })
    data_frame = data_frame.set_index("timestamp")
    pre_period = [
        pd.to_datetime(ori_data[0][0], unit="ms"),
        pd.to_datetime(ori_data[post_period_index - 1][0], unit="ms")
    ]
    post_period = [
        pd.to_datetime(ori_data[post_period_index][0], unit="ms"),
        pd.to_datetime(ori_data[-1][0], unit="ms")
    ]

    causal_impact = CausalImpact(data_frame,
                                 pre_period,
                                 post_period,
                                 prior_level_sd=0.1)
    summary = causal_impact.summary()
    report = causal_impact.summary(output='report')
    logging.info(summary)
    logging.info(report)

    relative_effect = -1  # Relative effect on average in the posterior area
    pattern_re = re.compile(
        r'Relative effect \(s\.d\.\)\s+-?(0\.\d+|[1-9]\d*\.\d+)%\s+\((0\.\d+|[1-9]\d*\.\d+)%\)'
    )
    match = pattern_re.search(summary)
    relative_effect = float(match.group(2))

    p = -1  # Posterior tail-area probability
    prob = -1  # Posterior prob. of a causal effect
    pattern_p_value = re.compile(
        r'Posterior tail-area probability p: (0\.\d+|[1-9]\d*\.\d+)\sPosterior prob. of a causal effect: (0\.\d+|[1-9]\d*\.\d+)%'
    )
    match = pattern_p_value.search(summary)
    p = float(match.group(1))
    prob = float(match.group(2))

    # causal_impact.plot(panels=['original'], figsize=(12, 4))

    return summary, report, p, prob, relative_effect
def main():
    with open("/path/to/glowroot/data.json", 'rt') as file:
        glowroot_data = json.load(file)

        x = list()
        y = list()
        for point in glowroot_data["dataSeries"][0]["data"]:
            x.append(point[0])
            y.append(point[1])

        data_frame = pd.DataFrame({
            "timestamp": pd.to_datetime(x, unit="ms"),
            "y": y
        })
        data_frame = data_frame.set_index("timestamp")
        logging.info(data_frame)
        pre_period = [
            pd.to_datetime(1573661277259, unit="ms"),
            pd.to_datetime(1573661647328, unit="ms")
        ]
        post_period = [
            pd.to_datetime(1573661652328, unit="ms"),
            pd.to_datetime(1573661932369, unit="ms")
        ]

        causal_impact = CausalImpact(data_frame,
                                     pre_period,
                                     post_period,
                                     prior_level_sd=0.1)
        logging.info(causal_impact.summary())
        causal_impact.plot()
    def test_summary(self, inference_input):
        inferences_df = pd.DataFrame(inference_input)
        causal = CausalImpact()

        params = {'alpha': 0.05, 'post_period': [2, 4]}

        causal.params = params
        causal.inferences = inferences_df

        expected = [
            [3, 7],
            [3, 7],
            [[3, 3], [7, 7]],
            [' ', ' '],
            [0, 0],
            [[0, 0], [0, 0]],
            [' ', ' '],
            ['-2.8%', '-2.8%'],
            [['0.0%', '-11.1%'], ['0.0%', '-11.1%']],
            [' ', ' '],
            ['0.0%', ' '],
            ['100.0%', ' '],
        ]

        expected = pd.DataFrame(expected,
                                columns=['Average', 'Cumulative'],
                                index=[
                                    'Actual', 'Predicted', '95% CI', ' ',
                                    'Absolute Effect', '95% CI', ' ',
                                    'Relative Effect', '95% CI', " ",
                                    "P-value", "Prob. of Causal Effect"
                                ])

        tmpdir = mkdtemp()
        tmp_expected = 'tmp_expected'
        tmp_result = 'tmp_test_summary'

        result_file = os.path.join(tmpdir, tmp_result)
        expected_file = os.path.join(tmpdir, tmp_expected)

        expected.to_csv(expected_file)
        expected_str = open(expected_file).read()

        causal.summary(path=result_file)

        result = open(result_file).read()
        assert result == expected_str
def causal_impact_analysis(ori_data, when_fi_started):
    x = list()
    y = list()
    post_period_index = 0
    for point in ori_data:
        x.append(point[0])
        y.append(point[1])
        if post_period_index == 0 and when_fi_started <= point[0]:
            post_period_index = ori_data.index(point)

    data_frame = pd.DataFrame({
        "timestamp": pd.to_datetime(x, unit="ms"),
        "y": y
    })
    data_frame = data_frame.set_index("timestamp")
    pre_period = [
        pd.to_datetime(ori_data[0][0], unit="ms"),
        pd.to_datetime(ori_data[post_period_index - 1][0], unit="ms")
    ]
    post_period = [
        pd.to_datetime(ori_data[post_period_index][0], unit="ms"),
        pd.to_datetime(ori_data[-1][0], unit="ms")
    ]

    causal_impact = CausalImpact(data_frame,
                                 pre_period,
                                 post_period,
                                 prior_level_sd=0.1)

    p = -1  # Posterior tail-area probability
    prob = -1  # Posterior prob. of a causal effect
    pattern = re.compile(
        r'Posterior tail-area probability p: (0\.\d+|[1-9]\d*\.\d+)\sPosterior prob. of a causal effect: (0\.\d+|[1-9]\d*\.\d+)%'
    )
    match = pattern.search(causal_impact.summary())
    p = float(match.group(1))
    prob = float(match.group(2))
    summary = causal_impact.summary()
    report = causal_impact.summary(output='report')
    # causal_impact.plot()

    return summary, report, p, prob
Beispiel #8
0
# Causal Impact
# 8. Using a custom model
# 結局こっちにする
# pip install pycausalimpact
# https://github.com/dafiti/causalimpact/blob/master/examples/getting_started.ipynb

from causalimpact import CausalImpact

# x_test, x_train =
# y_test, y_train =

# 予測期間
# pre_piriodだけ, exogを入れる必要がある
pre_period = ['2019-12-01', '2019-12-31']
post_period = ['2019-12-01', '2019-12-31']

# prior_level_sd=None, nseasons=[{'period': 52}]
ci = CausalImpact(data=nq, model=model, pre_period, post_period)

# 可視化
ci.plot(figsize=(14, 8))

# モデル・サマリー
ci.summary()





                 axis=1)

# Rename things, for the neatness
data = data.rename(columns={
    'close': 'close_voo',
})

print(data)

# Check if the SP500 looks like a solid input for our synthetic control
# data.plot()
# plt.savefig('summary.svg')

# Define periods. Article came out ~2:30 EDT on May 9th so let's say treatment end of markets the friday before
pre_period = [
    pd.Timestamp('2020-05-01 13:30:00+00:00'),
    pd.Timestamp('2020-05-08 20:00:00+00:00')
]
post_period = [
    pd.Timestamp('2020-05-11 13:30:00+00:00'),
    pd.Timestamp('2020-05-15 19:50:00+00:00')
]

# Shove it into CasualImpact
amc_data = data[['close_amc', 'close_voo', 'date']]
amc_data = amc_data.set_index('date')

ci = CausalImpact(amc_data, pre_period, post_period, prior_level_sd=None)
ci.plot()
print(ci.summary('report', 5))
Beispiel #10
0
t2 = 32

df['close_252d_rolling'] = df['close'].rolling(t1).mean()
df['close_21d_rolling'] = df['close'].rolling(t2).mean()

pre_period = [df.index[0], '2020/10/19 22:00']  # Define pre-event period
post_period = ['2020/10/20 01:00', df.index[-1]]  # Define post-event period

pre_period_df = df[df.index <= '2020/10/19 22:00']
post_period_df = df[df.index >= '2020/10/20 01:00']
print('Pre-Event Statistics')
print(pre_period_df.describe())
print('Post-Event Statistics')
print(post_period_df.describe())

ci = CausalImpact(df['close'], pre_period, post_period)
##########################
# DESDE ACA FALTA ARREGLAR
##########################
ci.plot(figsize=(12, 6))
ci.plot(panels=['original', 'pointwise'], figsize=(12, 8))
print(ci.summary())

ci.trained_model.params
print(ci.trained_model.summary())
_ = ci.trained_model.plot_diagnostics(figsize=(14, 6))

ci.trained_model.specification

df['close'].plot(figsize=(12, 4))
Beispiel #11
0
def main():
    st.title("""AB-Testing Tool """)
    html_temp = """
    <div style="background-color:orange;padding:10px">
    <h2 style="color:black;text-align:center;">Online Marketing Campaigns</h2>
    </div>
    """

    print('----Data Imports ------')
    df = pd.read_csv('./main/streamlit/data/fake_data.csv')  ###1
    cup_df = pd.read_csv('./main/streamlit/data/fake_data_cuped.csv')  ###2

    test_f = {'Control_Matrix': 'Control', 'Variant_BT': 'Test'}

    df['test_flag'] = df['Variant'].replace(test_f)
    cup_df['test_flag'] = cup_df['Variant'].replace(test_f)
    test = df[df['test_flag'] == 'Test']
    control = df[df['test_flag'] == 'Control']

    test_cuped = cup_df[cup_df['test_flag'] == 'Test']
    control = cup_df[cup_df['test_flag'] == 'Control']

    np.random.seed(12345)
    ar = np.r_[1, 0.9]
    ma = np.array([1])
    arma_process = ArmaProcess(ar, ma)
    X = 100 + arma_process.generate_sample(nsample=100)
    y = 1.2 * X + np.random.normal(size=100)
    y[70:] += 5

    pre_post_data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X'])  ###3
    pre_period = [0, 69]
    post_period = [70, 99]

    print('======================================================')
    print('----------- Sample Size Estimation--------------------')
    print('======================================================')
    st.markdown(html_temp, unsafe_allow_html=True)
    detectable_change = [
        0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10
    ]
    MENU = [
        'Sample-Size-Estimation', 'Stat Base Measurement',
        'Analysis & Recommendation'
    ]

    choice = st.sidebar.radio(''' Click here ''', MENU)
    if choice == 'Sample-Size-Estimation':
        mean_sales = st.sidebar.number_input('Base-Mean', 1)
        std_sales = st.sidebar.number_input('Base-StdDev', 1)
        alpha = st.sidebar.number_input('Alpha_Value', 0.05)
        power = st.sidebar.number_input('Power_Value', 0.8)
        k = min_detectable_data_prep(mean_sales, std_sales, detectable_change)
        k['require_sample_size'] = np.vectorize(sample_size_calculator)(
            k['mu_base'], k['mu_hat'], k['std_base'])
        st.subheader(
            'Sample Sizes for different scenario of Minimum Detectable Effect')
        st.write("""
                    Enter your  data into the sidebar and choose what will be Base Mean of KPI & Base Std Deviation of KPI.
                    Below table shows the different sample sizes for different MDE(Minimum detectable Effect) """
                 )
        st.dataframe(k)
        k['effect_in_%'] = (k['detectable_effect'] * 100)
        sns.pointplot(
            x=k['effect_in_%'],
            y=k['require_sample_size'],
            color='blue',
        )
        st.pyplot()
    elif choice == 'Stat Base Measurement':
        METRIC = st.sidebar.selectbox('Choose the metric', ['Pvs_per_session'])
        METHOD = st.sidebar.selectbox('Choose the method', [
            'Post (Control) Vs Post (Test)', 'Pre (Test) Vs Post(Test)',
            'CUPED', 'Post (Control) Vs Post (Test) NonParametric'
        ])
        if METHOD == 'Post (Control) Vs Post (Test)':
            print('---Step-1:Distribution Plot---')
            plt.figure()
            ax1 = sns.distplot(test[METRIC], hist=False, kde=True)
            ax2 = sns.distplot(control[METRIC], hist=False, kde=True)
            plt.axvline(np.mean(test[METRIC]),
                        color='b',
                        linestyle='dashed',
                        label='TEST',
                        linewidth=5)
            plt.axvline(np.mean(control[METRIC]),
                        color='orange',
                        linestyle='dashed',
                        label='CONTROL',
                        linewidth=5)
            plt.legend(labels=['TEST', 'CONTROL'])
            st.subheader('Distribution Comparison(Density Plot)')
            st.pyplot()
            sns.boxplot(data=[test[METRIC], control[METRIC]], showmeans=True)
            st.subheader('Distribution Comparison(Box Plot)')
            st.pyplot()
            print('--Step-2:T-Test for Mean Comparison--')
            st.subheader(
                'Mean comparison between Test & Control Distribution using Welsh T-Test'
            )
            r = t_distribution_ci(df,
                                  metric=METRIC,
                                  control='Control',
                                  test='Test',
                                  alpha=0.05)
            st.dataframe(r)
            if r['p-value'].iloc[0] > 0.1:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is no difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')
            else:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is siginificant difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')

        elif METHOD == 'Pre (Test) Vs Post(Test)':
            figsize = (20, 6)
            ci = CausalImpact(pre_post_data, pre_period, post_period)
            print(ci.summary())
            print(ci.summary(output='report'))
            pre_post_report = ci.summary_data
            pre_post_report['p_value'] = ci.p_value
            pre_post_report['siginificance'] = np.where(
                pre_post_report['p_value'] > 0.1, 'Not Significant',
                'Significant')
            st.subheader('Causal Inference Analysis')
            ci.plot()
            st.pyplot()
            st.subheader('Causal Inference statistical output')
            st.write(ci.summary(output='report'))
            st.dataframe(pre_post_report)
        elif METHOD == 'CUPED':
            cup_df = CUPED(cup_df, KPI=METRIC)
            test_cuped = cup_df[cup_df['test_flag'] == 'Test']
            control_cuped = cup_df[cup_df['test_flag'] == 'Control']
            cup_r = t_distribution_ci(cup_df,
                                      metric='CUPED-adjusted_metric',
                                      control='Control',
                                      test='Test',
                                      alpha=0.05)
            cor_df = cup_r.corr()
            st.subheader('Pre Vs Post Correlation to understand Variance')
            sns.jointplot(cup_df[METRIC],
                          cup_df[METRIC + '_pre_experiment'],
                          kind="reg",
                          stat_func=r2)
            st.pyplot()
            ax1 = sns.distplot(test_cuped['CUPED-adjusted_metric'],
                               hist=False,
                               kde=True)
            ax2 = sns.distplot(control_cuped['CUPED-adjusted_metric'],
                               hist=False,
                               kde=True)
            plt.axvline(np.mean(test_cuped['CUPED-adjusted_metric']),
                        color='b',
                        linestyle='dashed',
                        label='TEST',
                        linewidth=5)
            plt.axvline(np.mean(control_cuped['CUPED-adjusted_metric']),
                        color='orange',
                        linestyle='dashed',
                        label='CONTROL',
                        linewidth=5)
            plt.legend(labels=['TEST', 'CONTROL'])
            st.subheader(
                'CUPED-Distribution Comparison(Density Plot) after removing variance'
            )
            st.pyplot()
            st.subheader(
                'CUPED-Mean comparison between Test & Control Distribution using Welsh T-Test after removing variance'
            )
            st.dataframe(cup_r)
        elif METHOD == 'Post (Control) Vs Post (Test) NonParametric':
            print('---Step-1:Distribution Plot---')
            plt.figure()
            ax1 = sns.distplot(test[METRIC], hist=False, kde=True)
            ax2 = sns.distplot([METRIC], hist=False, kde=True)
            plt.axvline(np.mean(test[METRIC]),
                        color='b',
                        linestyle='dashed',
                        label='TEST',
                        linewidth=5)
            plt.axvline(np.mean([METRIC]),
                        color='orange',
                        linestyle='dashed',
                        label='CONTROL',
                        linewidth=5)
            plt.legend(labels=['TEST', 'CONTROL'])
            st.subheader('Distribution Comparison(Density Plot)')
            st.pyplot()
            sns.boxplot(data=[test[METRIC], [METRIC]], showmeans=True)
            st.subheader('Distribution Comparison(Box Plot)')
            st.pyplot()
            print('--Step-2:T-Test for Mean Comparison--')
            st.subheader(
                'Mean comparison between Test & Control Distribution using Welsh T-Test'
            )
            df[METRIC] = df[METRIC].astype('float')
            r = mann_whitney_u_test(df,
                                    metric=METRIC,
                                    control='Control',
                                    test='Test',
                                    test_flag='test_flag',
                                    alpha=0.05)
            st.dataframe(r)
            if r['p-value'].iloc[0] > 0.1:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is no difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')
            else:
                st.markdown('''### Inference ''')
                st.write(
                    '''According to the null hypothesis, there is siginificant difference between the means.
        The plot above shows the distribution of the difference of the means that
        we would expect under the null hypothesis.''')
Beispiel #12
0
# pip install pycausalimpact

import numpy as np
import pandas as pd
from statsmodels.tsa.arima_process import ArmaProcess
from causalimpact import CausalImpact

# Generate random sample

np.random.seed(0)
ar = np.r_[1, 0.9]
ma = np.array([1])
arma_process = ArmaProcess(ar, ma)

X = 50 + arma_process.generate_sample(nsample=1000)
y = 1.6 * X + np.random.normal(size=1000)

# There is a change starting from index 800
y[800:] += 10

data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X'])
pre_period = [0, 799]
post_period = [800, 999]

ci = CausalImpact(data, pre_period, post_period)
print(ci.summary())
print(ci.summary(output='report'))
ci.plot()