Example #1
0
 def test_gap(self):
     post_period = [120, 200]
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert np.all(
         pd.isnull(impact.inferences.loc[101:119,
                                         impact.inferences.columns[2:]]))
Example #2
0
 def test_pre_period_starts_after_beginning_of_data(self, data):
     pre_period = [3, 100]
     impact = CausalImpact(data, pre_period, [101, 199])
     impact.run()
     np.testing.assert_array_equal(impact.inferences.response.values,
                                   data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[0:pre_period[0], 2:]))
Example #3
0
 def test_late_pre_period(self):
     pre_period = [3, 100]
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert pd.util.testing.assert_numpy_array_equal(
         impact.inferences.response.values, data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[0:pre_period[0], 2:]))
Example #4
0
 def test_gap_between_pre_and_post_periods(self, data, pre_period):
     post_period = [120, 199]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert np.all(
         pd.isnull(impact.inferences.loc[101:119,
                                         impact.inferences.columns[2:]]))
Example #5
0
 def test_early_post_period(self):
     post_period = [101, 197]
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert pd.util.testing.assert_numpy_array_equal(
         impact.inferences.response.values, data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[-2:, 2:]))
Example #6
0
    def test_missing_values_in_pre_period_y(self, pre_period, post_period):
        data = pd.DataFrame(np.random.randn(200, 3), columns=["y", "x1", "x2"])
        data.iloc[95:100, 0] = np.nan

        impact = CausalImpact(data, pre_period, post_period)
        impact.run()
        """Test that all columns in the result series except those associated
        with point predictions have missing values at the time points the
        result time series has missing values."""

        predicted_cols = [
            impact.inferences.columns.get_loc(col)
            for col in impact.inferences.columns
            if ("response" not in col and "point_effect" not in col)
        ]

        effect_cols = [
            impact.inferences.columns.get_loc(col)
            for col in impact.inferences.columns if "point_effect" in col
        ]

        response_cols = [
            impact.inferences.columns.get_loc(col)
            for col in impact.inferences.columns if "response" in col
        ]

        assert np.all(np.isnan(impact.inferences.iloc[95:100, response_cols]))
        assert (np.any(np.isnan(
            impact.inferences.iloc[95:100, predicted_cols])) == False)
        assert np.any(np.isnan(impact.inferences.iloc[:95, :])) == False
        assert np.any(np.isnan(impact.inferences.iloc[101:, :])) == False
Example #7
0
 def test_post_period_finishes_before_end_of_data(self, data, pre_period):
     post_period = [101, 197]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     np.testing.assert_array_equal(impact.inferences.response.values,
                                   data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[-2:, 2:]))
Example #8
0
    def test_unlabelled_pandas_series(self, expected_columns, pre_period,
                                      post_period):
        model_args = {"niter": 123, 'standardize_data': False}
        alpha = 0.05
        data = pd.DataFrame(np.random.randn(200, 3))
        causal_impact = CausalImpact(data.values, pre_period, post_period,
                                     model_args, None, None, alpha, "MLE")

        causal_impact.run()
        actual_columns = list(causal_impact.inferences.columns)
        assert actual_columns == expected_columns
Example #9
0
 def test_late_early_and_gap(self):
     pre_period = [3, 80]
     post_period = [120, 197]
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert np.all(
         pd.isnull(impact.inferences.loc[:2,
                                         impact.inferences.columns[2:]]))
     assert np.all(
         pd.isnull(impact.inferences.loc[81:119,
                                         impact.inferences.columns[2:]]))
     assert np.all(
         pd.isnull(impact.inferences.loc[198:,
                                         impact.inferences.columns[2:]]))
Example #10
0
 def test_late_start_early_finish_and_gap_between_periods(self, data):
     pre_period = [3, 80]
     post_period = [120, 197]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert np.all(
         pd.isnull(impact.inferences.loc[:2,
                                         impact.inferences.columns[2:]]))
     assert np.all(
         pd.isnull(impact.inferences.loc[81:119,
                                         impact.inferences.columns[2:]]))
     assert np.all(
         pd.isnull(impact.inferences.loc[198:,
                                         impact.inferences.columns[2:]]))
Example #11
0
    def test_other_formats(self):
        # Test other data formats
        pre_period = [1, 100]
        post_period = [101, 200]
        model_args = {"niter": 100}

        # labelled dataframe
        data = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"])
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"

        # numpy array
        data = np.random.randn(200, 3)
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"

        # numpy array
        data = np.random.randn(200, 3)
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"

        # list of lists
        data = [[n, n + 2] for n in range(200)]
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"
Example #12
0
    def test_other_formats(self, expected_columns, pre_period, post_period):
        # Test other data formats
        model_args = {"niter": 100, "standardize_data": True}

        # labelled dataframe
        data = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"])
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = list(impact.inferences.columns)
        assert actual_columns == expected_columns

        # numpy array
        data = np.random.randn(200, 3)
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = list(impact.inferences.columns)
        assert actual_columns == expected_columns

        # list of lists
        data = np.random.randn(200, 2).tolist()
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = list(impact.inferences.columns)
        assert actual_columns == expected_columns
Example #13
0
 def test_missing_pre_period_data(self):
     data.iloc[3:5, 0] = np.nan
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert len(impact.inferences) == len(data)
Example #14
0
 def test_frame_no_exog(self):
     data = np.random.randn(200)
     impact = CausalImpact(data, pre_period, post_period, model_args)
     with pytest.raises(ValueError):
         impact.run()
Example #15
0
 def test_missing_input(self):
     with pytest.raises(SyntaxError):
         impact = CausalImpact()
         impact.run()
Example #16
0
 def test_post_period_bigger_than_data_index_max(self, data):
     pre_period = [0, 100]
     post_period = [101, 300]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert impact.params['post_period'] == [101, 199]
Example #17
0
 def test_pre_period_lower_than_data_index_min(self, data):
     pre_period = [-1, 100]
     post_period = [101, 199]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert impact.params['pre_period'] == [0, 100]
inputs2 = pd.merge(wiki_views_t2,wiki_views_c,on='Date',how='left')
inputs3 = pd.merge(wiki_views_t3,wiki_views_c,on='Date',how='left')
inputs4 = pd.merge(wiki_views_t4,wiki_views_c,on='Date',how='left')
inputs5 = pd.merge(wiki_views_t5,wiki_views_c,on='Date',how='left')
inputs6 = pd.merge(wiki_views_t6,wiki_views_c,on='Date',how='left')
inputs7 = pd.merge(wiki_views_t7,wiki_views_c,on='Date',how='left')


#### 4 CONDUCT CAUSAL IMPACT MEASUREMENT TO IDENTIFY SIGNIFICANT LIFT


#draft is april 26 - april 28
pre_period = [pd.to_datetime(date) for date in ["2018-02-05", "2018-04-19"]]
post_period = [pd.to_datetime(date) for date in ["2018-04-26", "2018-05-05"]]
impact = CausalImpact(inputs1, pre_period, post_period)
impact.run()
impact <- CausalImpact(inputs1, pre.period, post.period)
plot(impact)
inputs1['Date']


#error message "upper y"
pd.__version__
statsmodels.__version__
import statsmodels.api as sm 
sm.version.version 
CausalImpact.
df = pd.DataFrame(
    {'y': [150, 200, 225, 150, 175],
     'x1': [150, 249, 150, 125, 325],
     'x2': [275, 125, 249, 275, 250]
Example #19
0
 def test_frame_w_no_exog(self, pre_period, post_period):
     data = np.random.randn(200)
     impact = CausalImpact(data, pre_period, post_period, {})
     with pytest.raises(ValueError) as excinfo:
         impact.run()
     assert str(excinfo.value) == 'data contains no exogenous variables'
Example #20
0
 def test_missing_pre_period_data(self, data, pre_period, post_period):
     model_data = data.copy()
     model_data.iloc[3:5, 0] = np.nan
     impact = CausalImpact(model_data, pre_period, post_period)
     impact.run()
     assert len(impact.inferences) == len(model_data)