예제 #1
0
    def _run_with_data(self, data, pre_period, post_period, model_args, alpha,
                       estimation):
        # Zoom in on data in modeling range
        if data.shape[1] == 1:  # no exogenous values provided
            raise ValueError("data contains no exogenous variables")
        data_modeling = data.copy()

        df_pre = data_modeling.loc[pre_period[0]:pre_period[1], :]
        df_post = data_modeling.loc[post_period[0]:post_period[1], :]

        # Standardize all variables
        orig_std_params = (0, 1)
        if model_args["standardize_data"]:
            sd_results = standardize_all_variables(data_modeling, pre_period,
                                                   post_period)
            df_pre = sd_results["data_pre"]
            df_post = sd_results["data_post"]
            orig_std_params = sd_results["orig_std_params"]

        # Construct model and perform inference
        model = construct_model(df_pre, model_args)
        self.model = model

        trained_model = model_fit(model, estimation, model_args["niter"])
        self.model = trained_model

        inferences = compile_posterior_inferences(trained_model, data, df_pre,
                                                  df_post, None, alpha,
                                                  orig_std_params, estimation)

        # "append" to 'CausalImpact' object
        self.inferences = inferences["series"]
예제 #2
0
    def _run_with_data(self, data, pre_period, post_period, model_args, alpha,
                       estimation):
        # Zoom in on data in modeling range
        if data.shape[1] == 1:  # no exogenous values provided
            raise ValueError("data contains no exogenous variables")
        non_null = pd.isnull(data.iloc[:, 1]).to_numpy().nonzero()
        first_non_null = non_null[0]
        if first_non_null.size > 0:
            pre_period[0] = max(pre_period[0], data.index[first_non_null[0]])
        data_modeling = data.copy()
        df_pre = data_modeling.loc[pre_period[0]:pre_period[1], :]
        df_post = data_modeling.loc[post_period[0]:post_period[1], :]

        # Standardize all variables
        orig_std_params = (0, 1)
        if model_args["standardize_data"]:
            sd_results = standardize_all_variables(data_modeling, pre_period,
                                                   post_period)
            df_pre = sd_results["data_pre"]
            df_post = sd_results["data_post"]
            orig_std_params = sd_results["orig_std_params"]

        # Construct model and perform inference
        ucm_model = construct_model(self, df_pre, model_args)
        res = model_fit(self, ucm_model, estimation, model_args["niter"])

        inferences = compile_posterior_inferences(res, data, df_pre, df_post, None,
                                                  alpha, orig_std_params,
                                                  estimation)

        # "append" to 'CausalImpact' object
        self.inferences = inferences["series"]
        self.model = ucm_model
예제 #3
0
    def _run_with_ucm(self, ucm_model, post_period_response, alpha, model_args,
                      estimation):
        """ Runs an impact analysis on top of a ucm model.

           Args:
             ucm_model: Model as returned by UnobservedComponents(),
                        in which the data during the post-period was set to NA
             post_period_response: observed data during the post-intervention
                                   period
             alpha: tail-probabilities of posterior intervals"""
        # Guess <pre_period> and <post_period> from the observation vector
        # These will be needed for plotting period boundaries in plot().
        #raise NotImplementedError()
        """
        try:
            indices = infer_period_indices_from_data(y)
        except ValueError:
            raise ValueError("ucm_model must have been fitted on data where " +
                             "the values in the post-intervention period " +
                             "have been set to NA")
        """

        df_pre = ucm_model.data.orig_endog[:-len(post_period_response)]
        df_pre = pd.DataFrame(df_pre)

        post_period_response = pd.DataFrame(post_period_response)

        data = pd.DataFrame(
            np.concatenate([df_pre.values, post_period_response.values]))

        orig_std_params = (0, 1)

        fitted_model = model_fit(ucm_model, estimation, model_args["niter"])

        # Compile posterior inferences
        inferences = compile_posterior_inferences(fitted_model, data, df_pre,
                                                  None, post_period_response,
                                                  alpha, orig_std_params,
                                                  estimation)

        obs_inter = pre_len = fitted_model.model.nobs - len(
            post_period_response)

        self.params["pre_period"] = [0, obs_inter - 1]
        self.params["post_period"] = [obs_inter, -1]
        self.data = pd.concat([df_pre, post_period_response])
        self.inferences = inferences["series"]
        self.model = fitted_model