def model_local_level(endog=None, params=None, direct=False):
    if endog is None:
        y1 = 10.2394
        endog = np.r_[y1, [1] * 9]
    if params is None:
        params = [1.993, 8.253]
    sigma2_y, sigma2_mu = params

    if direct:
        mod = None
        # Construct the basic representation
        ssm = KalmanSmoother(k_endog=1, k_states=1, k_posdef=1)
        ssm.bind(endog)
        init = Initialization(ssm.k_states, initialization_type='diffuse')
        ssm.initialize(init)
        # ssm.filter_univariate = True  # should not be required

        # Fill in the system matrices for a local level model
        ssm['design', :] = 1
        ssm['obs_cov', :] = sigma2_y
        ssm['transition', :] = 1
        ssm['selection', :] = 1
        ssm['state_cov', :] = sigma2_mu
    else:
        mod = UnobservedComponents(endog, 'llevel')
        mod.update(params)
        ssm = mod.ssm
        ssm.initialize(Initialization(ssm.k_states, 'diffuse'))

    return mod, ssm
def model_local_linear_trend(endog=None, params=None, direct=False):
    if endog is None:
        y1 = 10.2394
        y2 = 4.2039
        y3 = 6.123123
        endog = np.r_[y1, y2, y3, [1] * 7]
    if params is None:
        params = [1.993, 8.253, 2.334]
    sigma2_y, sigma2_mu, sigma2_beta = params

    if direct:
        mod = None
        # Construct the basic representation
        ssm = KalmanSmoother(k_endog=1, k_states=2, k_posdef=2)
        ssm.bind(endog)
        init = Initialization(ssm.k_states, initialization_type='diffuse')
        ssm.initialize(init)
        # ssm.filter_univariate = True  # should not be required

        # Fill in the system matrices for a local level model
        ssm['design', 0, 0] = 1
        ssm['obs_cov', 0, 0] = sigma2_y
        ssm['transition'] = np.array([[1, 1], [0, 1]])
        ssm['selection'] = np.eye(2)
        ssm['state_cov'] = np.diag([sigma2_mu, sigma2_beta])
    else:
        mod = UnobservedComponents(endog, 'lltrend')
        mod.update(params)
        ssm = mod.ssm
        ssm.initialize(Initialization(ssm.k_states, 'diffuse'))

    return mod, ssm
Example #3
0
def test_forecast_exog():
    # Test forecasting with various shapes of `exog`
    nobs = 100
    endog = np.ones(nobs) * 2.0
    exog = np.ones(nobs)

    mod = UnobservedComponents(endog, 'irregular', exog=exog)
    res = mod.smooth([1.0, 2.0])

    # 1-step-ahead, valid
    exog_fcast_scalar = 1.
    exog_fcast_1dim = np.ones(1)
    exog_fcast_2dim = np.ones((1, 1))

    assert_allclose(res.forecast(1, exog=exog_fcast_scalar), 2.)
    assert_allclose(res.forecast(1, exog=exog_fcast_1dim), 2.)
    assert_allclose(res.forecast(1, exog=exog_fcast_2dim), 2.)

    # h-steps-ahead, valid
    h = 10
    exog_fcast_1dim = np.ones(h)
    exog_fcast_2dim = np.ones((h, 1))

    assert_allclose(res.forecast(h, exog=exog_fcast_1dim), 2.)
    assert_allclose(res.forecast(h, exog=exog_fcast_2dim), 2.)

    # h-steps-ahead, invalid
    assert_raises(ValueError, res.forecast, h, exog=1.)
    assert_raises(ValueError, res.forecast, h, exog=[1, 2])
    assert_raises(ValueError, res.forecast, h, exog=np.ones((h, 2)))
Example #4
0
def test_mle_reg():
    endog = np.arange(100)*1.0
    exog = endog*2
    # Make the fit not-quite-perfect
    endog[::2] += 0.01
    endog[1::2] -= 0.01

    with warnings.catch_warnings(record=True) as w:
        mod1 = UnobservedComponents(endog, irregular=True, exog=exog, mle_regression=False)
        res1 = mod1.fit(disp=-1)

        mod2 = UnobservedComponents(endog, irregular=True, exog=exog, mle_regression=True)
        res2 = mod2.fit(disp=-1)

    assert_allclose(res1.regression_coefficients.filtered[0, -1], 0.5, atol=1e-5)
    assert_allclose(res2.params[1], 0.5, atol=1e-5)
Example #5
0
def test_specifications():
    # Clear warnings
    structural.__warningregistry__ = {}

    endog = [1, 2]

    # Test that when nothing specified, a warning is issued and the model that
    # is fit is one with irregular=True and nothing else.
    with warnings.catch_warnings(record=True) as w:
        mod = UnobservedComponents(endog)

        message = ("Specified model does not contain a stochastic element;"
                   " irregular component added.")
        assert_equal(str(w[0].message), message)
        assert_equal(mod.trend_specification, 'irregular')

    # Test an invalid string trend specification
    with pytest.raises(ValueError):
        UnobservedComponents(endog, 'invalid spec')

    # Test that if a trend component is specified without a level component,
    # a warning is issued and a deterministic level component is added
    with warnings.catch_warnings(record=True) as w:
        mod = UnobservedComponents(endog, trend=True, irregular=True)
        message = ("Trend component specified without level component;"
                   " deterministic level component added.")
        assert_equal(str(w[0].message), message)
        assert_equal(mod.trend_specification, 'deterministic trend')

    # Test that if a string specification is provided, a warning is issued if
    # the boolean attributes are also specified
    trend_attributes = [
        'irregular', 'trend', 'stochastic_level', 'stochastic_trend'
    ]
    for attribute in trend_attributes:
        with warnings.catch_warnings(record=True) as w:
            kwargs = {attribute: True}
            mod = UnobservedComponents(endog, 'deterministic trend', **kwargs)

            message = ("Value of `%s` may be overridden when the trend"
                       " component is specified using a model string." %
                       attribute)
            assert_equal(str(w[0].message), message)

    # Test that a seasonal with period less than two is invalid
    with pytest.raises(ValueError):
        UnobservedComponents(endog, seasonal=1)
Example #6
0
 def get_model(y, x, **kwargs):
     """
     return the current model
     :param y: array
     :param x: array
     :param kwargs: model parameter
     :return: UnonbservedComponents model object
     """
     return UnobservedComponents(y, exog=x, **kwargs)
Example #7
0
def test_forecast():
    endog = np.arange(50) + 10
    exog = np.arange(50)

    mod = UnobservedComponents(endog, exog=exog, level='dconstant', seasonal=4)
    res = mod.smooth([1e-15, 0, 1])

    actual = res.forecast(10, exog=np.arange(50, 60)[:, np.newaxis])
    desired = np.arange(50, 60) + 10
    assert_allclose(actual, desired)
Example #8
0
def test_custom_model_input_validation(rand_data, pre_int_period,
                                       post_int_period):
    with pytest.raises(ValueError) as excinfo:
        ci = CausalImpact(rand_data,
                          pre_int_period,
                          post_int_period,
                          model='test')
    assert str(
        excinfo.value) == 'Input model must be of type UnobservedComponents.'

    ucm = UnobservedComponents(rand_data.iloc[:101, 0],
                               level='llevel',
                               exog=rand_data.iloc[:101, 1:])
    ucm.level = False
    with pytest.raises(ValueError) as excinfo:
        ci = CausalImpact(rand_data,
                          pre_int_period,
                          post_int_period,
                          model=ucm)
    assert str(excinfo.value) == 'Model must have level attribute set.'

    ucm = UnobservedComponents(rand_data.iloc[:101, 0],
                               level='llevel',
                               exog=rand_data.iloc[:101, 1:])
    ucm.exog = None
    with pytest.raises(ValueError) as excinfo:
        ci = CausalImpact(rand_data,
                          pre_int_period,
                          post_int_period,
                          model=ucm)
    assert str(excinfo.value) == 'Model must have exog attribute set.'

    ucm = UnobservedComponents(rand_data.iloc[:101, 0],
                               level='llevel',
                               exog=rand_data.iloc[:101, 1:])
    ucm.data = None
    with pytest.raises(ValueError) as excinfo:
        ci = CausalImpact(rand_data,
                          pre_int_period,
                          post_int_period,
                          model=ucm)
    assert str(excinfo.value) == 'Model must have data attribute set.'
Example #9
0
def test_specifications():
    # Clear warnings
    structural.__warningregistry__ = {}

    endog = [1, 2]

    # Test that when nothing specified, a warning is issued and the model that
    # is fit is one with irregular=True and nothing else.
    warning = SpecificationWarning
    match = 'irregular component added'
    with pytest.warns(warning, match=match):
        mod = UnobservedComponents(endog)
        assert_equal(mod.trend_specification, 'irregular')

    # Test an invalid string trend specification
    with pytest.raises(ValueError):
        UnobservedComponents(endog, 'invalid spec')

    # Test that if a trend component is specified without a level component,
    # a warning is issued and a deterministic level component is added
    warning = SpecificationWarning
    match = 'Trend component specified without'
    with pytest.warns(warning, match=match):
        mod = UnobservedComponents(endog, trend=True, irregular=True)
        assert_equal(mod.trend_specification, 'deterministic trend')

    # Test that if a string specification is provided, a warning is issued if
    # the boolean attributes are also specified
    trend_attributes = [
        'irregular', 'trend', 'stochastic_level', 'stochastic_trend'
    ]
    for attribute in trend_attributes:
        kwargs = {attribute: True}

        warning = SpecificationWarning
        match = 'may be overridden when the trend'
        with pytest.warns(warning, match=match):
            UnobservedComponents(endog, 'deterministic trend', **kwargs)

    # Test that a seasonal with period less than two is invalid
    with pytest.raises(ValueError):
        UnobservedComponents(endog, seasonal=1)
Example #10
0
def test_causal_cto_w_custom_model_and_seasons(rand_data, pre_int_period,
                                               post_int_period):
    pre_data = rand_data.loc[pre_int_period[0]: pre_int_period[1], :]
    model = UnobservedComponents(endog=pre_data.iloc[:, 0], level='llevel',
                                 exog=pre_data.iloc[:, 1:],
                                 freq_seasonal=[{'period': 4}, {'period': 3}])

    ci = CausalImpact(rand_data, pre_int_period, post_int_period, model=model)

    assert ci.model.freq_seasonal_periods == [4, 3]
    assert ci.model.freq_seasonal_harmonics == [2, 1]
Example #11
0
 def run(self):
     """Fit the BSTS model to the data.
     """
     self.model = UnobservedComponents(
         self.data.loc[:self.data_inter - 1, self._obs_col()].values,
         exog=self.data.loc[:self.data_inter - 1, self._reg_cols()].values,
         level='local linear trend',
         seasonal=self.model_args['n_seasons'],
     )
     self.fit = self.model.fit(
         maxiter=self.model_args['max_iter'],
     )
Example #12
0
def test_recreate_model():
    nobs = 100
    endog = np.ones(nobs) * 2.0
    exog = np.ones(nobs)

    levels = [
        'irregular', 'ntrend', 'fixed intercept', 'deterministic constant',
        'dconstant', 'local level', 'llevel', 'random walk', 'rwalk',
        'fixed slope', 'deterministic trend', 'dtrend',
        'local linear deterministic trend', 'lldtrend',
        'random walk with drift', 'rwdrift', 'local linear trend',
        'lltrend', 'smooth trend', 'strend', 'random trend', 'rtrend']

    for level in levels:
        # Note: have to add in some stochastic component, otherwise we have
        # problems with entirely deterministic models

        # level + stochastic seasonal
        mod = UnobservedComponents(endog, level=level, seasonal=2,
                                   stochastic_seasonal=True, exog=exog)
        mod2 = UnobservedComponents(endog, exog=exog, **mod._get_init_kwds())
        check_equivalent_models(mod, mod2)

        # level + autoregressive
        mod = UnobservedComponents(endog, level=level, exog=exog,
                                   autoregressive=1)
        mod2 = UnobservedComponents(endog, exog=exog, **mod._get_init_kwds())
        check_equivalent_models(mod, mod2)

        # level + stochastic cycle
        mod = UnobservedComponents(endog, level=level, exog=exog,
                                   cycle=True, stochastic_cycle=True,
                                   damped_cycle=True)
        mod2 = UnobservedComponents(endog, exog=exog, **mod._get_init_kwds())
        check_equivalent_models(mod, mod2)
Example #13
0
    def _construct_default_model(self):
        """Constructs default local level unobserved states model with input data.

        Returns
        -------
          model: `UnobservedComponents` built using pre-intervention data as training
              data.
        """
        data = self.pre_data if self.normed_pre_data is None else self.normed_pre_data
        y = data.iloc[:, 0]
        X = data.iloc[:, 1:] if data.shape[1] > 1 else None
        model = UnobservedComponents(endog=y, level='llevel', exog=X)
        return model
Example #14
0
def test_misc_exog():
    # Tests for missing data
    nobs = 20
    k_endog = 1
    np.random.seed(1208)
    endog = np.random.normal(size=(nobs, k_endog))
    endog[:4, 0] = np.nan
    exog1 = np.random.normal(size=(nobs, 1))
    exog2 = np.random.normal(size=(nobs, 2))

    index = pd.date_range('1970-01-01', freq='QS', periods=nobs)
    endog_pd = pd.DataFrame(endog, index=index)
    exog1_pd = pd.Series(exog1.squeeze(), index=index)
    exog2_pd = pd.DataFrame(exog2, index=index)

    models = [
        UnobservedComponents(endog, 'llevel', exog=exog1),
        UnobservedComponents(endog, 'llevel', exog=exog2),
        UnobservedComponents(endog, 'llevel', exog=exog2),
        UnobservedComponents(endog_pd, 'llevel', exog=exog1_pd),
        UnobservedComponents(endog_pd, 'llevel', exog=exog2_pd),
        UnobservedComponents(endog_pd, 'llevel', exog=exog2_pd),
    ]

    for mod in models:
        # Smoke tests
        mod.start_params
        res = mod.fit(disp=False)
        res.summary()
        res.predict()
        res.predict(dynamic=True)
        res.get_prediction()

        oos_exog = np.random.normal(size=(1, mod.k_exog))
        res.forecast(steps=1, exog=oos_exog)
        res.get_forecast(steps=1, exog=oos_exog)

        # Smoke tests for invalid exog
        oos_exog = np.random.normal(size=(1))
        with pytest.raises(ValueError):
            res.forecast(steps=1, exog=oos_exog)

        oos_exog = np.random.normal(size=(2, mod.k_exog))
        with pytest.raises(ValueError):
            res.forecast(steps=1, exog=oos_exog)

        oos_exog = np.random.normal(size=(1, mod.k_exog + 1))
        with pytest.raises(ValueError):
            res.forecast(steps=1, exog=oos_exog)

    # Test invalid model specifications
    with pytest.raises(ValueError):
        UnobservedComponents(endog, 'llevel', exog=np.zeros((10, 4)))
def test_apply_results():
    endog = np.arange(100)
    exog = np.ones_like(endog)
    params = [1., 1., 0.1, 1.]

    mod1 = UnobservedComponents(endog[:50], 'llevel', exog=exog[:50])
    res1 = mod1.smooth(params)

    mod2 = UnobservedComponents(endog[50:], 'llevel', exog=exog[50:])
    res2 = mod2.smooth(params)

    res3 = res2.apply(endog[:50], exog=exog[:50])

    assert_equal(res1.specification, res3.specification)

    for attr in [
            'nobs', 'llf', 'llf_obs', 'loglikelihood_burn',
            'cov_params_default'
    ]:
        assert_equal(getattr(res3, attr), getattr(res1, attr))

    for attr in [
            'filtered_state', 'filtered_state_cov', 'predicted_state',
            'predicted_state_cov', 'forecasts', 'forecasts_error',
            'forecasts_error_cov', 'standardized_forecasts_error',
            'forecasts_error_diffuse_cov', 'predicted_diffuse_state_cov',
            'scaled_smoothed_estimator', 'scaled_smoothed_estimator_cov',
            'smoothing_error', 'smoothed_state', 'smoothed_state_cov',
            'smoothed_state_autocov', 'smoothed_measurement_disturbance',
            'smoothed_state_disturbance',
            'smoothed_measurement_disturbance_cov',
            'smoothed_state_disturbance_cov'
    ]:
        assert_equal(getattr(res3, attr), getattr(res1, attr))

    assert_allclose(res3.forecast(10, exog=np.ones(10)),
                    res1.forecast(10, exog=np.ones(10)))
Example #16
0
def test_mle_reg(use_exact_diffuse):
    endog = np.arange(100) * 1.0
    exog = endog * 2
    # Make the fit not-quite-perfect
    endog[::2] += 0.01
    endog[1::2] -= 0.01

    with warnings.catch_warnings(record=True):
        mod1 = UnobservedComponents(endog,
                                    irregular=True,
                                    exog=exog,
                                    mle_regression=False,
                                    use_exact_diffuse=use_exact_diffuse)
        res1 = mod1.fit(disp=-1)

        mod2 = UnobservedComponents(endog,
                                    irregular=True,
                                    exog=exog,
                                    mle_regression=True,
                                    use_exact_diffuse=use_exact_diffuse)
        res2 = mod2.fit(disp=-1)

    assert_allclose(res1.regression_coefficients.filtered[0, -1],
                    0.5,
                    atol=1e-5)
    assert_allclose(res2.params[1], 0.5, atol=1e-5)

    # When the regression component is part of the state vector with exact
    # diffuse initialization, we have two diffuse observations
    if use_exact_diffuse:
        print(res1.predicted_diffuse_state_cov)
        assert_equal(res1.nobs_diffuse, 2)
        assert_equal(res2.nobs_diffuse, 0)
    else:
        assert_equal(res1.loglikelihood_burn, 1)
        assert_equal(res2.loglikelihood_burn, 0)
Example #17
0
def test_causal_cto_w_custom_model(rand_data, pre_int_period, post_int_period):
    pre_data = rand_data.loc[pre_int_period[0]: pre_int_period[1], :]
    model = UnobservedComponents(endog=pre_data.iloc[:, 0], level='llevel',
                                 exog=pre_data.iloc[:, 1:])

    ci = CausalImpact(rand_data, pre_int_period, post_int_period, model=model)

    assert ci.model.endog_names == 'y'
    assert ci.model.exog_names == ['x1', 'x2']
    assert ci.model.k_endog == 1
    assert ci.model.level
    assert ci.model.trend_specification == 'local level'

    assert isinstance(ci.trained_model, UnobservedComponentsResultsWrapper)
    assert ci.trained_model.nobs == len(pre_data)
Example #18
0
def construct_model(data, model_args={}):
    """Specifies the model and performs inference. Inference means using a
    technique that combines Kalman Filters with Maximum Likelihood Estimators
    methods to fit the parameters that best explain the observed data.

    Args:
      data: time series of response variable and optional covariates
      model_args: optional list of additional model arguments

    Returns:
      An Unobserved Components Model, as returned by UnobservedComponents()
    """
    from statsmodels.tsa.statespace.structural import UnobservedComponents

    y = data.iloc[:, 0]

    observations_ill_conditioned(y)

    #LocalLevel specification of statespace
    ss = {}
    ss["endog"] = y.values
    ss["level"] = "llevel"

    # No regression?
    if len(data.columns) == 1:
        mod = UnobservedComponents(**ss)
        return mod
    else:
        # Static regression
        if not model_args.get("dynamic_regression"):
            ss["exog"] = data.iloc[:, 1:].values
            mod = UnobservedComponents(**ss)
            return mod
        # Dynamic regression
        else:
            raise NotImplementedError()
Example #19
0
def get_referenced_model(model, endog, exog):
    """
    Buils an `UnobservedComponents` model using as reference the input `model`. This is
    mainly used for building models to make simulations of time series.

    Args
    ----
      model: `UnobservedComponents`.
          Template model that is used as reference to build a new one with new `endog`
          and `exog` variables.
      endog: pandas.Series.
          New endog value to be used in model.
      exog: pandas.Series.
          New exog value to be used in model.

    Returns
    -------
      ref_model: `UnobservedComponents`.
          New model built from input `model` setup.
    """
    args = {}
    args['level'] = model.level
    args['trend'] = model.trend
    args['seasonal'] = model.seasonal_periods
    args['freq_seasonal'] = [{
        'period': period,
        'harmonics': h
    } for (
        period,
        h) in zip(model.freq_seasonal_periods, model.freq_seasonal_harmonics)]
    args['cycle'] = model.cycle
    args['ar'] = model.ar_order
    args['exog'] = exog
    args['endog'] = endog
    args['irregular'] = model.irregular
    args['stochastic_level'] = model.stochastic_level
    args['stochastic_trend'] = model.stochastic_trend
    args['stochastic_seasonal'] = model.stochastic_seasonal
    args['stochastic_freq_seasonal'] = model.stochastic_freq_seasonal
    args['stochastic_cycle'] = model.stochastic_cycle
    args['damped_cycle'] = model.damped_cycle
    cycle_bounds = model.cycle_frequency_bound
    lower_cycle_bound = 2 * np.pi / cycle_bounds[1]
    upper_cycle_bound = 2 * np.pi / cycle_bounds[0] if cycle_bounds[
        0] > 0 else np.inf
    args['cycle_period_bounds'] = (lower_cycle_bound, upper_cycle_bound)
    ref_model = UnobservedComponents(**args)
    return ref_model
Example #20
0
    def _get_default_model(self):
        """Constructs default local level unobserved states model using input data and
        `self.model_args`.

        Returns
        -------
          model: `UnobservedComponents` built using pre-intervention data as training
              data.
        """
        data = self.pre_data if self.normed_pre_data is None else self.normed_pre_data
        y = data.iloc[:, 0]
        X = data.iloc[:, 1:] if data.shape[1] > 1 else None
        freq_seasonal = self.model_args.get('nseasons')
        model = UnobservedComponents(endog=y, level='llevel', exog=X,
                                     freq_seasonal=freq_seasonal)
        return model
Example #21
0
    def __init__(self, endog, **kwargs):
        self.exog = exog = kwargs.pop("exog", None)
        self.kwargs = kwargs
        assert endog.ndim == 3
        assert exog is None or exog.ndim in (2, 3)

        self.n_models = endog.shape[0]
        self.models = []

        for series_idx in range(self.n_models):
            exog = (
                self.exog[series_idx, ...]
                if self.exog is not None and self.exog.ndim == 3
                else self.exog
            )
            m = UnobservedComponents(endog[series_idx, ...], exog=exog, **kwargs)
            self.models += [m]
    def run(self, return_df=False):
        """Fit the BSTS model to the data.
        """
        self._model = UnobservedComponents(
            self.data.loc[:self.data_inter - 1, self._obs_col()].values,
            exog=self.data.loc[:self.data_inter - 1, self._reg_cols()].values,
            level='local linear trend',
            seasonal=self.model_args['n_seasons'],
        )
        self._fit = self._model.fit(
            maxiter=self.model_args['max_iter'],
        )
        self._get_estimates()
        self._get_difference_estimates()
        self._get_cumulative_estimates()

        if return_df:
            return self.result
Example #23
0
    def simulated_y(self):
        """
        In order to process lower and upper boundaries for different metrics we simulate
        several responses for `y` using parameters trained during the fitting phase.

        Returns
        -------
          simulations: np.array
              Array where each row is a simulation of the response variable whose shape is
              (n simulations, n points in post period).
        """
        if self._simulated_y is None:
            simulations = []
            # For more information about the `trend` and how it works, please refer to:
            # https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.structural.UnobservedComponents.html
            trend = self.model.trend_specification
            y = np.zeros(len(self.post_data))
            exog_data = self.post_data if self.mu_sig is None else self.normed_post_data
            X = exog_data.iloc[:, 1:] if exog_data.shape[1] > 1 else None
            model = UnobservedComponents(y, level=trend, exog=X)
            # `params` is related to the parameters found when fitting the Kalman filter
            # from the observed time series.
            params = self.trained_model.params
            predicted_state = self.trained_model.predicted_state[..., -1]
            predicted_state_cov = self.trained_model.predicted_state_cov[...,
                                                                         -1]
            for _ in range(self.n_sims):
                initial_state = np.random.multivariate_normal(
                    predicted_state, predicted_state_cov)
                sim = model.simulate(params,
                                     len(self.post_data),
                                     initial_state=initial_state)
                if self.mu_sig:
                    sim = sim * self.mu_sig[1] + self.mu_sig[0]
                simulations.append(sim)
            self._simulated_y = np.array(simulations)
            return self._simulated_y
        else:
            return self._simulated_y
Example #24
0
def test_simulated_y_custom_model():
    np.random.seed(1)
    ar = np.r_[1, 0.9]
    ma = np.array([1])
    arma_process = ArmaProcess(ar, ma)
    X = 100 + arma_process.generate_sample(nsample=100)
    y = 1.2 * X + np.random.normal(size=(100))
    data = pd.DataFrame({'y': y, 'X': X}, columns=['y', 'X'])
    intervention_idx = 70
    normed_pre_data, _ = standardize(data.iloc[:intervention_idx])

    model = UnobservedComponents(
        endog=normed_pre_data['y'].iloc[0:intervention_idx],
        level='llevel',
        exog=normed_pre_data['X'].iloc[0:intervention_idx])

    ci = CausalImpact(data, [0, 69], [70, 99], model=model)

    assert ci.simulated_y.shape == (1000, 30)

    lower, upper = np.percentile(ci.simulated_y.mean(axis=1), [5, 95])
    assert lower > 119
    assert upper < 121
Example #25
0
    def run(self, max_iter=1000, return_df=False):
        """Fit the BSTS model to the data.

        :param int max_iter: max number of iterations in UnobservedComponents.fit (maximum likelihood estimator)
        :param bool return_df: set to `True` if you want this method to return the dataframe of model results

        :return: None or pandas.DataFrame of results
        """
        self._model = UnobservedComponents(
            self.data.loc[:self._inter_index - 1,
                          self._obs_col()].values,
            exog=self.data.loc[:self._inter_index - 1,
                               self._reg_cols()].values,
            level='local linear trend',
            seasonal=self.n_seasons,
        )
        self._fit = self._model.fit(maxiter=max_iter)
        self._get_estimates()
        self._get_difference_estimates()
        self._get_cumulative_estimates()

        if return_df:
            return self.result
Example #26
0
def test_start_params():
    # Test that the behavior is correct for multiple exogenous and / or
    # autoregressive components

    # Parameters
    nobs = int(1e4)
    beta = np.r_[10, -2]
    phi = np.r_[0.5, 0.1]

    # Generate data
    np.random.seed(1234)
    exog = np.c_[np.ones(nobs), np.arange(nobs)*1.0]
    eps = np.random.normal(size=nobs)
    endog = np.zeros(nobs+2)
    for t in range(1, nobs):
        endog[t+1] = phi[0] * endog[t] + phi[1] * endog[t-1] + eps[t]
    endog = endog[2:]
    endog += np.dot(exog, beta)

    # Now just test that the starting parameters are approximately what they
    # ought to be (could make this arbitrarily precise by increasing nobs,
    # but that would slow down the test for no real gain)
    mod = UnobservedComponents(endog, exog=exog, autoregressive=2)
    assert_allclose(mod.start_params, [1., 0.5, 0.1, 10, -2], atol=1e-1)
Example #27
0
def test_matrices_somewhat_complicated_model():
    values = dta.copy()

    model = UnobservedComponents(values['unemp'],
                                 level='lltrend',
                                 freq_seasonal=[{'period': 4},
                                                {'period': 9, 'harmonics': 3}],
                                 cycle=True,
                                 cycle_period_bounds=[2, 30],
                                 damped_cycle=True,
                                 stochastic_freq_seasonal=[True, False],
                                 stochastic_cycle=True
                                 )
    # Selected parameters
    params = [1,  # irregular_var
              3, 4,  # lltrend parameters:  level_var, trend_var
              5,   # freq_seasonal parameters: freq_seasonal_var_0
              # cycle parameters: cycle_var, cycle_freq, cycle_damp
              6, 2*np.pi/30., .9
              ]
    model.update(params)

    # Check scalar properties
    assert_equal(model.k_states, 2 + 4 + 6 + 2)
    assert_equal(model.k_state_cov, 2 + 1 + 0 + 1)
    assert_equal(model.loglikelihood_burn, 2 + 4 + 6 + 2)
    assert_allclose(model.ssm.k_posdef, 2 + 4 + 0 + 2)
    assert_equal(model.k_params, len(params))

    # Check the statespace model matrices against hand-constructed answers
    # We group the terms by the component
    expected_design = np.r_[[1, 0],
                            [1, 0, 1, 0],
                            [1, 0, 1, 0, 1, 0],
                            [1, 0]].reshape(1, 14)
    assert_allclose(model.ssm.design[:, :, 0], expected_design)

    expected_transition = __direct_sum([
        np.array([[1, 1],
                  [0, 1]]),
        np.array([[0, 1, 0, 0],
                  [-1, 0, 0, 0],
                  [0, 0, -1,  0],
                  [0, 0,  0, -1]]),
        np.array([[np.cos(2*np.pi*1/9.), np.sin(2*np.pi*1/9.), 0, 0, 0, 0],
                  [-np.sin(2*np.pi*1/9.), np.cos(2*np.pi*1/9.), 0, 0, 0, 0],
                  [0, 0,  np.cos(2*np.pi*2/9.), np.sin(2*np.pi*2/9.), 0, 0],
                  [0, 0, -np.sin(2*np.pi*2/9.), np.cos(2*np.pi*2/9.), 0, 0],
                  [0, 0, 0, 0,  np.cos(2*np.pi/3.), np.sin(2*np.pi/3.)],
                  [0, 0, 0, 0, -np.sin(2*np.pi/3.), np.cos(2*np.pi/3.)]]),
        np.array([[.9*np.cos(2*np.pi/30.), .9*np.sin(2*np.pi/30.)],
                 [-.9*np.sin(2*np.pi/30.), .9*np.cos(2*np.pi/30.)]])
    ])
    assert_allclose(
        model.ssm.transition[:, :, 0], expected_transition, atol=1e-7)

    # Since the second seasonal term is not stochastic,
    # the dimensionality of the state disturbance is 14 - 6 = 8
    expected_selection = np.zeros((14, 14 - 6))
    expected_selection[0:2, 0:2] = np.eye(2)
    expected_selection[2:6, 2:6] = np.eye(4)
    expected_selection[-2:, -2:] = np.eye(2)
    assert_allclose(model.ssm.selection[:, :, 0], expected_selection)

    expected_state_cov = __direct_sum([
        np.diag(params[1:3]),
        np.eye(4)*params[3],
        np.eye(2)*params[4]
    ])
    assert_allclose(model.ssm.state_cov[:, :, 0], expected_state_cov)
Example #28
0
def run_ucm(name):
    true = getattr(results_structural, name)

    for model in true['models']:
        kwargs = model.copy()
        kwargs.update(true['kwargs'])

        # Make a copy of the data
        values = dta.copy()

        freq = kwargs.pop('freq', None)
        if freq is not None:
            values.index = pd.date_range(start='1959-01-01', periods=len(dta),
                                         freq=freq)

        # Test pandas exog
        if 'exog' in kwargs:
            # Default value here is pd.Series object
            exog = np.log(values['realgdp'])

            # Also allow a check with a 1-dim numpy array
            if kwargs['exog'] == 'numpy':
                exog = exog.values.squeeze()

            kwargs['exog'] = exog

        # Create the model
        mod = UnobservedComponents(values['unemp'], **kwargs)

        # Smoke test for starting parameters, untransform, transform
        # Also test that transform and untransform are inverses
        mod.start_params
        roundtrip = mod.transform_params(
            mod.untransform_params(mod.start_params))
        assert_allclose(mod.start_params, roundtrip)

        # Fit the model at the true parameters
        res_true = mod.filter(true['params'])

        # Check that the cycle bounds were computed correctly
        freqstr = freq[0] if freq is not None else values.index.freqstr[0]
        if 'cycle_period_bounds' in kwargs:
            cycle_period_bounds = kwargs['cycle_period_bounds']
        elif freqstr == 'A':
            cycle_period_bounds = (1.5, 12)
        elif freqstr == 'Q':
            cycle_period_bounds = (1.5*4, 12*4)
        elif freqstr == 'M':
            cycle_period_bounds = (1.5*12, 12*12)
        else:
            # If we have no information on data frequency, require the
            # cycle frequency to be between 0 and pi
            cycle_period_bounds = (2, np.inf)

        # Test that the cycle frequency bound is correct
        assert_equal(mod.cycle_frequency_bound,
                     (2*np.pi / cycle_period_bounds[1],
                      2*np.pi / cycle_period_bounds[0]))

        # Test that the likelihood is correct
        rtol = true.get('rtol', 1e-7)
        atol = true.get('atol', 0)
        assert_allclose(res_true.llf, true['llf'], rtol=rtol, atol=atol)

        # Optional smoke test for plot_components
        try:
            import matplotlib.pyplot as plt
            try:
                from pandas.plotting import register_matplotlib_converters
                register_matplotlib_converters()
            except ImportError:
                pass
            fig = plt.figure()
            res_true.plot_components(fig=fig)
        except ImportError:
            pass

        # Now fit the model via MLE
        with warnings.catch_warnings(record=True):
            res = mod.fit(disp=-1)
            # If we found a higher likelihood, no problem; otherwise check
            # that we're very close to that found by R
            if res.llf <= true['llf']:
                assert_allclose(res.llf, true['llf'], rtol=1e-4)

            # Smoke test for summary
            res.summary()
Example #29
0
    def fit(self, X, y=None):
        # Perform top percentile ceiling
        self.X = X
        mode = self.mode
        if '*f' in self.mode:
            self.X = np.minimum(X, np.percentile(X, 75))
            mode = self.mode.partition('*f')[0]
        # Perform transformation if specified by *transformation
        if '*ln' in self.mode:
            self.X = np.log(np.array(X) + 1)
            mode = self.mode.partition('*ln')[0]
        elif '*bc' in self.mode:
            transformer = pm.preprocessing.BoxCoxEndogTransformer()
            self.X = transformer.fit_transform(y=X)
            self.transformer = transformer
            mode = self.mode.partition('*bc')[0]

        try:
            if mode == 'll':
                # Local Level
                model = LocalLevel(self.X)
                self.res_ = model.fit(disp=False)
                self.k_exog = None
            elif mode == 'lla':
                endog = X[2:]
                exog = np.column_stack((X[1:-1], X[:-2]))
                self.k_exog = exog.shape[1]
                model = UnobservedComponents(endog=endog,
                                             exog=exog,
                                             level='local level')
                self.res_ = model.fit(disp=False)
            elif mode == 'lls':
                self.k_exog = None
                model = SARIMAX(endog=self.X,
                                order=(2, 0, 0),
                                trend='c',
                                measurement_error=True)
                self.res_ = model.fit(disp=False)
            elif mode == 'llt':
                # Local Linear Trend
                model = UnobservedComponents(endog=self.X,
                                             level='local linear trend')
                self.res_ = model.fit(disp=False)
            elif mode == 'llc':
                # Local Level Cycle
                model = UnobservedComponents(endog=self.X,
                                             level='local level',
                                             cycle=True,
                                             stochastic_cycle=True)
                self.res_ = model.fit(disp=False)
            elif mode == 'arima':
                self.res_ = pm.auto_arima(self.X,
                                          start_p=1,
                                          start_q=1,
                                          start_P=1,
                                          start_Q=1,
                                          max_p=5,
                                          max_q=5,
                                          max_P=5,
                                          max_Q=5,
                                          seasonal=True,
                                          stepwise=True,
                                          suppress_warnings=True,
                                          D=10,
                                          max_D=10,
                                          error_action='ignore')
            elif mode == 'rw1':
                # For RW model
                self.res_ = None
                self.converged = False
        except np.linalg.LinAlgError:
            # Some kalman filter error ==> Use random walk
            print(f'Convergence failed for {mode}')
            self.converged = False
            return self
        try:
            self.converged = self.res_.mle_retvals['converged']
        except AttributeError:
            if mode == 'arima':
                self.converged = True  # auto ARIMA from pmdarima should always converge
        return self
Example #30
0
def test_custom_model_fit(rand_data, pre_int_period, post_int_period,
                          monkeypatch):
    fit_mock = mock.Mock()
    monkeypatch.setattr(
        'causalimpact.main.CausalImpact._process_posterior_inferences',
        mock.Mock())

    pre_data = rand_data.loc[pre_int_period[0]:pre_int_period[1], :]
    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=pre_data.iloc[:, 1:])

    model.fit = fit_mock

    CausalImpact(rand_data, pre_int_period, post_int_period, model=model)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=True,
                                nseasons=[],
                                standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=0.01)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=True,
                                prior_level_sd=0.01,
                                nseasons=[],
                                standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=None)
    fit_mock.assert_called_with(bounds=[(None, None), (None, None),
                                        (None, None), (None, None)],
                                disp=True,
                                prior_level_sd=None,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=pre_data.iloc[:, 1:],
                                 freq_seasonal=[{
                                     'period': 3
                                 }])
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=0.001)
    fit_mock.assert_called_with(bounds=[
        (None, None), (0.001 / 1.2, 0.001 * 1.2), (None, None), (None, None),
        (None, None)
    ],
                                disp=True,
                                prior_level_sd=0.001,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level=True,
                                 exog=pre_data.iloc[:, 1],
                                 trend=True,
                                 seasonal=3,
                                 stochastic_level=True)
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=0.001)
    fit_mock.assert_called_with(bounds=[(0.001 / 1.2, 0.001 * 1.2),
                                        (None, None), (None, None)],
                                disp=True,
                                prior_level_sd=0.001,
                                nseasons=[],
                                standardize=True)

    new_pre_data = rand_data.loc[pre_int_period[0]:pre_int_period[1],
                                 ['y', 'x1']]
    model = UnobservedComponents(endog=new_pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=new_pre_data.iloc[:, 1:])

    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=False)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=new_pre_data.iloc[:, 0],
                                 level='dtrend',
                                 exog=new_pre_data.iloc[:, 1:])
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=False)
    fit_mock.assert_called_with(bounds=[(None, None), (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=new_pre_data.iloc[:, 0],
                                 level='lltrend',
                                 exog=new_pre_data.iloc[:, 1:])
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=False)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)