Ejemplo n.º 1
0
    def recursive_coefficients(self):
        """
        Estimates of regression coefficients, recursively estimated

        Returns
        -------
        out: Bunch
            Has the following attributes:

            - `filtered`: a time series array with the filtered estimate of
                          the component
            - `filtered_cov`: a time series array with the filtered estimate of
                          the variance/covariance of the component
            - `smoothed`: a time series array with the smoothed estimate of
                          the component
            - `smoothed_cov`: a time series array with the smoothed estimate of
                          the variance/covariance of the component
            - `offset`: an integer giving the offset in the state vector where
                        this component begins
        """
        out = None
        spec = self.specification
        start = offset = 0
        end = offset + spec.k_exog
        out = Bunch(filtered=self.filtered_state[start:end],
                    filtered_cov=self.filtered_state_cov[start:end, start:end],
                    smoothed=None,
                    smoothed_cov=None,
                    offset=offset)
        if self.smoothed_state is not None:
            out.smoothed = self.smoothed_state[start:end]
        if self.smoothed_state_cov is not None:
            out.smoothed_cov = (self.smoothed_state_cov[start:end, start:end])
        return out
Ejemplo n.º 2
0
def partial_project(endog, exog):
    '''helper function to get linear projection or partialling out of variables

    endog variables are projected on exog variables

    Parameters
    ----------
    endog : ndarray
        array of variables where the effect of exog is partialled out.
    exog : ndarray
        array of variables on which the endog variables are projected.

    Returns
    -------
    res : instance of Bunch with

        - params : OLS parameter estimates from projection of endog on exog
        - fittedvalues : predicted values of endog given exog
        - resid : residual of the regression, values of endog with effect of
          exog partialled out

    Notes
    -----
    This is no-frills mainly for internal calculations, no error checking or
    array conversion is performed, at least for now.

    '''
    x1, x2 = endog, exog
    params = np.linalg.pinv(x2).dot(x1)
    predicted = x2.dot(params)
    residual = x1 - predicted
    res = Bunch(params=params, fittedvalues=predicted, resid=residual)

    return res
Ejemplo n.º 3
0
    def x13_seasonal_decompose(self):
        """
        获取分析结果中调整后的数据、趋势循环数据、不规则数据、季节调整数据及日历调整数据。

        Returns
        ----------
        res : Bunch
            一个包含以下元素的Bunch对象:

            - seasadj: pandas.Series
            季节分解后最终因素调整完后的数据(即原始数据去除季节调整因素与日历调整因素)
            - trend: pandas.Series
            季节分解后最终的趋势-循环部分
            - irregular: pandas.Series
            季节分解后最终的不规则部分
            - seasonal: pandas.Series
            最终的季节调整因素(离群点调整与移动假日效应等调整已在先验调整中处理,包含在季节调整因素中)
            - calendar: pandas.Series 或 None
            若trading为True,calendar为最终的日历调整因素(主要是交易日调整);若trading为False,则为None。
        """

        seasadj, trend, irregular = self.analysis_res.seasadj, self.analysis_res.trend, self.analysis_res.irregular
        seasonal = self._seasonal_resolution()
        calendar = self._calendar_resolution()

        res = Bunch(seasadj=seasadj,
                    trend=trend,
                    irregular=irregular,
                    seasonal=seasonal,
                    calendar=calendar)

        return res
Ejemplo n.º 4
0
def _package_fixed_and_free_params_info(fixed_params, spec_ar_lags,
                                        spec_ma_lags):
    """
    Parameters
    ----------
    fixed_params : dict
    spec_ar_lags : list of int
        SARIMAXSpecification.ar_lags
    spec_ma_lags : list of int
        SARIMAXSpecification.ma_lags

    Returns
    -------
    Bunch with
    (lags) fixed_ar_lags, fixed_ma_lags, free_ar_lags, free_ma_lags;
    (ix) fixed_ar_ix, fixed_ma_ix, free_ar_ix, free_ma_ix;
    (params) fixed_ar_params, free_ma_params
    """
    # unpack fixed lags and params
    fixed_ar_lags_and_params = []
    fixed_ma_lags_and_params = []
    for key, val in fixed_params.items():
        lag = int(key.split(".")[-1].lstrip("L"))
        if key.startswith("ar"):
            fixed_ar_lags_and_params.append((lag, val))
        elif key.startswith("ma"):
            fixed_ma_lags_and_params.append((lag, val))

    fixed_ar_lags_and_params.sort()
    fixed_ma_lags_and_params.sort()

    fixed_ar_lags = [lag for lag, _ in fixed_ar_lags_and_params]
    fixed_ar_params = np.array([val for _, val in fixed_ar_lags_and_params])

    fixed_ma_lags = [lag for lag, _ in fixed_ma_lags_and_params]
    fixed_ma_params = np.array([val for _, val in fixed_ma_lags_and_params])

    # unpack free lags
    free_ar_lags = [lag for lag in spec_ar_lags
                    if lag not in set(fixed_ar_lags)]
    free_ma_lags = [lag for lag in spec_ma_lags
                    if lag not in set(fixed_ma_lags)]

    # get ix for indexing purposes: `ar_ix`, and `ma_ix` below, are to account
    # for non-consecutive lags; for indexing purposes, must have dtype int
    free_ar_ix = np.array(free_ar_lags, dtype=int) - 1
    free_ma_ix = np.array(free_ma_lags, dtype=int) - 1
    fixed_ar_ix = np.array(fixed_ar_lags, dtype=int) - 1
    fixed_ma_ix = np.array(fixed_ma_lags, dtype=int) - 1

    return Bunch(
        # lags
        fixed_ar_lags=fixed_ar_lags, fixed_ma_lags=fixed_ma_lags,
        free_ar_lags=free_ar_lags, free_ma_lags=free_ma_lags,
        # ixs
        fixed_ar_ix=fixed_ar_ix, fixed_ma_ix=fixed_ma_ix,
        free_ar_ix=free_ar_ix, free_ma_ix=free_ma_ix,
        # fixed params
        fixed_ar_params=fixed_ar_params, fixed_ma_params=fixed_ma_params,
    )
Ejemplo n.º 5
0
def plot_data(request):
    lags, trend, seasonal = request.param[:3]
    nexog, period, missing, use_pandas, hold_back = request.param[3:]
    data = gen_data(250, nexog, use_pandas)
    return Bunch(trend=trend, lags=lags, seasonal=seasonal, period=period,
                 endog=data.endog, exog=data.exog, missing=missing,
                 hold_back=hold_back)
def get_sarimax_models(endog, filter_univariate=False, **kwargs):
    kwargs.setdefault('tolerance', 0)
    # Construct a concentrated version of the given SARIMAX model, and get
    # the estimate of the scale
    mod_conc = sarimax.SARIMAX(endog, **kwargs)
    mod_conc.ssm.filter_concentrated = True
    mod_conc.ssm.filter_univariate = filter_univariate
    params_conc = mod_conc.start_params
    params_conc[-1] = 1
    res_conc = mod_conc.smooth(params_conc)
    scale = res_conc.scale

    # Construct the non-concentrated version
    mod_orig = sarimax.SARIMAX(endog, **kwargs)
    mod_orig.ssm.filter_univariate = filter_univariate
    params_orig = params_conc.copy()
    k_vars = 1 + kwargs.get('measurement_error', False)
    params_orig[-k_vars:] = scale * params_conc[-k_vars:]
    res_orig = mod_orig.smooth(params_orig)

    return Bunch(
        **{
            'mod_conc': mod_conc,
            'params_conc': params_conc,
            'mod_orig': mod_orig,
            'params_orig': params_orig,
            'res_conc': res_conc,
            'res_orig': res_orig,
            'scale': scale
        })
Ejemplo n.º 7
0
    def __init__(self, model, params, filter_results, cov_type='opg',
                 **kwargs):
        super(RecursiveLSResults, self).__init__(
            model, params, filter_results, cov_type, **kwargs)

        # Since we are overriding params with things that are not MLE params,
        # need to adjust df's
        q = max(self.loglikelihood_burn, self.k_diffuse_states)
        self.df_model = q - self.model.k_constraints
        self.df_resid = self.nobs_effective - self.df_model

        # Save _init_kwds
        self._init_kwds = self.model._get_init_kwds()

        # Save the model specification
        self.specification = Bunch(**{
            'k_exog': self.model.k_exog,
            'k_constraints': self.model.k_constraints})

        # Adjust results to remove "faux" endog from the constraints
        if self.model._r_matrix is not None:
            for name in ['forecasts', 'forecasts_error',
                         'forecasts_error_cov', 'standardized_forecasts_error',
                         'forecasts_error_diffuse_cov']:
                setattr(self, name, getattr(self, name)[0:1])
Ejemplo n.º 8
0
def perfect_fit_data(request):
    from statsmodels.tools.tools import Bunch
    rs = np.random.RandomState(1249328932)
    exog = rs.standard_normal((1000, 1))
    endog = exog + exog**2
    exog = sm.add_constant(np.c_[exog, exog**2])
    return Bunch(endog=endog, exog=exog, const=(3.2 * np.ones_like(endog)))
Ejemplo n.º 9
0
    def _fit_once(self):
        alpha, gamma, delta, damp = self._fixed_params[:4]
        initial = self.initial

        trend = self.trendtype
        season = self.seasontype

        nobs = self.nobs
        y = self.data.endog
        period = self.period

        # smoothed data
        sdata = np.zeros(nobs + 1)  # + 1 for initial data
        # trend
        bdata = np.zeros(nobs + 1)  # + 1 for initial data
        # seasonal
        cdata = np.zeros(nobs + period if period else nobs)
        # + period for initial data and forecasts

        # Setup seasonal values
        if period:
            sdata, bdata, cdata = _init_seasonal_params(initial, sdata, bdata,
                                                        cdata, period, gamma,
                                                        y, season)
        else:
            sdata, bdata = _init_nonseasonal_params(initial, sdata, bdata, y,
                                                    gamma, trend)

        smooth_func = _compute_smoothing[(season, trend)]
        sdata, bdata, cdata = smooth_func(y, sdata, bdata, cdata, alpha, gamma,
                                          damp, period, delta, nobs)

        #Handles special case for Brown linear
        if trend.startswith('b'):
            at = 2 * sdata - bdata
            bt = alpha / (1 - alpha) * (sdata - bdata)
            sdata = at
            bdata = bt

        fitted_func = _compute_fitted[(season, trend)]
        pdata = fitted_func(sdata[:nobs], bdata[:nobs], cdata[:nobs], damp)
        # NOTE: could compute other residuals for the non-linear model
        resid = y - pdata

        # go ahead and save the first forecast
        _forecast_level = sdata[-1]
        _forecast_trend = bdata[-1]

        res = SmoothingResults(self, Bunch(fitted=pdata, resid=resid,
                                           _level=sdata,
                                           _trend=bdata,
                                           _season=cdata,
                                           trendtype=trend, seasontype=season,
                                           damp=damp, period=period,
                                           alpha=alpha, gamma=gamma,
                                           delta=delta,
                                           _forecast_level=_forecast_level,
                                           _forecast_trend=_forecast_trend))
        return SmoothingResultsWrapper(res)
    def fit(self, method='pinv'):
        """
        Minimal implementation of WLS optimized for performance.

        Parameters
        ----------
        method : str, optional
            Method to use to estimate parameters.  "pinv", "qr" or "lstsq"

              * "pinv" uses the Moore-Penrose pseudoinverse
                 to solve the least squares problem.
              * "qr" uses the QR factorization.
              * "lstsq" uses the least squares implementation in numpy.linalg

        Returns
        -------
        results : namedtuple
            Named tuple containing the fewest terms needed to implement
            iterative estimation in models. Currently

              * params : Estimated parameters
              * fittedvalues : Fit values using original data
              * resid : Residuals using original data
              * model : namedtuple with one field, weights
              * scale : scale computed using weighted residuals

        Notes
        -----
        Does not perform and checks on the input data

        See Also
        --------
        statsmodels.regression.linear_model.WLS
        """
        if method == 'pinv':
            pinv_wexog = np.linalg.pinv(self.wexog)
            params = pinv_wexog.dot(self.wendog)
        elif method == 'qr':
            Q, R = np.linalg.qr(self.wexog)
            params = np.linalg.solve(R, np.dot(Q.T, self.wendog))
        else:
            params, _, _, _ = np.linalg.lstsq(self.wexog,
                                              self.wendog,
                                              rcond=-1)

        fitted_values = self.exog.dot(params)
        resid = self.endog - fitted_values
        wresid = self.wendog - self.wexog.dot(params)
        df_resid = self.wexog.shape[0] - self.wexog.shape[1]
        scale = np.dot(wresid, wresid) / df_resid

        return Bunch(params=params,
                     fittedvalues=fitted_values,
                     resid=resid,
                     model=self,
                     scale=scale)
Ejemplo n.º 11
0
    def __init__(self, model, params, filter_results, cov_type='opg',
                 **kwargs):
        super(RecursiveLSResults, self).__init__(
            model, params, filter_results, cov_type, **kwargs)

        self.df_resid = np.inf  # attribute required for wald tests

        # Save _init_kwds
        self._init_kwds = self.model._get_init_kwds()

        # Save the model specification
        self.specification = Bunch(**{
            'k_exog': self.model.k_exog})
Ejemplo n.º 12
0
    def recursive_coefficients(self):
        """
        Estimates of regression coefficients, recursively estimated

        Returns
        -------
        out: Bunch
            Has the following attributes:

            - `filtered`: a time series array with the filtered estimate of
                          the component
            - `filtered_cov`: a time series array with the filtered estimate of
                          the variance/covariance of the component
            - `smoothed`: a time series array with the smoothed estimate of
                          the component
            - `smoothed_cov`: a time series array with the smoothed estimate of
                          the variance/covariance of the component
            - `offset`: an integer giving the offset in the state vector where
                        this component begins
        """
        out = None
        spec = self.specification
        start = offset = 0
        end = offset + spec.k_exog
        out = Bunch(
            filtered=self.filtered_state[start:end],
            filtered_cov=self.filtered_state_cov[start:end, start:end],
            smoothed=None, smoothed_cov=None,
            offset=offset
        )
        if self.smoothed_state is not None:
            out.smoothed = self.smoothed_state[start:end]
        if self.smoothed_state_cov is not None:
            out.smoothed_cov = (
                self.smoothed_state_cov[start:end, start:end])
        return out
Ejemplo n.º 13
0
    def test_all(self):
        # expand frequencies to observations, (no freq_weights yet)
        freq = [46, 76, 24, 9, 1]
        y = np.repeat(np.arange(5), freq)
        # results from article table 7
        res1 = Bunch(
            params=[3.52636, 0.425617],
            llf=-187.469,
            chi2=1.701208,  # chisquare test
            df_model=2,
            p=0.4272,  # p-value for chi2
            aic=378.938,
            probs=[46.48, 73.72, 27.88, 6.5, 1.42])

        dp = DiscretizedCount(stats.gamma)
        mod = DiscretizedModel(y, distr=dp)
        res = mod.fit(start_params=[1, 1])
        nobs = len(y)

        assert_allclose(res.params, res1.params, rtol=1e-5)
        assert_allclose(res.llf, res1.llf, atol=6e-3)
        assert_allclose(res.aic, res1.aic, atol=6e-3)
        assert_equal(res.df_model, res1.df_model)

        probs = mod.predict(res.params, which="probs")
        probs_trunc = probs[:len(res1.probs)]
        probs_trunc[-1] += 1 - probs_trunc.sum()
        assert_allclose(probs_trunc * nobs, res1.probs, atol=6e-2)

        assert_allclose(np.sum(freq), (probs_trunc * nobs).sum(), rtol=1e-10)
        res_chi2 = stats.chisquare(freq,
                                   probs_trunc * nobs,
                                   ddof=len(res.params))
        # regression test, numbers from running test
        # close but not identical to article
        assert_allclose(res_chi2.statistic, 1.70409356, rtol=1e-7)
        assert_allclose(res_chi2.pvalue, 0.42654100, rtol=1e-7)

        # smoke test for summary
        res.summary()

        np.random.seed(987146)
        res_boots = res.bootstrap()
        # only loose check, small default n_rep=100, agreement at around 3%
        assert_allclose(res.params, res_boots[0], rtol=0.05)
        assert_allclose(res.bse, res_boots[1], rtol=0.05)
Ejemplo n.º 14
0
    def __init__(self,
                 model,
                 params,
                 filter_results,
                 cov_type='opg',
                 cov_kwds=None,
                 **kwargs):
        super(VARMAXResults, self).__init__(model, params, filter_results,
                                            cov_type, cov_kwds, **kwargs)

        self.specification = Bunch(
            **{
                # Set additional model parameters
                'error_cov_type': self.model.error_cov_type,
                'measurement_error': self.model.measurement_error,
                'enforce_stationarity': self.model.enforce_stationarity,
                'enforce_invertibility': self.model.enforce_invertibility,
                'trend_offset': self.model.trend_offset,
                'order': self.model.order,

                # Model order
                'k_ar': self.model.k_ar,
                'k_ma': self.model.k_ma,

                # Trend / Regression
                'trend': self.model.trend,
                'k_trend': self.model.k_trend,
                'k_exog': self.model.k_exog,
            })

        # Polynomials / coefficient matrices
        self.coefficient_matrices_var = None
        self.coefficient_matrices_vma = None
        if self.model.k_ar > 0:
            ar_params = np.array(self.params[self.model._params_ar])
            k_endog = self.model.k_endog
            k_ar = self.model.k_ar
            self.coefficient_matrices_var = (ar_params.reshape(
                k_endog * k_ar, k_endog).T).reshape(k_endog, k_endog, k_ar).T
        if self.model.k_ma > 0:
            ma_params = np.array(self.params[self.model._params_ma])
            k_endog = self.model.k_endog
            k_ma = self.model.k_ma
            self.coefficient_matrices_vma = (ma_params.reshape(
                k_endog * k_ma, k_endog).T).reshape(k_endog, k_endog, k_ma).T
Ejemplo n.º 15
0
    def __init__(self, model, params, filter_results, cov_type='opg',
                 **kwargs):
        super(RecursiveLSResults, self).__init__(
            model, params, filter_results, cov_type, **kwargs)

        # Since we are overriding params with things that aren't MLE params,
        # need to adjust df's
        q = max(self.loglikelihood_burn, self.k_diffuse_states)
        self.df_model = q - self.model.k_constraints
        self.df_resid = self.nobs_effective - self.df_model

        # Save _init_kwds
        self._init_kwds = self.model._get_init_kwds()

        # Save the model specification
        self.specification = Bunch(**{
            'k_exog': self.model.k_exog,
            'k_constraints': self.model.k_constraints})
Ejemplo n.º 16
0
    def results(self, params):
        """
        Construct results

        params : ndarray
            Model parameters

        Notes
        -----
        Allows results to be constructed from either existing parameters or
        when estimated using using ``fit``
        """
        fitted_values = self.exog.dot(params)
        resid = self.endog - fitted_values
        wresid = self.wendog - self.wexog.dot(params)
        df_resid = self.wexog.shape[0] - self.wexog.shape[1]
        scale = np.dot(wresid, wresid) / df_resid

        return Bunch(params=params, fittedvalues=fitted_values, resid=resid,
                     model=self, scale=scale)
Ejemplo n.º 17
0
 def test_acorr_breusch_godfrey_multidim(self):
     res = Bunch(resid=np.empty((100, 2)))
     with pytest.raises(ValueError, match='Model resid must be a 1d array'):
         smsdia.acorr_breusch_godfrey(res)
Ejemplo n.º 18
0
def _spg_optim(func,
               grad,
               start,
               project,
               maxiter=1e4,
               M=10,
               ctol=1e-3,
               maxiter_nmls=200,
               lam_min=1e-30,
               lam_max=1e30,
               sig1=0.1,
               sig2=0.9,
               gam=1e-4):
    """
    Implements the spectral projected gradient method for minimizing a
    differentiable function on a convex domain.

    Parameters
    ----------
    func : real valued function
        The objective function to be minimized.
    grad : real array-valued function
        The gradient of the objective function
    start : array_like
        The starting point
    project : function
        In-place projection of the argument to the domain
        of func.
    ... See notes regarding additional arguments

    Returns
    -------
    rslt : Bunch
        rslt.params is the final iterate, other fields describe
        convergence status.

    Notes
    -----
    This can be an effective heuristic algorithm for problems where no
    gauranteed algorithm for computing a global minimizer is known.

    There are a number of tuning parameters, but these generally
    should not be changed except for `maxiter` (positive integer) and
    `ctol` (small positive real).  See the Birgin et al reference for
    more information about the tuning parameters.

    Reference
    ---------
    E. Birgin, J.M. Martinez, and M. Raydan. Spectral projected
    gradient methods: Review and perspectives. Journal of Statistical
    Software (preprint).  Available at:
    http://www.ime.usp.br/~egbirgin/publications/bmr5.pdf
    """

    lam = min(10 * lam_min, lam_max)

    params = start.copy()
    gval = grad(params)

    obj_hist = [
        func(params),
    ]

    for itr in range(int(maxiter)):

        # Check convergence
        df = params - gval
        project(df)
        df -= params
        if np.max(np.abs(df)) < ctol:
            return Bunch(
                **{
                    "Converged": True,
                    "params": params,
                    "objective_values": obj_hist,
                    "Message": "Converged successfully"
                })

        # The line search direction
        d = params - lam * gval
        project(d)
        d -= params

        # Carry out the nonmonotone line search
        alpha, params1, fval, gval1 = _nmono_linesearch(func,
                                                        grad,
                                                        params,
                                                        d,
                                                        obj_hist,
                                                        M=M,
                                                        sig1=sig1,
                                                        sig2=sig2,
                                                        gam=gam,
                                                        maxiter=maxiter_nmls)

        if alpha is None:
            return Bunch(
                **{
                    "Converged": False,
                    "params": params,
                    "objective_values": obj_hist,
                    "Message": "Failed in nmono_linesearch"
                })

        obj_hist.append(fval)
        s = params1 - params
        y = gval1 - gval

        sy = (s * y).sum()
        if sy <= 0:
            lam = lam_max
        else:
            ss = (s * s).sum()
            lam = max(lam_min, min(ss / sy, lam_max))

        params = params1
        gval = gval1

    return Bunch(
        **{
            "Converged": False,
            "params": params,
            "objective_values": obj_hist,
            "Message": "spg_optim did not converge"
        })
Ejemplo n.º 19
0
def innovations_mle(endog,
                    order=(0, 0, 0),
                    seasonal_order=(0, 0, 0, 0),
                    demean=True,
                    enforce_invertibility=True,
                    start_params=None,
                    minimize_kwargs=None):
    """
    Estimate SARIMA parameters by MLE using innovations algorithm.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    order : tuple, optional
        The (p,d,q) order of the model for the number of AR parameters,
        differences, and MA parameters. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal component of the model for the
        AR parameters, differences, MA parameters, and periodicity. Default
        is (0, 0, 0, 0).
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the SARIMA coefficients. Default is True.
    enforce_invertibility : bool, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.
    start_params : array_like, optional
        Initial guess of the solution for the loglikelihood maximization. The
        AR polynomial must be stationary. If `enforce_invertibility=True` the
        MA poylnomial must be invertible. If not provided, default starting
        parameters are computed using the Hannan-Rissanen method.
    minimize_kwargs : dict, optional
        Arguments to pass to scipy.optimize.minimize.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes four components: `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments; `minimize_kwargs`,
        containing any keyword arguments passed to `minimize`; `start_params`,
        containing the untransformed starting parameters passed to `minimize`;
        and `minimize_results`, containing the output from `minimize`.

    Notes
    -----
    The primary reference is [1]_, section 5.2.

    Note: we do not include `enforce_stationarity` as an argument, because this
    function requires stationarity.

    TODO: support concentrating out the scale (should be easy: use sigma2=1
          and then compute sigma2=np.sum(u**2 / v) / len(u); would then need to
          redo llf computation in the Cython function).

    TODO: add support for fixed parameters

    TODO: add support for secondary optimization that does not enforce
          stationarity / invertibility, starting from first step's parameters

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = SARIMAXSpecification(endog,
                                order=order,
                                seasonal_order=seasonal_order,
                                enforce_stationarity=True,
                                enforce_invertibility=enforce_invertibility)
    endog = spec.endog
    if spec.is_integrated:
        warnings.warn('Provided `endog` series has been differenced to'
                      ' eliminate integration prior to ARMA parameter'
                      ' estimation.')
        endog = diff(endog,
                     k_diff=spec.diff,
                     k_seasonal_diff=spec.seasonal_diff,
                     seasonal_periods=spec.seasonal_periods)
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    if start_params is None:
        sp = SARIMAXParams(spec=spec)

        # Estimate starting parameters via Hannan-Rissanen
        hr, hr_results = hannan_rissanen(endog,
                                         ar_order=spec.ar_order,
                                         ma_order=spec.ma_order,
                                         demean=False)
        if spec.seasonal_periods == 0:
            # If no seasonal component, then `hr` gives starting parameters
            sp.params = hr.params
        else:
            # If we do have a seasonal component, estimate starting parameters
            # for the seasonal lags using the residuals from the previous step
            _ = SARIMAXSpecification(
                endog,
                seasonal_order=seasonal_order,
                enforce_stationarity=True,
                enforce_invertibility=enforce_invertibility)

            ar_order = np.array(spec.seasonal_ar_lags) * spec.seasonal_periods
            ma_order = np.array(spec.seasonal_ma_lags) * spec.seasonal_periods
            seasonal_hr, seasonal_hr_results = hannan_rissanen(
                hr_results.resid,
                ar_order=ar_order,
                ma_order=ma_order,
                demean=False)

            # Set the starting parameters
            sp.ar_params = hr.ar_params
            sp.ma_params = hr.ma_params
            sp.seasonal_ar_params = seasonal_hr.ar_params
            sp.seasonal_ma_params = seasonal_hr.ma_params
            sp.sigma2 = seasonal_hr.sigma2

        # Then, require starting parameters to be stationary and invertible
        if not sp.is_stationary:
            sp.ar_params = [0] * sp.k_ar_params
            sp.seasonal_ar_params = [0] * sp.k_seasonal_ar_params

        if not sp.is_invertible and spec.enforce_invertibility:
            sp.ma_params = [0] * sp.k_ma_params
            sp.seasonal_ma_params = [0] * sp.k_seasonal_ma_params

        start_params = sp.params
    else:
        sp = SARIMAXParams(spec=spec)
        sp.params = start_params
        if not sp.is_stationary:
            raise ValueError('Given starting parameters imply a non-stationary'
                             ' AR process. Innovations algorithm requires a'
                             ' stationary process.')

        if spec.enforce_invertibility and not sp.is_invertible:
            raise ValueError('Given starting parameters imply a non-invertible'
                             ' MA process with `enforce_invertibility=True`.')

    def obj(params):
        p.params = spec.constrain_params(params)

        return -arma_innovations.arma_loglike(
            endog,
            ar_params=-p.reduced_ar_poly.coef[1:],
            ma_params=p.reduced_ma_poly.coef[1:],
            sigma2=p.sigma2)

    # Untransform the starting parameters
    unconstrained_start_params = spec.unconstrain_params(start_params)

    # Perform the minimization
    if minimize_kwargs is None:
        minimize_kwargs = {}
    if 'options' not in minimize_kwargs:
        minimize_kwargs['options'] = {}
    minimize_kwargs['options'].setdefault('maxiter', 100)
    minimize_results = minimize(obj, unconstrained_start_params,
                                **minimize_kwargs)

    # TODO: show warning if convergence failed.

    # Reverse the transformation to get the optimal parameters
    p.params = spec.constrain_params(minimize_results.x)

    # Construct other results
    other_results = Bunch({
        'spec': spec,
        'minimize_results': minimize_results,
        'minimize_kwargs': minimize_kwargs,
        'start_params': start_params
    })

    return p, other_results
Ejemplo n.º 20
0
def innovations(endog, ma_order=0, demean=True):
    """
    Estimate MA parameters using innovations algorithm.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ma_order : int, optional
        Maximum moving average order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the moving average coefficients. Default is True.

    Returns
    -------
    parameters : list of SARIMAXParams objects
        List elements correspond to estimates at different `ma_order`. For
        example, parameters[0] is an `SARIMAXParams` instance corresponding to
        `ma_order=0`.
    other_results : Bunch
        Includes one component, `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 5.1.3.

    This procedure assumes that the series is stationary.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = max_spec = SARIMAXSpecification(endog, ma_order=ma_order)
    endog = max_spec.endog

    if demean:
        endog = endog - endog.mean()

    if not max_spec.is_ma_consecutive:
        raise ValueError('Innovations estimation unavailable for models with'
                         ' seasonal or otherwise non-consecutive MA orders.')

    sample_acovf = acovf(endog, fft=True)
    theta, v = innovations_algo(sample_acovf, nobs=max_spec.ma_order + 1)
    ma_params = [theta[i, :i] for i in range(1, max_spec.ma_order + 1)]
    sigma2 = v

    out = []
    for i in range(max_spec.ma_order + 1):
        spec = SARIMAXSpecification(ma_order=i)
        p = SARIMAXParams(spec=spec)
        if i == 0:
            p.params = sigma2[i]
        else:
            p.params = np.r_[ma_params[i - 1], sigma2[i]]
        out.append(p)

    # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return out, other_results
Ejemplo n.º 21
0
def arma_order_select_ic(y, max_ar=4, max_ma=2, ic='bic', trend='c',
                         model_kw={}, fit_kw={}):
    """
    Returns information criteria for many ARMA models

    Parameters
    ----------
    y : array-like
        Time-series data
    max_ar : int
        Maximum number of AR lags to use. Default 4.
    max_ma : int
        Maximum number of MA lags to use. Default 2.
    ic : str, list
        Information criteria to report. Either a single string or a list
        of different criteria is possible.
    trend : str
        The trend to use when fitting the ARMA models.
    model_kw : dict
        Keyword arguments to be passed to the ``ARMA`` model
    fit_kw : dict
        Keyword arguments to be passed to ``ARMA.fit``.

    Returns
    -------
    obj : Results object
        Each ic is an attribute with a DataFrame for the results. The AR order
        used is the row index. The ma order used is the column index. The
        minimum orders are available as ``ic_min_order``.

    Examples
    --------

    >>> from statsmodels.tsa.arima_process import arma_generate_sample
    >>> import statsmodels.api as sm
    >>> import numpy as np

    >>> arparams = np.array([.75, -.25])
    >>> maparams = np.array([.65, .35])
    >>> arparams = np.r_[1, -arparams]
    >>> maparam = np.r_[1, maparams]
    >>> nobs = 250
    >>> np.random.seed(2014)
    >>> y = arma_generate_sample(arparams, maparams, nobs)
    >>> res = sm.tsa.arma_order_select_ic(y, ic=['aic', 'bic'], trend='nc')
    >>> res.aic_min_order
    >>> res.bic_min_order

    Notes
    -----
    This method can be used to tentatively identify the order of an ARMA
from    process, provided that the time series is stationary and invertible. This
    function computes the full exact MLE estimate of each model and can be,
    therefore a little slow. An implementation using approximate estimates
    will be provided in the future. In the meantime, consider passing
    {method : 'css'} to fit_kw.
    """
    from pandas import DataFrame

    ar_range = lrange(0, max_ar + 1)
    ma_range = lrange(0, max_ma + 1)
    if isinstance(ic, string_types):
        ic = [ic]
    elif not isinstance(ic, (list, tuple)):
        raise ValueError("Need a list or a tuple for ic if not a string.")

    results = np.zeros((len(ic), max_ar + 1, max_ma + 1))

    for ar in ar_range:
        for ma in ma_range:
            if ar == 0 and ma == 0 and trend == 'nc':
                results[:, ar, ma] = np.nan
                continue

            mod = _safe_arma_fit(y, (ar, ma), model_kw, trend, fit_kw)
            if mod is None:
                results[:, ar, ma] = np.nan
                continue

            for i, criteria in enumerate(ic):
                results[i, ar, ma] = getattr(mod, criteria)

    dfs = [DataFrame(res, columns=ma_range, index=ar_range) for res in results]

    res = dict(zip(ic, dfs))

    # add the minimums to the results dict
    min_res = {}
    for i, result in iteritems(res):
        mins = np.where(result.min().min() == result)
        min_res.update({i + '_min_order' : (mins[0][0], mins[1][0])})
    res.update(min_res)

    return Bunch(**res)
Ejemplo n.º 22
0
def statespace(endog,
               exog=None,
               order=(0, 0, 0),
               seasonal_order=(0, 0, 0, 0),
               include_constant=True,
               enforce_stationarity=True,
               enforce_invertibility=True,
               concentrate_scale=False,
               start_params=None,
               fit_kwargs=None):
    """
    Estimate SARIMAX parameters using state space methods.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    order : tuple, optional
        The (p,d,q) order of the model for the number of AR parameters,
        differences, and MA parameters. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal component of the model for the
        AR parameters, differences, MA parameters, and periodicity. Default
        is (0, 0, 0, 0).
    include_constant : bool, optional
        Whether to add a constant term in `exog` if it's not already there.
        The estimate of the constant will then appear as one of the `exog`
        parameters. If `exog` is None, then the constant will represent the
        mean of the process.
    enforce_stationarity : boolean, optional
        Whether or not to transform the AR parameters to enforce stationarity
        in the autoregressive component of the model. Default is True.
    enforce_invertibility : boolean, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.
    concentrate_scale : boolean, optional
        Whether or not to concentrate the scale (variance of the error term)
        out of the likelihood. This reduces the number of parameters estimated
        by maximum likelihood by one.
    start_params : array_like, optional
        Initial guess of the solution for the loglikelihood maximization. The
        AR polynomial must be stationary. If `enforce_invertibility=True` the
        MA poylnomial must be invertible. If not provided, default starting
        parameters are computed using the Hannan-Rissanen method.
    fit_kwargs : dict, optional
        Arguments to pass to the state space model's `fit` method.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes two components, `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments; and
        `state_space_results`, corresponding to the results from the underlying
        state space model and Kalman filter / smoother.

    Notes
    -----
    The primary reference is [1]_.

    References
    ----------
    .. [1] Durbin, James, and Siem Jan Koopman. 2012.
       Time Series Analysis by State Space Methods: Second Edition.
       Oxford University Press.

    """
    # Handle including the constant (need to do it now so that the constant
    # parameter can be included in the specification as part of `exog`.)
    if include_constant:
        exog = np.ones_like(endog) if exog is None else add_constant(exog)

    # Create the specification
    spec = SARIMAXSpecification(endog,
                                exog=exog,
                                order=order,
                                seasonal_order=seasonal_order,
                                enforce_stationarity=enforce_stationarity,
                                enforce_invertibility=enforce_invertibility,
                                concentrate_scale=concentrate_scale)
    endog = spec.endog
    exog = spec.exog
    p = SARIMAXParams(spec=spec)

    # Check start parameters
    if start_params is not None:
        sp = SARIMAXParams(spec=spec)
        sp.params = start_params

        if spec.enforce_stationarity and not sp.is_stationary:
            raise ValueError('Given starting parameters imply a non-stationary'
                             ' AR process with `enforce_stationarity=True`.')

        if spec.enforce_invertibility and not sp.is_invertible:
            raise ValueError('Given starting parameters imply a non-invertible'
                             ' MA process with `enforce_invertibility=True`.')

    # Create and fit the state space model
    mod = SARIMAX(endog,
                  exog=exog,
                  order=spec.order,
                  seasonal_order=spec.seasonal_order,
                  enforce_stationarity=spec.enforce_stationarity,
                  enforce_invertibility=spec.enforce_invertibility,
                  concentrate_scale=spec.concentrate_scale)
    if fit_kwargs is None:
        fit_kwargs = {}
    fit_kwargs.setdefault('disp', 0)
    res_ss = mod.fit(start_params=start_params, **fit_kwargs)

    # Construct results
    p.params = res_ss.params
    res = Bunch({
        'spec': spec,
        'statespace_results': res_ss,
    })

    return p, res
Ejemplo n.º 23
0
def hannan_rissanen(endog,
                    ar_order=0,
                    ma_order=0,
                    demean=True,
                    initial_ar_order=None,
                    unbiased=None):
    """
    Estimate ARMA parameters using Hannan-Rissanen procedure.

    Parameters
    ----------
    endog : array_like
        Input time series array, assumed to be stationary.
    ar_order : int
        Autoregressive order
    ma_order : int
        Moving average order
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the ARMA coefficients. Default is True.
    initial_ar_order : int, optional
        Order of long autoregressive process used for initial computation of
        residuals.
    unbiased: bool, optional
        Whether or not to apply the bias correction step. Default is True if
        the estimated coefficients from the previous step imply a stationary
        and invertible process and False otherwise.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes three components: `spec`, containing the
        `SARIMAXSpecification` instance corresponding to the input arguments;
        `initial_ar_order`, containing the autoregressive lag order used in the
        first step; and `resid`, which contains the computed residuals from the
        last step.

    Notes
    -----
    The primary reference is [1]_, section 5.1.4, which describes a three-step
    procedure that we implement here.

    1. Fit a large-order AR model via Yule-Walker to estimate residuals
    2. Compute AR and MA estimates via least squares
    3. (Unless the estimated coefficients from step (2) are non-stationary /
       non-invertible or `unbiased=False`) Perform bias correction

    The order used for the AR model in the first step may be given as an
    argument. If it is not, we compute it as suggested by [2]_.

    The estimate of the variance that we use is computed from the residuals
    of the least-squares regression and not from the innovations algorithm.
    This is because our fast implementation of the innovations algorithm is
    only valid for stationary processes, and the Hannan-Rissanen procedure may
    produce estimates that imply non-stationary processes. To avoid
    inconsistency, we never compute this latter variance here, even if it is
    possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
    an example of how to compute this variance manually.

    This procedure assumes that the series is stationary, but if this is not
    true, it is still possible that this procedure will return parameters that
    imply a non-stationary / non-invertible process.

    Note that the third stage will only be applied if the parameters from the
    second stage imply a stationary / invertible model. If `unbiased=True` is
    given, then non-stationary / non-invertible parameters in the second stage
    will throw an exception.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    .. [2] Gomez, Victor, and Agustin Maravall. 2001.
       "Automatic Modeling Methods for Univariate Series."
       A Course in Time Series Analysis, 171–201.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
    endog = spec.endog
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    nobs = len(endog)
    max_ar_order = spec.max_ar_order
    max_ma_order = spec.max_ma_order

    # Default initial_ar_order is as suggested by Gomez and Maravall (2001)
    if initial_ar_order is None:
        initial_ar_order = max(
            np.floor(np.log(nobs)**2).astype(int),
            2 * max(max_ar_order, max_ma_order))
    # Create a spec, just to validate the initial autoregressive order
    _ = SARIMAXSpecification(endog, ar_order=initial_ar_order)

    # Compute lagged endog
    # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags;
    # for indexing purposes, must have dtype int)
    ar_ix = np.array(spec.ar_lags, dtype=int) - 1
    lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix]

    # If no AR or MA components, this is just a variance computation
    if max_ma_order == 0 and max_ar_order == 0:
        p.sigma2 = np.var(endog, ddof=0)
        resid = endog.copy()
    # If no MA component, this is just CSS
    elif max_ma_order == 0:
        mod = OLS(endog[max_ar_order:], lagged_endog)
        res = mod.fit()
        resid = res.resid
        p.ar_params = res.params
        p.sigma2 = res.scale
    # Otherwise ARMA model
    else:
        # Step 1: Compute long AR model via Yule-Walker, get residuals
        initial_ar_params, _ = yule_walker(endog,
                                           order=initial_ar_order,
                                           method='mle')
        X = lagmat(endog, initial_ar_order, trim='both')
        y = endog[initial_ar_order:]
        resid = y - X.dot(initial_ar_params)

        # Get lagged residuals for `exog` in least-squares regression
        ma_ix = np.array(spec.ma_lags, dtype=int) - 1
        lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix]

        # Step 2: estimate ARMA model via least squares
        ix = initial_ar_order + max_ma_order - max_ar_order
        mod = OLS(endog[initial_ar_order + max_ma_order:],
                  np.c_[lagged_endog[ix:], lagged_resid])
        res = mod.fit()
        p.ar_params = res.params[:spec.k_ar_params]
        p.ma_params = res.params[spec.k_ar_params:]
        resid = res.resid
        p.sigma2 = res.scale

        # Step 3: bias correction (if requested)
        if unbiased is True or unbiased is None:
            if p.is_stationary and p.is_invertible:
                Z = np.zeros_like(endog)
                V = np.zeros_like(endog)
                W = np.zeros_like(endog)

                ar_coef = p.ar_poly.coef
                ma_coef = p.ma_poly.coef

                for t in range(nobs):
                    if t >= max(max_ar_order, max_ma_order):
                        # Note: in the case of non-consecutive lag orders, the
                        # polynomials have the appropriate zeros so we don't
                        # need to subset `endog[t - max_ar_order:t]` or
                        # Z[t - max_ma_order:t]
                        tmp_ar = np.dot(-ar_coef[1:],
                                        endog[t - max_ar_order:t][::-1])
                        tmp_ma = np.dot(ma_coef[1:],
                                        Z[t - max_ma_order:t][::-1])
                        Z[t] = endog[t] - tmp_ar - tmp_ma

                V = lfilter([1], ar_coef, Z)
                W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)

                lagged_V = lagmat(V, max_ar_order, trim='both')
                lagged_W = lagmat(W, max_ma_order, trim='both')

                exog = np.c_[lagged_V[max(max_ma_order - max_ar_order, 0):,
                                      ar_ix],
                             lagged_W[max(max_ar_order - max_ma_order, 0):,
                                      ma_ix]]

                mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
                res_unbias = mod_unbias.fit()

                p.ar_params = (p.ar_params +
                               res_unbias.params[:spec.k_ar_params])
                p.ma_params = (p.ma_params +
                               res_unbias.params[spec.k_ar_params:])

                # Recompute sigma2
                resid = mod.endog - mod.exog.dot(np.r_[p.ar_params,
                                                       p.ma_params])
                p.sigma2 = np.inner(resid, resid) / len(resid)
            elif unbiased is True:
                raise ValueError('Cannot perform third step of Hannan-Rissanen'
                                 ' estimation to remove paramater bias,'
                                 ' because parameters estimated from the'
                                 ' second step are non-stationary or'
                                 ' non-invertible')

    # TODO: Gomez and Maravall (2001) or Gomez (1998)
    # propose one more step here to further improve MA estimates

    # Construct results
    other_results = Bunch({
        'spec': spec,
        'initial_ar_order': initial_ar_order,
        'resid': resid
    })

    return p, other_results
Ejemplo n.º 24
0
def gls(endog,
        exog=None,
        order=(0, 0, 0),
        seasonal_order=(0, 0, 0, 0),
        include_constant=None,
        n_iter=None,
        max_iter=50,
        tolerance=1e-8,
        arma_estimator='innovations_mle',
        arma_estimator_kwargs=None):
    """
    Estimate ARMAX parameters by GLS.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    exog : array_like, optional
        Array of exogenous regressors. If not included, then `include_constant`
        must be True, and then `exog` will only include the constant column.
    order : tuple, optional
        The (p,d,q) order of the ARIMA model. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal ARIMA model.
        Default is (0, 0, 0, 0).
    include_constant : bool, optional
        Whether to add a constant term in `exog` if it's not already there.
        The estimate of the constant will then appear as one of the `exog`
        parameters. If `exog` is None, then the constant will represent the
        mean of the process. Default is True if the specified model does not
        include integration and False otherwise.
    n_iter : int, optional
        Optionally iterate feasible GSL a specific number of times. Default is
        to iterate to convergence. If set, this argument overrides the
        `max_iter` and `tolerance` arguments.
    max_iter : int, optional
        Maximum number of feasible GLS iterations. Default is 50. If `n_iter`
        is set, it overrides this argument.
    tolerance : float, optional
        Tolerance for determining convergence of feasible GSL iterations. If
        `iter` is set, this argument has no effect.
        Default is 1e-8.
    arma_estimator : str, optional
        The estimator used for estimating the ARMA model. This option should
        not generally be used, unless the default method is failing or is
        otherwise unsuitable. Not all values will be valid, depending on the
        specified model orders (`order` and `seasonal_order`). Possible values
        are:
        * 'innovations_mle' - can be used with any specification
        * 'statespace' - can be used with any specification
        * 'hannan_rissanen' - can be used with any ARMA non-seasonal model
        * 'yule_walker' - only non-seasonal consecutive
          autoregressive (AR) models
        * 'burg' - only non-seasonal, consecutive autoregressive (AR) models
        * 'innovations' - only non-seasonal, consecutive moving
          average (MA) models.
        The default is 'innovations_mle'.
    arma_estimator_kwargs : dict, optional
        Arguments to pass to the ARMA estimator.

    Returns
    -------
    parameters : SARIMAXParams object
        Contains the parameter estimates from the final iteration.
    other_results : Bunch
        Includes eight components: `spec`, `params`, `converged`,
        `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs',
        and `arma_results`.

    Notes
    -----
    The primary reference is [1]_, section 6.6. In particular, the
    implementation follows the iterative procedure described in section 6.6.2.
    Construction of the transformed variables used to compute the GLS estimator
    described in section 6.6.1 is done via an application of the innovations
    algorithm (rather than explicit construction of the transformation matrix).

    Note that if the specified model includes integration, both the `endog` and
    `exog` series will be differenced prior to estimation and a warning will
    be issued to alert the user.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    # Handle n_iter
    if n_iter is not None:
        max_iter = n_iter
        tolerance = np.inf

    # Default for include_constant is True if there is no integration and
    # False otherwise
    integrated = order[1] > 0 or seasonal_order[1] > 0
    if include_constant is None:
        include_constant = not integrated
    elif include_constant and integrated:
        raise ValueError('Cannot include a constant in an integrated model.')

    # Handle including the constant (need to do it now so that the constant
    # parameter can be included in the specification as part of `exog`.)
    if include_constant:
        exog = np.ones_like(endog) if exog is None else add_constant(exog)

    # Create the SARIMAX specification
    spec = SARIMAXSpecification(endog,
                                exog=exog,
                                order=order,
                                seasonal_order=seasonal_order)
    endog = spec.endog
    exog = spec.exog

    # Handle integration
    if spec.is_integrated:
        # TODO: this is the approach suggested by BD (see Remark 1 in
        # section 6.6.2 and Example 6.6.3), but maybe there are some cases
        # where we don't want to force this behavior on the user?
        warnings.warn('Provided `endog` and `exog` series have been'
                      ' differenced to eliminate integration prior to GLS'
                      ' parameter estimation.')
        endog = diff(endog,
                     k_diff=spec.diff,
                     k_seasonal_diff=spec.seasonal_diff,
                     seasonal_periods=spec.seasonal_periods)
        exog = diff(exog,
                    k_diff=spec.diff,
                    k_seasonal_diff=spec.seasonal_diff,
                    seasonal_periods=spec.seasonal_periods)
    augmented = np.c_[endog, exog]

    # Validate arma_estimator
    spec.validate_estimator(arma_estimator)
    if arma_estimator_kwargs is None:
        arma_estimator_kwargs = {}

    # Step 1: OLS
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    exog_params = res_ols.params
    resid = res_ols.resid

    # 0th iteration parameters
    p = SARIMAXParams(spec=spec)
    p.exog_params = exog_params
    if spec.max_ar_order > 0:
        p.ar_params = np.zeros(spec.k_ar_params)
    if spec.max_seasonal_ar_order > 0:
        p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params)
    if spec.max_ma_order > 0:
        p.ma_params = np.zeros(spec.k_ma_params)
    if spec.max_seasonal_ma_order > 0:
        p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params)
    p.sigma2 = res_ols.scale

    ar_params = p.ar_params
    seasonal_ar_params = p.seasonal_ar_params
    ma_params = p.ma_params
    seasonal_ma_params = p.seasonal_ma_params
    sigma2 = p.sigma2

    # Step 2 - 4: iterate feasible GLS to convergence
    arma_results = [None]
    differences = [None]
    parameters = [p]
    converged = False if n_iter is None else None
    i = 0
    for i in range(1, max_iter + 1):
        prev = exog_params

        # Step 2: ARMA
        # TODO: allow estimator-specific kwargs?
        if arma_estimator == 'yule_walker':
            p_arma, res_arma = yule_walker(resid,
                                           ar_order=spec.ar_order,
                                           demean=False,
                                           **arma_estimator_kwargs)
        elif arma_estimator == 'burg':
            p_arma, res_arma = burg(resid,
                                    ar_order=spec.ar_order,
                                    demean=False,
                                    **arma_estimator_kwargs)
        elif arma_estimator == 'innovations':
            out, res_arma = innovations(resid,
                                        ma_order=spec.ma_order,
                                        demean=False,
                                        **arma_estimator_kwargs)
            p_arma = out[-1]
        elif arma_estimator == 'hannan_rissanen':
            p_arma, res_arma = hannan_rissanen(resid,
                                               ar_order=spec.ar_order,
                                               ma_order=spec.ma_order,
                                               demean=False,
                                               **arma_estimator_kwargs)
        else:
            # For later iterations, use a "warm start" for parameter estimates
            # (speeds up estimation and convergence)
            start_params = (None if i == 1 else np.r_[ar_params, ma_params,
                                                      seasonal_ar_params,
                                                      seasonal_ma_params,
                                                      sigma2])
            # Note: in each case, we do not pass in the order of integration
            # since we have already differenced the series
            tmp_order = (spec.order[0], 0, spec.order[2])
            tmp_seasonal_order = (spec.seasonal_order[0], 0,
                                  spec.seasonal_order[2],
                                  spec.seasonal_order[3])
            if arma_estimator == 'innovations_mle':
                p_arma, res_arma = innovations_mle(
                    resid,
                    order=tmp_order,
                    seasonal_order=tmp_seasonal_order,
                    demean=False,
                    start_params=start_params,
                    **arma_estimator_kwargs)
            else:
                p_arma, res_arma = statespace(
                    resid,
                    order=tmp_order,
                    seasonal_order=tmp_seasonal_order,
                    include_constant=False,
                    start_params=start_params,
                    **arma_estimator_kwargs)

        ar_params = p_arma.ar_params
        seasonal_ar_params = p_arma.seasonal_ar_params
        ma_params = p_arma.ma_params
        seasonal_ma_params = p_arma.seasonal_ma_params
        sigma2 = p_arma.sigma2
        arma_results.append(res_arma)

        # Step 3: GLS
        # Compute transformed variables that satisfy OLS assumptions
        # Note: In section 6.1.1 of Brockwell and Davis (2016), these
        # transformations are developed as computed by left multiplcation
        # by a matrix T. However, explicitly constructing T and then
        # performing the left-multiplications does not scale well when nobs is
        # large. Instead, we can retrieve the transformed variables as the
        # residuals of the innovations algorithm (the `normalize=True`
        # argument applies a Prais-Winsten-type normalization to the first few
        # observations to ensure homoskedasticity). Brockwell and Davis
        # mention that they also take this approach in practice.
        tmp, _ = arma_innovations.arma_innovations(augmented,
                                                   ar_params=ar_params,
                                                   ma_params=ma_params,
                                                   normalize=True)
        u = tmp[:, 0]
        x = tmp[:, 1:]

        # OLS on transformed variables
        mod_gls = OLS(u, x)
        res_gls = mod_gls.fit()
        exog_params = res_gls.params
        resid = endog - np.dot(exog, exog_params)

        # Construct the parameter vector for the iteration
        p = SARIMAXParams(spec=spec)
        p.exog_params = exog_params
        if spec.max_ar_order > 0:
            p.ar_params = ar_params
        if spec.max_seasonal_ar_order > 0:
            p.seasonal_ar_params = seasonal_ar_params
        if spec.max_ma_order > 0:
            p.ma_params = ma_params
        if spec.max_seasonal_ma_order > 0:
            p.seasonal_ma_params = seasonal_ma_params
        p.sigma2 = sigma2
        parameters.append(p)

        # Check for convergence
        difference = np.abs(exog_params - prev)
        differences.append(difference)
        if n_iter is None and np.all(difference < tolerance):
            converged = True
            break
    else:
        if n_iter is None:
            warnings.warn('Feasible GLS failed to converge in %d iterations.'
                          ' Consider increasing the maximum number of'
                          ' iterations using the `max_iter` argument or'
                          ' reducing the required tolerance using the'
                          ' `tolerance` argument.' % max_iter)

    # Construct final results
    p = parameters[-1]
    other_results = Bunch({
        'spec': spec,
        'params': parameters,
        'converged': converged,
        'differences': differences,
        'iterations': i,
        'arma_estimator': arma_estimator,
        'arma_estimator_kwargs': arma_estimator_kwargs,
        'arma_results': arma_results,
    })

    return p, other_results
def durbin_levinson(endog, ar_order=0, demean=True, adjusted=False):
    """
    Estimate AR parameters at multiple orders using Durbin-Levinson recursions.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ar_order : int, optional
        Autoregressive order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the autoregressive coefficients. Default is True.
    adjusted : bool, optional
        Whether to use the "adjusted" autocovariance estimator, which uses
        n - h degrees of freedom rather than n. This option can result in
        a non-positive definite autocovariance matrix. Default is False.

    Returns
    -------
    parameters : list of SARIMAXParams objects
        List elements correspond to estimates at different `ar_order`. For
        example, parameters[0] is an `SARIMAXParams` instance corresponding to
        `ar_order=0`.
    other_results : Bunch
        Includes one component, `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 2.5.1.

    This procedure assumes that the series is stationary.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    max_spec = SARIMAXSpecification(endog, ar_order=ar_order)
    endog = max_spec.endog

    # Make sure we have a consecutive process
    if not max_spec.is_ar_consecutive:
        raise ValueError('Durbin-Levinson estimation unavailable for models'
                         ' with seasonal or otherwise non-consecutive AR'
                         ' orders.')

    gamma = acovf(endog,
                  adjusted=adjusted,
                  fft=True,
                  demean=demean,
                  nlag=max_spec.ar_order)

    # If no AR component, just a variance computation
    if max_spec.ar_order == 0:
        ar_params = [None]
        sigma2 = [gamma[0]]
    # Otherwise, AR model
    else:
        Phi = np.zeros((max_spec.ar_order, max_spec.ar_order))
        v = np.zeros(max_spec.ar_order + 1)

        Phi[0, 0] = gamma[1] / gamma[0]
        v[0] = gamma[0]
        v[1] = v[0] * (1 - Phi[0, 0]**2)

        for i in range(1, max_spec.ar_order):
            tmp = Phi[i - 1, :i]
            Phi[i, i] = (gamma[i + 1] - np.dot(tmp, gamma[i:0:-1])) / v[i]
            Phi[i, :i] = (tmp - Phi[i, i] * tmp[::-1])
            v[i + 1] = v[i] * (1 - Phi[i, i]**2)

        ar_params = [None] + [Phi[i, :i + 1] for i in range(max_spec.ar_order)]
        sigma2 = v

    # Compute output
    out = []
    for i in range(max_spec.ar_order + 1):
        spec = SARIMAXSpecification(ar_order=i)
        p = SARIMAXParams(spec=spec)
        if i == 0:
            p.params = sigma2[i]
        else:
            p.params = np.r_[ar_params[i], sigma2[i]]
        out.append(p)

        # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return out, other_results
Ejemplo n.º 26
0
import numpy as np

from statsmodels.tools.tools import Bunch


pls5 = Bunch()

pls5.smooth = Bunch()
pls5.smooth.term = 'times'
pls5.smooth.bs_dim = 7
pls5.smooth.dim = 1
pls5.smooth.by = 'NA'
pls5.smooth.label = 's(times)'
pls5.smooth.sp = 1
pls5.smooth.BD = np.array([
    -0.0322305472050642, 0.0332895629742452, -0.00907144581575865,
    0.00386174436551668, -0.00624916066961505, 0.0181385348730838,
    0.0292384327901831, -0.0717740723184547, 0.054914261809955,
    -0.0158383049768667, 0.00626042823599089, -0.0103543594891998,
    -0.011074996356557, 0.0526128870346165, -0.0930190975208449,
    0.0595200902721069, -0.0135721686724522, 0.00600098849448633,
    0.00625687187895293, -0.0145166841858048, 0.0594020303618183,
    -0.0946831790103269, 0.0511018949689336, -0.0114519440129956,
    -0.00994300967116444, 0.00619931821053046, -0.0156728054209229,
    0.0550549724656574, -0.0669161912708059, 0.0271416199423184,
    0.0177532485636497, -0.00581101171513275, 0.00344705658575325,
    -0.00791532311608746, 0.0293751974079486, -0.0294748398076931
    ]).reshape(6, 6, order='F')

pls5.smooth.xp = np.array([
    2.4, 11.2, 17.8, 24.8, 31.2, 41, 57.6
Ejemplo n.º 27
0
def x13_arima_select_order(endog,
                           maxorder=(2, 1),
                           maxdiff=(2, 1),
                           diff=None,
                           exog=None,
                           log=None,
                           outlier=True,
                           trading=False,
                           forecast_years=None,
                           start=None,
                           freq=None,
                           print_stdout=False,
                           x12path=None,
                           prefer_x13=True):
    """
    Perform automatic seaonal ARIMA order identification using x12/x13 ARIMA.

    Parameters
    ----------
    endog : array-like, pandas.Series
        The series to model. It is best to use a pandas object with a
        DatetimeIndex or PeriodIndex. However, you can pass an array-like
        object. If your object does not have a dates index then ``start`` and
        ``freq`` are not optional.
    maxorder : tuple
        The maximum order of the regular and seasonal ARMA polynomials to
        examine during the model identification. The order for the regular
        polynomial must be greater than zero and no larger than 4. The
        order for the seaonal polynomial may be 1 or 2.
    maxdiff : tuple
        The maximum orders for regular and seasonal differencing in the
        automatic differencing procedure. Acceptable inputs for regular
        differencing are 1 and 2. The maximum order for seasonal differencing
        is 1. If ``diff`` is specified then ``maxdiff`` should be None.
        Otherwise, ``diff`` will be ignored. See also ``diff``.
    diff : tuple
        Fixes the orders of differencing for the regular and seasonal
        differencing. Regular differencing may be 0, 1, or 2. Seasonal
        differencing may be 0 or 1. ``maxdiff`` must be None, otherwise
        ``diff`` is ignored.
    exog : array-like
        Exogenous variables.
    log : bool or None
        If None, it is automatically determined whether to log the series or
        not. If False, logs are not taken. If True, logs are taken.
    outlier : bool
        Whether or not outliers are tested for and corrected, if detected.
    trading : bool
        Whether or not trading day effects are tested for.
    forecast_years : int
        Number of forecasts produced. The default is one year.
    start : str, datetime
        Must be given if ``endog`` does not have date information in its index.
        Anything accepted by pandas.DatetimeIndex for the start value.
    freq : str
        Must be givein if ``endog`` does not have date information in its
        index. Anything accapted by pandas.DatetimeIndex for the freq value.
    print_stdout : bool
        The stdout from X12/X13 is suppressed. To print it out, set this
        to True. Default is False.
    x12path : str or None
        The path to x12 or x13 binary. If None, the program will attempt
        to find x13as or x12a on the PATH or by looking at X13PATH or X12PATH
        depending on the value of prefer_x13.
    prefer_x13 : bool
        If True, will look for x13as first and will fallback to the X13PATH
        environmental variable. If False, will look for x12a first and will
        fallback to the X12PATH environmental variable. If x12path points
        to the path for the X12/X13 binary, it does nothing.

    Returns
    -------
    results : Bunch
        A bunch object that has the following attributes:

        - order : tuple
          The regular order
        - sorder : tuple
          The seasonal order
        - include_mean : bool
          Whether to include a mean or not
        - results : str
          The full results from the X12/X13 analysis
        - stdout : str
          The captured stdout from the X12/X13 analysis

    Notes
    -----
    This works by creating a specification file, writing it to a temporary
    directory, invoking X12/X13 in a subprocess, and reading the output back
    in.
    """
    results = x13_arima_analysis(endog,
                                 x12path=x12path,
                                 exog=exog,
                                 log=log,
                                 outlier=outlier,
                                 trading=trading,
                                 forecast_years=forecast_years,
                                 maxorder=maxorder,
                                 maxdiff=maxdiff,
                                 diff=diff,
                                 start=start,
                                 freq=freq,
                                 prefer_x13=prefer_x13)
    model = re.search("(?<=Final automatic model choice : ).*",
                      results.results)
    order = model.group()
    if re.search("Mean is not significant", results.results):
        include_mean = False
    elif re.search("Constant", results.results):
        include_mean = True
    else:
        include_mean = False
    order, sorder = _clean_order(order)
    res = Bunch(order=order,
                sorder=sorder,
                include_mean=include_mean,
                results=results.results,
                stdout=results.stdout)
    return res
Ejemplo n.º 28
0
    -.00018443722054,
    -.03257408922788,
    -.00018443722054,
    .00205106413403,
    -.3943459697384,
    -.03257408922788,
    -.3943459697384,
    140.50692606398]).reshape(3, 3)

cov_dk4_stata = np.array([
    .00018052657317,
    -.00035661054613,
    -.06728261073866,
    -.00035661054613,
    .0024312795189,
    -.32394785247278,
    -.06728261073866,
    -.32394785247278,
    148.60456447156]).reshape(3, 3)


results = Bunch(
    cov_clu_stata=cov_clu_stata,
    cov_pnw0_stata=cov_pnw0_stata,
    cov_pnw1_stata=cov_pnw1_stata,
    cov_pnw4_stata=cov_pnw4_stata,
    cov_dk0_stata=cov_dk0_stata,
    cov_dk1_stata=cov_dk1_stata,
    cov_dk4_stata=cov_dk4_stata
)
Ejemplo n.º 29
0
def _window_ols(y, x, window=None, window_type=None, min_periods=None):
    """
    Minimal replacement for pandas ols that provides the required features

    Parameters
    ----------
    y : pd.Series
        Endogenous variable
    x : pd.DataFrame
        Exogenous variables, always adds a constant
    window: {None, int}

    window_type : {str, int}
    min_periods : {None, int}

    Returns
    -------
    results : Bunch
        Bunch containing parameters (beta), R-squared (r2), nobs and
        residuals (resid)
    """
    # Must return beta, r2, resid, nobs
    if window_type == FULL_SAMPLE:
        window_type = 'full_sample'
    elif window_type == ROLLING:
        window_type = 'rolling'
    elif window_type == EXPANDING:
        window_type = 'expanding'

    if window_type in ('rolling', 'expanding') and window is None:
        window = y.shape[0]
    min_periods = 1 if min_periods is None else min_periods
    window_type = 'full_sample' if window is None else window_type
    window_type = 'rolling' if window_type is None else window_type
    if window_type == 'rolling':
        min_periods = window

    if window_type not in ('full_sample', 'rolling', 'expanding'):
        raise ValueError('Unknown window_type')

    x = x.copy()
    x['intercept'] = 1.0

    bunch = Bunch()
    if window_type == 'full_sample':
        missing = y.isnull() | x.isnull().any(1)
        y = y.loc[~missing]
        x = x.loc[~missing]

        res = OLS(y, x).fit()
        bunch['beta'] = res.params
        bunch['r2'] = res.rsquared
        bunch['nobs'] = res.nobs
        bunch['resid'] = res.resid
        return bunch

    index = y.index
    columns = x.columns
    n = y.shape[0]
    k = x.shape[1]

    beta = pd.DataFrame(np.zeros((n, k)), columns=columns, index=index)
    r2 = pd.Series(np.zeros(n), index=index)
    nobs = r2.copy().astype(np.int)
    resid = r2.copy()
    valid = r2.copy().astype(np.bool)

    if window_type == 'rolling':
        start = window
    else:
        start = min_periods
    for i in range(start, y.shape[0] + 1):
        # i is right edge, as in y[:i] for expanding
        if window_type == 'rolling':
            left = max(0, i - window)
            sel = slice(left, i)
        else:
            sel = slice(i)
        _y = y[sel]
        _x = x[sel]
        missing = _y.isnull() | _x.isnull().any(1)
        if missing.any():
            if (~missing).sum() < min_periods:
                continue
            else:
                _y = _y.loc[~missing]
                _x = _x.loc[~missing]
        if _y.shape[0] <= _x.shape[1]:
            continue
        if window_type == 'expanding' and missing.values[-1]:
            continue
        res = OLS(_y, _x).fit()
        valid.iloc[i - 1] = True
        beta.iloc[i - 1] = res.params
        r2.iloc[i - 1] = res.rsquared
        nobs.iloc[i - 1] = int(res.nobs)
        resid.iloc[i - 1] = res.resid.iloc[-1]

    bunch['beta'] = beta.loc[valid]
    bunch['r2'] = r2.loc[valid]
    bunch['nobs'] = nobs.loc[valid]
    bunch['resid'] = resid.loc[valid]
    return bunch
Ejemplo n.º 30
0
def yule_walker(endog, ar_order=0, demean=True, unbiased=False):
    """
    Estimate AR parameters using Yule-Walker equations.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ar_order : int, optional
        Autoregressive order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the autoregressive coefficients. Default is True.
    unbiased : bool, optional
        Whether to use the "unbiased" autocovariance estimator, which uses
        n - h degrees of freedom rather than n. Note that despite the name, it
        is only truly unbiased if the process mean is known (rather than
        estimated) and for some processes it can result in a non-positive
        definite autocovariance matrix. Default is False.

    Returns
    -------
    parameters : SARIMAXParams object
        Contains the parameter estimates from the final iteration.
    other_results : Bunch
        Includes one component, `spec`, which is the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 5.1.1.

    This procedure assumes that the series is stationary.

    For a description of the effect of the "unbiased" estimate of the
    autocovariance function, see 2.4.2 of [1]_.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order)
    endog = spec.endog
    p = SARIMAXParams(spec=spec)

    if not spec.is_ar_consecutive:
        raise ValueError('Yule-Walker estimation unavailable for models with'
                         ' seasonal or non-consecutive AR orders.')

    # Estimate parameters
    method = 'unbiased' if unbiased else 'mle'
    p.ar_params, sigma = linear_model.yule_walker(endog,
                                                  order=ar_order,
                                                  demean=demean,
                                                  method=method)
    p.sigma2 = sigma**2

    # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return p, other_results
Ejemplo n.º 31
0
    .85678082704544, 1.022847533226, 1.0930491685867, 1.0342184305191,
    1.2070096731186, 1.2472279071808, 1.0886085033417, 1.3604420423508,
    1.1053978204727, 2.0939025878906, 1.0898643732071, 1.3238569498062,
    1.5171576738358, .77435439825058, 1.3360253572464, 1.5512014627457,
    1.3569095134735, 1.4669530391693, 1.9312930107117, 1.52878677845,
    2.3952746391296, .80755305290222, -.2365039139986, .85178333520889,
    1.1858888864517
])

icstats = np.array(
    [202, np.nan, -240.21658671417, 4, 488.43317342834, 501.66624421795])

results = Bunch(llf=llf,
                nobs=nobs,
                k=k,
                k_exog=k_exog,
                sigma=sigma,
                chi2=chi2,
                df_model=df_model,
                k_ar=k_ar,
                k_ma=k_ma,
                params=params,
                cov_params=cov_params,
                xb=xb,
                y=y,
                resid=resid,
                yr=yr,
                mse=mse,
                stdp=stdp,
                icstats=icstats)
import numpy as np

from statsmodels.tools.tools import Bunch

epanechnikov_hsheather_q75 = Bunch()
epanechnikov_hsheather_q75.table = np.array(
    [[.6440143, .0122001, 52.79, 0.000, .6199777, .6680508],
     [62.39648, 13.5509, 4.60, 0.000, 35.69854, 89.09443]])
epanechnikov_hsheather_q75.psrsquared = 0.6966
epanechnikov_hsheather_q75.rank = 2
epanechnikov_hsheather_q75.sparsity = 223.784434936344
epanechnikov_hsheather_q75.bwidth = .1090401129546568
# epanechnikov_hsheather_q75.kbwidth = 59.62067927472172  # Stata 12 results
epanechnikov_hsheather_q75.kbwidth = 59.30  # TODO: why do we need lower tol?
epanechnikov_hsheather_q75.df_m = 1
epanechnikov_hsheather_q75.df_r = 233
epanechnikov_hsheather_q75.f_r = .0044685860313942
epanechnikov_hsheather_q75.N = 235
epanechnikov_hsheather_q75.q_v = 745.2352905273438
epanechnikov_hsheather_q75.q = .75
epanechnikov_hsheather_q75.sum_rdev = 43036.06956481934
epanechnikov_hsheather_q75.sum_adev = 13058.50008841318
epanechnikov_hsheather_q75.convcode = 0

biweight_bofinger = Bunch()
biweight_bofinger.table = np.array(
    [[.5601805, .0136491, 41.04, 0.000, .533289, .5870719],
     [81.48233, 15.1604, 5.37, 0.000, 51.61335, 111.3513]])
biweight_bofinger.psrsquared = 0.6206
biweight_bofinger.rank = 2
biweight_bofinger.sparsity = 216.8218989750115