Esempio n. 1
0
    def fit(self, method="ols", structural=None, dfk=None, maxlag=None,
            ic=None, trend="c"):
        """
        Fit the VAR model

        Parameters
        ----------
        method : str
            "ols" fit equation by equation with OLS
            "yw" fit with yule walker
            "mle" fit with unconditional maximum likelihood
            Only OLS is currently implemented.
        structural : str, optional
            If 'BQ' - Blanchard - Quah identification scheme is used.
            This imposes long run restrictions. Not yet implemented.
        dfk : int or Bool optional
            Small-sample bias correction.  If None, dfk = 0.
            If True, dfk = neqs * nlags + number of exogenous variables.  The
            user can also provide a number for dfk. Omega is divided by (avobs -
            dfk).
        maxlag : int, optional
            The highest lag order for lag length selection according to `ic`.
            The default is 12 * (nobs/100.)**(1./4).  If ic=None, maxlag
            is the number of lags that are fit for each equation.
        ic : str {"aic","bic","hq", "fpe"} or None, optional
            Information criteria to maximize for lag length selection.
            Not yet implemented for VAR.
        trend, str {"c", "ct", "ctt", "nc"}
            "c" - add constant
            "ct" - constant and trend
            "ctt" - constant, linear and quadratic trend
            "nc" - co constant, no trend
            Note that these are prepended to the columns of the dataset.


        Notes
        -----
        Not sure what to do with structural. Restrictions would be on
        coefficients or on omega.  So should it be short run (array),
        long run (array), or sign (str)?  Recursive?
        """
        if dfk is None:
            self.dfk = 0
        elif dkf is True:
            self.dfk = self.X.shape[1] #TODO: change when we accept
                                          # equations for endog and exog
        else:
            self.dfk = dfk

        nobs = int(self.nobs)

        self.avobs = nobs - maxlag # available obs (sample - pre-sample)


#        #recast indices to integers #TODO: really?  Is it easier to just use
                                     # floats in other places or import
                                     # division?

        # need to recompute after lag length selection
        avobs = int(self.avobs)
        if maxlag is None:
            maxlag = round(12*(nobs/100.)**(1/4.))
        self.laglen = maxlag #TODO: change when IC selection is sorted
#        laglen = se
        nvars = int(self.nvars)
        neqs = int(self.neqs)
        endog = self.endog
        laglen = maxlag
        Y = endog[laglen:,:]

        # Make lagged endogenous RHS
        X = np.zeros((avobs,nvars*laglen))
        for x1 in xrange(laglen):
            X[:,x1*nvars:(x1+1)*nvars] = endog[(laglen-1)-x1:(nobs-1)-x1,:]
#NOTE: the above loop is faster than lagmat
#        assert np.all(X == lagmat(endog, laglen-1, trim="backward")[:-laglen])

        # Prepend Exogenous variables
        if self.exog is not None:
            X = np.column_stack((self.exog[laglen:,:], X))

        # Handle constant, etc.
        if trend == 'c':
            trendorder = 1
        elif trend == 'nc':
            trendorder = 0
        elif trend == 'ct':
            trendorder = 2
        elif trend == 'ctt':
            trendorder = 3
        X = add_trend(X,prepend=True, trend=trend)
        self.trendorder = trendorder

        self.Y = Y
        self.X = X

# Two ways to do block diagonal, but they are slow
# diag
#        diag_X = linalg.block_diag(*[X]*nvars)
#Sparse: Similar to SUR
#        spdiag_X = sparse.lil_matrix(diag_X.shape)
#        for i in range(nvars):
#            spdiag_X[i*shape0:shape0*(i+1),i*shape1:(i+1)*shape1] = X
#        spX = sparse.kron(sparse.eye(20,20),X).todia()
#        results = GLS(Y,diag_X).fit()

        lagstart = trendorder
        if self.exog is not None:
            lagstart += self.exog.shape[1] #TODO: is there a variable that
                                           #      holds exog.shapep[1]?


#NOTE: just use GLS directly
        results = []
        for y in Y.T:
            results.append(GLS(y,X).fit())
        params = np.vstack((_.params for _ in results))

#TODO: For coefficient restrictions, will have to use SUR


#TODO: make a separate SVAR class or this is going to get really messy
        if structural and structural.lower() == 'bq':
            phi = np.swapaxes(params.reshape(neqs,laglen,neqs), 1,0)
            I_phi_inv = np.linalg.inv(np.eye(n) - phi.sum(0))
            omega = np.dot(results.resid.T,resid)/(avobs - self.dfk)
            shock_var = chain_dot(I_phi_inv, omega, I_phi_inv.T)
            R = np.linalg.cholesky(shock_var)
            phi_normalize = np.dot(I_phi_inv,R)
            params = np.zeros_like(phi)
            #TODO: apply a dot product along an axis?
            for i in range(laglen):
                params[i] = np.dot(phi_normalize, phi[i])
                params = np.swapaxes(params, 1,0).reshape(neqs,laglen*neqs)
        return VARMAResults(self, results, params)
Esempio n. 2
0
    def fit(self, maxlag=None, method='ols', ic=None, trend='c', demean=True,
            penalty=False,
            start_params=None, solver=None, maxiter=35, full_output=1, disp=1,
            callback=None, **kwargs):
        """
        Fit the unconditional maximum likelihood of an AR(p) process.

        Parameters
        ----------
        start_params : array-like, optional
            A first guess on the parameters.  Defaults is a vector of zeros.
        method : str {'ols', 'yw'. 'mle', 'umle'}, optional
            ols - Ordinary Leasy Squares
            yw - Yule-Walker
            mle - conditional maximum likelihood
            umle - unconditional maximum likelihood
        solver : str or None, optional
            Unconstrained solvers:
                Default is 'bfgs', 'newton' (newton-raphson), 'ncg'
                (Note that previous 3 are not recommended at the moment.)
                and 'powell'
            Constrained solvers:
                'bfgs-b', 'tnc'
            See notes.
        maxiter : int, optional
            The maximum number of function evaluations. Default is 35.
        tol = float
            The convergence tolerance.  Default is 1e-08.
        penalty : bool
            Whether or not to use a penalty function.  Default is False,
            though this is ignored at the moment and the penalty is always
            used if appropriate.  See notes.

        Notes
        -----
        The unconstrained solvers use a quadratic penalty (regardless if
        penalty kwd is True or False) in order to ensure that the solution
        stays within (-1,1).  The constrained solvers default to using a bound
        of (-.999,.999).

        See also
        --------
        scikits.statsmodels.model.LikelihoodModel.fit for more information
        on using the solvers.

        The below is the docstring from
        scikits.statsmodels.LikelihoodModel.fit
        """
        self.penalty = penalty
        method = method.lower()
        nobs = self.nobs
        if maxlag is None:
            maxlag = round(12*(nobs/100.)**(1/4.))
        avobs = nobs - maxlag
        self.avobs = avobs
        laglen = maxlag
        self.laglen = laglen
        if demean:
            endog = self.endog.copy() # have to copy if demeaning
            mean = endog.mean()
            endog -= mean
            self.endog_mean = mean
        else:
            endog = self.endog
        # LHS
        Y = endog[laglen:,:]
        # make lagged RHS
        X = lagmat(endog, maxlag=laglen, trim='both')[:,1:]
        if self.exog is not None:
            X = np.column_stack((self.exog[laglen:,:], X))
        # Handle constant, etc.
        if trend == 'c':
            trendorder = 1
        elif trend == 'nc':
            trendorder = 0
        elif trend == 'ct':
            trendorder = 2
        elif trend == 'ctt':
            trendorder = 3
        X = add_trend(X,prepend=True, trend=trend)
        self.trendorder = trendorder

        self.Y = Y
        self.X = X

        if solver:
            solver = solver.lower()
#TODO: allow user-specified penalty function
#        if penalty and method not in ['bfgs_b','tnc','cobyla','slsqp']:
#            minfunc = lambda params : -self.loglike(params) - \
#                    self.penfunc(params)
#        else:
        if method == "mle":
            if not solver: # make default?
                solver = 'newton'
            if not start_params:
                start_params = np.zeros((X.shape[1]))
            if solver in ['newton', 'bfgs', 'ncg']:
                return super(AR, self).fit(start_params=start_params, method=solver,
                    maxiter=maxiter, full_output=full_output, disp=disp,
                    callback=callback, **kwargs)
#                return retvals
        elif method == "umle":
#TODO: move this stuff up to LikelihoodModel.fit
            minfunc = lambda params: -self.loglike(params)
            bounds = [(-.999,.999)]   # assume stationarity
            if start_params == None:
                start_params = np.array([0]) # assumes AR(1)
            if method == 'bfgs-b':
                retval = optimize.fmin_l_bfgs_b(minfunc, start_params,
                        approx_grad=True, bounds=bounds)
                self.params, self.llf = retval[0:2]
            if method == 'tnc':
                retval = optimize.fmin_tnc(minfunc, start_params,
                        approx_grad=True, bounds = bounds)
                self.params = retval[0]
            if method == 'powell':
                retval = optimize.fmin_powell(minfunc,start_params)
                self.params = retval[None]
#TODO: write regression tests for Pauli's branch so that
# new line_search and optimize.nonlin can get put in.
# http://projects.scipy.org/scipy/ticket/791
#            if method == 'broyden':
#                retval = optimize.broyden2(minfunc, [.5], verbose=True)
#                self.results = retvar
        elif method == "ols":
            arfit = OLS(Y,X).fit()
            params = arfit.params
            omega = None
            self.params = params
        elif method == "yw":
            params, omega = sm.regression.yule_walker(endog, order=maxlag,
                    method="mle", demean=False)
            self.params = params