def fit(self, method="ols", structural=None, dfk=None, maxlag=None, ic=None, trend="c"): """ Fit the VAR model Parameters ---------- method : str "ols" fit equation by equation with OLS "yw" fit with yule walker "mle" fit with unconditional maximum likelihood Only OLS is currently implemented. structural : str, optional If 'BQ' - Blanchard - Quah identification scheme is used. This imposes long run restrictions. Not yet implemented. dfk : int or Bool optional Small-sample bias correction. If None, dfk = 0. If True, dfk = neqs * nlags + number of exogenous variables. The user can also provide a number for dfk. Omega is divided by (avobs - dfk). maxlag : int, optional The highest lag order for lag length selection according to `ic`. The default is 12 * (nobs/100.)**(1./4). If ic=None, maxlag is the number of lags that are fit for each equation. ic : str {"aic","bic","hq", "fpe"} or None, optional Information criteria to maximize for lag length selection. Not yet implemented for VAR. trend, str {"c", "ct", "ctt", "nc"} "c" - add constant "ct" - constant and trend "ctt" - constant, linear and quadratic trend "nc" - co constant, no trend Note that these are prepended to the columns of the dataset. Notes ----- Not sure what to do with structural. Restrictions would be on coefficients or on omega. So should it be short run (array), long run (array), or sign (str)? Recursive? """ if dfk is None: self.dfk = 0 elif dkf is True: self.dfk = self.X.shape[1] #TODO: change when we accept # equations for endog and exog else: self.dfk = dfk nobs = int(self.nobs) self.avobs = nobs - maxlag # available obs (sample - pre-sample) # #recast indices to integers #TODO: really? Is it easier to just use # floats in other places or import # division? # need to recompute after lag length selection avobs = int(self.avobs) if maxlag is None: maxlag = round(12*(nobs/100.)**(1/4.)) self.laglen = maxlag #TODO: change when IC selection is sorted # laglen = se nvars = int(self.nvars) neqs = int(self.neqs) endog = self.endog laglen = maxlag Y = endog[laglen:,:] # Make lagged endogenous RHS X = np.zeros((avobs,nvars*laglen)) for x1 in xrange(laglen): X[:,x1*nvars:(x1+1)*nvars] = endog[(laglen-1)-x1:(nobs-1)-x1,:] #NOTE: the above loop is faster than lagmat # assert np.all(X == lagmat(endog, laglen-1, trim="backward")[:-laglen]) # Prepend Exogenous variables if self.exog is not None: X = np.column_stack((self.exog[laglen:,:], X)) # Handle constant, etc. if trend == 'c': trendorder = 1 elif trend == 'nc': trendorder = 0 elif trend == 'ct': trendorder = 2 elif trend == 'ctt': trendorder = 3 X = add_trend(X,prepend=True, trend=trend) self.trendorder = trendorder self.Y = Y self.X = X # Two ways to do block diagonal, but they are slow # diag # diag_X = linalg.block_diag(*[X]*nvars) #Sparse: Similar to SUR # spdiag_X = sparse.lil_matrix(diag_X.shape) # for i in range(nvars): # spdiag_X[i*shape0:shape0*(i+1),i*shape1:(i+1)*shape1] = X # spX = sparse.kron(sparse.eye(20,20),X).todia() # results = GLS(Y,diag_X).fit() lagstart = trendorder if self.exog is not None: lagstart += self.exog.shape[1] #TODO: is there a variable that # holds exog.shapep[1]? #NOTE: just use GLS directly results = [] for y in Y.T: results.append(GLS(y,X).fit()) params = np.vstack((_.params for _ in results)) #TODO: For coefficient restrictions, will have to use SUR #TODO: make a separate SVAR class or this is going to get really messy if structural and structural.lower() == 'bq': phi = np.swapaxes(params.reshape(neqs,laglen,neqs), 1,0) I_phi_inv = np.linalg.inv(np.eye(n) - phi.sum(0)) omega = np.dot(results.resid.T,resid)/(avobs - self.dfk) shock_var = chain_dot(I_phi_inv, omega, I_phi_inv.T) R = np.linalg.cholesky(shock_var) phi_normalize = np.dot(I_phi_inv,R) params = np.zeros_like(phi) #TODO: apply a dot product along an axis? for i in range(laglen): params[i] = np.dot(phi_normalize, phi[i]) params = np.swapaxes(params, 1,0).reshape(neqs,laglen*neqs) return VARMAResults(self, results, params)
def fit(self, maxlag=None, method='ols', ic=None, trend='c', demean=True, penalty=False, start_params=None, solver=None, maxiter=35, full_output=1, disp=1, callback=None, **kwargs): """ Fit the unconditional maximum likelihood of an AR(p) process. Parameters ---------- start_params : array-like, optional A first guess on the parameters. Defaults is a vector of zeros. method : str {'ols', 'yw'. 'mle', 'umle'}, optional ols - Ordinary Leasy Squares yw - Yule-Walker mle - conditional maximum likelihood umle - unconditional maximum likelihood solver : str or None, optional Unconstrained solvers: Default is 'bfgs', 'newton' (newton-raphson), 'ncg' (Note that previous 3 are not recommended at the moment.) and 'powell' Constrained solvers: 'bfgs-b', 'tnc' See notes. maxiter : int, optional The maximum number of function evaluations. Default is 35. tol = float The convergence tolerance. Default is 1e-08. penalty : bool Whether or not to use a penalty function. Default is False, though this is ignored at the moment and the penalty is always used if appropriate. See notes. Notes ----- The unconstrained solvers use a quadratic penalty (regardless if penalty kwd is True or False) in order to ensure that the solution stays within (-1,1). The constrained solvers default to using a bound of (-.999,.999). See also -------- scikits.statsmodels.model.LikelihoodModel.fit for more information on using the solvers. The below is the docstring from scikits.statsmodels.LikelihoodModel.fit """ self.penalty = penalty method = method.lower() nobs = self.nobs if maxlag is None: maxlag = round(12*(nobs/100.)**(1/4.)) avobs = nobs - maxlag self.avobs = avobs laglen = maxlag self.laglen = laglen if demean: endog = self.endog.copy() # have to copy if demeaning mean = endog.mean() endog -= mean self.endog_mean = mean else: endog = self.endog # LHS Y = endog[laglen:,:] # make lagged RHS X = lagmat(endog, maxlag=laglen, trim='both')[:,1:] if self.exog is not None: X = np.column_stack((self.exog[laglen:,:], X)) # Handle constant, etc. if trend == 'c': trendorder = 1 elif trend == 'nc': trendorder = 0 elif trend == 'ct': trendorder = 2 elif trend == 'ctt': trendorder = 3 X = add_trend(X,prepend=True, trend=trend) self.trendorder = trendorder self.Y = Y self.X = X if solver: solver = solver.lower() #TODO: allow user-specified penalty function # if penalty and method not in ['bfgs_b','tnc','cobyla','slsqp']: # minfunc = lambda params : -self.loglike(params) - \ # self.penfunc(params) # else: if method == "mle": if not solver: # make default? solver = 'newton' if not start_params: start_params = np.zeros((X.shape[1])) if solver in ['newton', 'bfgs', 'ncg']: return super(AR, self).fit(start_params=start_params, method=solver, maxiter=maxiter, full_output=full_output, disp=disp, callback=callback, **kwargs) # return retvals elif method == "umle": #TODO: move this stuff up to LikelihoodModel.fit minfunc = lambda params: -self.loglike(params) bounds = [(-.999,.999)] # assume stationarity if start_params == None: start_params = np.array([0]) # assumes AR(1) if method == 'bfgs-b': retval = optimize.fmin_l_bfgs_b(minfunc, start_params, approx_grad=True, bounds=bounds) self.params, self.llf = retval[0:2] if method == 'tnc': retval = optimize.fmin_tnc(minfunc, start_params, approx_grad=True, bounds = bounds) self.params = retval[0] if method == 'powell': retval = optimize.fmin_powell(minfunc,start_params) self.params = retval[None] #TODO: write regression tests for Pauli's branch so that # new line_search and optimize.nonlin can get put in. # http://projects.scipy.org/scipy/ticket/791 # if method == 'broyden': # retval = optimize.broyden2(minfunc, [.5], verbose=True) # self.results = retvar elif method == "ols": arfit = OLS(Y,X).fit() params = arfit.params omega = None self.params = params elif method == "yw": params, omega = sm.regression.yule_walker(endog, order=maxlag, method="mle", demean=False) self.params = params