def _estimate_svar(self, start_params, lags, maxiter, maxfun, trend='c', solver="nm", override=False): """ lags : int trend : string or None As per above """ k_trend = util.get_trendorder(trend) y = self.endog z = util.get_var_endog(y, lags, trend=trend, has_constant='raise') y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula var_params = np.linalg.lstsq(z, y_sample, rcond=-1)[0] resid = y_sample - np.dot(z, var_params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) #TODO: should give users the option to use a dof correction or not omega = sse / df_resid self.sigma_u = omega A, B = self._solve_AB(start_params, override=override, solver=solver, maxiter=maxiter, maxfun=maxfun) A_mask = self.A_mask B_mask = self.B_mask return SVARResults(y, z, var_params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self, A=A, B=B, A_mask=A_mask, B_mask=B_mask)
def format_data_for_estimate_var(endog_list, exog_list, lag): k_trend = 1 explanatory_data = [] target_data = [] for i in range(len(endog_list)): endog = endog_list[i] exog = None if len(exog_list) > 0: exog = exog_list[i] nobs = len(endog) - lag if exog is not None and len(endog) != len(exog): raise ValueError( "Endog has length {} but exog has length {} - i={}".format( len(endog), len(exog), i)) if nobs <= 0: raise ValueError( "Endog only has {} observations but requesting with {} lags - i={}" .format(len(endog), lag, i)) useful_endog = endog[-nobs - lag:] z = util.get_var_endog(useful_endog, lag, has_constant='raise') if exog is not None: useful_exog = exog[-nobs:] x = util.get_var_endog(useful_exog, 0, trend='nc', has_constant='raise') x = np.column_stack((x, useful_exog)) temp_z = z z = np.empty((x.shape[0], x.shape[1] + z.shape[1])) z[:, :k_trend] = temp_z[:, :k_trend] z[:, k_trend:k_trend + x.shape[1]] = x z[:, k_trend + x.shape[1]:] = temp_z[:, k_trend:] for i in range(k_trend): if (np.diff(z[:, i]) == 1).all(): # modify the trend-column z[:, i] += lag # make the same adjustment for the quadratic term if (np.diff(np.sqrt(z[:, i])) == 1).all(): z[:, i] = (np.sqrt(z[:, i]) + lag)**2 explanatory_data.extend(z) target_data.extend(endog[-nobs:]) return target_data, explanatory_data
def _estimate_svar(self, start_params, lags, maxiter, maxfun, trend="c", solver="nm", override=False): """ lags : int trend : string or None As per above """ k_trend = util.get_trendorder(trend) y = self.endog z = util.get_var_endog(y, lags, trend=trend) y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula var_params = np.linalg.lstsq(z, y_sample)[0] resid = y_sample - np.dot(z, var_params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) # TODO: should give users the option to use a dof correction or not omega = sse / df_resid self.sigma_u = omega A, B = self._solve_AB(start_params, override=override, solver=solver, maxiter=maxiter, maxfun=maxfun) A_mask = self.A_mask B_mask = self.B_mask return SVARResults( y, z, var_params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self, A=A, B=B, A_mask=A_mask, B_mask=B_mask, )
def _estimate_var(self, lags, offset=0, trend="c"): """ lags : int offset : int Periods to drop from beginning-- for order selection so it's an apples-to-apples comparison trend : string or None As per above """ # have to do this again because select_order doesn't call fit self.k_trend = k_trend = util.get_trendorder(trend) if offset < 0: # pragma: no cover raise ValueError("offset must be >= 0") y = self.y[offset:] z = util.get_var_endog(y, lags, trend=trend) y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula params = np.linalg.lstsq(z, y_sample)[0] resid = y_sample - np.dot(z, params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) omega = sse / df_resid varfit = VARResults( y, z, params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self ) return VARResultsWrapper(varfit)
def _estimate_var(self, lags, offset=0, trend='c'): """ lags : int offset : int Periods to drop from beginning-- for order selection so it's an apples-to-apples comparison trend : string or None As per above """ # have to do this again because select_order doesn't call fit self.k_trend = k_trend = util.get_trendorder(trend) if offset < 0: # pragma: no cover raise ValueError('offset must be >= 0') y = self.y[offset:] z = util.get_var_endog(y, lags, trend=trend) y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula params = np.linalg.lstsq(z, y_sample)[0] resid = y_sample - np.dot(z, params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) omega = sse / df_resid varfit = VARResults(y, z, params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self) return VARResultsWrapper(varfit)
def predict(self, params, start=None, end=None, lags=1, trend='c'): """ Returns in-sample predictions or forecasts """ if start is None: start = k_ar # Handle start, end start, end, out_of_sample, prediction_index = ( self._get_prediction_index(start, end)) if end < start: raise ValueError("end is before start") if end == start + out_of_sample: return np.array([]) k_trend = util.get_trendorder(trend) k = self.neqs k_ar = lags predictedvalues = np.zeros((end + 1 - start + out_of_sample, k)) if k_trend != 0: intercept = params[:k_trend] predictedvalues += intercept y = self.y X = util.get_var_endog(y, lags, trend=trend, has_constant='raise') fittedvalues = np.dot(X, params) fv_start = start - k_ar pv_end = min(len(predictedvalues), len(fittedvalues) - fv_start) fv_end = min(len(fittedvalues), end-k_ar+1) predictedvalues[:pv_end] = fittedvalues[fv_start:fv_end] if not out_of_sample: return predictedvalues # fit out of sample y = y[-k_ar:] coefs = params[k_trend:].reshape((k_ar, k, k)).swapaxes(1,2) predictedvalues[pv_end:] = forecast(y, coefs, intercept, out_of_sample) return predictedvalues
def _estimate_var(self, lags, offset=0, trend='c'): """ lags : int Lags of the endogenous variable. offset : int Periods to drop from beginning-- for order selection so it's an apples-to-apples comparison trend : string or None As per above """ # have to do this again because select_order doesn't call fit self.k_trend = k_trend = util.get_trendorder(trend) if offset < 0: # pragma: no cover raise ValueError('offset must be >= 0') nobs = self.n_totobs - lags - offset endog = self.endog[offset:] exog = None if self.exog is None else self.exog[offset:] z = util.get_var_endog(endog, lags, trend=trend, has_constant='raise') if exog is not None: # TODO: currently only deterministic terms supported (exoglags==0) # and since exoglags==0, x will be an array of size 0. x = util.get_var_endog(exog[-nobs:], 0, trend="nc", has_constant="raise") x_inst = exog[-nobs:] x = np.column_stack((x, x_inst)) del x_inst # free memory temp_z = z z = np.empty((x.shape[0], x.shape[1] + z.shape[1])) z[:, :self.k_trend] = temp_z[:, :self.k_trend] z[:, self.k_trend:self.k_trend + x.shape[1]] = x z[:, self.k_trend + x.shape[1]:] = temp_z[:, self.k_trend:] del temp_z, x # free memory # the following modification of z is necessary to get the same results # as JMulTi for the constant-term-parameter... for i in range(self.k_trend): if (np.diff(z[:, i]) == 1).all(): # modify the trend-column z[:, i] += lags # make the same adjustment for the quadratic term if (np.diff(np.sqrt(z[:, i])) == 1).all(): z[:, i] = (np.sqrt(z[:, i]) + lags)**2 y_sample = endog[lags:] ################################################################################# ### TOPOLOGY CONSTRAINED VAR MODEL FITTING ### retrieve sizes associated with coefficient matrix c num_rows = z.shape[1] num_nodes = y_sample.shape[1] ### the loss function to minimize difference between [Z]*[C] and [Y], element-wise square-sum of [Z]*[C]-[Y] def loss(c): c = c.reshape( (num_rows, num_nodes)) #convert c from 1D array to 2D return np.sum(np.square((np.dot(z, c) - y_sample))) ### initial value of variable c0 = np.zeros((num_rows, num_nodes)) ### bounds of variables to be optimized if self.bounds is not None: bnds = np.tensordot(np.ones(num_rows * num_nodes), self.bounds, axes=0) else: bnds = None ### if topology is used as constraints if self.adjacency is not None: ### Index matrix (with the same size of c) to identify the zero coefficients corresponding to no-connection in graph ## H = np.ones((1,num_nodes),dtype=int) # First row of coefficient (bias) should be zero H = np.zeros((1, num_nodes), dtype=int) for i in range(0, lags): H = np.append(H, (1 - self.adjacency.T), axis=0) ### constraints: ### based on adjacency matrix, non-adjacent coefficients indicated by H are zeros cons = ({ 'type': 'eq', 'fun': lambda c: np.sum( np.square(H * c.reshape((num_rows, num_nodes)))) }) if self.adjacency.all( ): ## if with full connections, no constraint should be imposed res = minimize(loss, c0, method='SLSQP', constraints=(), bounds=bnds, options={'disp': True}) else: res = minimize(loss, c0, method='SLSQP', constraints=cons, bounds=bnds, options={'disp': True}) ### only Coefficients VALUE RANGE CONSTRAINED VAR MODEL FITTING else: ## H = np.ones((1,num_nodes),dtype=int) # First row of coefficient (bias) should be zero ## for i in range(0,lags): ## H=np.append(H, np.zeros((num_nodes,num_nodes)), axis=0) ## cons = ({'type': 'eq', ## 'fun' : lambda c: np.sum(np.square(H*c.reshape((num_rows, num_nodes)))) }) res = minimize(loss, c0, method='SLSQP', constraints=(), bounds=bnds, options={'disp': True}) params = res.x.reshape((num_rows, num_nodes)) ### ################################################################################### # L�tkepohl p75, about 5x faster than stated formula # params = np.linalg.lstsq(z, y_sample, rcond=1e-15)[0] resid = y_sample - np.dot(z, params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: L�tkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) if exog is not None: k_trend += exog.shape[1] df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) omega = sse / df_resid varfit = VARResults(endog, z, params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self, exog=self.exog) return VARResultsWrapper(varfit)
exog = np.hstack(exog) var_model = VAR(data, exog) k_trend = util.get_trendorder('c') n_totobs = len(data) p = 1 maxlags = 5 n_totobs = len(data) lags = p offset = maxlags + 1 - p nobs = n_totobs - lags - offset data = data[offset:] exog = exog[offset:] print(data) Z = np.array([data[t-lags : t][::-1].ravel() for t in range(lags, len(data))]) print(Z) z = util.get_var_endog(data, lags, trend='c', has_constant='raise') print(z.shape) exit() x = util.get_var_endog(exog[-nobs:], 0, trend="nc", has_constant="raise") x_inst = exog[-nobs:] x = np.column_stack((x, x_inst)) temp_z = z z = np.empty((x.shape[0], x.shape[1]+z.shape[1])) z[:, :k_trend] = temp_z[:, :k_trend] z[:, k_trend:k_trend+x.shape[1]] = x z[:, k_trend+x.shape[1]:] = temp_z[:, k_trend:] y_sample = data[lags:] params = np.linalg.lstsq(z, y_sample, rcond=1e-15)[0] print(z.shape)