def LL_calc(self, panel): X = panel.XIV matrices = self.arma_calc(panel) if matrices is None: return None AMA_1, AMA_1AR, GAR_1, GAR_1MA = matrices #Idea for IV: calculate Z*u throughout. Mazimize total sum of LL. u = panel.Y - cf.dot(X, self.args.args_d['beta']) e = panel.arma_dot.dot(AMA_1AR, u, self) e_RE = (e + self.re_obj_i.RE(e, panel) + self.re_obj_t.RE(e, panel)) * panel.included[3] e_REsq = (e_RE**2 + (e_RE == 0) * 1e-18) grp = self.variance_RE(panel, e_REsq) #experimental W_omega = cf.dot(panel.W_a, self.args.args_d['omega']) if panel.options.RE_in_GARCH.value: lnv_ARMA = self.garch(panel, GAR_1MA, e_RE) else: lnv_ARMA = self.garch(panel, GAR_1MA, e) lnv = W_omega + lnv_ARMA lnv += grp LL_full, v, v_inv, self.dlnv_pos = cll.LL(panel, lnv, e_REsq, e_RE) self.tobit(panel, LL_full) LL = np.sum(LL_full * panel.included[3]) self.LL_all = np.sum(LL_full) self.add_variables(panel, matrices, u, e, lnv_ARMA, lnv, v, W_omega, grp, e_RE, e_REsq, v_inv, LL_full) if abs(LL) > 1e+100: return None return LL
def standardize(self, panel, reverse_difference=False): """Adds X and Y and error terms after ARIMA-E-GARCH transformation and random effects to self. If reverse_difference and the ARIMA difference term d>0, the standardized variables are converted to the original undifferenced order. This may be usefull if the predicted values should be used in another differenced regression.""" if hasattr(self, 'Y_st'): return m = panel.lost_obs N, T, k = panel.X.shape if model_parser.DEFAULT_INTERCEPT_NAME in panel.args.names_d['beta']: m = self.args.args_d['beta'][0, 0] else: m = panel.mean(panel.Y) #e_norm=self.standardize_variable(panel,self.u,reverse_difference) self.Y_st, self.Y_st_long = self.standardize_variable( panel, panel.Y, reverse_difference) self.X_st, self.X_st_long = self.standardize_variable( panel, panel.X, reverse_difference) self.XIV_st, self.XIV_st_long = self.standardize_variable( panel, panel.XIV, reverse_difference) self.Y_pred_st = cf.dot(self.X_st, self.args.args_d['beta']) self.Y_pred = cf.dot(panel.X, self.args.args_d['beta']) self.e_norm_long = self.stretch_variable(panel, self.e_norm) self.Y_pred_st_long = self.stretch_variable(panel, self.Y_pred_st) self.Y_pred_long = cf.dot(panel.input.X, self.args.args_d['beta']) self.e_long = panel.input.Y - self.Y_pred_long Rsq, Rsqadj, LL_ratio, LL_ratio_OLS = self.goodness_of_fit( panel, False) Rsq2, Rsqadj2, LL_ratio2, LL_ratio_OLS2 = self.goodness_of_fit( panel, True) a = 0
def set_instrumentals(self): if self.input.Z.shape[1] == 1: self.XIV = self.X else: self.XIV = self.X = self.Z return ll = logl.LL(self.args.args_init, self) ll.standardize(self) Z_st, Z_st_long = ll.standardize_variable(panel, self.Z) ZZ = cf.dot(Z_st, Z_st) ZZInv = np.linalg.inv(ZZ) ZX = cf.dot(Z_st, ll.X_st) ZZInv_ZX = cf.dot(ZZInv, ZX) self.XIV = cf.dot( self.Z, ZZInv_ZX ) #using non-normalized first, since XIV should be unnormalized.
def square_and_norm(X): """Squares X, and normalize to unit lenght. Similar to a correlation matrix, except the means are not subtracted""" N, T, k = X.shape Sumsq = np.sqrt(np.sum(np.sum(X**2, 0), 0)) Sumsq.resize((k, 1)) Sumsq = Sumsq * Sumsq.T norm = cf.dot(X, X) / (Sumsq + 1e-200) return norm
def OLS(panel, X, Y, add_const=False, return_rsq=False, return_e=False, c=None, robust_se_lags=0): """runs OLS after adding const as the last variable""" if c is None: c = panel.included[3] N, T, k = X.shape NT = panel.NT if add_const: X = np.concatenate((c, X), 2) k = k + 1 X = X * c Y = Y * c XX = cf.dot(X, X) XY = cf.dot(X, Y) try: beta = np.linalg.solve(XX, XY) except np.linalg.LinAlgError: s = get_singular_list(panel, X) raise RuntimeError( "The following variables caused singularity runtime and must be removed: " + s) if return_rsq or return_e or robust_se_lags: e = (Y - cf.dot(X, beta)) * c if return_rsq: v0 = panel.var(e, included=c) v1 = panel.var(Y, included=c) Rsq = 1 - v0 / v1 #Rsqadj=1-(v0/v1)*(NT-1)/(NT-k-1) return beta, Rsq elif return_e: return beta, e * c elif robust_se_lags: XXInv = np.linalg.inv(XX) se_robust, se, V = robust_se(panel, robust_se_lags, XXInv, X * e) return beta, se_robust.reshape(k, 1), se.reshape(k, 1) return beta
def set_GARCH(panel, initargs, u, m): matrices = logl.set_garch_arch(panel, initargs) if matrices is None: e = u else: AMA_1, AMA_1AR, GAR_1, GAR_1MA = matrices e = cf.dot(AMA_1AR, u) * panel.included[3] h = h_func(e, panel, initargs) if m > 0: corr_v = stat.correlogram(panel, h, 1, center=True)[1:] initargs['gamma'][0] = 0 #corr_v[0] initargs['psi'][0] = 0 #corr_v[0]
def LL_calc(self, panel): panel = self.panel X = panel.XIV matrices = set_garch_arch(panel, self.args.args_d) if matrices is None: return None AMA_1, AMA_1AR, GAR_1, GAR_1MA = matrices (N, T, k) = X.shape #Idea for IV: calculate Z*u throughout. Mazimize total sum of LL. u = panel.Y - cf.dot(X, self.args.args_d['beta']) e = cf.dot(AMA_1AR, u) e_RE = (e + self.re_obj_i.RE(e, panel) + self.re_obj_t.RE(e, panel)) * panel.included[3] e_REsq = (e_RE**2 + (e_RE == 0) * 1e-18) grp = self.variance_RE(panel, e_REsq) #experimental W_omega = cf.dot(panel.W_a, self.args.args_d['omega']) if panel.options.RE_in_GARCH.value: lnv_ARMA = self.garch(panel, GAR_1MA, e_RE) else: lnv_ARMA = self.garch(panel, GAR_1MA, e) lnv = W_omega + lnv_ARMA # 'N x T x k' * 'k x 1' -> 'N x T x 1' lnv += grp self.dlnv_pos = (lnv < 100) * (lnv > -100) lnv = np.maximum(np.minimum(lnv, 100), -100) v = np.exp(lnv) * panel.a[3] v_inv = np.exp(-lnv) * panel.a[3] LL = self.LL_const - 0.5 * (lnv + (e_REsq) * v_inv) self.tobit(panel, LL) LL = np.sum(LL * panel.included[3]) self.add_variables(matrices, u, e, lnv_ARMA, lnv, v, W_omega, grp, e_RE, e_REsq, v_inv) if abs(LL) > 1e+100: return None return LL
def robust_cluster_weights(panel, XErr, cluster_dim, whites): """Calculates the Newey-West autocorrelation consistent weighting matrix. Either err_vec or XErr is required""" N, T, k = XErr.shape if cluster_dim == 0: #group cluster if N <= 1: return 0 mean = panel.mean(XErr, 0) elif cluster_dim == 1: #time cluster mean = random_effects.mean_time(panel, XErr, True) T, m, k = mean.shape mean = mean.reshape((T, k)) S = cf.dot(mean, mean) - whites return S
def newey_west_wghts(L, XErr): """Calculates the Newey-West autocorrelation consistent weighting matrix. Either err_vec or XErr is required""" N, T, k = XErr.shape S = np.zeros((k, k)) try: a = min(L, T) except: a = 0 for i in range(1, min(L, T)): w = 1 - (i + 1) / (L) XX = cf.dot(XErr[:, i:], XErr[:, 0:T - i]) S += w * (XX + XX.T) return S
def standardize_variable(self, panel, X, norm=False, reverse_difference=False): X = panel.arma_dot.dot(self.AMA_1AR, X, self) X = (X + self.re_obj_i.RE(X, panel, False) + self.re_obj_t.RE(X, panel, False)) if (not panel.Ld_inv is None) and reverse_difference: X = cf.dot(panel.Ld_inv, X) * panel.a[3] if norm: X = X * self.v_inv05 X_long = self.stretch_variable(panel, X) return X, X_long
def lag_variables(self): T = self.max_T d = self.pqdkm[2] self.I = np.diag(np.ones(T)) #differencing operator: if d == 0: return L0 = np.diag(np.ones(T - 1), -1) Ld = (self.I - L0) Ld_inv = np.tril(np.ones((T, T))) for i in range(1, d): Ld = cf.dot(self.I - L0, Ld) Ld_inv = np.cumsum(Ld_inv, 0) self.Ld_inv = Ld_inv #multiplication: self.X = self.lag_variable(self.X, Ld, d, True) self.Y = self.lag_variable(self.Y, Ld, d, False) if not self.Z is None: self.Z = self.lag_variable(self.Z, Ld, d, True)
def solve_mult(args, b, I): """Solves X*a=b for a where X is a banded matrix with 1 and args along the diagonal band""" n = len(b) q = len(args) X = np.zeros((q + 1, n)) X[0, :] = 1 X2 = np.zeros((n, n)) w = np.zeros(n) r = np.arange(n) for i in range(q): X[i + 1, :n - i - 1] = args[i] try: X_1 = scipy.linalg.solve_banded((q, 0), X, I) if np.any(np.isnan(X_1)): return None, None X_1b = cf.dot(X_1, b) except: return None, None return X_1b, X_1
def robust_se(panel, L, hessin, XErr, nw_only=True): """Returns the maximum robust standard errors considering all combinations of sums of different combinations of clusters and newy-west""" w, W = sandwich_var(hessin, cf.dot(XErr, XErr)) #whites nw, NW = sandwich_var(hessin, newey_west_wghts(L, XErr)) #newy-west if panel.N > 1: c0, C0 = sandwich_var(hessin, robust_cluster_weights(panel, XErr, 0, w)) #cluster dim 1 c1, C1 = sandwich_var(hessin, robust_cluster_weights(panel, XErr, 1, w)) #cluster dim 2 else: c0, c1, C0, C1 = 0, 0, 0 * W, 0 * W v = np.array([nw, nw + c0, nw + c1, nw + c1 + c0, w * 0]) V = np.array([NW, NW + C0, NW + C1, NW + C1 + C0, W * 0]) V = V + W s = np.max(w + v, 0) se_robust = np.maximum(s, 0)**0.5 i = np.argmax(np.sum(w + v, 1)) se_std = np.maximum(w, 0)**0.5 return se_robust, se_std, V[i]
def correl(X, panel=None, covar=False): """Returns the correlation of X. Assumes three dimensional matrices. """ if not panel is None: X = X * panel.included[3] N, T, k = X.shape N = panel.NT mean = np.sum(np.sum(X, 0), 0).reshape((1, k)) / N else: N, k = X.shape mean = np.sum(X, 0).reshape((1, k)) / N cov = cf.dot(X, X) / N cov = cov - (mean.T * mean) if covar: return cov stdx = (np.diag(cov)**0.5).reshape((1, k)) stdx = (stdx.T * stdx) stdx[np.isnan(stdx)] = 0 corr = (stdx > 0) * cov / (stdx + (stdx == 0) * 1e-100) corr[stdx <= 0] = 0 return corr
def get(self, ll, DLL_e=None, dLL_lnv=None, return_G=False): self.callback(perc=0.05, text='', task='gradient') (self.DLL_e, self.dLL_lnv) = (DLL_e, dLL_lnv) panel = self.panel incl = self.panel.included[3] re_obj_i, re_obj_t = ll.re_obj_i, ll.re_obj_t u, e, h_e_val, lnv_ARMA, h_val, v = ll.u, ll.e, ll.h_e_val, ll.lnv_ARMA, ll.h_val, ll.v p, q, d, k, m = panel.pqdkm nW = panel.nW if DLL_e is None: dLL_lnv, DLL_e = cll.gradient(ll, self.panel) #ARIMA: de_rho = self.arima_grad(p, u, ll, -1, ll.AMA_1) de_lambda = self.arima_grad(q, e, ll, -1, ll.AMA_1) de_beta = -self.panel.arma_dot.dot(ll.AMA_1AR, panel.XIV, ll) * panel.a[3] (self.de_rho, self.de_lambda, self.de_beta) = (de_rho, de_lambda, de_beta) self.de_rho_RE = cf.add( (de_rho, re_obj_i.dRE(de_rho, ll.e, 'rho', panel), re_obj_t.dRE(de_rho, ll.e, 'rho', panel)), True) self.de_lambda_RE = cf.add( (de_lambda, re_obj_i.dRE(de_lambda, ll.e, 'lambda', panel), re_obj_t.dRE(de_lambda, ll.e, 'lambda', panel)), True) self.de_beta_RE = cf.add( (de_beta, re_obj_i.dRE(de_beta, ll.e, 'beta', panel), re_obj_t.dRE(de_beta, ll.e, 'beta', panel)), True) dlnv_sigma_rho, dlnv_sigma_rho_G, dvRE_rho, d_rho_input = self.garch_arima_grad( ll, de_rho, self.de_rho_RE, 'rho') dlnv_sigma_lambda, dlnv_sigma_lambda_G, dvRE_lambda, d_lambda_input = self.garch_arima_grad( ll, de_lambda, self.de_lambda_RE, 'lambda') dlnv_sigma_beta, dlnv_sigma_beta_G, dvRE_beta, d_beta_input = self.garch_arima_grad( ll, de_beta, self.de_beta_RE, 'beta') (self.dlnv_sigma_rho, self.dlnv_sigma_lambda, self.dlnv_sigma_beta) = (dlnv_sigma_rho, dlnv_sigma_lambda, dlnv_sigma_beta) (self.dlnv_sigma_rho_G, self.dlnv_sigma_lambda_G, self.dlnv_sigma_beta_G) = (dlnv_sigma_rho_G, dlnv_sigma_lambda_G, dlnv_sigma_beta_G) (self.dvRE_rho, self.dvRE_lambda, self.dvRE_beta) = (dvRE_rho, dvRE_lambda, dvRE_beta) (self.d_rho_input, self.d_lambda_input, self.d_beta_input) = (d_rho_input, d_lambda_input, d_beta_input) #GARCH: (dlnv_gamma, dlnv_psi, dlnv_mu, dlnv_z_G, dlnv_z) = (None, None, None, None, None) if panel.N > 1: dlnv_mu = cf.prod((ll.dlnvRE_mu, incl)) else: dlnv_mu = None if m > 0: dlnv_gamma = self.arima_grad(k, lnv_ARMA, ll, 1, ll.GAR_1) dlnv_psi = self.arima_grad(m, h_val, ll, 1, ll.GAR_1) if not ll.h_z_val is None: dlnv_z_G = cf.dot(ll.GAR_1MA, ll.h_z_val) (N, T, k) = dlnv_z_G.shape dlnv_z = dlnv_z_G (self.dlnv_gamma, self.dlnv_psi, self.dlnv_mu, self.dlnv_z_G, self.dlnv_z) = (dlnv_gamma, dlnv_psi, dlnv_mu, dlnv_z_G, dlnv_z) #LL #final derivatives: dLL_beta = cf.add((cf.prod( (dlnv_sigma_beta, dLL_lnv)), cf.prod((self.de_beta_RE, DLL_e))), True) dLL_rho = cf.add((cf.prod( (dlnv_sigma_rho, dLL_lnv)), cf.prod((self.de_rho_RE, DLL_e))), True) dLL_lambda = cf.add((cf.prod( (dlnv_sigma_lambda, dLL_lnv)), cf.prod( (self.de_lambda_RE, DLL_e))), True) dLL_gamma = cf.prod((dlnv_gamma, dLL_lnv)) dLL_psi = cf.prod((dlnv_psi, dLL_lnv)) self.dlnv_omega = panel.W_a dLL_omega = cf.prod((self.dlnv_omega, dLL_lnv)) dLL_mu = cf.prod((self.dlnv_mu, dLL_lnv)) dLL_z = cf.prod((self.dlnv_z, dLL_lnv)) G = cf.concat_marray((dLL_beta, dLL_rho, dLL_lambda, dLL_gamma, dLL_psi, dLL_omega, dLL_mu, dLL_z)) g = np.sum(G, (0, 1)) #For debugging: #print (g) #gn=debug.grad_debug(ll,panel,0.00001)#debugging #if np.sum((g-gn)**2)>10000000: # a=0 #print(gn) #a=debug.grad_debug_detail(ll, panel, 0.00000001, 'LL', 'beta',0) #dLLeREn,deREn=debug.LL_calc_custom(ll, panel, 0.0000001) self.callback(perc=0.08, text='', task='gradient') if return_G: return g, G else: return g
def sandwich_var(hessin, V): hessinV = cf.dot(hessin, V) V = cf.dot(hessinV, hessin) v = np.diag(V) return v, V
def lag_variable(self, X, Ld, d, recreate_intercept): X_out = cf.dot(Ld, X) * self.a[3] if self.input.has_intercept and recreate_intercept: X_out[:, :, 0] = 1 X_out[:, :d] = 0 return X_out