def test_panel_other_lsdv(data): mod = PanelOLS(data.y, data.x, other_effects=data.c) assert 'Num Other Effects: 2' in str(mod) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe.copy() x = mod.exog.dataframe.copy() c = mod._other_effect_cats.dataframe.copy() d = [] d_columns = [] for i, col in enumerate(c): s = c[col].copy() dummies = pd.get_dummies(s.astype(np.int64), drop_first=(mod.has_constant or i > 0)) dummies.columns = [s.name + '_val_' + str(c) for c in dummies.columns] d_columns.extend(list(dummies.columns)) d.append(dummies.values) d = np.column_stack(d) if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_columns)) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) res3 = mod.fit(cov_type='unadjusted', auto_df=False, count_effects=False, debiased=False) assert_results_equal(res, res3) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_panel_both_lsdv(data): mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant) d2 = mod.dependent.dummies('time', drop_first=True) d = np.c_[d1.values, d2.values] if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d1.columns) + list(d2.columns)) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_panel_time_lsdv_weighted(large_data): mod = PanelOLS(large_data.y, large_data.x, time_effects=True, weights=large_data.w) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe w = mod.weights.dataframe d = mod.dependent.dummies('time', drop_first=mod.has_constant) d_cols = d.columns d = d.values if mod.has_constant: z = np.ones_like(y) root_w = np.sqrt(w.values) wd = root_w * d wz = root_w * z d = d - z @ lstsq(wz, wd)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_cols)) ols_mod = IV2SLS(y, xd, None, None, weights=w) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def fit(self, cov_type='robust', debiased=True, **cov_config): """ Estimate model parameters Parameters ---------- cov_type : str, optional Name of covariance estimator debiased : bool, optional Flag indicating whether to debias the covariance estimator using a degree of freedom adjustment **cov_config Additional covariance-specific options. See Notes. Returns ------- results : LinearFactorModelResults Results class with parameter estimates, covariance and test statistics Notes ----- The kernel covariance estimator takes the optional arguments ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral) and ``bandwidth`` (a positive integer). """ nobs, nf, nport, nrf, s1, s2, s3 = self._boundaries() excess_returns = not self._risk_free f = self.factors.ndarray p = self.portfolios.ndarray nport = p.shape[1] # Step 1, n regressions to get B fc = np.c_[np.ones((nobs, 1)), f] b = lstsq(fc, p)[0] # nf+1 by np eps = p - fc @ b if excess_returns: betas = b[1:].T else: betas = b.T.copy() betas[:, 0] = 1.0 sigma_m12 = self._sigma_m12 lam = lstsq(sigma_m12 @ betas, sigma_m12 @ p.mean(0)[:, None])[0] expected = betas @ lam pricing_errors = p - expected.T # Moments alphas = pricing_errors.mean(0)[:, None] moments = self._moments(eps, betas, lam, alphas, pricing_errors) # Jacobian jacobian = self._jacobian(betas, lam, alphas) if cov_type not in ('robust', 'heteroskedastic', 'kernel'): raise ValueError('Unknown weight: {0}'.format(cov_type)) if cov_type in ('robust', 'heteroskedastic'): cov_est = HeteroskedasticCovariance else: # 'kernel': cov_est = KernelCovariance cov_est = cov_est(moments, jacobian=jacobian, center=False, debiased=debiased, df=fc.shape[1], **cov_config) # VCV full_vcv = cov_est.cov alpha_vcv = full_vcv[s2:, s2:] stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas) jstat = WaldTestStatistic(stat, 'All alphas are 0', nport - nf - nrf, name='J-statistic') total_ss = ((p - p.mean(0)[None, :]) ** 2).sum() residual_ss = (eps ** 2).sum() r2 = 1 - residual_ss / total_ss rp = lam rp_cov = full_vcv[s1:s2, s1:s2] betas = betas if excess_returns else betas[:, 1:] params = np.c_[alphas, betas] param_names = [] for portfolio in self.portfolios.cols: param_names.append('alpha-{0}'.format(portfolio)) for factor in self.factors.cols: param_names.append('beta-{0}-{1}'.format(portfolio, factor)) if not excess_returns: param_names.append('lambda-risk_free') for factor in self.factors.cols: param_names.append('lambda-{0}'.format(factor)) # Pivot vcv to remove unnecessary and have correct order order = np.reshape(np.arange(s1), (nport, nf + 1)) order[:, 0] = np.arange(s2, s3) order = order.ravel() order = np.r_[order, s1:s2] full_vcv = full_vcv[order][:, order] factor_names = list(self.factors.cols) rp_names = factor_names[:] if not excess_returns: rp_names.insert(0, 'risk_free') res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov, alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat, rsquared=r2, total_ss=total_ss, residual_ss=residual_ss, param_names=param_names, portfolio_names=self.portfolios.cols, factor_names=factor_names, name=self._name, cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names, cov_est=cov_est) return LinearFactorModelResults(res)
def fit(self, *, cov_type: str = 'robust', debiased: bool = False, lsmr_options: dict = None, use_cache: bool = True, **cov_config: Any): """ Estimate model parameters Parameters ---------- cov_type : str, optional Name of covariance estimator to use. Supported covariance estimators are: * 'unadjusted', 'homoskedastic' - Classic homoskedastic inference * 'robust', 'heteroskedastic' - Heteroskedasticity robust inference * 'kernel' - Heteroskedasticity and autocorrelation robust inference * 'cluster' - One-way cluster dependent inference. Heteroskedasticity robust debiased : bool, optional Flag indicating whether to debiased the covariance estimator using a degree of freedom adjustment. **cov_config Additional parameters to pass to covariance estimator. The list of optional parameters differ according to ``cov_type``. See the documentation of the alternative covariance estimators for the complete list of available commands. lsmr_options : dict Dictionary of options to pass to scipy.sparse.linalg.lsmr use_cache : bool Flag indicating whether the variables, once purged from the absorbed variables and interactions, should be stored in the cache, and retrieved if available. Cache can dramatically speed up re-fitting large models when the set of absorbed variables and interactions are identical. Returns ------- results : AbsorbingLSResults Results container Notes ----- Additional covariance parameters depend on specific covariance used. The see the docstring of specific covariance estimator for a list of supported options. Defaults are used if no covariance configuration is provided. If use_cache is True, then variables are hashed based on their contents using either a 64 bit value (if xxhash is installed) or a 256 bit value. This allows variables to be reused in different models if the set of absorbing variables and interactions is held constant. See also -------- linearmodels.iv.covariance.HomoskedasticCovariance linearmodels.iv.covariance.HeteroskedasticCovariance linearmodels.iv.covariance.KernelCovariance linearmodels.iv.covariance.ClusteredCovariance """ if self._absorbed_dependent is None: self._first_time_fit(use_cache, lsmr_options) self._x = exog_resid = to_numpy(self.absorbed_exog) dep_resid = to_numpy(self.absorbed_dependent) if self._exog.shape[1] == 0: params = empty((0, 1)) else: if exog_resid.shape[1]: check_absorbed(exog_resid, self.exog.cols) params = lstsq(exog_resid, dep_resid)[0] self._num_params += exog_resid.shape[1] cov_estimator = COVARIANCE_ESTIMATORS[cov_type] cov_config['debiased'] = debiased cov_config['kappa'] = 0.0 cov_config_copy = {k: v for k, v in cov_config.items()} if 'center' in cov_config_copy: del cov_config_copy['center'] cov_estimator = cov_estimator(exog_resid, dep_resid, exog_resid, params, **cov_config_copy) results = {'kappa': 0.0, 'liml_kappa': 0.0} pe = self._post_estimation(params, cov_estimator, cov_type) results.update(pe) results['df_model'] = self._num_params return AbsorbingLSResults(results, self)
def test_linear_model_parameters_risk_free_gls(data): mod = LinearFactorModel(data.portfolios, data.factors, risk_free=True) p = mod.portfolios.ndarray sigma = np.cov(p.T) val, vec = np.linalg.eigh(sigma) sigma_m12 = vec @ np.diag(1.0 / np.sqrt(val)) @ vec.T sigma_inv = np.linalg.inv(sigma) mod = LinearFactorModel(data.portfolios, data.factors, risk_free=True, sigma=sigma) assert 'using GLS' in str(mod) res = mod.fit() f = mod.factors.ndarray p = mod.portfolios.ndarray n = f.shape[0] moments = np.zeros( (n, p.shape[1] * (f.shape[1] + 1) + f.shape[1] + 1 + p.shape[1])) fc = np.c_[np.ones((n, 1)), f] betas = lstsq(fc, p)[0] eps = p - fc @ betas loc = 0 for i in range(eps.shape[1]): for j in range(fc.shape[1]): moments[:, loc] = eps[:, i] * fc[:, j] loc += 1 bc = np.c_[np.ones((p.shape[1], 1)), betas[1:, :].T] lam = lstsq(sigma_m12 @ bc, sigma_m12 @ p.mean(0)[:, None])[0] pricing_errors = p - (bc @ lam).T for i in range(lam.shape[0]): lam_error = pricing_errors @ sigma_inv @ bc[:, [i]] moments[:, loc] = lam_error.squeeze() loc += 1 alphas = p.mean(0)[:, None] - bc @ lam moments[:, loc:] = pricing_errors - alphas.T mod_moments = mod._moments(eps, bc, lam, alphas, pricing_errors) assert_allclose(res.betas, bc[:, 1:]) assert_allclose(res.risk_premia, lam.squeeze()) assert_allclose(res.alphas, alphas.squeeze()) assert_allclose(moments, mod_moments) m = moments.shape[1] jac = np.eye(m) block1 = p.shape[1] * (f.shape[1] + 1) # 1,1 jac[:block1, :block1] = np.kron(np.eye(p.shape[1]), fc.T @ fc / n) # 2, 1 loc = 0 nport, nf = p.shape[1], f.shape[1] block2 = block1 + nf + 1 bct = sigma_inv @ bc at = sigma_inv @ alphas for i in range(nport): block = np.zeros((nf + 1, nf + 1)) for j in range(nf + 1): # rows for k in range(1, nf + 1): # cols block[j, k] = bct[i][j] * lam[k] if j == k: block[j, k] -= at[i] jac[block1:block2, loc:loc + nf + 1] = block loc += nf + 1 # 2, 2 jac[block1:block2, block1:block2] = bc.T @ sigma_inv @ bc # 3,1 block = np.zeros((nport, nport * (nf + 1))) row = col = 0 for _ in range(nport): for j in range(nf + 1): if j != 0: block[row, col] = lam[j] col += 1 row += 1 jac[-nport:, :(nport * (nf + 1))] = block # 3, 2 jac[-nport:, (nport * (nf + 1)):(nport * (nf + 1)) + nf + 1] = bc # 3, 3: already done since eye mod_jac = mod._jacobian(bc, lam, alphas) assert_allclose(mod_jac[:block1], jac[:block1]) assert_allclose(mod_jac[block1:block2, :block1], jac[block1:block2, :block1]) assert_allclose(mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2]) assert_allclose(mod_jac[block1:block2, block2:], jac[block1:block2, block2:]) assert_allclose(mod_jac[block2:], jac[block2:]) s = moments.T @ moments / (n - (nf + 1)) ginv = np.linalg.inv(jac) cov = ginv @ s @ ginv.T / n order = np.zeros((nport, nf + 1), dtype=np.int64) order[:, 0] = np.arange(block2, block2 + nport) for i in range(nf): order[:, i + 1] = (nf + 1) * np.arange(nport) + (i + 1) order = np.r_[order.ravel(), block1:block2] cov = cov[order][:, order] cov = (cov + cov.T) / 2 assert_allclose(cov, res.cov) acov = cov[:block1:(nf + 1), :block1:(nf + 1)] jstat = float(alphas.T @ np.linalg.pinv(acov) @ alphas) assert_allclose(res.cov.values[:block1:(nf + 1), :block1:(nf + 1)], acov) assert_allclose(res.j_statistic.stat, jstat, rtol=1e-1) assert_allclose(res.j_statistic.pval, 1 - stats.chi2(nport - nf - 1).cdf(jstat), rtol=1e-2) get_all(res)
def test_linear_model_parameters(data): mod = LinearFactorModel(data.portfolios, data.factors) res = mod.fit() f = mod.factors.ndarray p = mod.portfolios.ndarray n = f.shape[0] moments = np.zeros( (n, p.shape[1] * (f.shape[1] + 1) + f.shape[1] + p.shape[1])) fc = np.c_[np.ones((n, 1)), f] betas = lstsq(fc, p)[0] eps = p - fc @ betas loc = 0 for i in range(eps.shape[1]): for j in range(fc.shape[1]): moments[:, loc] = eps[:, i] * fc[:, j] loc += 1 b = betas[1:, :].T lam = lstsq(b, p.mean(0)[:, None])[0] pricing_errors = p - (b @ lam).T for i in range(lam.shape[0]): lam_error = (p - (b @ lam).T) @ b[:, [i]] moments[:, loc] = lam_error.squeeze() loc += 1 alphas = pricing_errors.mean(0)[:, None] moments[:, loc:] = pricing_errors - alphas.T mod_moments = mod._moments(eps, b, lam, alphas, pricing_errors) assert_allclose(res.betas, b) assert_allclose(res.risk_premia, lam.squeeze()) assert_allclose(res.alphas, alphas.squeeze()) assert_allclose(moments, mod_moments) m = moments.shape[1] jac = np.eye(m) block1 = p.shape[1] * (f.shape[1] + 1) # 1,1 jac[:block1, :block1] = np.kron(np.eye(p.shape[1]), fc.T @ fc / n) # 2, 1 loc = 0 nport, nf = p.shape[1], f.shape[1] block2 = block1 + nf for i in range(nport): block = np.zeros((nf, nf + 1)) for j in range(nf): # rows for k in range(1, nf + 1): # cols block[j, k] = b[i][j] * lam[k - 1] if j + 1 == k: block[j, k] -= alphas[i] jac[block1:block2, loc:loc + nf + 1] = block loc += nf + 1 # 2, 2 jac[block1:block2, block1:block2] = b.T @ b # 3,1 block = np.zeros((nport, nport * (nf + 1))) row = col = 0 for _ in range(nport): for j in range(nf + 1): if j != 0: block[row, col] = lam[j - 1] col += 1 row += 1 jac[-nport:, :(nport * (nf + 1))] = block # 3, 2 jac[-nport:, (nport * (nf + 1)):(nport * (nf + 1)) + nf] = b # 3, 3: already done since eye mod_jac = mod._jacobian(b, lam, alphas) assert_allclose(mod_jac[:block1], jac[:block1]) assert_allclose(mod_jac[block1:block2, :block1], jac[block1:block2, :block1]) assert_allclose(mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2]) assert_allclose(mod_jac[block1:block2, block2:], jac[block1:block2, block2:]) assert_allclose(mod_jac[block2:], jac[block2:]) s = moments.T @ moments / (n - (nf + 1)) ginv = np.linalg.inv(jac) cov = ginv @ s @ ginv.T / n order = np.zeros((nport, nf + 1), dtype=np.int64) order[:, 0] = np.arange(block2, block2 + nport) for i in range(nf): order[:, i + 1] = (nf + 1) * np.arange(nport) + (i + 1) order = np.r_[order.ravel(), block1:block2] cov = cov[order][:, order] cov = (cov + cov.T) / 2 assert_allclose(cov, res.cov) acov = cov[:block1:(nf + 1), :block1:(nf + 1)] jstat = float(alphas.T @ np.linalg.pinv(acov) @ alphas) assert_allclose(res.j_statistic.stat, jstat) assert_allclose(res.j_statistic.pval, 1 - stats.chi2(nport - nf).cdf(jstat)) get_all(res) res = LinearFactorModel(data.portfolios, data.factors).fit(cov_type='kernel', debiased=False) std_mom = moments / moments.std(0)[None, :] mom = std_mom.sum(1) bw = kernel_optimal_bandwidth(mom) w = kernel_weight_bartlett(bw, n - 1) s = _cov_kernel(moments, w) cov = ginv @ s @ ginv.T / n cov = cov[order][:, order] cov = (cov + cov.T) / 2 assert_allclose(cov, res.cov)