def test_attr_dict(): ad = AttrDict() ad["one"] = "one" ad[1] = 1 ad[("a", 2)] = ("a", 2) assert list(ad.keys()) == ["one", 1, ("a", 2)] assert len(ad) == 3 ad2 = ad.copy() assert list(ad2.keys()) == list(ad.keys()) assert ad.get("one", None) == "one" assert ad.get("two", False) is False k, v = ad.popitem() assert k == "one" assert v == "one" items = ad.items() assert (1, 1) in items assert (("a", 2), ("a", 2)) in items assert len(items) == 2 values = ad.values() assert 1 in values assert ("a", 2) in values assert len(values) == 2 ad2 = AttrDict() ad2[1] = 3 ad2["one"] = "one" ad2["a"] = "a" ad.update(ad2) assert ad[1] == 3 assert "a" in ad ad.__str__() with pytest.raises(AttributeError): ad.__private_dict__ = None with pytest.raises(AttributeError): ad.some_other_key with pytest.raises(KeyError): ad["__private_dict__"] = None del ad[1] assert 1 not in ad.keys() ad.new_value = "new_value" assert "new_value" in ad.keys() assert ad.new_value == ad["new_value"] for key in ad.keys(): if isinstance(key, str): assert key in dir(ad) new_value = ad.pop("new_value") assert new_value == "new_value" del ad.one assert "one" not in ad.keys() ad.clear() assert list(ad.keys()) == []
def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000, cov_type='robust', debiased=True, **cov_config): """ Estimate model parameters Parameters ---------- center : bool, optional Flag indicating to center the moment conditions before computing the weighting matrix. use_cue : bool, optional Flag indicating to use continuously updating estimator steps : int, optional Number of steps to use when estimating parameters. 2 corresponds to the standard efficient GMM estimator. Higher values will iterate until convergence or up to the number of steps given disp : int, optional Number of iterations between printed update. 0 or negative values suppresses output max_iter : int, positive, optional Maximum number of iterations when minimizing objective cov_type : str, optional Name of covariance estimator debiased : bool, optional Flag indicating whether to debias the covariance estimator using a degree of freedom adjustment **cov_config Additional covariance-specific options. See Notes. Returns ------- results : GMMFactorModelResults Results class with parameter estimates, covariance and test statistics Notes ----- The kernel covariance estimator takes the optional arguments ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral) and ``bandwidth`` (a positive integer). """ nobs, n = self.portfolios.shape k = self.factors.shape[1] excess_returns = not self._risk_free nrf = int(not bool(excess_returns)) # 1. Starting Values - use 2 pass mod = LinearFactorModel(self.portfolios, self.factors, risk_free=self._risk_free) res = mod.fit() betas = np.asarray(res.betas).ravel() lam = np.asarray(res.risk_premia) mu = self.factors.ndarray.mean(0) sv = np.r_[betas, lam, mu][:, None] g = self._moments(sv, excess_returns) g -= g.mean(0)[None, :] if center else 0 if cov_type not in ('robust', 'heteroskedastic', 'kernel'): raise ValueError('Unknown weight: {0}'.format(cov_type)) if cov_type in ('robust', 'heteroskedastic'): weight_est = HeteroskedasticWeight cov_est = HeteroskedasticCovariance else: # 'kernel': weight_est = KernelWeight cov_est = KernelCovariance weight_est = weight_est(g, center=center, **cov_config) w = weight_est.w(g) args = (excess_returns, w) # 2. Step 1 using w = inv(s) from SV callback = callback_factory(self._j, args, disp=disp) res = minimize(self._j, sv, args=args, callback=callback, options={'disp': bool(disp), 'maxiter': max_iter}) params = res.x last_obj = res.fun iters = 1 # 3. Step 2 using step 1 estimates if not use_cue: while iters < steps: iters += 1 g = self._moments(params, excess_returns) w = weight_est.w(g) args = (excess_returns, w) # 2. Step 1 using w = inv(s) from SV callback = callback_factory(self._j, args, disp=disp) res = minimize(self._j, params, args=args, callback=callback, options={'disp': bool(disp), 'maxiter': max_iter}) params = res.x obj = res.fun if np.abs(obj - last_obj) < 1e-6: break last_obj = obj else: args = (excess_returns, weight_est) obj = self._j_cue callback = callback_factory(obj, args, disp=disp) res = minimize(obj, params, args=args, callback=callback, options={'disp': bool(disp), 'maxiter': max_iter}) params = res.x # 4. Compute final S and G for inference g = self._moments(params, excess_returns) s = g.T @ g / nobs jac = self._jacobian(params, excess_returns) cov_est = cov_est(g, jacobian=jac, center=center, debiased=debiased, df=self.factors.shape[1], **cov_config) full_vcv = cov_est.cov sel = slice((n * k), (n * k + k + nrf)) rp = params[sel] rp_cov = full_vcv[sel, sel] sel = slice(0, (n * (k + 1)), (k + 1)) alphas = g.mean(0)[sel, None] alpha_vcv = s[sel, sel] / nobs stat = self._j(params, excess_returns, w) jstat = WaldTestStatistic(stat, 'All alphas are 0', n - k - nrf, name='J-statistic') # R2 calculation betas = np.reshape(params[:(n * k)], (n, k)) resids = self.portfolios.ndarray - self.factors.ndarray @ betas.T resids -= resids.mean(0)[None, :] residual_ss = (resids ** 2).sum() total = self.portfolios.ndarray total = total - total.mean(0)[None, :] total_ss = (total ** 2).sum() r2 = 1.0 - residual_ss / total_ss param_names = [] for portfolio in self.portfolios.cols: for factor in self.factors.cols: param_names.append('beta-{0}-{1}'.format(portfolio, factor)) if not excess_returns: param_names.append('lambda-risk_free') param_names.extend(['lambda-{0}'.format(f) for f in self.factors.cols]) param_names.extend(['mu-{0}'.format(f) for f in self.factors.cols]) rp_names = list(self.factors.cols)[:] if not excess_returns: rp_names.insert(0, 'risk_free') params = np.c_[alphas, betas] # 5. Return values res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov, alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat, rsquared=r2, total_ss=total_ss, residual_ss=residual_ss, param_names=param_names, portfolio_names=self.portfolios.cols, factor_names=self.factors.cols, name=self._name, cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names, iter=iters, cov_est=cov_est) return GMMFactorModelResults(res)
def test_attr_dict(): ad = AttrDict() ad['one'] = 'one' ad[1] = 1 ad[('a', 2)] = ('a', 2) assert list(ad.keys()) == ['one', 1, ('a', 2)] assert len(ad) == 3 ad2 = ad.copy() assert list(ad2.keys()) == list(ad.keys()) assert ad.get('one', None) == 'one' assert ad.get('two', False) is False k, v = ad.popitem() assert k == 'one' assert v == 'one' items = ad.items() assert (1, 1) in items assert (('a', 2), ('a', 2)) in items assert len(items) == 2 values = ad.values() assert 1 in values assert ('a', 2) in values assert len(values) == 2 ad2 = AttrDict() ad2[1] = 3 ad2['one'] = 'one' ad2['a'] = 'a' ad.update(ad2) assert ad[1] == 3 assert 'a' in ad ad.__str__() with pytest.raises(AttributeError): ad.__ordered_dict__ = None with pytest.raises(AttributeError): ad.some_other_key with pytest.raises(KeyError): ad['__ordered_dict__'] = None del ad[1] assert 1 not in ad.keys() ad.new_value = 'new_value' assert 'new_value' in ad.keys() assert ad.new_value == ad['new_value'] for key in ad.keys(): if isinstance(key, str): assert key in dir(ad) new_value = ad.pop('new_value') assert new_value == 'new_value' del ad.one assert 'one' not in ad.keys() ad.clear() assert list(ad.keys()) == []
def fit(self, cov_type='robust', debiased=True, **cov_config): """ Estimate model parameters Parameters ---------- cov_type : str, optional Name of covariance estimator debiased : bool, optional Flag indicating whether to debias the covariance estimator using a degree of freedom adjustment **cov_config : dict Additional covariance-specific options. See Notes. Returns ------- results : LinearFactorModelResults Results class with parameter estimates, covariance and test statistics Notes ----- Supported covariance estimators are: * 'robust' - Heteroskedasticity-robust covariance estimator * 'kernel' - Heteroskedasticity and Autocorrelation consistent (HAC) covariance estimator The kernel covariance estimator takes the optional arguments ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral) and ``bandwidth`` (a positive integer). """ p = self.portfolios.ndarray f = self.factors.ndarray nportfolio = p.shape[1] nobs, nfactor = f.shape fc = np.c_[np.ones((nobs, 1)), f] rp = f.mean(0)[:, None] fe = f - f.mean(0) b = np.linalg.pinv(fc) @ p eps = p - fc @ b alphas = b[:1].T nloading = (nfactor + 1) * nportfolio xpxi = np.eye(nloading + nfactor) xpxi[:nloading, :nloading] = np.kron(np.eye(nportfolio), np.linalg.pinv(fc.T @ fc / nobs)) f_rep = np.tile(fc, (1, nportfolio)) eps_rep = np.tile(eps, (nfactor + 1, 1)) # 1 2 3 ... 25 1 2 3 ... eps_rep = eps_rep.ravel(order='F') eps_rep = np.reshape(eps_rep, (nobs, (nfactor + 1) * nportfolio), order='F') xe = f_rep * eps_rep xe = np.c_[xe, fe] if cov_type in ('robust', 'heteroskedastic'): cov_est = HeteroskedasticCovariance(xe, inv_jacobian=xpxi, center=False, debiased=debiased, df=fc.shape[1]) rp_cov_est = HeteroskedasticCovariance(fe, jacobian=np.eye(f.shape[1]), center=False, debiased=debiased, df=1) elif cov_type == 'kernel': cov_est = KernelCovariance(xe, inv_jacobian=xpxi, center=False, debiased=debiased, df=fc.shape[1], **cov_config) bw = cov_est.bandwidth _cov_config = {k: v for k, v in cov_config.items()} _cov_config['bandwidth'] = bw rp_cov_est = KernelCovariance(fe, jacobian=np.eye(f.shape[1]), center=False, debiased=debiased, df=1, **_cov_config) else: raise ValueError('Unknown cov_type: {0}'.format(cov_type)) full_vcv = cov_est.cov rp_cov = rp_cov_est.cov vcv = full_vcv[:nloading, :nloading] # Rearrange VCV order = np.reshape(np.arange((nfactor + 1) * nportfolio), (nportfolio, nfactor + 1)) order = order.T.ravel() vcv = vcv[order][:, order] # Return values alpha_vcv = vcv[:nportfolio, :nportfolio] stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas) jstat = WaldTestStatistic(stat, 'All alphas are 0', nportfolio, name='J-statistic') params = b.T betas = b[1:].T residual_ss = (eps ** 2).sum() e = p - p.mean(0)[None, :] total_ss = (e ** 2).sum() r2 = 1 - residual_ss / total_ss param_names = [] for portfolio in self.portfolios.cols: param_names.append('alpha-{0}'.format(portfolio)) for factor in self.factors.cols: param_names.append('beta-{0}-{1}'.format(portfolio, factor)) for factor in self.factors.cols: param_names.append('lambda-{0}'.format(factor)) res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov, alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat, rsquared=r2, total_ss=total_ss, residual_ss=residual_ss, param_names=param_names, portfolio_names=self.portfolios.cols, factor_names=self.factors.cols, name=self._name, cov_type=cov_type, model=self, nobs=nobs, rp_names=self.factors.cols, cov_est=cov_est) return LinearFactorModelResults(res)
def fit(self, cov_type='robust', debiased=True, **cov_config): """ Estimate model parameters Parameters ---------- cov_type : str, optional Name of covariance estimator debiased : bool, optional Flag indicating whether to debias the covariance estimator using a degree of freedom adjustment **cov_config Additional covariance-specific options. See Notes. Returns ------- results : LinearFactorModelResults Results class with parameter estimates, covariance and test statistics Notes ----- The kernel covariance estimator takes the optional arguments ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral) and ``bandwidth`` (a positive integer). """ nobs, nf, nport, nrf, s1, s2, s3 = self._boundaries() excess_returns = not self._risk_free f = self.factors.ndarray p = self.portfolios.ndarray nport = p.shape[1] # Step 1, n regressions to get B fc = np.c_[np.ones((nobs, 1)), f] b = lstsq(fc, p)[0] # nf+1 by np eps = p - fc @ b if excess_returns: betas = b[1:].T else: betas = b.T.copy() betas[:, 0] = 1.0 sigma_m12 = self._sigma_m12 lam = lstsq(sigma_m12 @ betas, sigma_m12 @ p.mean(0)[:, None])[0] expected = betas @ lam pricing_errors = p - expected.T # Moments alphas = pricing_errors.mean(0)[:, None] moments = self._moments(eps, betas, lam, alphas, pricing_errors) # Jacobian jacobian = self._jacobian(betas, lam, alphas) if cov_type not in ('robust', 'heteroskedastic', 'kernel'): raise ValueError('Unknown weight: {0}'.format(cov_type)) if cov_type in ('robust', 'heteroskedastic'): cov_est = HeteroskedasticCovariance else: # 'kernel': cov_est = KernelCovariance cov_est = cov_est(moments, jacobian=jacobian, center=False, debiased=debiased, df=fc.shape[1], **cov_config) # VCV full_vcv = cov_est.cov alpha_vcv = full_vcv[s2:, s2:] stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas) jstat = WaldTestStatistic(stat, 'All alphas are 0', nport - nf - nrf, name='J-statistic') total_ss = ((p - p.mean(0)[None, :]) ** 2).sum() residual_ss = (eps ** 2).sum() r2 = 1 - residual_ss / total_ss rp = lam rp_cov = full_vcv[s1:s2, s1:s2] betas = betas if excess_returns else betas[:, 1:] params = np.c_[alphas, betas] param_names = [] for portfolio in self.portfolios.cols: param_names.append('alpha-{0}'.format(portfolio)) for factor in self.factors.cols: param_names.append('beta-{0}-{1}'.format(portfolio, factor)) if not excess_returns: param_names.append('lambda-risk_free') for factor in self.factors.cols: param_names.append('lambda-{0}'.format(factor)) # Pivot vcv to remove unnecessary and have correct order order = np.reshape(np.arange(s1), (nport, nf + 1)) order[:, 0] = np.arange(s2, s3) order = order.ravel() order = np.r_[order, s1:s2] full_vcv = full_vcv[order][:, order] factor_names = list(self.factors.cols) rp_names = factor_names[:] if not excess_returns: rp_names.insert(0, 'risk_free') res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov, alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat, rsquared=r2, total_ss=total_ss, residual_ss=residual_ss, param_names=param_names, portfolio_names=self.portfolios.cols, factor_names=factor_names, name=self._name, cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names, cov_est=cov_est) return LinearFactorModelResults(res)
def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None): if rng is None: np.random.seed(12345) else: np.random.set_state(rng.get_state()) n, t, k = ntk k += const x = standard_normal((k, t, n)) beta = np.arange(1, k + 1)[:, None, None] / k y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal( (1, n)) w = np.random.chisquare(5, (t, n)) / 5 c = None if other_effects == 1: cats = ['Industries'] else: cats = ['cat.' + str(i) for i in range(other_effects)] if other_effects: c = np.random.randint(0, 4, (other_effects, t, n)) vcats = ['varcat.' + str(i) for i in range(2)] vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n)) vc1 = vc2[[0]] if const: x[0] = 1.0 if missing > 0: locs = np.random.choice(n * t, int(n * t * missing)) y.flat[locs] = np.nan locs = np.random.choice(n * t * k, int(n * t * k * missing)) x.flat[locs] = np.nan if datatype in ('pandas', 'xarray'): entities = ['firm' + str(i) for i in range(n)] time = date_range('1-1-1900', periods=t, freq='A-DEC') var_names = ['x' + str(i) for i in range(k)] # y = DataFrame(y, index=time, columns=entities) y = panel_to_frame(y[None], items=['y'], major_axis=time, minor_axis=entities, swap=True) w = panel_to_frame(w[None], items=['w'], major_axis=time, minor_axis=entities, swap=True) w = w.reindex(y.index) x = panel_to_frame(x, items=var_names, major_axis=time, minor_axis=entities, swap=True) x = x.reindex(y.index) c = panel_to_frame(c, items=cats, major_axis=time, minor_axis=entities, swap=True) c = c.reindex(y.index) vc1 = panel_to_frame(vc1, items=vcats[:1], major_axis=time, minor_axis=entities, swap=True) vc1 = vc1.reindex(y.index) vc2 = panel_to_frame(vc2, items=vcats, major_axis=time, minor_axis=entities, swap=True) vc2 = vc2.reindex(y.index) if datatype == 'xarray': # TODO: This is broken now, need to transfor multiindex to xarray 3d import xarray as xr x = xr.DataArray(PanelData(x).values3d, coords={ 'entities': entities, 'time': time, 'vars': var_names }, dims=['vars', 'time', 'entities']) y = xr.DataArray(PanelData(y).values3d, coords={ 'entities': entities, 'time': time, 'vars': ['y'] }, dims=['vars', 'time', 'entities']) w = xr.DataArray(PanelData(w).values3d, coords={ 'entities': entities, 'time': time, 'vars': ['w'] }, dims=['vars', 'time', 'entities']) if c.shape[1] > 0: c = xr.DataArray(PanelData(c).values3d, coords={ 'entities': entities, 'time': time, 'vars': c.columns }, dims=['vars', 'time', 'entities']) vc1 = xr.DataArray(PanelData(vc1).values3d, coords={ 'entities': entities, 'time': time, 'vars': vc1.columns }, dims=['vars', 'time', 'entities']) vc2 = xr.DataArray(PanelData(vc2).values3d, coords={ 'entities': entities, 'time': time, 'vars': vc2.columns }, dims=['vars', 'time', 'entities']) if rng is not None: rng.set_state(np.random.get_state()) return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)
def _str_extra(self) -> AttrDict: return AttrDict(Debiased=self._debiased, Center=self._center)
def __init__(self, center: bool = False, debiased: bool = False) -> None: self._center = center self._debiased = debiased self._bandwidth: Optional[float] = 0 self._name = "Homoskedastic (Unadjusted) Weighting" self._config = AttrDict(center=center, debiased=debiased)
def lsdv_config(request): weights, entity_effects, time_effects, other_effects = request.param return AttrDict(weights=weights, entity_effects=entity_effects, time_effects=time_effects, other_effects=other_effects)
def generate_data( missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None, num_cats=4, ): if rng is None: np.random.seed(12345) else: np.random.set_state(rng.get_state()) n, t, k = ntk k += const x = standard_normal((k, t, n)) beta = np.arange(1, k + 1)[:, None, None] / k y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal( (1, n)) w = np.random.chisquare(5, (t, n)) / 5 c = None if other_effects == 1: cats = ["Industries"] else: cats = ["cat." + str(i) for i in range(other_effects)] if other_effects: if not isinstance(num_cats, list): num_cats = [num_cats] * other_effects c = [] for i in range(other_effects): nc = num_cats[i] c.append(np.random.randint(0, nc, (1, t, n))) c = np.concatenate(c, 0) vcats = ["varcat." + str(i) for i in range(2)] vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n)) vc1 = vc2[[0]] if const: x[0] = 1.0 if missing > 0: locs = np.random.choice(n * t, int(n * t * missing)) y.flat[locs] = np.nan locs = np.random.choice(n * t * k, int(n * t * k * missing)) x.flat[locs] = np.nan if datatype in ("pandas", "xarray"): entities = ["firm" + str(i) for i in range(n)] time = date_range("1-1-1900", periods=t, freq="A-DEC") var_names = ["x" + str(i) for i in range(k)] # y = DataFrame(y, index=time, columns=entities) y = panel_to_frame(y[None], items=["y"], major_axis=time, minor_axis=entities, swap=True) w = panel_to_frame(w[None], items=["w"], major_axis=time, minor_axis=entities, swap=True) w = w.reindex(y.index) x = panel_to_frame(x, items=var_names, major_axis=time, minor_axis=entities, swap=True) x = x.reindex(y.index) c = panel_to_frame(c, items=cats, major_axis=time, minor_axis=entities, swap=True) c = c.reindex(y.index) vc1 = panel_to_frame(vc1, items=vcats[:1], major_axis=time, minor_axis=entities, swap=True) vc1 = vc1.reindex(y.index) vc2 = panel_to_frame(vc2, items=vcats, major_axis=time, minor_axis=entities, swap=True) vc2 = vc2.reindex(y.index) if datatype == "xarray": # TODO: This is broken now, need to transfor multiindex to xarray 3d import xarray as xr x = xr.DataArray( PanelData(x).values3d, coords={ "entities": entities, "time": time, "vars": var_names }, dims=["vars", "time", "entities"], ) y = xr.DataArray( PanelData(y).values3d, coords={ "entities": entities, "time": time, "vars": ["y"] }, dims=["vars", "time", "entities"], ) w = xr.DataArray( PanelData(w).values3d, coords={ "entities": entities, "time": time, "vars": ["w"] }, dims=["vars", "time", "entities"], ) if c.shape[1] > 0: c = xr.DataArray( PanelData(c).values3d, coords={ "entities": entities, "time": time, "vars": c.columns }, dims=["vars", "time", "entities"], ) vc1 = xr.DataArray( PanelData(vc1).values3d, coords={ "entities": entities, "time": time, "vars": vc1.columns }, dims=["vars", "time", "entities"], ) vc2 = xr.DataArray( PanelData(vc2).values3d, coords={ "entities": entities, "time": time, "vars": vc2.columns }, dims=["vars", "time", "entities"], ) if rng is not None: rng.set_state(np.random.get_state()) return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)