def sigma(self, eps: Float64Array, x: Sequence[Float64Array]) -> Float64Array: """ Estimate residual covariance. Parameters ---------- eps : ndarray The residuals from the system of equations. x : list[ndarray] A list of the regressor matrices for each equation in the system. Returns ------- ndarray The estimated covariance matrix of the residuals. """ nobs = eps.shape[0] eps = eps - eps.mean(0) sigma = eps.T @ eps / nobs scale = 1.0 if self._debiased: k = array([a.shape[1] for a in x])[:, None] k = sqrt(k) scale = nobs / (nobs - k @ k.T) sigma *= scale return sigma
def kernel_optimal_bandwidth(x: Float64Array, kernel: str = "bartlett") -> int: """ Parameters x : ndarray Array of data to use when computing optimal bandwidth kernel : str, optional Name of kernel to use. Supported kernels include: * 'bartlett', 'newey-west' : Bartlett's kernel * 'parzen', 'gallant' : Parzen's kernel * 'qs', 'quadratic-spectral', 'andrews' : Quadratic spectral kernel Returns ------- int Optimal bandwidth. Set to nobs - 1 if computed bandwidth is larger. Notes ----- .. todo:: * Explain mathematics involved * References See Also -------- linearmodels.iv.covariance.kernel_weight_bartlett, linearmodels.iv.covariance.kernel_weight_parzen, linearmodels.iv.covariance.kernel_weight_quadratic_spectral """ t = x.shape[0] x = x.squeeze() if kernel in ("bartlett", "newey-west"): q, c = 1, 1.1447 m_star = int(ceil(4 * (t / 100)**(2 / 9))) elif kernel in ("qs", "andrews", "quadratic-spectral"): q, c = 2, 1.3221 m_star = int(ceil(4 * (t / 100)**(2 / 25))) elif kernel in ("gallant", "parzen"): q, c = 2, 2.6614 m_star = int(ceil(4 * (t / 100)**(4 / 25))) else: raise ValueError("Unknown kernel: {0}".format(kernel)) sigma = empty(m_star + 1) sigma[0] = x.T @ x / t for i in range(1, m_star + 1): sigma[i] = x[i:].T @ x[:-i] / t s0 = sigma[0] + 2 * sigma[1:].sum() sq = 2 * npsum(sigma[1:] * arange(1, m_star + 1)**q) rate = 1 / (2 * q + 1) gamma = c * ((sq / s0)**2)**rate m = gamma * t**rate return min(int(ceil(m)), t - 1)
def _optimal_bandwidth(self, moments: Float64Array) -> float: """Compute optimal bandwidth used in estimation if needed""" if self._predefined_bw is not None: return self._predefined_bw elif not self._optimal_bw: self._bandwidth = moments.shape[0] - 2 else: m = moments / moments.std(0)[None, :] m = m.sum(1) self._bandwidth = kernel_optimal_bandwidth(m, kernel=self.kernel) assert self._bandwidth is not None return self._bandwidth
def w(self, moments: Float64Array) -> Float64Array: """ Score/moment condition weighting matrix Parameters ---------- moments : ndarray Moment conditions (nobs by nmoments) Returns ------- ndarray Weighting matrix computed from moment conditions """ if self._center: moments = moments - moments.mean(0)[None, :] out = self._kernel_cov(moments) return inv(out)
def w(self, moments: Float64Array) -> Float64Array: """ Score/moment condition weighting matrix Parameters ---------- moments : ndarray Moment conditions (nobs by nmoments) Returns ------- ndarray Weighting matrix computed from moment conditions """ if self._center: moments = moments - moments.mean(0)[None, :] nobs = moments.shape[0] out = moments.T @ moments / nobs return inv((out + out.T) / 2.0)
def has_constant(x: Float64Array, x_rank: Optional[int] = None) -> Tuple[bool, Optional[int]]: """ Parameters ---------- x: ndarray Array to be checked for a constant (n,k) x_rank : {int, None} Rank of x if previously computed. If None, this value will be computed. Returns ------- const : bool Flag indicating whether x contains a constant or has column span with a constant loc : int Column location of constant """ if np.any(np.all(x == 1, axis=0)): loc: Optional[int] = int(np.argwhere(np.all(x == 1, axis=0))) return True, loc if np.any((np.ptp(x, axis=0) == 0) & ~np.all(x == 0, axis=0)): loc_arr = (np.ptp(x, axis=0) == 0) & ~np.all(x == 0, axis=0) loc = int(np.argwhere(loc_arr)) return True, loc n = x.shape[0] aug_rank = np.linalg.matrix_rank(np.c_[np.ones((n, 1)), x]) rank = np.linalg.matrix_rank(x) if x_rank is None else x_rank has_const = (aug_rank == rank) and x.shape[0] > x.shape[1] has_const = has_const or rank < min(x.shape) loc = None if has_const: normed_var = x.var(0) / np.abs(x).max(0) loc = int(np.argmin(normed_var)) return bool(has_const), loc
def weight_matrix(self, x: Float64Array, z: Float64Array, eps: Float64Array) -> Float64Array: """ Parameters ---------- x : ndarray Model regressors (exog and endog), (nobs by nvar) z : ndarray Model instruments (exog and instruments), (nobs by ninstr) eps : ndarray Model errors (nobs by 1) Returns ------- ndarray Covariance of GMM moment conditions. """ nobs, nvar = x.shape mu = eps.mean(0) s2 = (eps - mu).T @ (eps - mu) / nobs w = s2 * z.T @ z / nobs w *= 1 if not self._debiased else nobs / (nobs - nvar) return w
def _post_estimation( self, params: Float64Array, cov_estimator: Union[HomoskedasticCovariance, HeteroskedasticCovariance, KernelCovariance, ClusteredCovariance, ], cov_type: str, ) -> Dict[str, Any]: columns = self._columns index = self._index eps = self.resids(params) fitted_values = self._dependent.ndarray - eps fitted = DataFrameWrapper( fitted_values, index=self._dependent.rows, columns=["fitted_values"], ) assert isinstance(self._absorbed_dependent, DataFrame) absorbed_effects = DataFrameWrapper( self._absorbed_dependent.to_numpy() - fitted_values, columns=["absorbed_effects"], index=self._dependent.rows, ) weps = self.wresids(params) cov = cov_estimator.cov debiased = cov_estimator.debiased residual_ss = (weps.T @ weps)[0, 0] w = self.weights.ndarray root_w = sqrt(w) e = self._dependent.ndarray * root_w if self.has_constant: e = e - root_w * average(self._dependent.ndarray, weights=w) total_ss = float(e.T @ e) r2 = max(1 - residual_ss / total_ss, 0.0) e = self._absorbed_dependent.to_numpy() # already scaled by root_w # If absorbing contains a constant, but exog does not, no need to demean assert isinstance(self._absorbed_exog, DataFrame) if self._const_col is not None: col = self._const_col x = self._absorbed_exog.to_numpy()[:, col:col + 1] mu = (lstsq(x, e, rcond=None)[0]).squeeze() e = e - x * mu aborbed_total_ss = float(e.T @ e) r2_absorbed = max(1 - residual_ss / aborbed_total_ss, 0.0) fstat = self._f_statistic(params, cov, debiased) out = { "params": Series(params.squeeze(), columns, name="parameter"), "eps": SeriesWrapper(eps.squeeze(), index=index, name="residual"), "weps": SeriesWrapper(weps.squeeze(), index=index, name="weighted residual"), "cov": DataFrame(cov, columns=columns, index=columns), "s2": float(cov_estimator.s2), "debiased": debiased, "residual_ss": float(residual_ss), "total_ss": float(total_ss), "r2": float(r2), "fstat": fstat, "vars": columns, "instruments": [], "cov_config": cov_estimator.config, "cov_type": cov_type, "method": self._method, "cov_estimator": cov_estimator, "fitted": fitted, "original_index": self._original_index, "absorbed_effects": absorbed_effects, "absorbed_r2": r2_absorbed, } return out