def sigma(self, eps: NDArray, x: Sequence[NDArray]) -> NDArray: """ Estimate residual covariance. Parameters ---------- eps : ndarray The residuals from the system of equations. x : list[ndarray] A list of the regressor matrices for each equation in the system. Returns ------- ndarray The estimated covariance matrix of the residuals. """ nobs = eps.shape[0] eps = eps - eps.mean(0) sigma = eps.T @ eps / nobs scale = 1.0 if self._debiased: k = array(list(map(lambda a: a.shape[1], x)))[:, None] k = sqrt(k) scale = nobs / (nobs - k @ k.T) sigma *= scale return sigma
def kernel_optimal_bandwidth(x: NDArray, kernel: str = "bartlett") -> int: """ Parameters x : ndarray Array of data to use when computing optimal bandwidth kernel : str, optional Name of kernel to use. Supported kernels include: * 'bartlett', 'newey-west' : Bartlett's kernel * 'parzen', 'gallant' : Parzen's kernel * 'qs', 'quadratic-spectral', 'andrews' : Quadratic spectral kernel Returns ------- int Optimal bandwidth. Set to nobs - 1 if computed bandwidth is larger. Notes ----- .. todo:: * Explain mathematics involved * References See Also -------- linearmodels.iv.covariance.kernel_weight_bartlett, linearmodels.iv.covariance.kernel_weight_parzen, linearmodels.iv.covariance.kernel_weight_quadratic_spectral """ t = x.shape[0] x = x.squeeze() if kernel in ("bartlett", "newey-west"): q, c = 1, 1.1447 m_star = int(ceil(4 * (t / 100)**(2 / 9))) elif kernel in ("qs", "andrews", "quadratic-spectral"): q, c = 2, 1.3221 m_star = int(ceil(4 * (t / 100)**(2 / 25))) elif kernel in ("gallant", "parzen"): q, c = 2, 2.6614 m_star = int(ceil(4 * (t / 100)**(4 / 25))) else: raise ValueError("Unknown kernel: {0}".format(kernel)) sigma = empty(m_star + 1) sigma[0] = x.T @ x / t for i in range(1, m_star + 1): sigma[i] = x[i:].T @ x[:-i] / t s0 = sigma[0] + 2 * sigma[1:].sum() sq = 2 * npsum(sigma[1:] * arange(1, m_star + 1)**q) rate = 1 / (2 * q + 1) gamma = c * ((sq / s0)**2)**rate m = gamma * t**rate return min(int(ceil(m)), t - 1)
def _optimal_bandwidth(self, moments: NDArray) -> float: """Compute optimal bandwidth used in estimation if needed""" if self._predefined_bw is not None: return self._predefined_bw elif not self._optimal_bw: self._bandwidth = moments.shape[0] - 2 else: m = moments / moments.std(0)[None, :] m = m.sum(1) self._bandwidth = kernel_optimal_bandwidth(m, kernel=self.kernel) assert self._bandwidth is not None return self._bandwidth
def from_array( cls, values: NDArray, items: Sequence[Label], major_axis: Sequence[Label], minor_axis: Sequence[Label], ) -> "_Panel": index = list(product(minor_axis, major_axis)) index = MultiIndex.from_tuples(index) i, j, k = len(items), len(major_axis), len(minor_axis) values = np.swapaxes(values.copy(), 0, 2).ravel() values = np.reshape(values, ((j * k), i)) df = DataFrame(values, index=index, columns=items) return cls(df)
def drop(self, locs: NDArray) -> None: """ Drop observations from the panel. Parameters ---------- locs : ndarray Boolean array indicating observations to drop with reference to the dataframe view of the data """ self._frame = self._frame.loc[~locs.ravel()] self._frame = self._minimize_multiindex(self._frame) # Reset panel and shape after a drop self._panel = self._shape = None self._k, self._t, self._n = self.shape
def panel_to_frame( x: NDArray, items: Sequence[Label], major_axis: Sequence[Label], minor_axis: Sequence[Label], swap: bool = False, ) -> DataFrame: """ Construct a multiindex DataFrame using Panel-like arguments Parameters ---------- x : ndarray 3-d array with size nite, nmajor, nminor items : list-like List like object with item labels major_axis : list-like List like object with major_axis labels minor_axis : list-like List like object with minor_axis labels swap : bool Swap is major and minor axes Notes ----- This function is equivalent to Panel(x, items, major_axis, minor_axis).to_frame() if `swap` is True, it is equivalent to Panel(x, items, major_axis, minor_axis).swapaxes(1,2).to_frame() """ nmajor = np.arange(len(major_axis)) nminor = np.arange(len(minor_axis)) final_levels = [major_axis, minor_axis] mi = MultiIndex.from_product([nmajor, nminor]) if x is not None: shape = x.shape x = x.reshape((shape[0], shape[1] * shape[2])).T df = DataFrame(x, columns=items, index=mi) if swap: df.index = mi.swaplevel() df.sort_index(inplace=True) final_levels = [minor_axis, major_axis] df.index.set_levels(final_levels, [0, 1], inplace=True) df.index.names = ["major", "minor"] return df
def _post_estimation(self, params: NDArray, cov_estimator: CovarianceEstimator, cov_type: str) -> Dict[str, Any]: columns = self._columns index = self._index eps = self.resids(params) y = self.dependent.pandas fitted = DataFrame(asarray(y) - eps, y.index, ["fitted_values"]) weps = self.wresids(params) cov = cov_estimator.cov debiased = cov_estimator.debiased residual_ss = weps.T @ weps w = self.weights.ndarray e = self._wy if self.has_constant: e = e - sqrt(self.weights.ndarray) * average(self._y, weights=w) total_ss = float(e.T @ e) r2 = 1 - residual_ss / total_ss fstat = self._f_statistic(params, cov, debiased) out = { "params": Series(params.squeeze(), columns, name="parameter"), "eps": Series(eps.squeeze(), index=index, name="residual"), "weps": Series(weps.squeeze(), index=index, name="weighted residual"), "cov": DataFrame(cov, columns=columns, index=columns), "s2": float(cov_estimator.s2), "debiased": debiased, "residual_ss": float(residual_ss), "total_ss": float(total_ss), "r2": float(r2), "fstat": fstat, "vars": columns, "instruments": self._instr_columns, "cov_config": cov_estimator.config, "cov_type": cov_type, "method": self._method, "cov_estimator": cov_estimator, "fitted": fitted, "original_index": self._original_index, } return out
def w(self, moments: NDArray) -> NDArray: """ Score/moment condition weighting matrix Parameters ---------- moments : ndarray Moment conditions (nobs by nmoments) Returns ------- ndarray Weighting matrix computed from moment conditions """ if self._center: moments = moments - moments.mean(0)[None, :] out = self._kernel_cov(moments) return inv(out)
def w(self, moments: NDArray) -> NDArray: """ Score/moment condition weighting matrix Parameters ---------- moments : ndarray Moment conditions (nobs by nmoments) Returns ------- ndarray Weighting matrix computed from moment conditions """ if self._center: moments = moments - moments.mean(0)[None, :] nobs = moments.shape[0] out = moments.T @ moments / nobs return inv((out + out.T) / 2.0)
def has_constant( x: NDArray, x_rank: Optional[int] = None ) -> Tuple[bool, Optional[int]]: """ Parameters ---------- x: ndarray Array to be checked for a constant (n,k) x_rank : {int, None} Rank of x if previously computed. If None, this value will be computed. Returns ------- const : bool Flag indicating whether x contains a constant or has column span with a constant loc : int Column location of constant """ if np.any(np.all(x == 1, axis=0)): loc: Optional[int] = int(np.argwhere(np.all(x == 1, axis=0))) return True, loc if np.any((np.ptp(x, axis=0) == 0) & ~np.all(x == 0, axis=0)): loc_arr = (np.ptp(x, axis=0) == 0) & ~np.all(x == 0, axis=0) loc = int(np.argwhere(loc_arr)) return True, loc n = x.shape[0] aug_rank = np.linalg.matrix_rank(np.c_[np.ones((n, 1)), x]) rank = np.linalg.matrix_rank(x) if x_rank is None else x_rank has_const = (aug_rank == rank) and x.shape[0] > x.shape[1] has_const = has_const or rank < min(x.shape) loc = None if has_const: normed_var = x.var(0) / np.abs(x).max(0) loc = int(np.argmin(normed_var)) return bool(has_const), loc
def weight_matrix(self, x: NDArray, z: NDArray, eps: NDArray) -> NDArray: """ Parameters ---------- x : ndarray Model regressors (exog and endog), (nobs by nvar) z : ndarray Model instruments (exog and instruments), (nobs by ninstr) eps : ndarray Model errors (nobs by 1) Returns ------- ndarray Covariance of GMM moment conditions. """ nobs, nvar = x.shape mu = eps.mean(0) s2 = (eps - mu).T @ (eps - mu) / nobs w = s2 * z.T @ z / nobs w *= 1 if not self._debiased else nobs / (nobs - nvar) return w
def _post_estimation( self, params: NDArray, cov_estimator: Union[HomoskedasticCovariance, HeteroskedasticCovariance, KernelCovariance, ClusteredCovariance, ], cov_type: str, ) -> Dict[str, Any]: columns = self._columns index = self._index eps = self.resids(params) fitted = DataFrame( self._dependent.ndarray - eps, index=self._dependent.rows, columns=["fitted_values"], ) absorbed_effects = DataFrame( to_numpy(self._absorbed_dependent) - to_numpy(fitted), columns=["absorbed_effects"], index=self._dependent.rows, ) weps = self.wresids(params) cov = cov_estimator.cov debiased = cov_estimator.debiased residual_ss = (weps.T @ weps)[0, 0] w = self.weights.ndarray root_w = sqrt(w) e = self._dependent.ndarray * root_w if self.has_constant: e = e - root_w * average(self._dependent.ndarray, weights=w) total_ss = float(e.T @ e) r2 = max(1 - residual_ss / total_ss, 0.0) e = to_numpy(self._absorbed_dependent) # already scaled by root_w # If absorbing contains a constant, but exog does not, no need to demean if self._const_col is not None: col = self._const_col x = to_numpy(self._absorbed_exog)[:, col:col + 1] mu = (lstsq(x, e, rcond=None)[0]).squeeze() e = e - x * mu aborbed_total_ss = float(e.T @ e) r2_absorbed = max(1 - residual_ss / aborbed_total_ss, 0.0) fstat = self._f_statistic(params, cov, debiased) out = { "params": Series(params.squeeze(), columns, name="parameter"), "eps": Series(eps.squeeze(), index=index, name="residual"), "weps": Series(weps.squeeze(), index=index, name="weighted residual"), "cov": DataFrame(cov, columns=columns, index=columns), "s2": float(cov_estimator.s2), "debiased": debiased, "residual_ss": float(residual_ss), "total_ss": float(total_ss), "r2": float(r2), "fstat": fstat, "vars": columns, "instruments": [], "cov_config": cov_estimator.config, "cov_type": cov_type, "method": self._method, "cov_estimator": cov_estimator, "fitted": fitted, "original_index": self._original_index, "absorbed_effects": absorbed_effects, "absorbed_r2": r2_absorbed, } return out