Esempio n. 1
0
    def sigma(self, eps: NDArray, x: Sequence[NDArray]) -> NDArray:
        """
        Estimate residual covariance.

        Parameters
        ----------
        eps : ndarray
            The residuals from the system of equations.
        x : list[ndarray]
            A list of the regressor matrices for each equation in the system.

        Returns
        -------
        ndarray
            The estimated covariance matrix of the residuals.
        """
        nobs = eps.shape[0]
        eps = eps - eps.mean(0)
        sigma = eps.T @ eps / nobs
        scale = 1.0
        if self._debiased:
            k = array(list(map(lambda a: a.shape[1], x)))[:, None]
            k = sqrt(k)
            scale = nobs / (nobs - k @ k.T)
        sigma *= scale

        return sigma
Esempio n. 2
0
def kernel_optimal_bandwidth(x: NDArray, kernel: str = "bartlett") -> int:
    """
    Parameters
    x : ndarray
        Array of data to use when computing optimal bandwidth
    kernel : str, optional
        Name of kernel to use.  Supported kernels include:

          * 'bartlett', 'newey-west' : Bartlett's kernel
          * 'parzen', 'gallant' : Parzen's kernel
          * 'qs', 'quadratic-spectral', 'andrews' : Quadratic spectral kernel

    Returns
    -------
    int
        Optimal bandwidth. Set to nobs - 1 if computed bandwidth is larger.

    Notes
    -----

    .. todo::

      * Explain mathematics involved
      * References

    See Also
    --------
    linearmodels.iv.covariance.kernel_weight_bartlett,
    linearmodels.iv.covariance.kernel_weight_parzen,
    linearmodels.iv.covariance.kernel_weight_quadratic_spectral
    """
    t = x.shape[0]
    x = x.squeeze()
    if kernel in ("bartlett", "newey-west"):
        q, c = 1, 1.1447
        m_star = int(ceil(4 * (t / 100)**(2 / 9)))
    elif kernel in ("qs", "andrews", "quadratic-spectral"):
        q, c = 2, 1.3221
        m_star = int(ceil(4 * (t / 100)**(2 / 25)))
    elif kernel in ("gallant", "parzen"):
        q, c = 2, 2.6614
        m_star = int(ceil(4 * (t / 100)**(4 / 25)))
    else:
        raise ValueError("Unknown kernel: {0}".format(kernel))
    sigma = empty(m_star + 1)
    sigma[0] = x.T @ x / t
    for i in range(1, m_star + 1):
        sigma[i] = x[i:].T @ x[:-i] / t
    s0 = sigma[0] + 2 * sigma[1:].sum()
    sq = 2 * npsum(sigma[1:] * arange(1, m_star + 1)**q)
    rate = 1 / (2 * q + 1)
    gamma = c * ((sq / s0)**2)**rate
    m = gamma * t**rate
    return min(int(ceil(m)), t - 1)
Esempio n. 3
0
 def _optimal_bandwidth(self, moments: NDArray) -> float:
     """Compute optimal bandwidth used in estimation if needed"""
     if self._predefined_bw is not None:
         return self._predefined_bw
     elif not self._optimal_bw:
         self._bandwidth = moments.shape[0] - 2
     else:
         m = moments / moments.std(0)[None, :]
         m = m.sum(1)
         self._bandwidth = kernel_optimal_bandwidth(m, kernel=self.kernel)
     assert self._bandwidth is not None
     return self._bandwidth
Esempio n. 4
0
    def from_array(
        cls,
        values: NDArray,
        items: Sequence[Label],
        major_axis: Sequence[Label],
        minor_axis: Sequence[Label],
    ) -> "_Panel":
        index = list(product(minor_axis, major_axis))
        index = MultiIndex.from_tuples(index)
        i, j, k = len(items), len(major_axis), len(minor_axis)
        values = np.swapaxes(values.copy(), 0, 2).ravel()
        values = np.reshape(values, ((j * k), i))

        df = DataFrame(values, index=index, columns=items)
        return cls(df)
Esempio n. 5
0
    def drop(self, locs: NDArray) -> None:
        """
        Drop observations from the panel.

        Parameters
        ----------
        locs : ndarray
            Boolean array indicating observations to drop with reference to
            the dataframe view of the data
        """
        self._frame = self._frame.loc[~locs.ravel()]
        self._frame = self._minimize_multiindex(self._frame)
        # Reset panel and shape after a drop
        self._panel = self._shape = None
        self._k, self._t, self._n = self.shape
Esempio n. 6
0
def panel_to_frame(
    x: NDArray,
    items: Sequence[Label],
    major_axis: Sequence[Label],
    minor_axis: Sequence[Label],
    swap: bool = False,
) -> DataFrame:
    """
    Construct a multiindex DataFrame using Panel-like arguments

    Parameters
    ----------
    x : ndarray
        3-d array with size nite, nmajor, nminor
    items : list-like
        List like object with item labels
    major_axis : list-like
        List like object with major_axis labels
    minor_axis : list-like
        List like object with minor_axis labels
    swap : bool
        Swap is major and minor axes

    Notes
    -----
    This function is equivalent to

    Panel(x, items, major_axis, minor_axis).to_frame()

    if `swap` is True, it is equivalent to

    Panel(x, items, major_axis, minor_axis).swapaxes(1,2).to_frame()
    """
    nmajor = np.arange(len(major_axis))
    nminor = np.arange(len(minor_axis))
    final_levels = [major_axis, minor_axis]
    mi = MultiIndex.from_product([nmajor, nminor])
    if x is not None:
        shape = x.shape
        x = x.reshape((shape[0], shape[1] * shape[2])).T
    df = DataFrame(x, columns=items, index=mi)
    if swap:
        df.index = mi.swaplevel()
        df.sort_index(inplace=True)
        final_levels = [minor_axis, major_axis]
    df.index.set_levels(final_levels, [0, 1], inplace=True)
    df.index.names = ["major", "minor"]
    return df
Esempio n. 7
0
    def _post_estimation(self, params: NDArray,
                         cov_estimator: CovarianceEstimator,
                         cov_type: str) -> Dict[str, Any]:
        columns = self._columns
        index = self._index
        eps = self.resids(params)
        y = self.dependent.pandas
        fitted = DataFrame(asarray(y) - eps, y.index, ["fitted_values"])
        weps = self.wresids(params)
        cov = cov_estimator.cov
        debiased = cov_estimator.debiased

        residual_ss = weps.T @ weps

        w = self.weights.ndarray
        e = self._wy
        if self.has_constant:
            e = e - sqrt(self.weights.ndarray) * average(self._y, weights=w)

        total_ss = float(e.T @ e)
        r2 = 1 - residual_ss / total_ss

        fstat = self._f_statistic(params, cov, debiased)
        out = {
            "params": Series(params.squeeze(), columns, name="parameter"),
            "eps": Series(eps.squeeze(), index=index, name="residual"),
            "weps": Series(weps.squeeze(),
                           index=index,
                           name="weighted residual"),
            "cov": DataFrame(cov, columns=columns, index=columns),
            "s2": float(cov_estimator.s2),
            "debiased": debiased,
            "residual_ss": float(residual_ss),
            "total_ss": float(total_ss),
            "r2": float(r2),
            "fstat": fstat,
            "vars": columns,
            "instruments": self._instr_columns,
            "cov_config": cov_estimator.config,
            "cov_type": cov_type,
            "method": self._method,
            "cov_estimator": cov_estimator,
            "fitted": fitted,
            "original_index": self._original_index,
        }

        return out
Esempio n. 8
0
    def w(self, moments: NDArray) -> NDArray:
        """
        Score/moment condition weighting matrix

        Parameters
        ----------
        moments : ndarray
            Moment conditions (nobs by nmoments)

        Returns
        -------
        ndarray
            Weighting matrix computed from moment conditions
        """
        if self._center:
            moments = moments - moments.mean(0)[None, :]
        out = self._kernel_cov(moments)

        return inv(out)
Esempio n. 9
0
    def w(self, moments: NDArray) -> NDArray:
        """
        Score/moment condition weighting matrix

        Parameters
        ----------
        moments : ndarray
            Moment conditions (nobs by nmoments)

        Returns
        -------
        ndarray
            Weighting matrix computed from moment conditions
        """
        if self._center:
            moments = moments - moments.mean(0)[None, :]
        nobs = moments.shape[0]
        out = moments.T @ moments / nobs

        return inv((out + out.T) / 2.0)
Esempio n. 10
0
def has_constant(
    x: NDArray, x_rank: Optional[int] = None
) -> Tuple[bool, Optional[int]]:
    """
    Parameters
    ----------
    x: ndarray
        Array to be checked for a constant (n,k)
    x_rank : {int, None}
        Rank of x if previously computed.  If None, this value will be
        computed.

    Returns
    -------
    const : bool
        Flag indicating whether x contains a constant or has column span with
        a constant
    loc : int
        Column location of constant
    """
    if np.any(np.all(x == 1, axis=0)):
        loc: Optional[int] = int(np.argwhere(np.all(x == 1, axis=0)))
        return True, loc

    if np.any((np.ptp(x, axis=0) == 0) & ~np.all(x == 0, axis=0)):
        loc_arr = (np.ptp(x, axis=0) == 0) & ~np.all(x == 0, axis=0)
        loc = int(np.argwhere(loc_arr))
        return True, loc

    n = x.shape[0]
    aug_rank = np.linalg.matrix_rank(np.c_[np.ones((n, 1)), x])
    rank = np.linalg.matrix_rank(x) if x_rank is None else x_rank

    has_const = (aug_rank == rank) and x.shape[0] > x.shape[1]
    has_const = has_const or rank < min(x.shape)
    loc = None
    if has_const:
        normed_var = x.var(0) / np.abs(x).max(0)
        loc = int(np.argmin(normed_var))

    return bool(has_const), loc
Esempio n. 11
0
    def weight_matrix(self, x: NDArray, z: NDArray, eps: NDArray) -> NDArray:
        """
        Parameters
        ----------
        x : ndarray
            Model regressors (exog and endog), (nobs by nvar)
        z : ndarray
            Model instruments (exog and instruments), (nobs by ninstr)
        eps : ndarray
            Model errors (nobs by 1)

        Returns
        -------
        ndarray
            Covariance of GMM moment conditions.
        """
        nobs, nvar = x.shape
        mu = eps.mean(0)
        s2 = (eps - mu).T @ (eps - mu) / nobs
        w = s2 * z.T @ z / nobs
        w *= 1 if not self._debiased else nobs / (nobs - nvar)
        return w
Esempio n. 12
0
    def _post_estimation(
        self,
        params: NDArray,
        cov_estimator: Union[HomoskedasticCovariance,
                             HeteroskedasticCovariance, KernelCovariance,
                             ClusteredCovariance, ],
        cov_type: str,
    ) -> Dict[str, Any]:
        columns = self._columns
        index = self._index
        eps = self.resids(params)
        fitted = DataFrame(
            self._dependent.ndarray - eps,
            index=self._dependent.rows,
            columns=["fitted_values"],
        )
        absorbed_effects = DataFrame(
            to_numpy(self._absorbed_dependent) - to_numpy(fitted),
            columns=["absorbed_effects"],
            index=self._dependent.rows,
        )

        weps = self.wresids(params)
        cov = cov_estimator.cov
        debiased = cov_estimator.debiased

        residual_ss = (weps.T @ weps)[0, 0]

        w = self.weights.ndarray
        root_w = sqrt(w)
        e = self._dependent.ndarray * root_w
        if self.has_constant:
            e = e - root_w * average(self._dependent.ndarray, weights=w)

        total_ss = float(e.T @ e)
        r2 = max(1 - residual_ss / total_ss, 0.0)

        e = to_numpy(self._absorbed_dependent)  # already scaled by root_w
        # If absorbing contains a constant, but exog does not, no need to demean
        if self._const_col is not None:
            col = self._const_col
            x = to_numpy(self._absorbed_exog)[:, col:col + 1]
            mu = (lstsq(x, e, rcond=None)[0]).squeeze()
            e = e - x * mu

        aborbed_total_ss = float(e.T @ e)
        r2_absorbed = max(1 - residual_ss / aborbed_total_ss, 0.0)

        fstat = self._f_statistic(params, cov, debiased)
        out = {
            "params": Series(params.squeeze(), columns, name="parameter"),
            "eps": Series(eps.squeeze(), index=index, name="residual"),
            "weps": Series(weps.squeeze(),
                           index=index,
                           name="weighted residual"),
            "cov": DataFrame(cov, columns=columns, index=columns),
            "s2": float(cov_estimator.s2),
            "debiased": debiased,
            "residual_ss": float(residual_ss),
            "total_ss": float(total_ss),
            "r2": float(r2),
            "fstat": fstat,
            "vars": columns,
            "instruments": [],
            "cov_config": cov_estimator.config,
            "cov_type": cov_type,
            "method": self._method,
            "cov_estimator": cov_estimator,
            "fitted": fitted,
            "original_index": self._original_index,
            "absorbed_effects": absorbed_effects,
            "absorbed_r2": r2_absorbed,
        }

        return out