def test_clustered_cov_debiased(data): c = ClusteredCovariance(data.x, data.y, data.z, data.params, debiased=True, clusters=data.clusters) assert c.debiased is True assert c.config["debiased"] is True assert_equal(c.config["clusters"], data.clusters) ngroups = len(np.unique(data.clusters)) sums = np.zeros((ngroups, data.nvar)) xe = data.xhat * data.e for i in range(len(data.clusters)): sums[data.clusters[i]] += xe[i] op = np.zeros((data.nvar, data.nvar)) for j in range(len(sums)): op += sums[[j]].T @ sums[[j]] # This is a strange choice s = (op / data.nobs * ((data.nobs - 1) / (data.nobs - data.nvar)) * ngroups / (ngroups - 1)) assert_allclose(c.s, s) assert_allclose(c.cov, data.vinv @ s @ data.vinv / data.nobs) cs = str(c) assert "Debiased: True" in cs assert "Num Clusters: {0}".format(len(sums)) in cs assert "id" in c.__repr__()
def test_clustered_cov_errors(data): with pytest.raises(ValueError): ClusteredCovariance(data.x, data.y, data.z, data.params, clusters=data.clusters[:10])
def test_asymptotic(self, data): c = ClusteredCovariance(data.x, data.y, data.z, data.params, clusters=data.clusters) assert c._kappa == 1 assert c.debiased is False assert c.config['debiased'] is False assert_equal(c.config['clusters'], data.clusters) assert_allclose(c.s2, data.s2) sums = np.zeros((len(np.unique(data.clusters)), data.nvar)) xe = data.xhat * data.e for i in range(len(data.clusters)): sums[data.clusters[i]] += xe[i] op = np.zeros((data.nvar, data.nvar)) for j in range(len(sums)): op += sums[[j]].T @ sums[[j]] s = op / data.nobs assert_allclose(c.s, s) assert_allclose(c.cov, data.vinv @ s @ data.vinv / data.nobs) cs = str(c) assert 'Debiased: False' in cs assert 'Num Clusters: {0}'.format(len(sums)) in cs