def test_fully_absorb(random_gen): absorb = random_gen.randint(0, 10, size=1000) x = random_gen.standard_normal((1000, 3)) y = random_gen.standard_normal((1000)) dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)} dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)}) df = pd.DataFrame(dfd) y = df.y x = pd.get_dummies(df.c, drop_first=False) mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True) with pytest.raises(ValueError, match="All columns in exog"): mod.fit()
def test_lsmr_options(random_gen): absorb = random_gen.randint(0, 10, size=1000) x = random_gen.standard_normal((1000, 3)) y = random_gen.standard_normal((1000)) dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)} dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)}) df = pd.DataFrame(dfd) y = df.y x = df.iloc[:, :3] mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True) with pytest.warns(FutureWarning, match="lsmr_options"): mod.fit(lsmr_options={}) with pytest.raises(ValueError, match="absorb_options cannot"): mod.fit(lsmr_options={}, absorb_options={})
def test_center_cov_arg(): gen = generate_data( 2, True, 2, factor_format="pandas", ncont=0, cont_interactions=1 ) mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb, interactions=gen.interactions) res = mod.fit(center=True) assert "center" not in res.cov_config
def test_smoke(data): mod = AbsorbingLS( data.y, data.x, absorb=data.absorb, interactions=data.interactions, weights=data.weights, ) res = mod.fit() assert isinstance(res.summary, Summary) assert isinstance(str(res.summary), str)
def test_cache(): gen = generate_data(2, True, 2, factor_format="pandas", ncont=0, cont_interactions=1) first = len(_VARIABLE_CACHE) mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb.iloc[:, :1], interactions=gen.interactions) mod.fit() second = len(_VARIABLE_CACHE) mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb, interactions=gen.interactions) mod.fit() third = len(_VARIABLE_CACHE) assert third - second == 1 assert second - first == 1 mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb, interactions=gen.interactions) mod.fit() fourth = len(_VARIABLE_CACHE) assert fourth - third == 0
def test_drop_absorb(random_gen): absorb = random_gen.randint(0, 10, size=1000) x = random_gen.standard_normal((1000, 3)) y = random_gen.standard_normal((1000)) dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)} dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)}) df = pd.DataFrame(dfd) y = df.y x = df.iloc[:, :3] x = pd.concat([x, pd.get_dummies(df.c).iloc[:, :2]], axis=1) mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True) with pytest.warns(AbsorbingEffectWarning): res = mod.fit() assert len(res.params) == 3 assert all(f"x{i}" in res.params for i in range(3)) assert isinstance(str(res.summary), str) mod = AbsorbingLS(y, x, absorb=df[["c"]]) with pytest.raises(AbsorbingEffectError): mod.fit() mod = AbsorbingLS(y, x.iloc[:, -2:], absorb=df[["c"]]) with pytest.raises(AbsorbingEffectError): mod.fit()
def test_against_ols(ols_data): mod = AbsorbingLS( ols_data.y, ols_data.x, absorb=ols_data.absorb, interactions=ols_data.interactions, weights=ols_data.weights, ) res = mod.fit() absorb = [] has_dummy = False if ols_data.absorb is not None: absorb.append(ols_data.absorb.cont.to_numpy()) if ols_data.absorb.cat.shape[1] > 0: dummies = dummy_matrix(ols_data.absorb.cat, precondition=False)[0] assert isinstance(dummies, sp.csc_matrix) absorb.append(dummies.A) has_dummy = ols_data.absorb.cat.shape[1] > 0 if ols_data.interactions is not None: for interact in ols_data.interactions: absorb.append(interact.sparse.A) _x = ols_data.x if absorb: absorb = np.column_stack(absorb) if np.any(np.ptp(_x, 0) == 0) and has_dummy: if ols_data.weights is None: absorb = annihilate(absorb, np.ones((absorb.shape[0], 1))) else: root_w = np.sqrt(mod.weights.ndarray) wabsorb = annihilate(root_w * absorb, root_w) absorb = (1.0 / root_w) * wabsorb rank = np.linalg.matrix_rank(absorb) if rank < absorb.shape[1]: a, b = np.linalg.eig(absorb.T @ absorb) order = np.argsort(a)[::-1] a, b = a[order], b[:, order] z = absorb @ b absorb = z[:, :rank] _x = np.column_stack([_x, absorb]) ols_mod = _OLS(ols_data.y, _x, weights=ols_data.weights) ols_res = ols_mod.fit() assert_results_equal(ols_res, res)
def test_options(random_gen): absorb = random_gen.randint(0, 10, size=1000) x = random_gen.standard_normal((1000, 3)) y = random_gen.standard_normal((1000)) dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)} dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)}) df = pd.DataFrame(dfd) y = df.y x = df.iloc[:, :3] mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True) mod.fit(absorb_options={"drop_singletons": False}) mod.fit(absorb_options={"atol": 1e-7, "btol": 1e-7}, method="lsmr") mod = AbsorbingLS(y, x[["x0", "x1"]], absorb=df[["x2", "c"]], drop_absorbed=True) with pytest.raises(RuntimeError, match="HDFE has been"): mod.fit(absorb_options={"atol": 1e-7, "btol": 1e-7}, method="hdfe")