def test_fully_absorb(random_gen):
    absorb = random_gen.randint(0, 10, size=1000)
    x = random_gen.standard_normal((1000, 3))
    y = random_gen.standard_normal((1000))
    dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)}
    dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)})
    df = pd.DataFrame(dfd)

    y = df.y
    x = pd.get_dummies(df.c, drop_first=False)
    mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True)
    with pytest.raises(ValueError, match="All columns in exog"):
        mod.fit()
def test_lsmr_options(random_gen):
    absorb = random_gen.randint(0, 10, size=1000)
    x = random_gen.standard_normal((1000, 3))
    y = random_gen.standard_normal((1000))
    dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)}
    dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)})
    df = pd.DataFrame(dfd)

    y = df.y
    x = df.iloc[:, :3]
    mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True)
    with pytest.warns(FutureWarning, match="lsmr_options"):
        mod.fit(lsmr_options={})
    with pytest.raises(ValueError, match="absorb_options cannot"):
        mod.fit(lsmr_options={}, absorb_options={})
예제 #3
0
def test_center_cov_arg():
    gen = generate_data(
        2, True, 2, factor_format="pandas", ncont=0, cont_interactions=1
    )
    mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb, interactions=gen.interactions)
    res = mod.fit(center=True)
    assert "center" not in res.cov_config
def test_smoke(data):
    mod = AbsorbingLS(
        data.y,
        data.x,
        absorb=data.absorb,
        interactions=data.interactions,
        weights=data.weights,
    )
    res = mod.fit()
    assert isinstance(res.summary, Summary)
    assert isinstance(str(res.summary), str)
def test_cache():
    gen = generate_data(2,
                        True,
                        2,
                        factor_format="pandas",
                        ncont=0,
                        cont_interactions=1)
    first = len(_VARIABLE_CACHE)
    mod = AbsorbingLS(gen.y,
                      gen.x,
                      absorb=gen.absorb.iloc[:, :1],
                      interactions=gen.interactions)
    mod.fit()
    second = len(_VARIABLE_CACHE)
    mod = AbsorbingLS(gen.y,
                      gen.x,
                      absorb=gen.absorb,
                      interactions=gen.interactions)
    mod.fit()
    third = len(_VARIABLE_CACHE)
    assert third - second == 1
    assert second - first == 1
    mod = AbsorbingLS(gen.y,
                      gen.x,
                      absorb=gen.absorb,
                      interactions=gen.interactions)
    mod.fit()
    fourth = len(_VARIABLE_CACHE)
    assert fourth - third == 0
def test_drop_absorb(random_gen):
    absorb = random_gen.randint(0, 10, size=1000)
    x = random_gen.standard_normal((1000, 3))
    y = random_gen.standard_normal((1000))
    dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)}
    dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)})
    df = pd.DataFrame(dfd)

    y = df.y
    x = df.iloc[:, :3]
    x = pd.concat([x, pd.get_dummies(df.c).iloc[:, :2]], axis=1)
    mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True)
    with pytest.warns(AbsorbingEffectWarning):
        res = mod.fit()
    assert len(res.params) == 3
    assert all(f"x{i}" in res.params for i in range(3))
    assert isinstance(str(res.summary), str)
    mod = AbsorbingLS(y, x, absorb=df[["c"]])
    with pytest.raises(AbsorbingEffectError):
        mod.fit()
    mod = AbsorbingLS(y, x.iloc[:, -2:], absorb=df[["c"]])
    with pytest.raises(AbsorbingEffectError):
        mod.fit()
def test_against_ols(ols_data):
    mod = AbsorbingLS(
        ols_data.y,
        ols_data.x,
        absorb=ols_data.absorb,
        interactions=ols_data.interactions,
        weights=ols_data.weights,
    )
    res = mod.fit()
    absorb = []
    has_dummy = False
    if ols_data.absorb is not None:
        absorb.append(ols_data.absorb.cont.to_numpy())
        if ols_data.absorb.cat.shape[1] > 0:
            dummies = dummy_matrix(ols_data.absorb.cat, precondition=False)[0]
            assert isinstance(dummies, sp.csc_matrix)
            absorb.append(dummies.A)
        has_dummy = ols_data.absorb.cat.shape[1] > 0
    if ols_data.interactions is not None:
        for interact in ols_data.interactions:
            absorb.append(interact.sparse.A)
    _x = ols_data.x
    if absorb:
        absorb = np.column_stack(absorb)
        if np.any(np.ptp(_x, 0) == 0) and has_dummy:
            if ols_data.weights is None:
                absorb = annihilate(absorb, np.ones((absorb.shape[0], 1)))
            else:
                root_w = np.sqrt(mod.weights.ndarray)
                wabsorb = annihilate(root_w * absorb, root_w)
                absorb = (1.0 / root_w) * wabsorb
        rank = np.linalg.matrix_rank(absorb)
        if rank < absorb.shape[1]:
            a, b = np.linalg.eig(absorb.T @ absorb)
            order = np.argsort(a)[::-1]
            a, b = a[order], b[:, order]
            z = absorb @ b
            absorb = z[:, :rank]
        _x = np.column_stack([_x, absorb])
    ols_mod = _OLS(ols_data.y, _x, weights=ols_data.weights)
    ols_res = ols_mod.fit()

    assert_results_equal(ols_res, res)
예제 #8
0
def test_options(random_gen):
    absorb = random_gen.randint(0, 10, size=1000)
    x = random_gen.standard_normal((1000, 3))
    y = random_gen.standard_normal((1000))
    dfd = {f"x{i}": pd.Series(x[:, i]) for i in range(3)}
    dfd.update({"c": pd.Series(absorb, dtype="category"), "y": pd.Series(y)})
    df = pd.DataFrame(dfd)

    y = df.y
    x = df.iloc[:, :3]
    mod = AbsorbingLS(y, x, absorb=df[["c"]], drop_absorbed=True)
    mod.fit(absorb_options={"drop_singletons": False})
    mod.fit(absorb_options={"atol": 1e-7, "btol": 1e-7}, method="lsmr")

    mod = AbsorbingLS(y, x[["x0", "x1"]], absorb=df[["x2", "c"]], drop_absorbed=True)
    with pytest.raises(RuntimeError, match="HDFE has been"):
        mod.fit(absorb_options={"atol": 1e-7, "btol": 1e-7}, method="hdfe")