コード例 #1
0
def test_interaction_cont_only(cont):
    interact = Interaction(cont=cont)
    assert interact.nobs == cont.shape[0]
    assert_frame_equal(cont, interact.cont)
    expected = cont.to_numpy()
    actual = interact.sparse
    assert isinstance(actual, csc_matrix)
    assert_allclose(expected, actual.A)
コード例 #2
0
def test_interaction_cat_only(cat):
    interact = Interaction(cat=cat)
    assert interact.nobs == cat.shape[0]
    assert_frame_equal(cat, interact.cat)
    expected = category_interaction(category_product(cat), precondition=False)
    actual = interact.sparse
    assert isinstance(actual, csc_matrix)
    assert_allclose(expected.A, actual.A)
コード例 #3
0
def interact(request):
    if not request.param:
        return None
    rs = np.random.RandomState(0)
    interactions = []
    for i in range(request.param):
        cat = random_cat(4, 100, frame=True, rs=rs)
        cont = random_cont(100, rs=rs)
        interactions.append(Interaction(cat, cont))
    return interactions
コード例 #4
0
def test_interaction_cat_cont(cat, cont):
    interact = Interaction(cat=cat, cont=cont)
    assert interact.nobs == cat.shape[0]
    assert_frame_equal(cat, interact.cat)
    assert_frame_equal(cont, interact.cont)
    base = category_interaction(category_product(cat), precondition=False).A
    expected = []
    for i in range(cont.shape[1]):
        element = base.copy()
        element[np.where(element)] = cont.iloc[:, i].to_numpy()
        expected.append(element)
    expected = np.column_stack(expected)
    actual = interact.sparse
    assert isinstance(actual, csc_matrix)
    assert_allclose(expected, interact.sparse.A)
コード例 #5
0
def test_interaction_cat_cont_convert(cat, cont):
    base = Interaction(cat, cont)
    interact = Interaction(cat.to_numpy(), cont)
    assert_allclose(base.sparse.A, interact.sparse.A)
コード例 #6
0
def test_empty_interaction():
    interact = Interaction(nobs=100)
    assert isinstance(interact.sparse, csc_matrix)
    assert interact.sparse.shape == (100, 0)
コード例 #7
0
def test_interaction_cat_bad_nobs():
    with pytest.raises(ValueError):
        Interaction()
    with pytest.raises(ValueError):
        Interaction(cat=np.empty((100, 0)), cont=np.empty((100, 0)))
コード例 #8
0
def test_interaction_from_frame(cat, cont):
    base = Interaction(cat=cat, cont=cont)
    interact = Interaction.from_frame(pd.concat([cat, cont], 1))
    assert_allclose(base.sparse.A, interact.sparse.A)
コード例 #9
0
def generate_data(
    k=3,
    const=True,
    nfactors=1,
    factor_density=10,
    nobs=2000,
    cont_interactions=1,
    factor_format="interaction",
    singleton_interaction=False,
    weighted=False,
    ncont=0,
):
    rs = np.random.RandomState(1234567890)
    density = [factor_density] * max(nfactors, cont_interactions)
    x = rs.standard_normal((nobs, k))
    if const:
        x = np.column_stack([np.ones(nobs), x])
    e = rs.standard_normal(nobs)
    y = x.sum(1) + e

    factors = []
    for i in range(nfactors):
        ncat = nobs // density[min(i, len(density) - 1)]
        fact = rs.randint(ncat, size=nobs)
        effects = rs.standard_normal(ncat)
        y += effects[fact]
        factors.append(pd.Series(pd.Categorical(fact)))
    for i in range(ncont):
        cont = rs.standard_normal(size=nobs)
        factors.append(pd.Series(cont))

    if factors:
        factors = pd.concat(factors, 1)
        if factor_format == "interaction":
            if nfactors and ncont:
                factors = Interaction(factors.iloc[:, :nfactors],
                                      factors.iloc[:, nfactors:])
            elif nfactors:
                factors = Interaction(factors, None)
            else:
                factors = Interaction(None, factors)
    else:
        factors = None

    interactions = []
    for i in range(cont_interactions):
        ncat = nobs // density[min(i, len(density) - 1)]
        fact = rs.randint(ncat, size=nobs)
        effects = rs.standard_normal(nobs)
        y += effects
        df = pd.DataFrame(pd.Series(pd.Categorical(fact)),
                          columns=["fact{0}".format(i)])
        df_eff = pd.DataFrame(effects[:, None],
                              columns=["effect_{0}".format(i)])
        interactions.append(Interaction(df, df_eff))
    if factor_format == "pandas":
        for i, interact in enumerate(interactions):
            interactions[i] = pd.concat([interact.cat, interact.cont], 1)
    interactions = interactions if interactions else None
    if interactions and singleton_interaction:
        interactions = interactions[0]
    if weighted:
        weights = pd.DataFrame(rs.chisquare(10, size=(nobs, 1)) / 10)
    else:
        weights = None

    return AttrDict(y=y,
                    x=x,
                    absorb=factors,
                    interactions=interactions,
                    weights=weights)