def test_interaction_cont_only(cont): interact = Interaction(cont=cont) assert interact.nobs == cont.shape[0] assert_frame_equal(cont, interact.cont) expected = cont.to_numpy() actual = interact.sparse assert isinstance(actual, csc_matrix) assert_allclose(expected, actual.A)
def test_interaction_cat_only(cat): interact = Interaction(cat=cat) assert interact.nobs == cat.shape[0] assert_frame_equal(cat, interact.cat) expected = category_interaction(category_product(cat), precondition=False) actual = interact.sparse assert isinstance(actual, csc_matrix) assert_allclose(expected.A, actual.A)
def interact(request): if not request.param: return None rs = np.random.RandomState(0) interactions = [] for i in range(request.param): cat = random_cat(4, 100, frame=True, rs=rs) cont = random_cont(100, rs=rs) interactions.append(Interaction(cat, cont)) return interactions
def test_interaction_cat_cont(cat, cont): interact = Interaction(cat=cat, cont=cont) assert interact.nobs == cat.shape[0] assert_frame_equal(cat, interact.cat) assert_frame_equal(cont, interact.cont) base = category_interaction(category_product(cat), precondition=False).A expected = [] for i in range(cont.shape[1]): element = base.copy() element[np.where(element)] = cont.iloc[:, i].to_numpy() expected.append(element) expected = np.column_stack(expected) actual = interact.sparse assert isinstance(actual, csc_matrix) assert_allclose(expected, interact.sparse.A)
def test_interaction_cat_cont_convert(cat, cont): base = Interaction(cat, cont) interact = Interaction(cat.to_numpy(), cont) assert_allclose(base.sparse.A, interact.sparse.A)
def test_empty_interaction(): interact = Interaction(nobs=100) assert isinstance(interact.sparse, csc_matrix) assert interact.sparse.shape == (100, 0)
def test_interaction_cat_bad_nobs(): with pytest.raises(ValueError): Interaction() with pytest.raises(ValueError): Interaction(cat=np.empty((100, 0)), cont=np.empty((100, 0)))
def test_interaction_from_frame(cat, cont): base = Interaction(cat=cat, cont=cont) interact = Interaction.from_frame(pd.concat([cat, cont], 1)) assert_allclose(base.sparse.A, interact.sparse.A)
def generate_data( k=3, const=True, nfactors=1, factor_density=10, nobs=2000, cont_interactions=1, factor_format="interaction", singleton_interaction=False, weighted=False, ncont=0, ): rs = np.random.RandomState(1234567890) density = [factor_density] * max(nfactors, cont_interactions) x = rs.standard_normal((nobs, k)) if const: x = np.column_stack([np.ones(nobs), x]) e = rs.standard_normal(nobs) y = x.sum(1) + e factors = [] for i in range(nfactors): ncat = nobs // density[min(i, len(density) - 1)] fact = rs.randint(ncat, size=nobs) effects = rs.standard_normal(ncat) y += effects[fact] factors.append(pd.Series(pd.Categorical(fact))) for i in range(ncont): cont = rs.standard_normal(size=nobs) factors.append(pd.Series(cont)) if factors: factors = pd.concat(factors, 1) if factor_format == "interaction": if nfactors and ncont: factors = Interaction(factors.iloc[:, :nfactors], factors.iloc[:, nfactors:]) elif nfactors: factors = Interaction(factors, None) else: factors = Interaction(None, factors) else: factors = None interactions = [] for i in range(cont_interactions): ncat = nobs // density[min(i, len(density) - 1)] fact = rs.randint(ncat, size=nobs) effects = rs.standard_normal(nobs) y += effects df = pd.DataFrame(pd.Series(pd.Categorical(fact)), columns=["fact{0}".format(i)]) df_eff = pd.DataFrame(effects[:, None], columns=["effect_{0}".format(i)]) interactions.append(Interaction(df, df_eff)) if factor_format == "pandas": for i, interact in enumerate(interactions): interactions[i] = pd.concat([interact.cat, interact.cont], 1) interactions = interactions if interactions else None if interactions and singleton_interaction: interactions = interactions[0] if weighted: weights = pd.DataFrame(rs.chisquare(10, size=(nobs, 1)) / 10) else: weights = None return AttrDict(y=y, x=x, absorb=factors, interactions=interactions, weights=weights)