def test_absorbing_regressors(cat, cont, interact, weights):
    areg = AbsorbingRegressor(cat=cat,
                              cont=cont,
                              interactions=interact,
                              weights=weights)
    rank = areg.approx_rank
    expected_rank = 0

    expected = []
    for i, col in enumerate(cat):
        expected_rank += pd.Series(cat[col].cat.codes).nunique() - (i > 0)
    expected.append(dummy_matrix(cat, precondition=False)[0])
    expected_rank += cont.shape[1]
    expected.append(csc_matrix(cont))
    if interact is not None:
        for inter in interact:
            interact_mat = inter.sparse
            expected_rank += interact_mat.shape[1]
            expected.append(interact_mat)
    expected = sp.hstack(expected, format="csc")
    if weights is not None:
        expected = (sp.diags(np.sqrt(weights)).dot(expected)).asformat("csc")
    actual = areg.regressors
    assert expected.shape == actual.shape
    assert_array_equal(expected.indptr, actual.indptr)
    assert_array_equal(expected.indices, actual.indices)
    assert_allclose(expected.A, actual.A)
    assert expected_rank == rank
Esempio n. 2
0
def test_absorbing_regressors_hash(cat, cont, interact, weights):
    areg = AbsorbingRegressor(
        cat=cat, cont=cont, interactions=interact, weights=weights
    )
    # Build hash
    hashes = []
    for col in cat:
        hashes.append((hasher.single(cat[col].cat.codes.to_numpy().data),))
    for col in cont:
        hashes.append((hasher.single(cont[col].to_numpy().data),))
    hashes = sorted(hashes)
    if interact is not None:
        for inter in interact:
            hashes.extend(inter.hash)
    if weights is not None:
        hashes.append((hasher.single(weights.data),))
    hashes = tuple(sorted(hashes))
    assert hashes == areg.hash
def test_empty_absorbing_regressor():
    areg = AbsorbingRegressor()
    assert areg.regressors.shape == (0, 0)
    assert areg.hash == tuple()