def test_absorbing_regressors(cat, cont, interact, weights): areg = AbsorbingRegressor(cat=cat, cont=cont, interactions=interact, weights=weights) rank = areg.approx_rank expected_rank = 0 expected = [] for i, col in enumerate(cat): expected_rank += pd.Series(cat[col].cat.codes).nunique() - (i > 0) expected.append(dummy_matrix(cat, precondition=False)[0]) expected_rank += cont.shape[1] expected.append(csc_matrix(cont)) if interact is not None: for inter in interact: interact_mat = inter.sparse expected_rank += interact_mat.shape[1] expected.append(interact_mat) expected = sp.hstack(expected, format="csc") if weights is not None: expected = (sp.diags(np.sqrt(weights)).dot(expected)).asformat("csc") actual = areg.regressors assert expected.shape == actual.shape assert_array_equal(expected.indptr, actual.indptr) assert_array_equal(expected.indices, actual.indices) assert_allclose(expected.A, actual.A) assert expected_rank == rank
def test_absorbing_regressors_hash(cat, cont, interact, weights): areg = AbsorbingRegressor( cat=cat, cont=cont, interactions=interact, weights=weights ) # Build hash hashes = [] for col in cat: hashes.append((hasher.single(cat[col].cat.codes.to_numpy().data),)) for col in cont: hashes.append((hasher.single(cont[col].to_numpy().data),)) hashes = sorted(hashes) if interact is not None: for inter in interact: hashes.extend(inter.hash) if weights is not None: hashes.append((hasher.single(weights.data),)) hashes = tuple(sorted(hashes)) assert hashes == areg.hash
def test_empty_absorbing_regressor(): areg = AbsorbingRegressor() assert areg.regressors.shape == (0, 0) assert areg.hash == tuple()