def test_threshold_encoder_with_all_columns_under_threshold():
    encoder = ThresholdOneHotEncoder(threshold=0.01)
    Xt = encoder.fit_transform(np.array([[1, 2], [1, 3]])).todense()
    assert_array_equal(np.array([
        [1, 1, 0],
        [1, 0, 1],
    ]), Xt)
def test_threshold_encoder_with_no_columns_over_threshold():
    encoder = ThresholdOneHotEncoder(threshold=1000)
    Xt = encoder.fit_transform(np.array([[1, 2], [1, 3]])).todense()
    assert_array_equal(np.array([
        [0, 0],
        [0, 0],
    ]), Xt)
def test_threshold_encoder(X, X_expected_categories, X_expected, max_categories, threshold):
    enc = ThresholdOneHotEncoder(threshold=threshold, max_categories=max_categories)
    X_observed_sparse = enc.fit_transform(X)
    assert isinstance(X_observed_sparse, sp.csr_matrix)

    assert len(enc.categories_) == len(X_expected_categories)
    for observed_category, expected_category in zip(enc.categories_, X_expected_categories):
        np.testing.assert_array_equal(observed_category, expected_category)

    X_observed_dense = X_observed_sparse.toarray()
    np.testing.assert_array_equal(X_observed_dense, X_expected)