Ejemplo n.º 1
0
def test_one_hot_encoder_drop_manual():
    cats_to_drop = ['def', 12, 3, 56]
    enc = OneHotEncoder(drop=cats_to_drop)
    X = [['abc', 12, 2, 55], ['def', 12, 1, 55], ['def', 12, 3, 56]]
    trans = enc.fit_transform(X).toarray()
    exp = [[1, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]]
    assert_array_equal(trans, exp)
    dropped_cats = [
        cat[feature] for cat, feature in zip(enc.categories_, enc.drop_idx_)
    ]
    assert_array_equal(dropped_cats, cats_to_drop)
    assert_array_equal(np.array(X, dtype=object), enc.inverse_transform(trans))
Ejemplo n.º 2
0
def test_one_hot_encoder_inverse(sparse_, drop):
    X = [['abc', 2, 55], ['def', 1, 55], ['abc', 3, 55]]
    enc = OneHotEncoder(sparse=sparse_, drop=drop)
    X_tr = enc.fit_transform(X)
    exp = np.array(X, dtype=object)
    assert_array_equal(enc.inverse_transform(X_tr), exp)

    X = [[2, 55], [1, 55], [3, 55]]
    enc = OneHotEncoder(sparse=sparse_, categories='auto', drop=drop)
    X_tr = enc.fit_transform(X)
    exp = np.array(X)
    assert_array_equal(enc.inverse_transform(X_tr), exp)

    if drop is None:
        # with unknown categories
        # drop is incompatible with handle_unknown=ignore
        X = [['abc', 2, 55], ['def', 1, 55], ['abc', 3, 55]]
        enc = OneHotEncoder(sparse=sparse_,
                            handle_unknown='ignore',
                            categories=[['abc', 'def'], [1, 2], [54, 55, 56]])
        X_tr = enc.fit_transform(X)
        exp = np.array(X, dtype=object)
        exp[2, 1] = None
        assert_array_equal(enc.inverse_transform(X_tr), exp)

        # with an otherwise numerical output, still object if unknown
        X = [[2, 55], [1, 55], [3, 55]]
        enc = OneHotEncoder(sparse=sparse_,
                            categories=[[1, 2], [54, 56]],
                            handle_unknown='ignore')
        X_tr = enc.fit_transform(X)
        exp = np.array(X, dtype=object)
        exp[2, 0] = None
        exp[:, 1] = None
        assert_array_equal(enc.inverse_transform(X_tr), exp)

    # incorrect shape raises
    X_tr = np.array([[0, 1, 1], [1, 0, 1]])
    msg = re.escape('Shape of the passed X data is not correct')
    with pytest.raises(ValueError, match=msg):
        enc.inverse_transform(X_tr)