Example #1
0
def test_onehot_fit_handle_unknown(client):
    X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]})
    Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]})
    X = dask_cudf.from_cudf(X, npartitions=2)

    enc = OneHotEncoder(handle_unknown='error', categories=Y)
    with pytest.raises(KeyError):
        enc.fit(X)

    enc = OneHotEncoder(handle_unknown='ignore', categories=Y)
    enc.fit(X)
Example #2
0
def test_onehot_transform_handle_unknown(client):
    X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]})
    Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]})
    X = dask_cudf.from_cudf(X, npartitions=2)
    Y = dask_cudf.from_cudf(Y, npartitions=2)

    enc = OneHotEncoder(handle_unknown='error', sparse=False)
    enc = enc.fit(X)
    with pytest.raises(KeyError):
        enc.transform(Y).compute()

    enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
    enc = enc.fit(X)
    ohe = enc.transform(Y)
    ref = cp.array([[0., 0., 1., 0.], [0., 1., 0., 1.]])
    cp.testing.assert_array_equal(ohe.compute(), ref)
Example #3
0
def test_onehot_inverse_transform_handle_unknown(client):
    X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]})
    X = dask_cudf.from_cudf(X, npartitions=2)
    Y_ohe = cp.array([[0., 0., 1., 0.], [0., 1., 0., 1.]])
    Y_ohe = da.from_array(Y_ohe)

    enc = OneHotEncoder(handle_unknown='ignore')
    enc = enc.fit(X)
    df = enc.inverse_transform(Y_ohe)
    ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]})
    assert_frame_equal(df.compute().to_pandas(), ref.to_pandas())