예제 #1
0
def test_inverse_transform(orig_label, ord_label, expected_reverted,
                           bad_ord_label, use_fit_transform, client):
    n_workers = len(client.has_what())
    orig_label = dask_cudf.from_cudf(orig_label, npartitions=n_workers)
    ord_label = dask_cudf.from_cudf(ord_label, npartitions=n_workers)
    expected_reverted = dask_cudf.from_cudf(expected_reverted,
                                            npartitions=n_workers)
    bad_ord_label = dask_cudf.from_cudf(bad_ord_label, npartitions=n_workers)

    # prepare LabelEncoder
    le = LabelEncoder()
    if use_fit_transform:
        le.fit_transform(orig_label)
    else:
        le.fit(orig_label)
    assert (le._fitted is True)

    # test if inverse_transform is correct
    reverted = le.inverse_transform(ord_label)
    reverted = reverted.compute().reset_index(drop=True)
    expected_reverted = expected_reverted.compute()

    assert (len(reverted) == len(expected_reverted))
    assert (len(reverted) == len(reverted[reverted == expected_reverted]))
    # test if correctly raies ValueError
    with pytest.raises(ValueError, match='y contains previously unseen label'):
        le.inverse_transform(bad_ord_label).compute()
예제 #2
0
def test_empty_input(empty, ord_label, client):
    # prepare LabelEncoder
    n_workers = len(client.has_what())
    empty = dask_cudf.from_cudf(empty, npartitions=n_workers)
    ord_label = dask_cudf.from_cudf(ord_label, npartitions=n_workers)
    le = LabelEncoder()
    le.fit(empty)
    assert (le._fitted is True)

    # test if correctly raies ValueError
    with pytest.raises(ValueError, match='y contains previously unseen label'):
        le.inverse_transform(ord_label).compute()

    # check fit_transform()
    le = LabelEncoder()
    transformed = le.fit_transform(empty).compute()
    assert (le._fitted is True)
    assert (len(transformed) == 0)