예제 #1
0
def test_labelencoder_unfitted():
    """ Try calling `.transform()` without fitting first
    """
    df = cudf.Series(np.random.choice(10, (10, )))
    le = LabelEncoder()
    assert not le._fitted

    with pytest.raises(NotFittedError):
        le.transform(df)
예제 #2
0
def test_labelencoder_unseen():
    """ Try encoding a value that was not present during fitting
    """
    df = cudf.Series(np.random.choice(10, (10, )))
    le = LabelEncoder().fit(df)
    assert le._fitted

    with pytest.raises(KeyError):
        le.transform(cudf.Series([-1]))
예제 #3
0
def test_unfitted_inverse_transform():
    """ Try calling `.inverse_transform()` without fitting first
    """
    df = cudf.Series(np.random.choice(10, (10, )))
    le = LabelEncoder()
    assert (not le._fitted)

    with pytest.raises(RuntimeError):
        le.transform(df)
예제 #4
0
def test_labelencoder_transform(length, cardinality):
    """ Try fitting and then encoding a small subset of the df
    """
    df = cudf.Series(np.random.choice(cardinality, (length, )))
    le = LabelEncoder().fit(df)
    assert le._fitted

    subset = df.iloc[0:df.shape[0] // 2]
    encoded = le.transform(subset)

    subset_arr = _df_to_similarity_mat(subset)
    encoded_arr = _df_to_similarity_mat(encoded)
    assert ((encoded_arr == encoded_arr.T) == (
        subset_arr == subset_arr.T)).all()