def test_labelencoder_unfitted(): """ Try calling `.transform()` without fitting first """ df = cudf.Series(np.random.choice(10, (10, ))) le = LabelEncoder() assert not le._fitted with pytest.raises(NotFittedError): le.transform(df)
def test_labelencoder_unseen(): """ Try encoding a value that was not present during fitting """ df = cudf.Series(np.random.choice(10, (10, ))) le = LabelEncoder().fit(df) assert le._fitted with pytest.raises(KeyError): le.transform(cudf.Series([-1]))
def test_unfitted_inverse_transform(): """ Try calling `.inverse_transform()` without fitting first """ df = cudf.Series(np.random.choice(10, (10, ))) le = LabelEncoder() assert (not le._fitted) with pytest.raises(RuntimeError): le.transform(df)
def test_labelencoder_transform(length, cardinality): """ Try fitting and then encoding a small subset of the df """ df = cudf.Series(np.random.choice(cardinality, (length, ))) le = LabelEncoder().fit(df) assert le._fitted subset = df.iloc[0:df.shape[0] // 2] encoded = le.transform(subset) subset_arr = _df_to_similarity_mat(subset) encoded_arr = _df_to_similarity_mat(encoded) assert ((encoded_arr == encoded_arr.T) == ( subset_arr == subset_arr.T)).all()