def test_error_if_input_df_contains_categories_not_present_in_fit_df(
        df_enc, df_enc_rare):
    # test case 3: when dataset to be transformed contains categories not present in
    # training dataset
    with pytest.warns(UserWarning):
        encoder = CountFrequencyEncoder()
        encoder.fit(df_enc)
        encoder.transform(df_enc_rare)
def test_no_error_triggered_when_df_contains_unseen_categories_and_errors_is_encode(
    df_enc, df_enc_rare
):
    # dataset to be transformed contains categories not present in
    # training dataset (unseen categories).

    # check for no error and no warning when errors equals 'encode'
    warnings.simplefilter("error")
    encoder = CountFrequencyEncoder(errors="encode")
    encoder.fit(df_enc)
    with warnings.catch_warnings():
        encoder.transform(df_enc_rare)
def test_warning_when_df_contains_unseen_categories(df_enc, df_enc_rare):
    # dataset to be transformed contains categories not present in
    # training dataset (unseen categories), errors set to ignore.

    msg = "During the encoding, NaN values were introduced in the feature(s) var_A."

    # check for warning when errors equals 'ignore'
    encoder = CountFrequencyEncoder(errors="ignore")
    encoder.fit(df_enc)
    with pytest.warns(UserWarning) as record:
        encoder.transform(df_enc_rare)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[0] == msg
def test_error_when_df_contains_unseen_categories(df_enc, df_enc_rare):
    # dataset to be transformed contains categories not present in
    # training dataset (unseen categories), errors set to raise.

    msg = "During the encoding, NaN values were introduced in the feature(s) var_A."

    encoder = CountFrequencyEncoder(errors="raise")
    encoder.fit(df_enc)

    # check for exception when errors equals 'raise'
    with pytest.raises(ValueError) as record:
        encoder.transform(df_enc_rare)

    # check that the error message matches
    assert str(record.value) == msg

    # check for no error and no warning when errors equals 'encode'
    with warnings.catch_warnings():
        warnings.simplefilter("error")
        encoder = CountFrequencyEncoder(errors="encode")
        encoder.fit(df_enc)
        encoder.transform(df_enc_rare)
def test_transform_raises_error_if_df_contains_na(df_enc, df_enc_na):
    # test case 4: when dataset contains na, transform method
    with pytest.raises(ValueError):
        encoder = CountFrequencyEncoder()
        encoder.fit(df_enc)
        encoder.transform(df_enc_na)
Exemple #6
0
def convertFromField36ToField40WithCountFrequencyEncoder(array):
    dataFrame = pd.DataFrame(array[:, 44:49])
    encoder = CountFrequencyEncoder(encoding_method='frequency')
    encoder.fit(dataFrame)
    encodedField36Field40 = encoder.transform(dataFrame)
    array[:, 44:49] = encodedField36Field40
def test_fit_raises_error_if_df_contains_na(errors, df_enc_na):
    # test case 4: when dataset contains na, fit method
    encoder = CountFrequencyEncoder(errors=errors)
    with pytest.raises(ValueError):
        encoder.fit(df_enc_na)