Ejemplo n.º 1
0
def test_categorical_encoder_unfitted_fail():
    unfitted_categorical_encoder = column_encoders.CategoricalEncoder(
        ["col_1"])
    assert not unfitted_categorical_encoder.is_fitted()
    with pytest.raises(column_encoders.NotFittedError):
        unfitted_categorical_encoder.transform(
            pd.DataFrame({"col_1": ['a', 'b']}))
Ejemplo n.º 2
0
def test_categorical_encoder_numeric_nan():
    df = pd.DataFrame({'brand': [1, 2, 3, None]})
    try:
        column_encoders.CategoricalEncoder("brand").fit(df)
    except TypeError:
        pytest.fail(
            "fitting categorical encoder on integers with nulls should not fail"
        )
Ejemplo n.º 3
0
def test_categorical_encoder_max_token():
    categorical_encoder = column_encoders.CategoricalEncoder(
        ['labels'], max_tokens=1e4).fit(df)
    assert categorical_encoder.max_tokens == 2
Ejemplo n.º 4
0
def test_categorical_encoder_numeric_transform():
    df = pd.DataFrame({'brand': [1, 2, 3, 1, 2, 1, np.nan, None]})
    col_enc = column_encoders.CategoricalEncoder("brand").fit(df)
    assert np.array_equal(col_enc.transform(df),
                          np.array([[1], [2], [3], [1], [2], [1], [0], [0]]))
Ejemplo n.º 5
0
import numpy as np
import pandas as pd
import pytest

from datawig import column_encoders

df = pd.DataFrame({
    'features': [
        'xwcxG pQldP Cel0n 5LaWO 2cjTu', '2cjTu YizDY u1aEa Cel0n SntTK',
        '2cjTu YizDY u1aEa Cel0n SntTK'
    ],
    'labels': ['xwcxG', 'SntTK', 'SntTK']
})

categorical_encoder = column_encoders.CategoricalEncoder(['labels'],
                                                         max_tokens=3).fit(df)
sequential_encoder = column_encoders.SequentialEncoder(['features'],
                                                       max_tokens=50,
                                                       seq_len=3).fit(df)


# CategoricalEncoder Tests
def test_categorical_encoder_unfitted_fail():
    unfitted_categorical_encoder = column_encoders.CategoricalEncoder(
        ["col_1"])
    assert not unfitted_categorical_encoder.is_fitted()
    with pytest.raises(column_encoders.NotFittedError):
        unfitted_categorical_encoder.transform(
            pd.DataFrame({"col_1": ['a', 'b']}))