Beispiel #1
0
def test_dict_list_tuple():
    # pandas encoderizer example
    X = [
        "[text encoding is fun]",
        "this is a text encoding {'pizza': 'example'}",
        "hopefully it works and [we'll have passed] the test",
    ]
    df = pd.DataFrame({
        "text_col":
        X * 4,
        "categorical_str_col": ["control", "treatment", "control"] * 4,
        "categorical_int_col": [0, 1, 2] * 4,
        "numeric_col": [5, 22, 69] * 4,
        "dict_col": [{
            "a": 4
        }, {
            "b": 1
        }, {
            "c": 3
        }] * 4,
        "list_col": [[1, 2], [1, 3], [2]] * 4,
        "tuple_col": [(1, 2), (1, 3), (2, )] * 4,
    })
    encoderizer = encoder.Encoderizer(size="medium")
    X_t = encoderizer.fit_transform(df)
    assert X_t.shape == (12, 244)
def test_simple():
    # simple encoderizer example
    X = [
        "text encoding is fun", 
        "this is a text encoding example",
        "hopefully it works and we'll have passed the test"
        ]
    encoderizer = encoder.Encoderizer(col_names=["text"])
    X_t = encoderizer.fit_transform(X)
    assert X_t.shape == (3,31)
def test_dict_list():
    # pandas encoderizer example
    X = [
        "[text encoding is fun]",
        "this is a text encoding {'pizza': 'example'}",
        "hopefully it works and [we'll have passed] the test"
        ]
    df = pd.DataFrame({
        "text": X*4,
        "constant_str": ["test","test","test"]*4,
        "dicts": [{"a": 4}, None, {"b": 1}]*4,
        "lists": [["this", "is", "text"], ["more", "text"], ["text"]]*4
        })
    encoderizer = encoder.Encoderizer(size="medium")
    X_t = encoderizer.fit_transform(df)
    assert X_t.shape == (12, 210)
def test_pandas():
    # pandas encoderizer example
    X = [
        "text encoding is fun",
        "this is a text encoding example",
        "hopefully it works and we'll have passed the test"
        ]
    df = pd.DataFrame({
        "text": X*4, 
        "constant_int": [np.nan,2,2]*4, 
        "constant_str": ["test","test","test"]*4, 
        "constant_null": [None,"test",np.nan]*4, 
        "numbers": [0.123, None, 0.535]*4,
        "dicts": [{"a": 4}, None, {"b": 1}]*4
        })
    encoderizer = encoder.Encoderizer(size="medium")
    X_t = encoderizer.fit_transform(df)
    assert X_t.shape == (12, 180)