Esempio n. 1
0
def test_categorical_mixed_type_levels():
    # Use a category with both strings and integers in its categories
    raw = pd.concat([
        pd.Series([1.0, np.NaN, 3.0], dtype='float', name='fruits'),
        pd.Series([500, np.NaN, 'cat'], dtype='category', name='mixed'),
    ],
                    axis=1)
    expander = DataFrameETL(cols_to_expand='auto', dummy_na=False)
    tfm = expander.fit_transform(raw)
    exp = np.array([[1, 1, 0], [np.nan, 0, 0], [3, 0, 1]])
    assert_almost_equal(tfm, exp)
    assert expander.columns_ == ['fruits', 'mixed_500', 'mixed_cat']
Esempio n. 2
0
def test_categorical_looks_like_int():
    # Verify that the right thing happens if the fit DataFrame has a
    # categorical with integer categories
    raw = pd.concat([
        pd.Series([1.0, np.NaN, 3.0], dtype='float', name='fruits'),
        pd.Series([500, 1000, 1000], dtype='category', name='intcat'),
    ],
                    axis=1)
    expander = DataFrameETL(cols_to_expand='auto', dummy_na='expanded')
    tfm = expander.fit_transform(raw)
    exp = np.array([[1, 1, 0, 0], [np.nan, 0, 1, 0], [3, 0, 1, 0]])
    assert_almost_equal(tfm, exp)
    assert expander.columns_ == \
        ['fruits', 'intcat_500', 'intcat_1000', 'intcat_NaN']
Esempio n. 3
0
def test_pickle(data_raw):
    expander = DataFrameETL(cols_to_drop=['pid'],
                            cols_to_expand=['djinn_type', 'fruits', 'animal'],
                            dummy_na='all')
    expected_array = expander.fit_transform(data_raw)
    # pickle the transformer
    buff = io.BytesIO()
    pickle.dump(expander, buff)
    buff.seek(0)
    # transform data after unpickling transformer
    expander = pickle.load(buff)

    arr = expander.transform(data_raw)
    assert arr.shape == expected_array.shape
    assert_almost_equal(arr, expected_array)