Ejemplo n.º 1
0
def test_ohe_preserves_custom_index(index):

    df = pd.DataFrame({"categories": [f"cat_{i}" for i in range(5)], "numbers": np.arange(5)},
                      index=index)
    ohe = OneHotEncoder()
    new_df = ohe.fit_transform(df)
    pd.testing.assert_index_equal(new_df.index, df.index)
    assert not new_df.isna().any(axis=None)
Ejemplo n.º 2
0
def test_ohe_output_bools():
    X = ww.DataTable(
        pd.DataFrame({
            "bool": [bool(i % 2) for i in range(100)],
            "categorical": ["dog"] * 20 + ["cat"] * 40 + ["fish"] * 40,
            "integers": [i for i in range(100)]
        }))
    y = ww.DataColumn(pd.Series([i % 2 for i in range(100)]))
    ohe = OneHotEncoder()
    output = ohe.fit_transform(X, y)
    for name, types in output.types["Logical Type"].items():
        if name == 'integers':
            assert str(types) == "Integer"
        else:
            assert str(types) == "Boolean"
    assert len(output.columns) == 5
Ejemplo n.º 3
0
 def check_df_equality(random_seed):
     ohe = OneHotEncoder(top_n=4, random_seed=random_seed)
     df1 = ohe.fit_transform(df).to_dataframe()
     df2 = ohe.fit_transform(df).to_dataframe()
     assert_frame_equal(df1, df2)
Ejemplo n.º 4
0
 def check_df_equality(random_state):
     ohe = OneHotEncoder(top_n=4, random_state=random_state)
     df1 = ohe.fit_transform(df)
     df2 = ohe.fit_transform(df)
     pd.testing.assert_frame_equal(df1, df2)