def test_ohe_preserves_custom_index(index): df = pd.DataFrame({"categories": [f"cat_{i}" for i in range(5)], "numbers": np.arange(5)}, index=index) ohe = OneHotEncoder() new_df = ohe.fit_transform(df) pd.testing.assert_index_equal(new_df.index, df.index) assert not new_df.isna().any(axis=None)
def test_ohe_output_bools(): X = ww.DataTable( pd.DataFrame({ "bool": [bool(i % 2) for i in range(100)], "categorical": ["dog"] * 20 + ["cat"] * 40 + ["fish"] * 40, "integers": [i for i in range(100)] })) y = ww.DataColumn(pd.Series([i % 2 for i in range(100)])) ohe = OneHotEncoder() output = ohe.fit_transform(X, y) for name, types in output.types["Logical Type"].items(): if name == 'integers': assert str(types) == "Integer" else: assert str(types) == "Boolean" assert len(output.columns) == 5
def check_df_equality(random_seed): ohe = OneHotEncoder(top_n=4, random_seed=random_seed) df1 = ohe.fit_transform(df).to_dataframe() df2 = ohe.fit_transform(df).to_dataframe() assert_frame_equal(df1, df2)
def check_df_equality(random_state): ohe = OneHotEncoder(top_n=4, random_state=random_state) df1 = ohe.fit_transform(df) df2 = ohe.fit_transform(df) pd.testing.assert_frame_equal(df1, df2)