def test_onehotencode_no_drop_first(): """Basic binning test.""" df = _one_categ_df() onehotencode = OneHotEncode("Born", drop_first=False) res_df = onehotencode(df, verbose=True) assert "Born" not in res_df.columns assert "Born_UK" in res_df.columns assert res_df["Born_UK"][1] == 0 assert res_df["Born_UK"][2] == 1 assert res_df["Born_UK"][3] == 0 assert "Born_USA" in res_df.columns assert res_df["Born_USA"][1] == 1 assert res_df["Born_USA"][2] == 0 assert res_df["Born_USA"][3] == 0 assert "Born_Greece" in res_df.columns assert res_df["Born_Greece"][1] == 0 assert res_df["Born_Greece"][2] == 0 assert res_df["Born_Greece"][3] == 1 # check when fitted df2 = _one_categ_single_row_df() assert onehotencode.is_fitted res_df2 = onehotencode(df2, verbose=True) print(res_df2) assert "Born" not in res_df2.columns assert "Born_UK" in res_df2.columns assert res_df2["Born_UK"][1] == 0 assert "Born_USA" in res_df.columns assert res_df2["Born_USA"][1] == 0 assert "Born_Greece" in res_df2.columns assert res_df2["Born_Greece"][1] == 1
def test_onehotencode_large(): """Basic binning test.""" df = _one_categ_df() onehotencode = OneHotEncode("Born") res_df = onehotencode(df, verbose=True) assert "Born" not in res_df.columns assert "Born_Greece" not in res_df.columns assert "Born_UK" in res_df.columns assert res_df["Born_UK"][1] == 0 assert res_df["Born_UK"][2] == 1 assert res_df["Born_UK"][3] == 0 assert "Born_USA" in res_df.columns assert res_df["Born_USA"][1] == 1 assert res_df["Born_USA"][2] == 0 assert res_df["Born_USA"][3] == 0 # check when fitted df2 = _one_categ_df_large() assert onehotencode.is_fitted res_df2 = onehotencode(df2, verbose=True) print(res_df2) assert len(res_df2) == 7 assert "Born" not in res_df2.columns assert "Born_Greece" not in res_df2.columns assert res_df2["Born_UK"][3] == 0 assert res_df2["Born_USA"][3] == 0 assert "Born_UK" in res_df2.columns assert res_df2["Born_UK"][2] == 1 assert "Born_USA" in res_df.columns assert res_df2["Born_USA"][1] == 1
def test_onehotencode_col_subset(): df = _two_categ_df() onehotencode = OneHotEncode(columns=["Born", "Cat"], col_subset=True) res_df = onehotencode(df) assert "Born" not in res_df.columns assert "Born_Greece" not in res_df.columns assert "Born_UK" in res_df.columns assert res_df["Born_UK"][1] == 0 assert res_df["Born_UK"][2] == 1 assert res_df["Born_UK"][3] == 0 assert "Born_USA" in res_df.columns assert res_df["Born_USA"][1] == 1 assert res_df["Born_USA"][2] == 0 assert res_df["Born_USA"][3] == 0 assert "Name" in res_df.columns assert "Name_Bob" not in res_df.columns assert "Name_Jack" not in res_df.columns assert "Name_Yan" not in res_df.columns # check when fitted df2 = _two_categ_single_row_df() assert onehotencode.is_fitted res_df2 = onehotencode(df2, verbose=True) print(res_df2) assert "Born" not in res_df2.columns assert "Born_Greece" not in res_df2.columns assert "Born_UK" in res_df2.columns assert res_df2["Born_UK"][1] == 0 assert "Born_USA" in res_df.columns assert res_df2["Born_USA"][1] == 0 assert "Name" in res_df.columns assert "Name_Bob" not in res_df.columns assert "Name_Jack" not in res_df.columns assert "Name_Yan" not in res_df.columns
def test_onehotencode_with_dummy_na(): """Basic binning test.""" df = _one_categ_df_with_nan() onehotencode = OneHotEncode("Born", dummy_na=True) res_df = onehotencode(df) assert "Born" not in res_df.columns assert "Born_nan" not in res_df.columns assert "Born_UK" in res_df.columns assert res_df["Born_UK"][1] == 0 assert res_df["Born_UK"][2] == 1 assert res_df["Born_UK"][3] == 0 assert "Born_USA" in res_df.columns assert res_df["Born_USA"][1] == 1 assert res_df["Born_USA"][2] == 0 assert res_df["Born_USA"][3] == 0 # check when fitted df2 = _one_categ_single_row_df() assert onehotencode.is_fitted res_df2 = onehotencode(df2, verbose=True) print(res_df2) assert "Born" not in res_df2.columns assert "Born_nan" not in res_df2.columns assert "Born_USA" in res_df.columns assert res_df2["Born_USA"][1] == 0 assert "Born_UK" in res_df2.columns assert res_df2["Born_UK"][1] == 0
def test_onehotencode_one_with_drop_first_colname(verbose): df = _one_categ_df() onehotencode = OneHotEncode("Born", drop_first="UK") res_df = onehotencode(df, verbose=verbose) assert "Born" not in res_df.columns assert "Born_UK" not in res_df.columns assert "Born_Greece" in res_df.columns assert res_df["Born_Greece"][1] == 0 assert res_df["Born_Greece"][2] == 0 assert res_df["Born_Greece"][3] == 1 assert "Born_USA" in res_df.columns assert res_df["Born_USA"][1] == 1 assert res_df["Born_USA"][2] == 0 assert res_df["Born_USA"][3] == 0 # check when fitted df2 = _one_categ_single_row_df() assert onehotencode.is_fitted res_df2 = onehotencode(df2, verbose=True) print(res_df2) assert "Born" not in res_df2.columns assert "Born_UK" not in res_df2.columns assert "Born_Greece" in res_df2.columns assert res_df2["Born_Greece"][1] == 1 assert "Born_USA" in res_df.columns assert res_df2["Born_USA"][1] == 0
def test_onehotencode_two(): """Basic binning test.""" df = _two_categ_df() onehotencode = OneHotEncode() res_df = onehotencode(df) assert "Born" not in res_df.columns assert "Born_Greece" not in res_df.columns assert "Born_UK" in res_df.columns assert res_df["Born_UK"][1] == 0 assert res_df["Born_UK"][2] == 1 assert res_df["Born_UK"][3] == 0 assert "Born_USA" in res_df.columns assert res_df["Born_USA"][1] == 1 assert res_df["Born_USA"][2] == 0 assert res_df["Born_USA"][3] == 0 assert "Name" not in res_df.columns assert "Name_Bob" not in res_df.columns assert "Name_Jack" in res_df.columns assert res_df["Name_Jack"][1] == 0 assert res_df["Name_Jack"][2] == 1 assert res_df["Name_Jack"][3] == 0 assert "Name_Yan" in res_df.columns assert res_df["Name_Yan"][1] == 0 assert res_df["Name_Yan"][2] == 0 assert res_df["Name_Yan"][3] == 1 # check when fitted df2 = _two_categ_single_row_df() assert onehotencode.is_fitted res_df2 = onehotencode(df2, verbose=True) print(res_df2) assert "Born" not in res_df2.columns assert "Born_Greece" not in res_df2.columns assert "Born_UK" in res_df2.columns assert res_df2["Born_UK"][1] == 0 assert "Born_USA" in res_df.columns assert res_df2["Born_USA"][1] == 0 assert "Name" not in res_df.columns assert "Name_Bob" not in res_df.columns assert "Name_Jack" in res_df.columns assert res_df2["Name_Jack"][1] == 0 assert "Name_Yan" in res_df.columns assert res_df2["Name_Yan"][1] == 0