Ejemplo n.º 1
0
def test_parser():
    train_df = pd.DataFrame({
        "A": [i for i in range(100)],
        "B": ["hello" if i % 2 == 0 else "bye" for i in range(100)],
    })
    params = {"train_df": train_df, "target_label": "C"}
    encoder = Encoder()
    encoder.encode(params=params)
    assert "B" in params["cat_cols"]
Ejemplo n.º 2
0
def test_categorical_encoding():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "cat_cols": ["Profession"],
        "one_hot": False,
    }
    encoder = Encoder()
    encoder.encode(params=params)
    assert "ProfessionEncoded" in params["train_df"].columns
    assert params["train_df"]["ProfessionEncoded"][0] == 0
Ejemplo n.º 3
0
def test_ignore_cat_col():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "cat_cols": ["Profession"],
        "ord_dict": ord_dict,
        "one_hot": True,
    }
    encoder = Encoder()
    encoder.encode(params=params)
    assert "Profession_HOD" not in params["train_df"].columns
Ejemplo n.º 4
0
def test_one_hot_encoding():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "cat_cols": ["Test", "Labels"],
        "ord_dict": ord_dict,
        "one_hot": True,
    }
    encoder = Encoder()
    encoder.encode(params=params)
    assert "Test_Tata" in params["train_df"].columns
    assert params["train_df"]["Test_Tata"][1] == 1
Ejemplo n.º 5
0
def test_empty_weight_mapping():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    train_csv.drop(["Price"], axis=1, inplace=True)
    ord_dict1 = ord_dict.copy()
    ord_dict1["Size"] = None
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "ord_dict": ord_dict1,
    }
    with pytest.raises(ValueError):
        encoder = Encoder()
        encoder.encode(params=params)
Ejemplo n.º 6
0
def test_mapping():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    train_csv.drop(["Price"], axis=1, inplace=True)
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "ord_dict": ord_dict,
    }
    encoder = Encoder()
    encoder.encode(params=params)
    assert params["train_df"]["ProfessionEncoded"].nunique() == 3
    assert params["train_df"]["ProfessionEncoded"][2] == 3
    assert Counter(params["ord_dict"]["Profession"].values()) == Counter(
        params["train_df"]["ProfessionEncoded"].unique())
 def __init__(
     self,
     train_df_path=None,
     test_df_path=None,
     steps=None,
     config_file=None,
     params=None,
     custom_reader=None,
 ):
     steps = [
         Parser().parse_dataset,
         NullValuesHandler().execute,
         Encoder().encode,
         HandleOutlier().handle_outliers,
         Scaler().execute,
         SelectKBest().fit_transform,
         Split().train_test_split,
     ]
     super().__init__(
         train_df_path=train_df_path,
         test_df_path=test_df_path,
         steps=steps,
         config_file=config_file,
         params=params,
         custom_reader=custom_reader,
     )
Ejemplo n.º 8
0
def test_target_label_warning():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {"train_df": train_csv, "ord_dict": ord_dict}
    with pytest.warns(UserWarning):
        encoder = Encoder()
        encoder.encode(params=params)
Ejemplo n.º 9
0
def test_empty_df():
    params = {"target_label": "Price", "ord_dict": ord_dict}
    with pytest.raises(ValueError):
        encoder = Encoder()
        encoder.encode(params=params)