Exemple #1
0
 def test_empty_weight_mapping(self):
     train_csv = pd.read_csv("datasets/encoding/testnew.csv")
     with pytest.raises(ValueError):
         params["ord_dict"]["Size"] = None
         encoder = EncodeData(train_df=train_csv,
                              target_label="Price",
                              params=params)
         encoder.encode()
Exemple #2
0
 def test_mapping(self):
     train_csv = pd.read_csv("datasets/encoding/testnew.csv")
     encoder = EncodeData(train_df=train_csv,
                          target_label="Price",
                          params=params)
     train = encoder.encode()
     assert train[0]["ProfessionEncoded"].nunique() == 3
     assert train[0]["ProfessionEncoded"][2] == 3
     assert Counter(params["ord_dict"]["Profession"].values()) == Counter(
         train[0]["ProfessionEncoded"].unique())
def test_ignore_cat_col():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "cat_cols": ["Profession"],
        "ord_dict": ord_dict,
        "one_hot": True,
    }
    encoder = EncodeData()
    encoder.encode(params=params)
    assert "Profession_HOD" not in params["train_df"].columns
def test_one_hot_encoding():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "cat_cols": ["Test", "Labels"],
        "ord_dict": ord_dict,
        "one_hot": True,
    }
    encoder = EncodeData()
    encoder.encode(params=params)
    assert "Test_Tata" in params["train_df"].columns
    assert params["train_df"]["Test_Tata"][1] == 1
def test_empty_weight_mapping():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    train_csv.drop(["Price"], axis=1, inplace=True)
    ord_dict1 = ord_dict.copy()
    ord_dict1["Size"] = None
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "ord_dict": ord_dict1,
    }
    with pytest.raises(ValueError):
        encoder = EncodeData()
        encoder.encode(params=params)
def test_mapping():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    train_csv.drop(["Price"], axis=1, inplace=True)
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "ord_dict": ord_dict,
    }
    encoder = EncodeData()
    encoder.encode(params=params)
    assert params["train_df"]["ProfessionEncoded"].nunique() == 3
    assert params["train_df"]["ProfessionEncoded"][2] == 3
    assert Counter(params["ord_dict"]["Profession"].values()) == Counter(
        params["train_df"]["ProfessionEncoded"].unique())
def test_warning():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {"train_df": train_csv, "ord_dict": ord_dict}
    with pytest.warns(UserWarning):
        encoder = EncodeData()
        encoder.encode(params=params)
def test_empty_df():
    params = {"target_label": "Price", "ord_dict": ord_dict}
    with pytest.raises(ValueError):
        encoder = EncodeData()
        encoder.encode(params=params)
Exemple #9
0
from preprocessy.encoding import EncodeData
import pandas as pd

ord_dict = {"Profession": {"Student": 1, "Teacher": 2, "HOD": 3}}

params = {"ord_dict": ord_dict}

train_csv = pd.read_csv("datasets/encoding/testnew.csv")
# print(train_csv.dtypes)
# train_csv = train_csv.drop(['Unnamed: 5','Unnamed: 6'],axis=1)
k = EncodeData(train_df=train_csv, params=params)
train = k.encode()

print(train)
Exemple #10
0
 def test_warning(self):
     train_csv = pd.read_csv("datasets/encoding/testnew.csv")
     with pytest.warns(UserWarning):
         encoder = EncodeData(train_df=train_csv, params=params)
         encoder.encode()
Exemple #11
0
 def test_empty_df(self):
     with pytest.raises(ValueError):
         encoder = EncodeData(target_label="Price", params=params)
         encoder.encode()