Beispiel #1
0
def test_invalid_user_cat_cols():
    train_df = pd.DataFrame({
        "A": [i for i in range(100)],
        "B": ["hello" if i % 2 == 0 else "bye" for i in range(100)],
    })
    params = {"train_df": train_df, "target_label": "C", "cat_cols": ["C"]}
    parser = Parser()
    with pytest.raises(ValueError):
        parser.parse_dataset(params=params)
Beispiel #2
0
def test_parser():
    train_df = pd.DataFrame({
        "A": [i for i in range(100)],
        "B": ["hello" if i % 2 == 0 else "bye" for i in range(100)],
    })
    params = {"train_df": train_df, "target_label": "C"}
    parser = Parser()
    parser.parse_dataset(params=params)
    assert "B" in params["cat_cols"]
Beispiel #3
0
def test_empty_weight_mapping():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    train_csv.drop(["Price"], axis=1, inplace=True)
    ord_dict1 = ord_dict.copy()
    ord_dict1["Size"] = None
    params = {
        "train_df": train_csv,
        "target_label": "Price",
        "ord_dict": ord_dict1,
    }
    with pytest.raises(ValueError):
        parser = Parser()
        parser.parse_dataset(params=params)
 def __init__(
     self,
     train_df_path=None,
     test_df_path=None,
     steps=None,
     config_file=None,
     params=None,
     custom_reader=None,
 ):
     steps = [
         Parser().parse_dataset,
         NullValuesHandler().execute,
         Encoder().encode,
         HandleOutlier().handle_outliers,
         Scaler().execute,
         SelectKBest().fit_transform,
         Split().train_test_split,
     ]
     super().__init__(
         train_df_path=train_df_path,
         test_df_path=test_df_path,
         steps=steps,
         config_file=config_file,
         params=params,
         custom_reader=custom_reader,
     )
Beispiel #5
0
def test_empty_df():
    params = {"target_label": "Price", "ord_dict": ord_dict}
    with pytest.raises(ValueError):
        parser = Parser()
        parser.parse_dataset(params=params)
Beispiel #6
0
def test_target_label_warning():
    train_csv = pd.read_csv("datasets/encoding/testnew.csv")
    params = {"train_df": train_csv, "ord_dict": ord_dict}
    with pytest.warns(UserWarning):
        parser = Parser()
        parser.parse_dataset(params=params)