def test_invalid_user_cat_cols(): train_df = pd.DataFrame({ "A": [i for i in range(100)], "B": ["hello" if i % 2 == 0 else "bye" for i in range(100)], }) params = {"train_df": train_df, "target_label": "C", "cat_cols": ["C"]} parser = Parser() with pytest.raises(ValueError): parser.parse_dataset(params=params)
def test_parser(): train_df = pd.DataFrame({ "A": [i for i in range(100)], "B": ["hello" if i % 2 == 0 else "bye" for i in range(100)], }) params = {"train_df": train_df, "target_label": "C"} parser = Parser() parser.parse_dataset(params=params) assert "B" in params["cat_cols"]
def test_empty_weight_mapping(): train_csv = pd.read_csv("datasets/encoding/testnew.csv") train_csv.drop(["Price"], axis=1, inplace=True) ord_dict1 = ord_dict.copy() ord_dict1["Size"] = None params = { "train_df": train_csv, "target_label": "Price", "ord_dict": ord_dict1, } with pytest.raises(ValueError): parser = Parser() parser.parse_dataset(params=params)
def __init__( self, train_df_path=None, test_df_path=None, steps=None, config_file=None, params=None, custom_reader=None, ): steps = [ Parser().parse_dataset, NullValuesHandler().execute, Encoder().encode, HandleOutlier().handle_outliers, Scaler().execute, SelectKBest().fit_transform, Split().train_test_split, ] super().__init__( train_df_path=train_df_path, test_df_path=test_df_path, steps=steps, config_file=config_file, params=params, custom_reader=custom_reader, )
def test_empty_df(): params = {"target_label": "Price", "ord_dict": ord_dict} with pytest.raises(ValueError): parser = Parser() parser.parse_dataset(params=params)
def test_target_label_warning(): train_csv = pd.read_csv("datasets/encoding/testnew.csv") params = {"train_df": train_csv, "ord_dict": ord_dict} with pytest.warns(UserWarning): parser = Parser() parser.parse_dataset(params=params)