def test_multiple_args(self): self.dataframe = pd.read_csv("datasets/encoding/testnew.csv") with pytest.raises(Exception): handler = NullValuesHandler() handler.execute( {"df": self.dataframe, "drop": True, "fill_missing": "mean"} )
def test_categorical_replace(test_input2): test_input2["train_df"] = pd.read_csv("datasets/encoding/test2.csv") test_input2["test_df"] = pd.read_csv("datasets/encoding/test2.csv") handler = NullValuesHandler() handler.execute(params=test_input2) assert (len(test_input2["train_df"][test_input2["train_df"].Capitals == "xyz"]["Capitals"]) == 8) assert (len(test_input2["test_df"][test_input2["test_df"].Capitals == "xyz"]["Capitals"]) == 8)
def test_categorical_drops(test_input1): train_row_count = (test_input1["train_df"].shape)[0] test_row_count = (test_input1["test_df"].shape)[0] handler = NullValuesHandler() handler.execute(params=test_input1) assert "0" not in test_input1["train_df"]["Other Capitals"] assert (test_input1["train_df"].shape)[0] <= train_row_count assert "0" not in test_input1["test_df"]["Other Capitals"] assert (test_input1["test_df"].shape)[0] <= test_row_count
def test_drop_col(): params = { "train_df": dataframe1, "test_df": dataframe1, "drop": True, "column_list": ["Distance"], } handler = NullValuesHandler() handler.execute(params=params) assert "Distance" not in params["train_df"].columns assert "Distance" not in params["test_df"].columns
def test_auto(): dataframe1 = pd.read_csv("datasets/encoding/test2.csv") params = {"train_df": dataframe1, "test_df": dataframe1} r = (params["train_df"].shape)[0] rt = (params["test_df"].shape)[0] handler = NullValuesHandler() handler.execute(params=params) r_new = (params["train_df"].shape)[0] rt_new = (params["test_df"].shape)[0] assert r_new != r assert rt_new != rt
def __init__( self, train_df_path=None, test_df_path=None, steps=None, config_file=None, params=None, custom_reader=None, ): steps = [ Parser().parse_dataset, NullValuesHandler().execute, Encoder().encode, HandleOutlier().handle_outliers, Scaler().execute, SelectKBest().fit_transform, Split().train_test_split, ] super().__init__( train_df_path=train_df_path, test_df_path=test_df_path, steps=steps, config_file=config_file, params=params, custom_reader=custom_reader, )
def test_output(test_input3): handler = NullValuesHandler() handler.execute(params=test_input3) assert not test_input3["train_df"].isnull().any()["Distance"] assert not test_input3["test_df"].isnull().any()["Distance"]
def test_incorrect_input_type(error, test_input): with pytest.raises(error): handler = NullValuesHandler() handler.execute(params=test_input)
def test_null_dataframe(): with pytest.raises(ValueError): handler = NullValuesHandler() handler.execute({})
def test_incorrect_input_type(self): # for dataframe argument array = np.random.random((5, 5)) with pytest.raises(TypeError): handler = NullValuesHandler() handler.execute({"df": array}) # for drop argument self.dataframe = pd.read_csv("datasets/encoding/testnew.csv") with pytest.raises(TypeError): handler = NullValuesHandler() handler.execute({"df": self.dataframe, "drop": "nice"}) # for fill_missing argument with pytest.raises(TypeError): handler = NullValuesHandler() handler.execute({"df": dataframe, "fill_missing": 3}) with pytest.raises(ArgumentsError): handler = NullValuesHandler() handler.execute({"df": dataframe, "fill_missing": "sum"}) # for fill_values argument with pytest.raises(TypeError): value = [5] handler = NullValuesHandler() handler.execute({"df": dataframe, "fill_values": value}) with pytest.raises(ArgumentsError): value = {"Label": 10} handler = NullValuesHandler() handler.execute({"df": dataframe, "fill_values": value})
def test_multiple_args(self): with pytest.raises(ArgumentsError): handler = NullValuesHandler() handler.execute({ "df": dataframe, "drop": True, "fill_missing": "mean" }) with pytest.raises(ArgumentsError): handler = NullValuesHandler() handler.execute({ "df": dataframe, "drop": True, "fill_values": { "Test": "Tata" }, }) with pytest.raises(ArgumentsError): handler = NullValuesHandler() handler.execute({ "df": dataframe, "drop": True, "fill_values": { "Test": "Tata" }, }) with pytest.raises(ArgumentsError): handler = NullValuesHandler() handler.execute({ "df": dataframe, "fill_missing": "mean", "fill_values": { "Test": "Tata" }, })
def test_none_args(self): with pytest.raises(ArgumentsError): handler = NullValuesHandler() handler.execute({"df": dataframe})
def test_drop_col(self): params = {"df": dataframe, "drop": True, "column_list": ["Distance"]} handler = NullValuesHandler() handler.execute(params=params) assert "Distance" not in params["df"].columns
def test_incorrect_input_type(self): self.dataframe = pd.read_csv("datasets/encoding/testnew.csv") with pytest.raises(TypeError): handler = NullValuesHandler() handler.execute({"df": self.dataframe, "drop": "nice"})
def test_output(self): params = {"df": dataframe, "drop": True} handler = NullValuesHandler() handler.execute(params=params) assert not params["df"].isnull().values.any() params = {"df": dataframe, "fill_missing": "mean"} handler = NullValuesHandler() handler.execute(params=params) assert not params["df"].isnull().any()["Distance"] params = {"df": dataframe, "fill_values": {"Distance": 0}} handler = NullValuesHandler() handler.execute(params=params) assert not params["df"].isnull().any()["Distance"]
def test_multiple_args(test_input): with pytest.raises(ArgumentsError): handler = NullValuesHandler() handler.execute(params=test_input)
def test_output(test_input1): handler = NullValuesHandler() handler.execute(params=test_input1) assert not test_input1["df"].isnull().any()["Distance"]