def test_incorrect_input_key(): with pytest.raises(KeyError): outlier = HandleOutlier() outlier.handle_outliers(params={ "train_df": train_df, "cols": ["Place"] })
def test_replace_output(test_input): outlier = HandleOutlier() outlier.handle_outliers(params=test_input) assert test_input["train_df"].shape == train_df.shape assert -999 in test_input["train_df"]["Distance"].values assert "-999" not in test_input["train_df"]["Capitals"].values assert "-999" not in test_input["train_df"]["Other Capitals"].values
def test_true_arguments(): with pytest.raises(ArgumentsError): outlier = HandleOutlier() outlier.handle_outliers(params={ "train_df": train_df, "cols": ["Distance"], "replace": True, })
def test_true_arguments(): with pytest.raises(ArgumentsError): outlier = HandleOutlier() outlier.handle_outliers( params={ "train_df": train_df, "cat_cols": ["Capitals", "Other Capitals"], "replace": True, })
def test_false_arguments(): with pytest.warns(UserWarning): outlier = HandleOutlier() outlier.handle_outliers( params={ "train_df": train_df, "cols": ["Distance"], "remove_outliers": False, "replace": False, })
def __init__( self, train_df_path=None, test_df_path=None, steps=None, config_file=None, params=None, custom_reader=None, ): steps = [ Parser().parse_dataset, NullValuesHandler().execute, Encoder().encode, HandleOutlier().handle_outliers, Scaler().execute, SelectKBest().fit_transform, Split().train_test_split, ] super().__init__( train_df_path=train_df_path, test_df_path=test_df_path, steps=steps, config_file=config_file, params=params, custom_reader=custom_reader, )
def test_incorrect_input_type(test_input): with pytest.raises(TypeError): outlier = HandleOutlier() outlier.handle_outliers(params=test_input)
def test_all(test_input): outlier = HandleOutlier() outlier.handle_outliers(params=test_input) assert -999 in test_input["train_df"]["Distance"].values assert -999 in test_input["test_df"]["Distance"].values assert test_input["train_df"].equals(test_input["test_df"])
def test_removeoutliers_output(test_input): outlier = HandleOutlier() outlier.handle_outliers(params=test_input) assert test_input["train_df"].shape != train_df.shape