def test_max_sigma_functionality(): """Assert that the max_sigma parameter works as intended.""" # Test 3 different values for sigma and number of rows they drop X_1 = Pruner(max_sigma=1).fit_transform(X_bin) X_2 = Pruner(max_sigma=4).fit_transform(X_bin) X_3 = Pruner(max_sigma=8).fit_transform(X_bin) assert len(X_1) < len(X_2) < len(X_3)
def test_add_complete_dataset(): """Assert that atom accepts transformers for the complete dataset.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(StandardScaler()) assert check_scaling(atom.dataset) len_dataset = len(atom.dataset) atom.add(Pruner()) assert len(atom.dataset) != len_dataset
def test_add_train_only(): """Assert that atom accepts transformers for the train set only.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(StandardScaler(), train_only=True) assert check_scaling(atom.X_train) and not check_scaling(atom.X_test) len_train, len_test = len(atom.train), len(atom.test) atom.add(Pruner(), train_only=True) assert len(atom.train) != len_train and len(atom.test) == len_test
def test_strategies(strategy): """Assert that all estimator requiring strategies work.""" pruner = Pruner(strategy=strategy) X, y = pruner.transform(X_bin, y_bin) assert len(X) < len(X_bin) assert hasattr(pruner, strategy.lower())
def test_drop_outlier_in_target(): """Assert that method works as intended for target columns as well.""" X, y = Pruner(max_sigma=2, include_target=True).transform(X10, y10) assert len(y) + 2 == len(y10)
def test_categorical_cols_are_ignored(): """Assert that categorical columns are returned untouched.""" Feature_2 = np.array(X10_str)[:, 2] X, y = Pruner(method="min_max", max_sigma=2).transform(X10_str, y10) assert [i == j for i, j in zip(X["Feature 2"], Feature_2)]
def test_value_pruner(): """Assert that the method works as intended when strategy=value.""" X = Pruner(method=-99, max_sigma=2).transform(X10) assert X.iloc[3, 0] == -99 assert X.iloc[5, 1] == -99
def test_min_max_pruner(): """Assert that the method works as intended when strategy="min_max".""" X = Pruner(method="min_max", max_sigma=2).transform(X10) assert X.iloc[3, 0] == 0.23 # Max of column assert X.iloc[5, 1] == 2 # Min of column
def test_drop_pruner(): """Assert that rows with outliers are dropped when strategy="drop".""" X = Pruner(method="drop", max_sigma=2).transform(X10) assert len(X) + 2 == len(X10)
def test_kwargs_parameter_pruner(): """Assert that the kwargs are passed to the strategy estimator.""" pruner = Pruner(strategy="iForest", n_estimators=50) pruner.transform(X10) assert pruner.iforest.get_params()["n_estimators"] == 50
def test_invalid_max_sigma_parameter(): """Assert that an error is raised for an invalid max_sigma parameter.""" pruner = Pruner(max_sigma=0) pytest.raises(ValueError, pruner.transform, X_bin)
def test_invalid_method_for_non_z_score(): """Assert that an error is raised for an invalid method and strat combination.""" pruner = Pruner(strategy="iforest", method="min_max") pytest.raises(ValueError, pruner.transform, X_bin)
def test_invalid_method_parameter(): """Assert that an error is raised for an invalid method parameter.""" pruner = Pruner(method="invalid") pytest.raises(ValueError, pruner.transform, X_bin)
def test_invalid_strategy_parameter(): """Assert that an error is raised for an invalid strategy parameter.""" pruner = Pruner(strategy="invalid") pytest.raises(ValueError, pruner.transform, X_bin)
def test_sets_are_kept_equal(): """Assert that the train and test sets always keep the same rows.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) len_train, len_test = len(atom.train), len(atom.test) atom.add(Pruner()) assert len(atom.train) < len_train and len(atom.test) < len_test