def test_plot_components(show): """Assert that the plot_components method work as intended.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(PermissionError, atom.plot_components) # No PCA in pipeline atom.feature_selection(strategy="PCA", n_features=10) pytest.raises(ValueError, atom.plot_components, show=0) # Show is invalid atom.plot_components(show=show, display=False)
def test_plot_rfecv(scoring): """Assert that the plot_rfecv method work as intended """ atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(PermissionError, atom.plot_rfecv) # No RFECV in pipeline atom.run("lr", metric="precision") atom.feature_selection(strategy="RFECV", n_features=10, scoring=scoring) atom.plot_rfecv(display=False)
def test_plot_partial_dependence(features): """Assert that the plot_partial_dependence method work as intended.""" # For binary classification tasks atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(NotFittedError, atom.plot_partial_dependence) atom.run(["Tree", "LGB"], metric="f1") # More than 3 features with pytest.raises(ValueError, match=r".*Maximum 3 allowed.*"): atom.plot_partial_dependence(features=[0, 1, 2, 3], display=False) # Triple feature with pytest.raises(ValueError, match=r".*should be single or in pairs.*"): atom.lgb.plot_partial_dependence(features=[(0, 1, 2), 2], display=False) # Pair for multi-model with pytest.raises(ValueError, match=r".*when plotting multiple models.*"): atom.plot_partial_dependence(features=[(0, 2), 2], display=False) # Unknown feature with pytest.raises(ValueError, match=r".*not found in the dataset.*"): atom.plot_partial_dependence(features=["test", 2], display=False) # Invalid index with pytest.raises(ValueError, match=r".*got index.*"): atom.plot_partial_dependence(features=[120, 2], display=False) # Different features for multiple models atom.branch = "branch_2" atom.feature_selection(strategy="pca", n_features=5) atom.run(["tree2"]) with pytest.raises(ValueError, match=r".*models use the same features.*"): atom.plot_partial_dependence(features=(0, 1), display=False) atom.delete("Tree2") # Drop model created for test atom.branch.delete() # Drop branch created for test atom.plot_partial_dependence(display=False) atom.lgb.plot_feature_importance(show=5, display=False) atom.lgb.plot_partial_dependence(display=False) # For multiclass classification tasks atom = ATOMClassifier(X_class, y_class, random_state=1) atom.run(["Tree", "LGB"], metric="f1_macro") # Invalid target int with pytest.raises(ValueError, match=r".*classes, got .*"): atom.plot_partial_dependence(target=5, display=False) # Invalid target str with pytest.raises(ValueError, match=r".*not found in the mapping.*"): atom.plot_partial_dependence(target="Yes", display=False) atom.lgb.plot_partial_dependence(features, target=2, title="title", display=False)
def test_plot_pipeline(show_params): """Assert that the plot_pipeline method work as intended.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.impute() atom.prune() atom.feature_selection("univariate", n_features=10) atom.successive_halving(["Tree", "AdaB"]) pytest.raises(ValueError, atom.plot_pipeline, branch="invalid") atom.plot_pipeline(show_params=show_params, title="Pipeline plot", display=False)
def test_default_solver_from_task(): """Assert that the solver is inferred from the task when a model is selected.""" # For classification tasks atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.feature_selection(strategy="rfe", solver="lgb", n_features=8) assert type(atom.pipeline[0].solver).__name__ == "LGBMClassifier" # For regression tasks atom = ATOMRegressor(X_reg, y_reg, random_state=1) atom.feature_selection(strategy="rfe", solver="lgb", n_features=25) assert type(atom.pipeline[0].solver).__name__ == "LGBMRegressor"
def test_default_solver_univariate(): """Assert that the default solver is selected for strategy="univariate".""" # For classification tasks atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.feature_selection(strategy="univariate", solver=None, n_features=8) assert atom.pipeline[0].solver.__name__ == "f_classif" # For regression tasks atom = ATOMRegressor(X_reg, y_reg, random_state=1) atom.feature_selection(strategy="univariate", solver=None, n_features=8) assert atom.pipeline[0].solver.__name__ == "f_regression"
def test_transform_data_multiple_branches(): """Assert that the data is transformed with multiple branches.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.prune() atom.branch = "branch_2" atom.balance() atom.feature_generation(strategy="dfs", n_features=5) atom.branch = "branch_3" atom.feature_selection(strategy="sfm", solver="lgb", n_features=20) atom.save(FILE_DIR + "atom_2", save_data=False) atom2 = ATOMLoader(FILE_DIR + "atom_2", data=(X_bin, y_bin), transform_data=True) for branch in atom._branches: assert atom2._branches[branch].data.equals(atom._branches[branch].data)
def test_transform_data(): """Assert that the data is transformed correctly.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.prune(columns=slice(3, 10)) atom.apply(lambda x: x + 2, column="mean radius") atom.feature_generation(strategy="dfs", n_features=5) atom.feature_selection(strategy="sfm", solver="lgb", n_features=10) atom.save(FILE_DIR + "atom", save_data=False) atom2 = ATOMLoader(FILE_DIR + "atom", data=(X_bin, y_bin), transform_data=True) assert atom2.dataset.shape == atom.dataset.shape atom3 = ATOMLoader(FILE_DIR + "atom", data=(X_bin, y_bin), transform_data=False) assert atom3.dataset.shape == merge(X_bin, y_bin).shape
def test_plot_pca(): """Assert that the plot_pca method work as intended.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(PermissionError, atom.plot_pca) # No PCA in pipeline atom.feature_selection(strategy="PCA", n_features=10) atom.plot_pca(display=False)
def test_default_scoring(cls): """Assert that the scoring is atom's metric when exists.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run("lr", metric="recall") atom.feature_selection(strategy="sfs", solver="lgb", n_features=25) assert atom.pipeline[0].kwargs["scoring"].name == "recall"
def test_winner_solver_after_run(): """Assert that the solver is the winning model after run.""" atom = ATOMClassifier(X_class, y_class, random_state=1) atom.run("LR") atom.feature_selection(strategy="SFM", solver=None, n_features=8) assert atom.pipeline[0].solver is atom.winner.estimator
def test_feature_selection_attrs(): """Assert that feature_selection attaches only used attributes.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.feature_selection(strategy="univariate", n_features=8) assert hasattr(atom, "univariate") assert not hasattr(atom, "RFE")