Example #1
0
def test_plot_components(show):
    """Assert that the plot_components method work as intended."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    pytest.raises(PermissionError, atom.plot_components)  # No PCA in pipeline
    atom.feature_selection(strategy="PCA", n_features=10)
    pytest.raises(ValueError, atom.plot_components, show=0)  # Show is invalid
    atom.plot_components(show=show, display=False)
Example #2
0
def test_plot_rfecv(scoring):
    """Assert that the plot_rfecv method work as intended """
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    pytest.raises(PermissionError, atom.plot_rfecv)  # No RFECV in pipeline
    atom.run("lr", metric="precision")
    atom.feature_selection(strategy="RFECV", n_features=10, scoring=scoring)
    atom.plot_rfecv(display=False)
Example #3
0
def test_plot_partial_dependence(features):
    """Assert that the plot_partial_dependence method work as intended."""
    # For binary classification tasks
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    pytest.raises(NotFittedError, atom.plot_partial_dependence)
    atom.run(["Tree", "LGB"], metric="f1")

    # More than 3 features
    with pytest.raises(ValueError, match=r".*Maximum 3 allowed.*"):
        atom.plot_partial_dependence(features=[0, 1, 2, 3], display=False)

    # Triple feature
    with pytest.raises(ValueError, match=r".*should be single or in pairs.*"):
        atom.lgb.plot_partial_dependence(features=[(0, 1, 2), 2],
                                         display=False)

    # Pair for multi-model
    with pytest.raises(ValueError, match=r".*when plotting multiple models.*"):
        atom.plot_partial_dependence(features=[(0, 2), 2], display=False)

    # Unknown feature
    with pytest.raises(ValueError, match=r".*not found in the dataset.*"):
        atom.plot_partial_dependence(features=["test", 2], display=False)

    # Invalid index
    with pytest.raises(ValueError, match=r".*got index.*"):
        atom.plot_partial_dependence(features=[120, 2], display=False)

    # Different features for multiple models
    atom.branch = "branch_2"
    atom.feature_selection(strategy="pca", n_features=5)
    atom.run(["tree2"])
    with pytest.raises(ValueError, match=r".*models use the same features.*"):
        atom.plot_partial_dependence(features=(0, 1), display=False)

    atom.delete("Tree2")  # Drop model created for test
    atom.branch.delete()  # Drop branch created for test
    atom.plot_partial_dependence(display=False)
    atom.lgb.plot_feature_importance(show=5, display=False)
    atom.lgb.plot_partial_dependence(display=False)

    # For multiclass classification tasks
    atom = ATOMClassifier(X_class, y_class, random_state=1)
    atom.run(["Tree", "LGB"], metric="f1_macro")

    # Invalid target int
    with pytest.raises(ValueError, match=r".*classes, got .*"):
        atom.plot_partial_dependence(target=5, display=False)

    # Invalid target str
    with pytest.raises(ValueError, match=r".*not found in the mapping.*"):
        atom.plot_partial_dependence(target="Yes", display=False)

    atom.lgb.plot_partial_dependence(features,
                                     target=2,
                                     title="title",
                                     display=False)
Example #4
0
def test_plot_pipeline(show_params):
    """Assert that the plot_pipeline method work as intended."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.impute()
    atom.prune()
    atom.feature_selection("univariate", n_features=10)
    atom.successive_halving(["Tree", "AdaB"])
    pytest.raises(ValueError, atom.plot_pipeline, branch="invalid")
    atom.plot_pipeline(show_params=show_params,
                       title="Pipeline plot",
                       display=False)
Example #5
0
def test_default_solver_from_task():
    """Assert that the solver is inferred from the task when a model is selected."""
    # For classification tasks
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.feature_selection(strategy="rfe", solver="lgb", n_features=8)
    assert type(atom.pipeline[0].solver).__name__ == "LGBMClassifier"

    # For regression tasks
    atom = ATOMRegressor(X_reg, y_reg, random_state=1)
    atom.feature_selection(strategy="rfe", solver="lgb", n_features=25)
    assert type(atom.pipeline[0].solver).__name__ == "LGBMRegressor"
Example #6
0
def test_default_solver_univariate():
    """Assert that the default solver is selected for strategy="univariate"."""
    # For classification tasks
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.feature_selection(strategy="univariate", solver=None, n_features=8)
    assert atom.pipeline[0].solver.__name__ == "f_classif"

    # For regression tasks
    atom = ATOMRegressor(X_reg, y_reg, random_state=1)
    atom.feature_selection(strategy="univariate", solver=None, n_features=8)
    assert atom.pipeline[0].solver.__name__ == "f_regression"
Example #7
0
def test_transform_data_multiple_branches():
    """Assert that the data is transformed with multiple branches."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.prune()
    atom.branch = "branch_2"
    atom.balance()
    atom.feature_generation(strategy="dfs", n_features=5)
    atom.branch = "branch_3"
    atom.feature_selection(strategy="sfm", solver="lgb", n_features=20)
    atom.save(FILE_DIR + "atom_2", save_data=False)

    atom2 = ATOMLoader(FILE_DIR + "atom_2",
                       data=(X_bin, y_bin),
                       transform_data=True)
    for branch in atom._branches:
        assert atom2._branches[branch].data.equals(atom._branches[branch].data)
Example #8
0
def test_transform_data():
    """Assert that the data is transformed correctly."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.prune(columns=slice(3, 10))
    atom.apply(lambda x: x + 2, column="mean radius")
    atom.feature_generation(strategy="dfs", n_features=5)
    atom.feature_selection(strategy="sfm", solver="lgb", n_features=10)
    atom.save(FILE_DIR + "atom", save_data=False)

    atom2 = ATOMLoader(FILE_DIR + "atom",
                       data=(X_bin, y_bin),
                       transform_data=True)
    assert atom2.dataset.shape == atom.dataset.shape

    atom3 = ATOMLoader(FILE_DIR + "atom",
                       data=(X_bin, y_bin),
                       transform_data=False)
    assert atom3.dataset.shape == merge(X_bin, y_bin).shape
Example #9
0
def test_plot_pca():
    """Assert that the plot_pca method work as intended."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    pytest.raises(PermissionError, atom.plot_pca)  # No PCA in pipeline
    atom.feature_selection(strategy="PCA", n_features=10)
    atom.plot_pca(display=False)
Example #10
0
def test_default_scoring(cls):
    """Assert that the scoring is atom's metric when exists."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.run("lr", metric="recall")
    atom.feature_selection(strategy="sfs", solver="lgb", n_features=25)
    assert atom.pipeline[0].kwargs["scoring"].name == "recall"
Example #11
0
def test_winner_solver_after_run():
    """Assert that the solver is the winning model after run."""
    atom = ATOMClassifier(X_class, y_class, random_state=1)
    atom.run("LR")
    atom.feature_selection(strategy="SFM", solver=None, n_features=8)
    assert atom.pipeline[0].solver is atom.winner.estimator
Example #12
0
def test_feature_selection_attrs():
    """Assert that feature_selection attaches only used attributes."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.feature_selection(strategy="univariate", n_features=8)
    assert hasattr(atom, "univariate")
    assert not hasattr(atom, "RFE")