def test_shape_property(): """Assert that the shape property returns the shape of the dataset.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) assert atom.branch.shape == (len(X_bin), X_bin.shape[1] + 1) atom = ATOMClassifier(*mnist, random_state=1) assert atom.branch.shape == (70000, (28, 28, 1), 2)
def test_plot_partial_dependence(features): """Assert that the plot_partial_dependence method work as intended.""" # For binary classification tasks atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(NotFittedError, atom.plot_partial_dependence) atom.run(["Tree", "LGB"], metric="f1") # More than 3 features with pytest.raises(ValueError, match=r".*Maximum 3 allowed.*"): atom.plot_partial_dependence(features=[0, 1, 2, 3], display=False) # Triple feature with pytest.raises(ValueError, match=r".*should be single or in pairs.*"): atom.lgb.plot_partial_dependence(features=[(0, 1, 2), 2], display=False) # Pair for multi-model with pytest.raises(ValueError, match=r".*when plotting multiple models.*"): atom.plot_partial_dependence(features=[(0, 2), 2], display=False) # Unknown feature with pytest.raises(ValueError, match=r".*not found in the dataset.*"): atom.plot_partial_dependence(features=["test", 2], display=False) # Invalid index with pytest.raises(ValueError, match=r".*got index.*"): atom.plot_partial_dependence(features=[120, 2], display=False) # Different features for multiple models atom.branch = "branch_2" atom.feature_selection(strategy="pca", n_features=5) atom.run(["tree2"]) with pytest.raises(ValueError, match=r".*models use the same features.*"): atom.plot_partial_dependence(features=(0, 1), display=False) atom.delete("Tree2") # Drop model created for test atom.branch.delete() # Drop branch created for test atom.plot_partial_dependence(display=False) atom.lgb.plot_feature_importance(show=5, display=False) atom.lgb.plot_partial_dependence(display=False) # For multiclass classification tasks atom = ATOMClassifier(X_class, y_class, random_state=1) atom.run(["Tree", "LGB"], metric="f1_macro") # Invalid target int with pytest.raises(ValueError, match=r".*classes, got .*"): atom.plot_partial_dependence(target=5, display=False) # Invalid target str with pytest.raises(ValueError, match=r".*not found in the mapping.*"): atom.plot_partial_dependence(target="Yes", display=False) atom.lgb.plot_partial_dependence(features, target=2, title="title", display=False)
def test_task_assignment(): """Assert that the correct task is assigned.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) assert atom.task == "binary classification" atom = ATOMClassifier(X_class, y_class, random_state=1) assert atom.task == "multiclass classification" atom = ATOMRegressor(X_reg, y_reg, random_state=1) assert atom.task == "regression"
def test_bagging_attribute_types(): """Assert that the bagging attributes have python types (not numpy).""" # For single-metric atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run("LGB", n_calls=5, bagging=5) assert isinstance(atom.lgb.metric_bagging, list) assert isinstance(atom.lgb.mean_bagging, float) # For multi-metric atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run("LGB", metric=("f1", "auc", "recall"), bagging=5) assert isinstance(atom.lgb.metric_bagging[0], tuple) assert isinstance(atom.lgb.mean_bagging, list)
def test_data(): """Assert that data can be loaded.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.save(FILE_DIR + "atom", save_data=False) atom2 = ATOMLoader(FILE_DIR + "atom", data=(X_bin, y_bin)) assert atom2.dataset.equals(atom.dataset)
def test_branch_setter_change(): """Assert that we can change to an old branch.""" atom = ATOMClassifier(X10_nan, y10, random_state=1) atom.branch = "branch_2" atom.clean() atom.branch = "master" assert atom.pipeline.empty # Has no clean estimator
def test_vote_scoring_with_weights(): """Assert that the scoring works with weights.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run(["Tree", "LGB"]) atom.voting(weights=[1, 2]) avg = (atom.tree.scoring("r2") + 2 * atom.lgb.scoring("r2")) / 3 assert atom.vote.scoring("r2") == avg
def test_X_test_setter(): """Assert that the X_test setter changes the test feature set.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) new_X_test = atom.X_test new_X_test.iloc[0, 0] = 999 atom.X_test = new_X_test assert atom.X_test.iloc[0, 0] == 999
def test_setter_error_unequal_column_names(): """Assert that an error is raised with different column names.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) with pytest.raises(ValueError, match="the same columns"): new_X = atom.train.drop(atom.train.columns[0], axis=1) new_X.insert(0, "new_column", 1) atom.train = new_X
def test_pipeline_parameter_None(): """Assert that only some transformers are used.""" atom = ATOMClassifier(X10_nan, y10, random_state=1) atom.impute(strat_num="median") atom.prune(max_sigma=2) X = atom.transform(X10_nan, pipeline=None) # Only use imputer assert len(X) == 10
def test_X_train_setter(): """Assert that the X_train setter changes the training feature set.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) new_X_train = atom.X_train new_X_train.iloc[0, 0] = 999 atom.X_train = new_X_train assert atom.X_train.iloc[0, 0] == 999
def test_calibrate_reset_predictions(): """Assert that the prediction attributes are reset after calibrating.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run("MNB") print(atom.mnb.score_test) atom.calibrate() assert atom.mnb._pred_attrs[9] is None
def test_plot_components(show): """Assert that the plot_components method work as intended.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(PermissionError, atom.plot_components) # No PCA in pipeline atom.feature_selection(strategy="PCA", n_features=10) pytest.raises(ValueError, atom.plot_components, show=0) # Show is invalid atom.plot_components(show=show, display=False)
def test_repr(): """Assert that the __repr__ method visualizes the pipeline(s).""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.scale() assert "Branches: master" in str(atom) atom.branch = "branch_2" assert "Branches:\n >>> master\n >>> branch_2 !" in str(atom)
def test_pipeline_parameter_True(): """Assert that all transformers are used.""" atom = ATOMClassifier(X10_nan, y10, random_state=1) atom.impute(strat_num="median") atom.prune(max_sigma=2) X = atom.transform(X10_nan, pipeline=True) # Use both transformers assert len(X) < 10
def test_basetransformer_params_are_attached(): """Assert that the n_jobs and random_state params from atom are used.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(PCA()) # When left to default atom.add(PCA(random_state=2)) # When changed assert atom.pipeline[0].get_params()["random_state"] == 1 assert atom.pipeline[1].get_params()["random_state"] == 2
def test_apply_not_callable(): """Assert that an error is raised when func is not callable.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(TypeError, atom.apply, func=RandomForestClassifier(), column=0)
def test_pipeline_parameter_False(): """Assert that no transformers used.""" atom = ATOMClassifier(X10_nan, y10, random_state=1) atom.impute(strat_num="median") atom.prune(max_sigma=2) X = atom.transform(X10_nan, pipeline=False) # Use None assert isinstance(X, list) # X is unchanged
def test_train_test_provided_with_parameter_y(): """Assert that input X works can be combined with y.""" atom = ATOMClassifier(bin_train, bin_test, y="mean texture", random_state=1) assert atom.target == "mean texture"
def test_calibrate(): """Assert that the calibrate method works as intended.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(NotFittedError, atom.calibrate) # When not yet fitted atom.run("LR") atom.calibrate() assert atom.winner.estimator.__class__.__name__ == "CalibratedClassifierCV"
def test_plot_rfecv(scoring): """Assert that the plot_rfecv method work as intended """ atom = ATOMClassifier(X_bin, y_bin, random_state=1) pytest.raises(PermissionError, atom.plot_rfecv) # No RFECV in pipeline atom.run("lr", metric="precision") atom.feature_selection(strategy="RFECV", n_features=10, scoring=scoring) atom.plot_rfecv(display=False)
def test_scoring_metric_is_given(): """Assert that the scoring method works for a specified metric_.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run(["GNB", "PA"]) atom.scoring("logloss") # For _ProbaScorer atom.scoring("ap") # For _ThresholdScorer atom.scoring("cm") # For special case
def test_delete_default(): """Assert that the whole pipeline is deleted as default.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run(["LR", "LDA"]) atom.delete() assert not (atom.models or atom.metric) assert atom.results.empty
def test_vote_decision_function_prediction_attrs(): """Assert that the decision functions can be calculated.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.run(models=["lsvm", "pa"]) atom.voting() assert isinstance(atom.vote.decision_function_train, np.ndarray) assert isinstance(atom.vote.decision_function_test, np.ndarray)
def test_attrs_are_passed(): """Assert that the attributes from the parent are passed.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.balance() atom.branch = "branch_2" assert atom.branch_2.idx is not atom.master.idx assert atom.branch_2.adasyn is atom.master.adasyn
def test_stack_scoring(): """Assert that the scoring method works as intended.""" atom = ATOMClassifier(X_bin, y_bin, verbose=2, random_state=1) atom.run(["Tree", "RF"]) atom.stacking() assert atom.stack.scoring() == "f1: 0.957" assert atom.stack.scoring("recall") == 0.9852941176470589
def test_setter_error_unequal_columns(): """Assert that an error is raised when the setter has unequal columns.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) with pytest.raises(ValueError, match="number of columns"): new_X = atom.train new_X.insert(0, "new_column", 1) atom.train = new_X
def test_stack_predictions_multiclass(): """Assert that the prediction methods work for multiclass tasks.""" atom = ATOMClassifier(X_class, y_class, random_state=1) atom.run(["Tree", "PA"]) atom.stacking(models=["Tree", "PA"], passthrough=True) assert isinstance(atom.stack.predict(X_class), np.ndarray) assert isinstance(atom.stack.score(X_class, y_class), np.float64)
def test_setter_error_unequal_target_names(): """Assert that an error is raised with different target names.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) with pytest.raises(ValueError, match="the same name"): new_y_train = atom.y_train new_y_train.name = "different_name" atom.y_train = new_y_train
def test_getitem(): """Assert that atom is subscriptable.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.clean() atom.impute() assert atom[1].__class__.__name__ == "Imputer" assert atom["mean radius"].equals(atom.dataset["mean radius"])