def test_breast_cancer(): X, y = demos.load_breast_cancer() assert X.shape == (569, 30) assert y.shape == (569, ) assert isinstance(X, ww.DataTable) assert isinstance(y, ww.DataColumn) X, y = demos.load_breast_cancer(return_pandas=True) assert X.shape == (569, 30) assert y.shape == (569, ) assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series)
def test_graph_two_way_partial_dependence(test_pipeline): X, y = load_breast_cancer() go = pytest.importorskip( 'plotly.graph_objects', reason='Skipping plotting test because plotly not installed') clf = test_pipeline clf.fit(X, y) fig = graph_partial_dependence(clf, X, features=('mean radius', 'mean area'), grid_resolution=20) assert isinstance(fig, go.Figure) fig_dict = fig.to_dict() assert fig_dict['layout']['title'][ 'text'] == "Partial Dependence of 'mean radius' vs. 'mean area'" assert len(fig_dict['data']) == 1 assert fig_dict['data'][0]['name'] == "Partial Dependence" part_dep_data = partial_dependence(clf, X, features=('mean radius', 'mean area'), grid_resolution=20) assert np.array_equal(fig_dict['data'][0]['x'], part_dep_data.index) assert np.array_equal(fig_dict['data'][0]['y'], part_dep_data.columns) assert np.array_equal(fig_dict['data'][0]['z'], part_dep_data.values)
def test_pipeline_has_classes_property( logistic_regression_binary_pipeline_class, logistic_regression_multiclass_pipeline_class, problem_type, use_ints): if problem_type == "binary": X, y = load_breast_cancer(return_pandas=True) pipeline = logistic_regression_binary_pipeline_class( parameters={"Logistic Regression Classifier": { "n_jobs": 1 }}) if use_ints: y = y.map({'malignant': 0, 'benign': 1}) answer = [0, 1] else: answer = ["benign", "malignant"] elif problem_type == "multi": X, y = load_wine(return_pandas=True) pipeline = logistic_regression_multiclass_pipeline_class( parameters={"Logistic Regression Classifier": { "n_jobs": 1 }}) if use_ints: y = y.map({"class_0": 0, "class_1": 1, "class_2": 2}) answer = [0, 1, 2] else: answer = ["class_0", "class_1", "class_2"] with pytest.raises( AttributeError, match="Cannot access class names before fitting the pipeline."): pipeline.classes_ pipeline.fit(X, y) pd.testing.assert_series_equal(pd.Series(pipeline.classes_), pd.Series(answer))
def test_partial_dependence_string_feature_name(logistic_regression_binary_pipeline_class): X, y = load_breast_cancer() pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) pipeline.fit(X, y) part_dep = partial_dependence(pipeline, X, features="mean radius", grid_resolution=20) assert list(part_dep.columns) == ["feature_values", "partial_dependence", "class_label"] assert len(part_dep["partial_dependence"]) == 20 assert len(part_dep["feature_values"]) == 20 assert not part_dep.isnull().any(axis=None)
def test_invalid_targets_regression_pipeline(target_type, dummy_regression_pipeline_class): X, y = load_wine(return_pandas=True) if target_type == "category": y = pd.Series(y).astype("category") if target_type == "bool": X, y = load_breast_cancer(return_pandas=True) y = y.map({"malignant": False, "benign": True}) mock_regression_pipeline = dummy_regression_pipeline_class(parameters={}) with pytest.raises(ValueError, match="Regression pipeline can only handle numeric target data"): mock_regression_pipeline.fit(X, y)
def test_woodwork_classification_pipeline( logistic_regression_binary_pipeline_class): X, y = load_breast_cancer() mock_pipeline = logistic_regression_binary_pipeline_class( parameters={"Logistic Regression Classifier": { "n_jobs": 1 }}) mock_pipeline.fit(X, y) assert not pd.isnull(mock_pipeline.predict(X)).any() assert not pd.isnull(mock_pipeline.predict_proba(X)).any().any()