Beispiel #1
0
def test_breast_cancer():
    X, y = demos.load_breast_cancer()
    assert X.shape == (569, 30)
    assert y.shape == (569, )
    assert isinstance(X, ww.DataTable)
    assert isinstance(y, ww.DataColumn)

    X, y = demos.load_breast_cancer(return_pandas=True)
    assert X.shape == (569, 30)
    assert y.shape == (569, )
    assert isinstance(X, pd.DataFrame)
    assert isinstance(y, pd.Series)
Beispiel #2
0
def test_graph_two_way_partial_dependence(test_pipeline):
    X, y = load_breast_cancer()

    go = pytest.importorskip(
        'plotly.graph_objects',
        reason='Skipping plotting test because plotly not installed')
    clf = test_pipeline
    clf.fit(X, y)
    fig = graph_partial_dependence(clf,
                                   X,
                                   features=('mean radius', 'mean area'),
                                   grid_resolution=20)
    assert isinstance(fig, go.Figure)
    fig_dict = fig.to_dict()
    assert fig_dict['layout']['title'][
        'text'] == "Partial Dependence of 'mean radius' vs. 'mean area'"
    assert len(fig_dict['data']) == 1
    assert fig_dict['data'][0]['name'] == "Partial Dependence"

    part_dep_data = partial_dependence(clf,
                                       X,
                                       features=('mean radius', 'mean area'),
                                       grid_resolution=20)
    assert np.array_equal(fig_dict['data'][0]['x'], part_dep_data.index)
    assert np.array_equal(fig_dict['data'][0]['y'], part_dep_data.columns)
    assert np.array_equal(fig_dict['data'][0]['z'], part_dep_data.values)
def test_pipeline_has_classes_property(
        logistic_regression_binary_pipeline_class,
        logistic_regression_multiclass_pipeline_class, problem_type, use_ints):
    if problem_type == "binary":
        X, y = load_breast_cancer(return_pandas=True)
        pipeline = logistic_regression_binary_pipeline_class(
            parameters={"Logistic Regression Classifier": {
                "n_jobs": 1
            }})
        if use_ints:
            y = y.map({'malignant': 0, 'benign': 1})
            answer = [0, 1]
        else:
            answer = ["benign", "malignant"]
    elif problem_type == "multi":
        X, y = load_wine(return_pandas=True)
        pipeline = logistic_regression_multiclass_pipeline_class(
            parameters={"Logistic Regression Classifier": {
                "n_jobs": 1
            }})
        if use_ints:
            y = y.map({"class_0": 0, "class_1": 1, "class_2": 2})
            answer = [0, 1, 2]
        else:
            answer = ["class_0", "class_1", "class_2"]

    with pytest.raises(
            AttributeError,
            match="Cannot access class names before fitting the pipeline."):
        pipeline.classes_

    pipeline.fit(X, y)
    pd.testing.assert_series_equal(pd.Series(pipeline.classes_),
                                   pd.Series(answer))
Beispiel #4
0
def test_partial_dependence_string_feature_name(logistic_regression_binary_pipeline_class):
    X, y = load_breast_cancer()
    pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
    pipeline.fit(X, y)
    part_dep = partial_dependence(pipeline, X, features="mean radius", grid_resolution=20)
    assert list(part_dep.columns) == ["feature_values", "partial_dependence", "class_label"]
    assert len(part_dep["partial_dependence"]) == 20
    assert len(part_dep["feature_values"]) == 20
    assert not part_dep.isnull().any(axis=None)
Beispiel #5
0
def test_invalid_targets_regression_pipeline(target_type, dummy_regression_pipeline_class):
    X, y = load_wine(return_pandas=True)
    if target_type == "category":
        y = pd.Series(y).astype("category")
    if target_type == "bool":
        X, y = load_breast_cancer(return_pandas=True)
        y = y.map({"malignant": False, "benign": True})
    mock_regression_pipeline = dummy_regression_pipeline_class(parameters={})
    with pytest.raises(ValueError, match="Regression pipeline can only handle numeric target data"):
        mock_regression_pipeline.fit(X, y)
def test_woodwork_classification_pipeline(
        logistic_regression_binary_pipeline_class):
    X, y = load_breast_cancer()
    mock_pipeline = logistic_regression_binary_pipeline_class(
        parameters={"Logistic Regression Classifier": {
            "n_jobs": 1
        }})
    mock_pipeline.fit(X, y)
    assert not pd.isnull(mock_pipeline.predict(X)).any()
    assert not pd.isnull(mock_pipeline.predict_proba(X)).any().any()