def test_plot_partial_dependence_multiclass(pyplot): # Test partial dependence plot function on multi-class input. clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) grid_resolution = 25 fig, axs = plot_partial_dependence(clf, iris.data, [0, 1], label=0, grid_resolution=grid_resolution) assert len(axs) == 2 assert all(ax.has_data for ax in axs) # now with symbol labels target = iris.target_names[iris.target] clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, target) grid_resolution = 25 fig, axs = plot_partial_dependence(clf, iris.data, [0, 1], label='setosa', grid_resolution=grid_resolution) assert len(axs) == 2 assert all(ax.has_data for ax in axs) # label not in gbrt.classes_ assert_raises(ValueError, plot_partial_dependence, clf, iris.data, [0, 1], label='foobar', grid_resolution=grid_resolution) # label not provided assert_raises(ValueError, plot_partial_dependence, clf, iris.data, [0, 1], grid_resolution=grid_resolution)
def test_plot_partial_dependence_input(pyplot): # Test partial dependence plot function input checks. clf = GradientBoostingClassifier(n_estimators=10, random_state=1) # not fitted yet assert_raises(ValueError, plot_partial_dependence, clf, X, [0]) clf.fit(X, y) assert_raises(ValueError, plot_partial_dependence, clf, np.array(X)[:, :0], [0]) # first argument must be an instance of BaseGradientBoosting assert_raises(ValueError, plot_partial_dependence, {}, X, [0]) # must be larger than -1 assert_raises(ValueError, plot_partial_dependence, clf, X, [-1]) # too large feature value assert_raises(ValueError, plot_partial_dependence, clf, X, [100]) # str feature but no feature_names assert_raises(ValueError, plot_partial_dependence, clf, X, ['foobar']) # not valid features value assert_raises(ValueError, plot_partial_dependence, clf, X, [{'foo': 'bar'}])
def test_partial_dependecy_input(): # Test input validation of partial dependence. clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(X, y) assert_raises(ValueError, partial_dependence, clf, [0], grid=None, X=None) assert_raises(ValueError, partial_dependence, clf, [0], grid=[0, 1], X=X) # first argument must be an instance of BaseGradientBoosting assert_raises(ValueError, partial_dependence, {}, [0], X=X) # Gradient boosting estimator must be fit assert_raises(ValueError, partial_dependence, GradientBoostingClassifier(), [0], X=X) assert_raises(ValueError, partial_dependence, clf, [-1], X=X) assert_raises(ValueError, partial_dependence, clf, [100], X=X) # wrong ndim for grid grid = np.random.rand(10, 2, 1) assert_raises(ValueError, partial_dependence, clf, [0], grid=grid)
def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg): iris = load_iris() clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(clf, iris.data, **params)
def test_partial_dependence_multiclass(): # Test partial dependence for multi-class classifier clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) grid_resolution = 25 n_classes = clf.n_classes_ pdp, axes = partial_dependence( clf, [0], X=iris.data, grid_resolution=grid_resolution) assert pdp.shape == (n_classes, grid_resolution) assert len(axes) == 1 assert axes[0].shape[0] == grid_resolution
def test_warning_recursion_non_constant_init(): # make sure that passing a non-constant init parameter to a GBDT and using # recursion method yields a warning. gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0) gbc.fit(X, y) with pytest.warns( UserWarning, match='Using recursion method with a non-constant init predictor'): partial_dependence(gbc, X, [0], method='recursion') with pytest.warns( UserWarning, match='Using recursion method with a non-constant init predictor'): partial_dependence(gbc, X, [0], method='recursion')
def test_plot_partial_dependence_multiclass(pyplot): grid_resolution = 25 clf_int = GradientBoostingClassifier(n_estimators=10, random_state=1) iris = load_iris() # Test partial dependence plot function on multi-class input. clf_int.fit(iris.data, iris.target) disp_target_0 = plot_partial_dependence(clf_int, iris.data, [0, 1], target=0, grid_resolution=grid_resolution) assert disp_target_0.figure_ is pyplot.gcf() assert disp_target_0.axes_.shape == (1, 2) assert disp_target_0.lines_.shape == (1, 2) assert disp_target_0.contours_.shape == (1, 2) assert all(c is None for c in disp_target_0.contours_.flat) assert disp_target_0.target_idx == 0 # now with symbol labels target = iris.target_names[iris.target] clf_symbol = GradientBoostingClassifier(n_estimators=10, random_state=1) clf_symbol.fit(iris.data, target) disp_symbol = plot_partial_dependence(clf_symbol, iris.data, [0, 1], target='setosa', grid_resolution=grid_resolution) assert disp_symbol.figure_ is pyplot.gcf() assert disp_symbol.axes_.shape == (1, 2) assert disp_symbol.lines_.shape == (1, 2) assert disp_symbol.contours_.shape == (1, 2) assert all(c is None for c in disp_symbol.contours_.flat) assert disp_symbol.target_idx == 0 for int_result, symbol_result in zip(disp_target_0.pd_results, disp_symbol.pd_results): avg_preds_int, values_int = int_result avg_preds_symbol, values_symbol = symbol_result assert_allclose(avg_preds_int, avg_preds_symbol) assert_allclose(values_int, values_symbol) # check that the pd plots are different for another target disp_target_1 = plot_partial_dependence(clf_int, iris.data, [0, 1], target=1, grid_resolution=grid_resolution) target_0_data_y = disp_target_0.lines_[0, 0].get_data()[1] target_1_data_y = disp_target_1.lines_[0, 0].get_data()[1] assert any(target_0_data_y != target_1_data_y)
def test_partial_dependence_classifier(): # Test partial dependence for classifier clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(X, y) pdp, axes = partial_dependence(clf, [0], X=X, grid_resolution=5) # only 4 grid points instead of 5 because only 4 unique X[:,0] vals assert pdp.shape == (1, 4) assert axes[0].shape[0] == 4 # now with our own grid X_ = np.asarray(X) grid = np.unique(X_[:, 0]) pdp_2, axes = partial_dependence(clf, [0], grid=grid) assert axes is None assert_array_equal(pdp, pdp_2) # with trivial (no-op) sample weights clf.fit(X, y, sample_weight=np.ones(len(y))) pdp_w, axes_w = partial_dependence(clf, [0], X=X, grid_resolution=5) assert pdp_w.shape == (1, 4) assert axes_w[0].shape[0] == 4 assert_allclose(pdp_w, pdp) # with non-trivial sample weights clf.fit(X, y, sample_weight=sample_weight) pdp_w2, axes_w2 = partial_dependence(clf, [0], X=X, grid_resolution=5) assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 assert np.all(np.abs(pdp_w2 - pdp_w) / np.abs(pdp_w) > 0.1)