def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg): iris = load_iris() clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(clf, iris.data, **params)
def test_plot_partial_dependence_with_same_axes(pyplot, clf_boston, boston): # The first call to plot_partial_dependence will create two new axes to # place in the space of the passed in axes, which results in a total of # three axes in the figure. # Currently the API does not allow for the second call to # plot_partial_dependence to use the same axes again, because it will # create two new axes in the space resulting in five axes. To get the # expected behavior one needs to pass the generated axes into the second # call: # disp1 = plot_partial_dependence(...) # disp2 = plot_partial_dependence(..., ax=disp1.axes_) grid_resolution = 25 fig, ax = pyplot.subplots() plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=boston.feature_names, ax=ax) msg = ("The ax was already used in another plot function, please set " "ax=display.axes_ instead") with pytest.raises(ValueError, match=msg): plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=boston.feature_names, ax=ax)
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_boston, boston): grid_resolution = 25 feature_names = boston.feature_names.tolist() disp1 = plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=feature_names) assert disp1.axes_.shape == (1, 2) assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence" assert disp1.axes_[0, 1].get_ylabel() == "" assert len(disp1.axes_[0, 0].get_lines()) == 1 assert len(disp1.axes_[0, 1].get_lines()) == 1 lr = LinearRegression() lr.fit(boston.data, boston.target) disp2 = plot_partial_dependence(lr, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=feature_names, ax=disp1.axes_) assert np.all(disp1.axes_ == disp2.axes_) assert len(disp2.axes_[0, 0].get_lines()) == 2 assert len(disp2.axes_[0, 1].get_lines()) == 2
def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston): pd = pytest.importorskip('pandas') df = pd.DataFrame(boston.data, columns=boston.feature_names) grid_resolution = 25 plot_partial_dependence( clf_boston, df, ['TAX', 'AGE'], grid_resolution=grid_resolution, feature_names=df.columns.tolist() )
def test_plot_partial_dependence_fig_deprecated(pyplot): # Make sure fig object is correctly used if not None X, y = make_regression(n_samples=50, random_state=0) clf = LinearRegression() clf.fit(X, y) fig = pyplot.figure() grid_resolution = 25 msg = ("The fig parameter is deprecated in version 0.22 and will be " "removed in version 0.24") with pytest.warns(FutureWarning, match=msg): plot_partial_dependence( clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig) assert pyplot.gcf() is fig
def test_plot_partial_dependence_feature_name_reuse(pyplot, clf_boston, boston): # second call to plot does not change the feature names from the first # call feature_names = boston.feature_names disp = plot_partial_dependence(clf_boston, boston.data, [0, 1], grid_resolution=10, feature_names=feature_names) plot_partial_dependence(clf_boston, boston.data, [0, 1], grid_resolution=10, ax=disp.axes_) for i, ax in enumerate(disp.axes_.ravel()): assert ax.get_xlabel() == feature_names[i]
def test_plot_partial_dependence_custom_axes(pyplot, clf_boston, boston): grid_resolution = 25 fig, (ax1, ax2) = pyplot.subplots(1, 2) feature_names = boston.feature_names.tolist() disp = plot_partial_dependence(clf_boston, boston.data, ['CRIM', ('CRIM', 'ZN')], grid_resolution=grid_resolution, feature_names=feature_names, ax=[ax1, ax2]) assert fig is disp.figure_ assert disp.bounding_ax_ is None assert disp.axes_.shape == (2, ) assert disp.axes_[0] is ax1 assert disp.axes_[1] is ax2 ax = disp.axes_[0] assert ax.get_xlabel() == "CRIM" assert ax.get_ylabel() == "Partial dependence" line = disp.lines_[0] avg_preds, values = disp.pd_results[0] target_idx = disp.target_idx line_data = line.get_data() assert_allclose(line_data[0], values[0]) assert_allclose(line_data[1], avg_preds[target_idx].ravel()) # contour ax = disp.axes_[1] coutour = disp.contours_[1] expect_levels = np.linspace(*disp.pdp_lim[2], num=8) assert_allclose(coutour.levels, expect_levels) assert ax.get_xlabel() == "CRIM" assert ax.get_ylabel() == "ZN"
def test_plot_partial_dependence_multiclass(pyplot): grid_resolution = 25 clf_int = GradientBoostingClassifier(n_estimators=10, random_state=1) iris = load_iris() # Test partial dependence plot function on multi-class input. clf_int.fit(iris.data, iris.target) disp_target_0 = plot_partial_dependence(clf_int, iris.data, [0, 1], target=0, grid_resolution=grid_resolution) assert disp_target_0.figure_ is pyplot.gcf() assert disp_target_0.axes_.shape == (1, 2) assert disp_target_0.lines_.shape == (1, 2) assert disp_target_0.contours_.shape == (1, 2) assert all(c is None for c in disp_target_0.contours_.flat) assert disp_target_0.target_idx == 0 # now with symbol labels target = iris.target_names[iris.target] clf_symbol = GradientBoostingClassifier(n_estimators=10, random_state=1) clf_symbol.fit(iris.data, target) disp_symbol = plot_partial_dependence(clf_symbol, iris.data, [0, 1], target='setosa', grid_resolution=grid_resolution) assert disp_symbol.figure_ is pyplot.gcf() assert disp_symbol.axes_.shape == (1, 2) assert disp_symbol.lines_.shape == (1, 2) assert disp_symbol.contours_.shape == (1, 2) assert all(c is None for c in disp_symbol.contours_.flat) assert disp_symbol.target_idx == 0 for int_result, symbol_result in zip(disp_target_0.pd_results, disp_symbol.pd_results): avg_preds_int, values_int = int_result avg_preds_symbol, values_symbol = symbol_result assert_allclose(avg_preds_int, avg_preds_symbol) assert_allclose(values_int, values_symbol) # check that the pd plots are different for another target disp_target_1 = plot_partial_dependence(clf_int, iris.data, [0, 1], target=1, grid_resolution=grid_resolution) target_0_data_y = disp_target_0.lines_[0, 0].get_data()[1] target_1_data_y = disp_target_1.lines_[0, 0].get_data()[1] assert any(target_0_data_y != target_1_data_y)
def test_plot_partial_dependence(grid_resolution, pyplot, clf_boston, boston): # Test partial dependence plot function. feature_names = boston.feature_names disp = plot_partial_dependence(clf_boston, boston.data, [0, 1, (0, 1)], grid_resolution=grid_resolution, feature_names=feature_names, contour_kw={"cmap": "jet"}) fig = pyplot.gcf() axs = fig.get_axes() assert disp.figure_ is fig assert len(axs) == 4 assert disp.bounding_ax_ is not None assert disp.axes_.shape == (1, 3) assert disp.lines_.shape == (1, 3) assert disp.contours_.shape == (1, 3) assert disp.lines_[0, 2] is None assert disp.contours_[0, 0] is None assert disp.contours_[0, 1] is None assert disp.features == [(0, ), (1, ), (0, 1)] assert np.all(disp.feature_names == feature_names) assert len(disp.deciles) == 2 for i in [0, 1]: assert_allclose(disp.deciles[i], mquantiles(boston.data[:, i], prob=np.arange(0.1, 1.0, 0.1))) single_feature_positions = [(0, 0), (0, 1)] expected_ylabels = ["Partial dependence", ""] for i, pos in enumerate(single_feature_positions): ax = disp.axes_[pos] assert ax.get_ylabel() == expected_ylabels[i] assert ax.get_xlabel() == boston.feature_names[i] assert_allclose(ax.get_ylim(), disp.pdp_lim[1]) line = disp.lines_[pos] avg_preds, values = disp.pd_results[i] assert avg_preds.shape == (1, grid_resolution) target_idx = disp.target_idx line_data = line.get_data() assert_allclose(line_data[0], values[0]) assert_allclose(line_data[1], avg_preds[target_idx].ravel()) # two feature position ax = disp.axes_[0, 2] coutour = disp.contours_[0, 2] expected_levels = np.linspace(*disp.pdp_lim[2], num=8) assert_allclose(coutour.levels, expected_levels) assert coutour.get_cmap().name == "jet" assert ax.get_xlabel() == boston.feature_names[0] assert ax.get_ylabel() == boston.feature_names[1]
def test_plot_partial_dependence_str_features(pyplot, clf_boston, boston, input_type, feature_names_type): if input_type == 'dataframe': pd = pytest.importorskip("pandas") X = pd.DataFrame(boston.data, columns=boston.feature_names) elif input_type == 'list': X = boston.data.tolist() else: X = boston.data if feature_names_type is None: feature_names = None else: feature_names = _convert_container(boston.feature_names, feature_names_type) grid_resolution = 25 # check with str features and array feature names and single column disp = plot_partial_dependence(clf_boston, X, [('CRIM', 'ZN'), 'ZN'], grid_resolution=grid_resolution, feature_names=feature_names, n_cols=1, line_kw={"alpha": 0.8}) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 3 assert disp.figure_ is fig assert disp.axes_.shape == (2, 1) assert disp.lines_.shape == (2, 1) assert disp.contours_.shape == (2, 1) assert disp.lines_[0, 0] is None assert disp.contours_[1, 0] is None # line ax = disp.axes_[1, 0] assert ax.get_xlabel() == "ZN" assert ax.get_ylabel() == "Partial dependence" line = disp.lines_[1, 0] avg_preds, values = disp.pd_results[1] target_idx = disp.target_idx assert line.get_alpha() == 0.8 line_data = line.get_data() assert_allclose(line_data[0], values[0]) assert_allclose(line_data[1], avg_preds[target_idx].ravel()) # contour ax = disp.axes_[0, 0] coutour = disp.contours_[0, 0] expect_levels = np.linspace(*disp.pdp_lim[2], num=8) assert_allclose(coutour.levels, expect_levels) assert ax.get_xlabel() == "CRIM" assert ax.get_ylabel() == "ZN"
def test_plot_partial_dependence_incorrent_num_axes(pyplot, clf_boston, boston, nrows, ncols): grid_resolution = 5 fig, axes = pyplot.subplots(nrows, ncols) axes_formats = [list(axes.ravel()), tuple(axes.ravel()), axes] msg = "Expected ax to have 2 axes, got {}".format(nrows * ncols) disp = plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=boston.feature_names) for ax_format in axes_formats: with pytest.raises(ValueError, match=msg): plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=boston.feature_names, ax=ax_format) # with axes object with pytest.raises(ValueError, match=msg): disp.plot(ax=ax_format)
def test_plot_partial_dependence_multioutput(pyplot, target): # Test partial dependence plot function on multi-output input. X, y = multioutput_regression_data clf = LinearRegression().fit(X, y) grid_resolution = 25 disp = plot_partial_dependence(clf, X, [0, 1], target=target, grid_resolution=grid_resolution) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 3 assert disp.target_idx == target assert disp.bounding_ax_ is not None positions = [(0, 0), (0, 1)] expected_label = ["Partial dependence", ""] for i, pos in enumerate(positions): ax = disp.axes_[pos] assert ax.get_ylabel() == expected_label[i] assert ax.get_xlabel() == "{}".format(i)
def test_plot_partial_dependence_error(pyplot, data, params, err_msg): X, y = data estimator = LinearRegression().fit(X, y) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(estimator, X, **params)