def write(session: Dict[str, Any]): st.title("Global Effects") st.markdown("#### Partial Dependence of Predictions on a Feature") feat = st.selectbox( "Please select a feature", ["Don't plot partial dependence"] + sorted(session["X_train"].columns), ) if not feat == "Don't plot partial dependence": dataset = st.selectbox( "Please select dataset on which to calculate partial depedence", ["Test", "Train"], ) if dataset == "Train": X = session["X_train"] else: X = session["X_valid"] plot_partial_dependence( session["m"], X, features=[feat], feature_names=X.columns, grid_resolution=20, ) plt.tight_layout() st.pyplot()
def test_plot_partial_dependence_multiclass(pyplot): # Test partial dependence plot function on multi-class input. iris = load_iris() clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) grid_resolution = 25 plot_partial_dependence(clf, iris.data, [0, 1], target=0, grid_resolution=grid_resolution) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs) # now with symbol labels target = iris.target_names[iris.target] clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, target) grid_resolution = 25 plot_partial_dependence(clf, iris.data, [0, 1], target='setosa', grid_resolution=grid_resolution) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs)
def test_plot_partial_dependence_incorrent_num_axes(pyplot, clf_diabetes, diabetes, nrows, ncols): grid_resolution = 5 fig, axes = pyplot.subplots(nrows, ncols) axes_formats = [list(axes.ravel()), tuple(axes.ravel()), axes] msg = "Expected ax to have 2 axes, got {}".format(nrows * ncols) disp = plot_partial_dependence( clf_diabetes, diabetes.data, ["age", "bmi"], grid_resolution=grid_resolution, feature_names=diabetes.feature_names, ) for ax_format in axes_formats: with pytest.raises(ValueError, match=msg): plot_partial_dependence( clf_diabetes, diabetes.data, ["age", "bmi"], grid_resolution=grid_resolution, feature_names=diabetes.feature_names, ax=ax_format, ) # with axes object with pytest.raises(ValueError, match=msg): disp.plot(ax=ax_format)
def test_plot_partial_dependence_multioutput(): # Test partial dependence plot function on multi-output input. import matplotlib.pyplot as plt # noqa (X, y), _ = multioutput_regression_data clf = LinearRegression() clf.fit(X, y) grid_resolution = 25 plot_partial_dependence(clf, X, [0, 1], target=0, grid_resolution=grid_resolution) fig = plt.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs) plot_partial_dependence(clf, X, [0, 1], target=1, grid_resolution=grid_resolution) fig = plt.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs) plt.close('all')
def make_pdp_all(self): # Partial Dependence Plots print( f"Creating partial_dependence_all plot. This will take a moment.") fig, ax = plt.subplots(figsize=(16, 12), facecolor='white') plot_partial_dependence(self.model, self.X, self.top_n_feature_indicies, feature_names=self.pretty_features, fig=fig, line_kw={ 'c': '#40FF40', 'linewidth': 8 }, n_jobs=-1) # plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None) save_image_path = os.path.join(ROOT_IMGS_DIRECTORY, 'charts/partial_dependence_all') plt.savefig(save_image_path, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format=None, transparent=False, bbox_inches=None, pad_inches=0.1, frameon=None, metadata=None) print(f"barpartial_dependence_all plot saved to {save_image_path}.") plt.show()
def partial_dependence_titanic(pth): from data.real_data import get_titanic df = get_titanic(original=True) # lending= get_lending(2000, original=True) # X = df.drop(columns=['Survived']) df = df.dropna(subset=['Pclass', 'Age', 'Fare', 'Sex']) X = df[['Pclass', 'Age', 'Fare', 'Parch', 'Sex']] X['Sex'].replace({'male': 1, 'female': 0}, inplace=True) # X = df.drop(columns=['Survived', ]) y = df['Survived'] # X,y = lending.drop(columns=['loan_amnt']), lending['loan_amnt'] # clf = GradientBoostingClassifier(n_estimators=500, learning_rate=0.2, max_depth = 1, random_state = 0).fit(X,y) clf = RandomForestClassifier(200, min_samples_split=20, min_samples_leaf=5, max_features=3).fit(X, y) # clf = LogisticRegression().fit(X,y) fig, _ = plt.subplots(ncols=3, figsize=(8, 4)) plot_partial_dependence(clf, X, ['Pclass', 'Age', 'Fare'], fig=fig, grid_resolution=50) fig = plt.gcf() axes = fig.get_axes() axes[1].set_ylabel('Partial dependence Survived') axes[1].set_xticks([1, 2, 3]) axes[1].set_xticklabels([1, 2, 3]) fig.tight_layout() fig.savefig(pth, bbox_inches='tight') plt.show()
def pdp(model, X, features): print('Начинает работать алгоритм pdp/ice') # plt.figure(figsize=(10, 9)) # fig = plt.gcf() # plot_partial_dependence(model, X, features, target=4) fig1 = plot_partial_dependence(model, X, features, kind='average', target=1) fig2 = plot_partial_dependence(model, X, features, kind='average', target=2) fig3 = plot_partial_dependence(model, X, features, kind='average', target=3) fig4 = plot_partial_dependence(model, X, features, kind='average', target=4) fig5 = plot_partial_dependence(model, X, features, kind='average', target=5) # fig.savefig('test2png.png', dpi=100) plt.show()
def plotpdpOfDistanceToTrueResultSklearn(data, subplots, pr): ''' :param data: pandas dataframe with datasets where each row represents a dataset :param subplots: indicates columns to examine in pdp plot :param pr: Predictor of ML-System saves and plots indicated PDPplots that are calculated with sklearn ''' pr.setReturnDistanceOfClass(True) resultColumnName = pr.resultColumn data = pr.encode(data) pr.standardColumnsNoResultColumn() plot_partial_dependence(pr, data, subplots, feature_names=pr.standardColumns) for i in range(len(subplots)): ax = plt.gcf().axes[i] spreadfourSubplotsHorizontally(ax, i) subplotXLabel = subplots[i] #ax.get_xlabel() ticks(ax, pr, subplotXLabel, "x") plt.title("PDP for " + subplotXLabel) plt.gcf().set_size_inches(30, 7) save("plot_partial_dependence textBruttoClient", plt=plt) writeDictToFile(pr.encodingDictionary, pr.decodedColumns)
def test_plot_partial_dependence_multioutput(): # Test partial dependence plot function on multi-output input. import matplotlib.pyplot as plt # noqa (X, y), _ = multioutput_regression_data clf = LinearRegression() clf.fit(X, y) grid_resolution = 25 plot_partial_dependence(clf, X, [0, 1], target=0, grid_resolution=grid_resolution) fig = plt.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs) plot_partial_dependence(clf, X, [0, 1], target=1, grid_resolution=grid_resolution) fig = plt.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs) close_figure()
def test_plot_partial_dependence_with_same_axes(pyplot, clf_boston, boston): # The first call to plot_partial_dependence will create two new axes to # place in the space of the passed in axes, which results in a total of # three axes in the figure. # Currently the API does not allow for the second call to # plot_partial_dependence to use the same axes again, because it will # create two new axes in the space resulting in five axes. To get the # expected behavior one needs to pass the generated axes into the second # call: # disp1 = plot_partial_dependence(...) # disp2 = plot_partial_dependence(..., ax=disp1.axes_) grid_resolution = 25 fig, ax = pyplot.subplots() plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=boston.feature_names, ax=ax) msg = ("The ax was already used in another plot function, please set " "ax=display.axes_ instead") with pytest.raises(ValueError, match=msg): plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=boston.feature_names, ax=ax)
def test_plot_partial_dependence_multiclass(pyplot): # Test partial dependence plot function on multi-class input. iris = load_iris() clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) grid_resolution = 25 plot_partial_dependence(clf, iris.data, [0, 1], target=0, grid_resolution=grid_resolution) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs) # now with symbol labels target = iris.target_names[iris.target] clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, target) grid_resolution = 25 plot_partial_dependence(clf, iris.data, [0, 1], target='setosa', grid_resolution=grid_resolution) fig = pyplot.gcf() axs = fig.get_axes() assert len(axs) == 2 assert all(ax.has_data for ax in axs)
def make_pdp_pair(self, feat_idx_lst=[0, 35]): print(f"Creating partial_dependence_pair plot.") fig, ax = plt.subplots(figsize=(12, 6), facecolor='white') plt.title("Top Features Partial Dependence Plots", fontsize='large') ax.set_facecolor('whitesmoke') plot_partial_dependence(self.model, self.X, feat_idx_lst, feature_names=self.pretty_features, fig=fig, line_kw={ 'c': '#40FF40', 'linewidth': 10 }, n_jobs=-1) plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None) save_image_path = os.path.join(ROOT_IMGS_DIRECTORY, 'charts/partial_dependence_pair') plt.savefig(save_image_path, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format=None, transparent=False, bbox_inches=None, pad_inches=0.1, frameon=None, metadata=None) print(f"barpartial_dependence_pair plot saved to {save_image_path}.") plt.show()
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_boston, boston): grid_resolution = 25 feature_names = boston.feature_names.tolist() disp1 = plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=feature_names) assert disp1.axes_.shape == (1, 2) assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence" assert disp1.axes_[0, 1].get_ylabel() == "" assert len(disp1.axes_[0, 0].get_lines()) == 1 assert len(disp1.axes_[0, 1].get_lines()) == 1 lr = LinearRegression() lr.fit(boston.data, boston.target) disp2 = plot_partial_dependence(lr, boston.data, ['CRIM', 'ZN'], grid_resolution=grid_resolution, feature_names=feature_names, ax=disp1.axes_) assert np.all(disp1.axes_ == disp2.axes_) assert len(disp2.axes_[0, 0].get_lines()) == 2 assert len(disp2.axes_[0, 1].get_lines()) == 2
def production_plotting(model, X, axes): ''' Takes in a model for inferential modeling and makes partial dependence plots for the two production metrics (num cols, and yield per col). Parameters ---------- model - The instantiated and fit model to be used for plotting. X - The data that is used to creat the grid of values to cycle through for partial dependence. Returns ---------- None ''' plot_partial_dependence(estimator=model, X=X, features=[0,1], ax=axes) axes[0].set_xlabel('Number of Colonies (in Thousands)', fontsize=15) axes[0].set_ylabel('Partial Dependence', fontsize=15) axes[0].set_xticks([0, 50000, 100000, 150000, 200000, 250000]) axes[0].set_xticklabels(['0', '50', '100', '150', '200', '250']) axes[0].set_title('Colonies per State', fontsize=20) axes[1].set_xlabel('Yield per Colony (in Lbs)', fontsize=15) axes[1].set_ylabel('Partial Dependence', fontsize=15) axes[1].set_xticks([40, 80, 120]) axes[1].set_xticklabels(['40', '80', '120']) axes[1].set_title('Production per Colony', fontsize=20)
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_diabetes, diabetes, kind, lines): grid_resolution = 25 feature_names = diabetes.feature_names disp1 = plot_partial_dependence( clf_diabetes, diabetes.data, ["age", "bmi"], kind=kind, grid_resolution=grid_resolution, feature_names=feature_names, ) assert disp1.axes_.shape == (1, 2) assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence" assert disp1.axes_[0, 1].get_ylabel() == "" assert len(disp1.axes_[0, 0].get_lines()) == lines assert len(disp1.axes_[0, 1].get_lines()) == lines lr = LinearRegression() lr.fit(diabetes.data, diabetes.target) disp2 = plot_partial_dependence( lr, diabetes.data, ["age", "bmi"], kind=kind, grid_resolution=grid_resolution, feature_names=feature_names, ax=disp1.axes_, ) assert np.all(disp1.axes_ == disp2.axes_) assert len(disp2.axes_[0, 0].get_lines()) == 2 * lines assert len(disp2.axes_[0, 1].get_lines()) == 2 * lines
def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg): iris = load_iris() clf = GradientBoostingClassifier(n_estimators=10, random_state=1) clf.fit(iris.data, iris.target) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(clf, iris.data, **params)
def test_plot_partial_dependence_error(data, params, err_msg): X, y = data estimator = LinearRegression().fit(X, y) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(estimator, X, **params) close_figure()
def test_plot_partial_dependence_error(data, params, err_msg): import matplotlib.pyplot as plt # noqa X, y = data estimator = LinearRegression().fit(X, y) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(estimator, X, **params) plt.close()
def test_plot_partial_dependence_error(data, params, err_msg): import matplotlib.pyplot as plt # noqa X, y = data estimator = LinearRegression().fit(X, y) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(estimator, X, **params) plt.close()
def plot_boston_pd(estimator, X, var_name): fig, ax = plt.subplots() plot_partial_dependence(estimator=estimator, X=X, features=[var_name], kind="average", ax=ax) fig.suptitle(f"Partial Dependence Plot ({var_name})") fig.show()
def pdp(est, x, feature, feature_names, no, target): if no == -1: plot_partial_dependence(est, x, feature, feature_names, target=target) else: plot_partial_dependence(est, x, feature[:no], feature_names, target=target) fig = plt.gcf() fig.suptitle('Partial dependence') plt.subplots_adjust(top=0.9) plt.show()
def test_plot_partial_dependence_does_not_override_ylabel( pyplot, clf_diabetes, diabetes): # Non-regression test to be sure to not override the ylabel if it has been # See https://github.com/scikit-learn/scikit-learn/issues/15772 _, axes = pyplot.subplots(1, 2) axes[0].set_ylabel("Hello world") plot_partial_dependence(clf_diabetes, diabetes.data, [0, 1], ax=axes) assert axes[0].get_ylabel() == "Hello world" assert axes[1].get_ylabel() == "Partial dependence"
def pdp(est, x, feature, feature_names, no): fig = plt.figure(figsize=(24, 18)) if no == -1: plot_partial_dependence(est, x, feature, feature_names, fig=fig) else: plot_partial_dependence(est, x, feature[:no], feature_names, fig=fig) fig = plt.gcf() fig.suptitle('Partial dependence', fontsize=30) plt.subplots_adjust(top=0.95) plt.show()
def partial_dependency_analysis(method: Method, X: DataFrame, y: Series, features: List = None) -> None: if features is None: features = list(X.columns) _pipeline = method.get_pipeline() _pipeline.fit_transform(X, y) plot_partial_dependence(_pipeline, X, features, target=y) pass
def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston): pd = pytest.importorskip('pandas') df = pd.DataFrame(boston.data, columns=boston.feature_names) grid_resolution = 25 plot_partial_dependence(clf_boston, df, ['TAX', 'AGE'], grid_resolution=grid_resolution, feature_names=df.columns.tolist())
def test_plot_partial_dependence_dataframe(pyplot, clf_diabetes, diabetes): pd = pytest.importorskip('pandas') df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) grid_resolution = 25 plot_partial_dependence(clf_diabetes, df, ['bp', 's1'], grid_resolution=grid_resolution, feature_names=df.columns.tolist())
def test_plot_partial_dependence_fig(pyplot): # Make sure fig object is correctly used if not None (X, y), _ = regression_data clf = LinearRegression() clf.fit(X, y) fig = pyplot.figure() grid_resolution = 25 plot_partial_dependence( clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig) assert pyplot.gcf() is fig
def test_plot_partial_dependence_fig(pyplot): # Make sure fig object is correctly used if not None (X, y), _ = regression_data clf = LinearRegression() clf.fit(X, y) fig = pyplot.figure() grid_resolution = 25 plot_partial_dependence( clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig) assert pyplot.gcf() is fig
def test_plot_partial_dependence_multiclass(pyplot): grid_resolution = 25 clf_int = GradientBoostingClassifier(n_estimators=10, random_state=1) iris = load_iris() # Test partial dependence plot function on multi-class input. clf_int.fit(iris.data, iris.target) disp_target_0 = plot_partial_dependence(clf_int, iris.data, [0, 1], target=0, grid_resolution=grid_resolution) assert disp_target_0.figure_ is pyplot.gcf() assert disp_target_0.axes_.shape == (1, 2) assert disp_target_0.lines_.shape == (1, 2) assert disp_target_0.contours_.shape == (1, 2) assert disp_target_0.deciles_vlines_.shape == (1, 2) assert disp_target_0.deciles_hlines_.shape == (1, 2) assert all(c is None for c in disp_target_0.contours_.flat) assert disp_target_0.target_idx == 0 # now with symbol labels target = iris.target_names[iris.target] clf_symbol = GradientBoostingClassifier(n_estimators=10, random_state=1) clf_symbol.fit(iris.data, target) disp_symbol = plot_partial_dependence(clf_symbol, iris.data, [0, 1], target='setosa', grid_resolution=grid_resolution) assert disp_symbol.figure_ is pyplot.gcf() assert disp_symbol.axes_.shape == (1, 2) assert disp_symbol.lines_.shape == (1, 2) assert disp_symbol.contours_.shape == (1, 2) assert disp_symbol.deciles_vlines_.shape == (1, 2) assert disp_symbol.deciles_hlines_.shape == (1, 2) assert all(c is None for c in disp_symbol.contours_.flat) assert disp_symbol.target_idx == 0 for int_result, symbol_result in zip(disp_target_0.pd_results, disp_symbol.pd_results): avg_preds_int, values_int = int_result avg_preds_symbol, values_symbol = symbol_result assert_allclose(avg_preds_int, avg_preds_symbol) assert_allclose(values_int, values_symbol) # check that the pd plots are different for another target disp_target_1 = plot_partial_dependence(clf_int, iris.data, [0, 1], target=1, grid_resolution=grid_resolution) target_0_data_y = disp_target_0.lines_[0, 0].get_data()[1] target_1_data_y = disp_target_1.lines_[0, 0].get_data()[1] assert any(target_0_data_y != target_1_data_y)
def plot_two_ways_pdp(model, test, columns): """ Plots a two ways partial dependence plot with the variables given through the argument columns. Two-ways Partial dependence plots show how a pair of variables or predictors affects the model's predictions. :param model: the model considered. The partial dependence plot is calculated only after the model has been fit. :param test: test dataset. :param columns: variables studied. It must be in the form [(var1, var2)]. :return: """ sn.set() plt.figure() plot_partial_dependence(model, test, columns, n_jobs=-1) plt.tight_layout() plt.show()
def plot_partial_dependencies(model, test, column): """ Plots a one way partial dependence plot with the variables in the test dataset. Partial dependence plots show how a particular variable or predictor affects the model's predictions. :param model: the model considered. The partial dependence plot is calculated only after the model has been fit. :param test: test dataset. :param column: variables studied. :return: """ sn.set() plot_partial_dependence(model, test, column, n_jobs=-1) # Format the figure plt.tight_layout() plt.show()
def plot_pdp(self, learner, X, features_idx, feature_names=None): ''' Plots the partial dependence plot for the given learner. Parameters: - learner: already trained learner to be analyzed - X: matrix of input data on which the learner has been trained - features_idx: features to be analyzed by pdp, should be column indexes in X - feature_names: features names of X ''' fig = plt.figure(figsize=(20, 10)) plot_partial_dependence(learner, X, features_idx, feature_names=feature_names, fig=fig)
def test_plot_partial_dependence_custom_axes(pyplot, clf_boston, boston): grid_resolution = 25 fig, (ax1, ax2) = pyplot.subplots(1, 2) feature_names = boston.feature_names.tolist() disp = plot_partial_dependence(clf_boston, boston.data, ['CRIM', ('CRIM', 'ZN')], grid_resolution=grid_resolution, feature_names=feature_names, ax=[ax1, ax2]) assert fig is disp.figure_ assert disp.bounding_ax_ is None assert disp.axes_.shape == (2, ) assert disp.axes_[0] is ax1 assert disp.axes_[1] is ax2 ax = disp.axes_[0] assert ax.get_xlabel() == "CRIM" assert ax.get_ylabel() == "Partial dependence" line = disp.lines_[0] avg_preds, values = disp.pd_results[0] target_idx = disp.target_idx line_data = line.get_data() assert_allclose(line_data[0], values[0]) assert_allclose(line_data[1], avg_preds[target_idx].ravel()) # contour ax = disp.axes_[1] coutour = disp.contours_[1] expect_levels = np.linspace(*disp.pdp_lim[2], num=8) assert_allclose(coutour.levels, expect_levels) assert ax.get_xlabel() == "CRIM" assert ax.get_ylabel() == "ZN"
def test_partial_dependence_overwrite_labels( pyplot, clf_diabetes, diabetes, kind, line_kw, label, ): """Test that make sure that we can overwrite the label of the PDP plot""" disp = plot_partial_dependence( clf_diabetes, diabetes.data, [0, 2], grid_resolution=25, feature_names=diabetes.feature_names, kind=kind, line_kw=line_kw, ) for ax in disp.axes_.ravel(): if label is None: assert ax.get_legend() is None else: legend_text = ax.get_legend().get_texts() assert len(legend_text) == 1 assert legend_text[0].get_text() == label
def test_plot_partial_dependence(): # Test partial dependence plot function. import matplotlib.pyplot as plt # noqa boston = load_boston() clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(boston.data, boston.target) grid_resolution = 25 plot_partial_dependence(clf, boston.data, [0, 1, (0, 1)], grid_resolution=grid_resolution, feature_names=boston.feature_names) fig = plt.gcf() axs = fig.get_axes() assert len(axs) == 3 assert all(ax.has_data for ax in axs) # check with str features and array feature names plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN', ('CRIM', 'ZN')], grid_resolution=grid_resolution, feature_names=boston.feature_names) fig = plt.gcf() axs = fig.get_axes() assert len(axs) == 3 assert all(ax.has_data for ax in axs) # check with list feature_names feature_names = boston.feature_names.tolist() plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN', ('CRIM', 'ZN')], grid_resolution=grid_resolution, feature_names=feature_names) fig = plt.gcf() axs = fig.get_axes() assert len(axs) == 3 assert all(ax.has_data for ax in axs) plt.close('all')
def test_plot_partial_dependence_error(pyplot, data, params, err_msg): X, y = data estimator = LinearRegression().fit(X, y) with pytest.raises(ValueError, match=err_msg): plot_partial_dependence(estimator, X, **params)
def main(): cal_housing = fetch_california_housing() X, y = cal_housing.data, cal_housing.target names = cal_housing.feature_names # Center target to avoid gradient boosting init bias: gradient boosting # with the 'recursion' method does not account for the initial estimator # (here the average target, by default) y -= y.mean() print("Training MLPRegressor...") est = MLPRegressor(activation='logistic') est.fit(X, y) print('Computing partial dependence plots...') # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower # with the brute method. features = [0, 5, 1, 2] plot_partial_dependence(est, X, features, feature_names=names, n_jobs=3, grid_resolution=50) fig = plt.gcf() fig.suptitle('Partial dependence of house value on non-location features\n' 'for the California housing dataset, with MLPRegressor') plt.subplots_adjust(top=0.9) # tight_layout causes overlap with suptitle print("Training GradientBoostingRegressor...") est = GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, loss='huber', random_state=1) est.fit(X, y) print('Computing partial dependence plots...') features = [0, 5, 1, 2, (5, 1)] plot_partial_dependence(est, X, features, feature_names=names, n_jobs=3, grid_resolution=50) fig = plt.gcf() fig.suptitle('Partial dependence of house value on non-location features\n' 'for the California housing dataset, with Gradient Boosting') plt.subplots_adjust(top=0.9) print('Custom 3d plot via ``partial_dependence``') fig = plt.figure() target_feature = (1, 5) pdp, axes = partial_dependence(est, X, target_feature, grid_resolution=50) XX, YY = np.meshgrid(axes[0], axes[1]) Z = pdp[0].T ax = Axes3D(fig) surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu, edgecolor='k') ax.set_xlabel(names[target_feature[0]]) ax.set_ylabel(names[target_feature[1]]) ax.set_zlabel('Partial dependence') # pretty init view ax.view_init(elev=22, azim=122) plt.colorbar(surf) plt.suptitle('Partial dependence of house value on median\n' 'age and average occupancy, with Gradient Boosting') plt.subplots_adjust(top=0.9) plt.show()