def test_transform_target_regressor_2d_transformer(X, y): # Check consistency with transformer accepting only 2D array and a 1D/2D y # array. transformer = StandardScaler() regr = TransformedTargetRegressor(regressor=LinearRegression(), transformer=transformer) y_pred = regr.fit(X, y).predict(X) assert y.shape == y_pred.shape # consistency forward transform if y.ndim == 1: # create a 2D array and squeeze results y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze() else: y_tran = regr.transformer_.transform(y) _check_standard_scaled(y, y_tran) assert y.shape == y_pred.shape # consistency inverse transform assert_allclose(y, regr.transformer_.inverse_transform( y_tran).squeeze()) # consistency of the regressor lr = LinearRegression() transformer2 = clone(transformer) if y.ndim == 1: # create a 2D array and squeeze results lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze()) else: lr.fit(X, transformer2.fit_transform(y)) y_lr_pred = lr.predict(X) assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) assert_allclose(regr.regressor_.coef_, lr.coef_)
def test_partial_dependence_easy_target(est, power): # If the target y only depends on one feature in an obvious way (linear or # quadratic) then the partial dependence for that feature should reflect # it. # We here fit a linear regression_data model (with polynomial features if # needed) and compute r_squared to check that the partial dependence # correctly reflects the target. rng = np.random.RandomState(0) n_samples = 200 target_variable = 2 X = rng.normal(size=(n_samples, 5)) y = X[:, target_variable]**power est.fit(X, y) averaged_predictions, values = partial_dependence( est, features=[target_variable], X=X, grid_resolution=1000) new_X = values[0].reshape(-1, 1) new_y = averaged_predictions[0] # add polynomial features if needed new_X = PolynomialFeatures(degree=power).fit_transform(new_X) lr = LinearRegression().fit(new_X, new_y) r2 = r2_score(new_y, lr.predict(new_X)) assert r2 > .99
def test_transform_target_regressor_1d_transformer(X, y): # All transformer in scikit-learn expect 2D data. FunctionTransformer with # validate=False lift this constraint without checking that the input is a # 2D vector. We check the consistency of the data shape using a 1D and 2D y # array. transformer = FunctionTransformer(func=lambda x: x + 1, inverse_func=lambda x: x - 1) regr = TransformedTargetRegressor(regressor=LinearRegression(), transformer=transformer) y_pred = regr.fit(X, y).predict(X) assert y.shape == y_pred.shape # consistency forward transform y_tran = regr.transformer_.transform(y) _check_shifted_by_one(y, y_tran) assert y.shape == y_pred.shape # consistency inverse transform assert_allclose(y, regr.transformer_.inverse_transform( y_tran).squeeze()) # consistency of the regressor lr = LinearRegression() transformer2 = clone(transformer) lr.fit(X, transformer2.fit_transform(y)) y_lr_pred = lr.predict(X) assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) assert_allclose(regr.regressor_.coef_, lr.coef_)
def test_transform_target_regressor_2d_transformer_multioutput(): # Check consistency with transformer accepting only 2D array and a 2D y # array. X = friedman[0] y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T transformer = StandardScaler() regr = TransformedTargetRegressor(regressor=LinearRegression(), transformer=transformer) y_pred = regr.fit(X, y).predict(X) assert y.shape == y_pred.shape # consistency forward transform y_tran = regr.transformer_.transform(y) _check_standard_scaled(y, y_tran) assert y.shape == y_pred.shape # consistency inverse transform assert_allclose(y, regr.transformer_.inverse_transform( y_tran).squeeze()) # consistency of the regressor lr = LinearRegression() transformer2 = clone(transformer) lr.fit(X, transformer2.fit_transform(y)) y_lr_pred = lr.predict(X) assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) assert_allclose(regr.regressor_.coef_, lr.coef_)
# Loading some example data X, y = datasets.load_boston(return_X_y=True) # Training classifiers reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) reg2 = RandomForestRegressor(random_state=1, n_estimators=10) reg3 = LinearRegression() ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)]) reg1.fit(X, y) reg2.fit(X, y) reg3.fit(X, y) ereg.fit(X, y) xt = X[:20] plt.figure() plt.plot(reg1.predict(xt), 'gd', label='GradientBoostingRegressor') plt.plot(reg2.predict(xt), 'b^', label='RandomForestRegressor') plt.plot(reg3.predict(xt), 'ys', label='LinearRegression') plt.plot(ereg.predict(xt), 'r*', label='VotingRegressor') plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) plt.ylabel('predicted') plt.xlabel('training samples') plt.legend(loc="best") plt.title('Comparison of individual predictions with averaged') plt.show()
# construct the dataset rnd = np.random.RandomState(42) X = rnd.uniform(-3, 3, size=100) y = np.sin(X) + rnd.normal(size=len(X)) / 3 X = X.reshape(-1, 1) # transform the dataset with KBinsDiscretizer enc = KBinsDiscretizer(n_bins=10, encode='onehot') X_binned = enc.fit_transform(X) # predict with original dataset fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True, figsize=(10, 4)) line = np.linspace(-3, 3, 1000, endpoint=False).reshape(-1, 1) reg = LinearRegression().fit(X, y) ax1.plot(line, reg.predict(line), linewidth=2, color='green', label="linear regression") reg = DecisionTreeRegressor(min_samples_split=3, random_state=0).fit(X, y) ax1.plot(line, reg.predict(line), linewidth=2, color='red', label="decision tree") ax1.plot(X[:, 0], y, 'o', c='k') ax1.legend(loc="best") ax1.set_ylabel("Regression output") ax1.set_xlabel("Input feature") ax1.set_title("Result before discretization")
x = np.arange(n) rs = check_random_state(0) y = rs.randint(-50, 50, size=(n,)) + 50. * np.log1p(np.arange(n)) # ############################################################################# # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() y_ = ir.fit_transform(x, y) lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression # ############################################################################# # Plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segments, zorder=0) lc.set_array(np.ones(len(y))) lc.set_linewidths(np.full(n, 0.5)) fig = plt.figure() plt.plot(x, y, 'r.', markersize=12) plt.plot(x, y_, 'b.-', markersize=12) plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-') plt.gca().add_collection(lc) plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right') plt.title('Isotonic regression') plt.show()