def test_stacking_regressor_diabetes(cv, final_estimator, predict_params, passthrough): # prescale the data to avoid convergence warning without using a pipeline # for later assert X_train, X_test, y_train, _ = train_test_split(scale(X_diabetes), y_diabetes, random_state=42) estimators = [('lr', LinearRegression()), ('svr', LinearSVR())] reg = StackingRegressor(estimators=estimators, final_estimator=final_estimator, cv=cv, passthrough=passthrough) reg.fit(X_train, y_train) result = reg.predict(X_test, **predict_params) expected_result_length = 2 if predict_params else 1 if predict_params: assert len(result) == expected_result_length X_trans = reg.transform(X_test) expected_column_count = 12 if passthrough else 2 assert X_trans.shape[1] == expected_column_count if passthrough: assert_allclose(X_test, X_trans[:, -10:]) reg.set_params(lr='drop') reg.fit(X_train, y_train) reg.predict(X_test) X_trans = reg.transform(X_test) expected_column_count_drop = 11 if passthrough else 1 assert X_trans.shape[1] == expected_column_count_drop if passthrough: assert_allclose(X_test, X_trans[:, -10:])
def test_stacking_regressor_drop_estimator(): # prescale the data to avoid convergence warning without using a pipeline # for later assert X_train, X_test, y_train, _ = train_test_split(scale(X_diabetes), y_diabetes, random_state=42) estimators = [('lr', 'drop'), ('svr', LinearSVR(random_state=0))] rf = RandomForestRegressor(n_estimators=10, random_state=42) reg = StackingRegressor(estimators=[('svr', LinearSVR(random_state=0))], final_estimator=rf, cv=5) reg_drop = StackingRegressor(estimators=estimators, final_estimator=rf, cv=5) reg.fit(X_train, y_train) reg_drop.fit(X_train, y_train) assert_allclose(reg.predict(X_test), reg_drop.predict(X_test)) assert_allclose(reg.transform(X_test), reg_drop.transform(X_test))
def test_stacking_regressor_sparse_passthrough(fmt): # Check passthrough behavior on a sparse X matrix X_train, X_test, y_train, _ = train_test_split( sparse.coo_matrix(scale(X_diabetes)).asformat(fmt), y_diabetes, random_state=42 ) estimators = [('lr', LinearRegression()), ('svr', LinearSVR())] rf = RandomForestRegressor(n_estimators=10, random_state=42) clf = StackingRegressor( estimators=estimators, final_estimator=rf, cv=5, passthrough=True ) clf.fit(X_train, y_train) X_trans = clf.transform(X_test) assert_allclose_dense_sparse(X_test, X_trans[:, -10:]) assert sparse.issparse(X_trans) assert X_test.format == X_trans.format