def test_fit_predict_standard_stack(teardown): # This uses the "standard" protocol where the 2nd level features # are the out-of-fold predictions of the 1st. It also appends the # original data to the 2nd level features. # See for example: https://www.kdnuggets.com/2017/02/stacking-models-imropved-predictions.html X_data, y_t_data = breast_cancer.data, breast_cancer.target X_train, X_test, y_t_train, y_t_test = train_test_split(X_data, y_t_data, test_size=0.2, random_state=0) random_state = 42 # baikal way x = Input() y_t = Input() y_p1 = RandomForestClassifierOOF(n_estimators=10, random_state=random_state)( x, y_t, compute_func="predict_proba") y_p1 = Lambda(lambda array: array[:, 1:])(y_p1) # remove collinear feature x_scaled = StandardScaler()(x) y_p2 = LinearSVCOOF(random_state=random_state)( x_scaled, y_t, compute_func="decision_function") stacked_features = ColumnStack()([x, y_p1, y_p2]) y_p = LogisticRegression(solver="liblinear", random_state=random_state)(stacked_features, y_t) model = Model(x, y_p, y_t) model.fit(X_train, y_t_train) y_pred_baikal = model.predict(X_test) # traditional way estimators = [ ("rf", RandomForestClassifier(n_estimators=10, random_state=random_state)), ("svr", make_pipeline(StandardScaler(), LinearSVC(random_state=random_state))), ] clf = sklearn.ensemble.StackingClassifier( estimators=estimators, final_estimator=LogisticRegression(solver="liblinear", random_state=random_state), passthrough=True, ) y_pred_traditional = clf.fit(X_train, y_t_train).predict(X_test) assert_array_equal(y_pred_baikal, y_pred_traditional)
def dataplaceholders(self): x1 = Input(name="x1") x2 = Input(name="x2") y1_t = Input(name="y1_t") x1_rescaled = StandardScaler()(x1) y1 = LogisticRegression()(x1_rescaled, y1_t) y2 = PCA()(x2) return x1, x2, x1_rescaled, y1, y2, y1_t
def test_fit_with_shared_step(self, teardown): x = Input() scaler = StandardScaler() z = scaler(x, compute_func="transform", trainable=True) y = scaler(z, compute_func="inverse_transform", trainable=False) model = Model(x, y) model.fit(np.array([1, 3, 1, 3]).reshape(-1, 1)) assert (scaler.mean_, scaler.var_) == (2.0, 1.0)
def test_fit_predict_with_shared_step(teardown): x = Input() scaler = StandardScaler() z = scaler(x, compute_func="transform", trainable=True) y = scaler(z, compute_func="inverse_transform", trainable=False) model = Model(x, y) X_data = np.array([1, 3, 1, 3]).reshape(-1, 1) model.fit(X_data) assert_array_equal(model.predict(X_data), X_data)
def test_get_set_params_invariance(teardown): scaler = StandardScaler(name="scaler") regressor = LinearRegression(name="regressor") x = Input() y_t = Input() y_t_scaled = scaler(y_t) y_p_scaled = regressor(x, y_t_scaled) y_p = scaler(y_p_scaled, compute_func="inverse_transform", trainable=False) model = Model(x, y_p, y_t) params1 = model.get_params() model.set_params(**params1) params2 = model.get_params() assert params2 == params1