Exemple #1
0
def test_fit_predict_naive_stack(teardown):
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123

    # baikal way
    x = Input()
    y_t = Input()
    y1 = LogisticRegression(random_state=random_state, solver="liblinear")(x, y_t)
    y2 = RandomForestClassifier(random_state=random_state)(x, y_t)
    features = Stack(axis=1)([y1, y2])
    y = LogisticRegression(random_state=random_state, solver="liblinear")(features, y_t)

    model = Model(x, y, y_t)
    model.fit(x_data, y_t_data)
    y_pred_baikal = model.predict(x_data)

    # traditional way
    logreg = LogisticRegression(random_state=random_state, solver="liblinear")
    logreg.fit(x_data, y_t_data)
    logreg_pred = logreg.predict(x_data)

    random_forest = RandomForestClassifier(random_state=random_state)
    random_forest.fit(x_data, y_t_data)
    random_forest_pred = random_forest.predict(x_data)

    features = np.stack([logreg_pred, random_forest_pred], axis=1)
    stacked = LogisticRegression(random_state=random_state, solver="liblinear")
    stacked.fit(features, y_t_data)
    y_pred_traditional = stacked.predict(features)

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Exemple #2
0
def test_nested_model_stack(teardown):
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123
    n_components = 2

    # ----------- baikal way
    stacked_model_baikal = make_naive_stacked_model(
        n_components, random_state, x_data, y_t_data
    )
    y_pred_baikal = stacked_model_baikal.predict(x_data)

    # ----------- traditional way
    # Submodel 1
    submodel1 = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    pca = PCA(n_components=n_components, random_state=random_state)
    pca.fit(x_data)
    pca_trans = pca.transform(x_data)
    submodel1.fit(pca_trans, y_t_data)
    submodel1_pred = submodel1.predict(pca_trans)

    # Submodel 2 (a nested stacked model)
    random_forest = RandomForestClassifier(random_state=random_state)
    random_forest.fit(x_data, y_t_data)
    random_forest_pred = random_forest.predict(x_data)

    extra_trees = ExtraTreesClassifier(random_state=random_state)
    extra_trees.fit(x_data, y_t_data)
    extra_trees_pred = extra_trees.predict(x_data)

    features = np.stack([random_forest_pred, extra_trees_pred], axis=1)
    submodel2 = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    submodel2.fit(features, y_t_data)
    submodel2_pred = submodel2.predict(features)

    # Stacked model
    features = np.stack([submodel1_pred, submodel2_pred], axis=1)
    stacked_model_traditional = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    stacked_model_traditional.fit(features, y_t_data)
    y_pred_traditional = stacked_model_traditional.predict(features)

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Exemple #3
0
def test_fit_predict_naive_stack_with_proba_features(teardown):
    mask = iris.target != 2  # Reduce to binary problem to avoid ConvergenceWarning
    x_data = iris.data[mask]
    y_t_data = iris.target[mask]
    random_state = 123
    n_estimators = 5

    # baikal way
    x = Input()
    y_t = Input()
    y_p1 = LogisticRegression(random_state=random_state)(
        x, y_t, compute_func="predict_proba"
    )
    y_p2 = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)(
        x, y_t, compute_func="apply"
    )
    y_p1 = Lambda(compute_func=lambda array: array[:, 1:])(y_p1)
    y_p2 = Lambda(compute_func=lambda array: array[:, 1:])(y_p2)
    features = Concatenate(axis=1)([y_p1, y_p2])
    y_p = LogisticRegression(random_state=random_state)(features, y_t)

    model = Model(x, y_p, y_t)
    model.fit(x_data, y_t_data)
    y_pred_baikal = model.predict(x_data)

    # traditional way
    logreg = LogisticRegression(random_state=random_state)
    logreg.fit(x_data, y_t_data)
    logreg_proba = logreg.predict_proba(x_data)

    random_forest = RandomForestClassifier(
        n_estimators=n_estimators, random_state=random_state
    )
    random_forest.fit(x_data, y_t_data)
    random_forest_leafidx = random_forest.apply(x_data)

    features = np.concatenate(
        [logreg_proba[:, 1:], random_forest_leafidx[:, 1:]], axis=1
    )
    stacked = LogisticRegression(random_state=random_state)
    stacked.fit(features, y_t_data)
    y_pred_traditional = stacked.predict(features)

    assert_array_equal(y_pred_baikal, y_pred_traditional)