Ejemplo n.º 1
0
def test_nested_model(teardown):
    x_data = iris.data
    y_t_data = iris.target

    # Sub-model
    x = Input()
    y_t = Input()
    h = PCA(n_components=2)(x)
    y = LogisticRegression()(h, y_t)
    submodel = Model(x, y, y_t)

    # Model
    x = Input()
    y_t = Input()
    y = submodel(x, y_t)
    model = Model(x, y, y_t)

    with raises_with_cause(RuntimeError, NotFittedError):
        submodel.predict(x_data)

    model.fit(x_data, y_t_data)
    y_pred = model.predict(x_data)
    y_pred_sub = submodel.predict(x_data)

    assert_array_equal(y_pred, y_pred_sub)
Ejemplo n.º 2
0
def test_fit_predict_naive_stack(teardown):
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123

    # baikal way
    x = Input()
    y_t = Input()
    y1 = LogisticRegression(random_state=random_state, solver="liblinear")(x, y_t)
    y2 = RandomForestClassifier(random_state=random_state)(x, y_t)
    features = Stack(axis=1)([y1, y2])
    y = LogisticRegression(random_state=random_state, solver="liblinear")(features, y_t)

    model = Model(x, y, y_t)
    model.fit(x_data, y_t_data)
    y_pred_baikal = model.predict(x_data)

    # traditional way
    logreg = LogisticRegression(random_state=random_state, solver="liblinear")
    logreg.fit(x_data, y_t_data)
    logreg_pred = logreg.predict(x_data)

    random_forest = RandomForestClassifier(random_state=random_state)
    random_forest.fit(x_data, y_t_data)
    random_forest_pred = random_forest.predict(x_data)

    features = np.stack([logreg_pred, random_forest_pred], axis=1)
    stacked = LogisticRegression(random_state=random_state, solver="liblinear")
    stacked.fit(features, y_t_data)
    y_pred_traditional = stacked.predict(features)

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Ejemplo n.º 3
0
def test_fit_params(teardown):
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123
    n_components = 2

    sample_weight = y_t_data + 1  # Just weigh the classes differently
    fit_params = {"logreg__sample_weight": sample_weight}

    # baikal way
    x = Input()
    y_t = Input()
    x_pca = PCA(n_components=n_components, random_state=random_state, name="pca")(x)
    y = LogisticRegression(
        multi_class="multinomial",
        solver="lbfgs",
        random_state=random_state,
        name="logreg",
    )(x_pca, y_t)

    model = Model(x, y, y_t)
    model.fit(x_data, y_t_data, **fit_params)

    # traditional way
    pca = PCA(n_components=n_components, random_state=random_state)
    logreg = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    pipe = Pipeline([("pca", pca), ("logreg", logreg)])
    pipe.fit(x_data, y_t_data, **fit_params)

    # Use assert_allclose instead of all equal due to small numerical differences
    # between fit_transform(...) and fit(...).transform(...)
    assert_allclose(model.get_step("logreg").coef_, pipe.named_steps["logreg"].coef_)
Ejemplo n.º 4
0
def test_fit_predict_pipeline(teardown):
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123
    n_components = 2

    # baikal way
    x = Input()
    y_t = Input()
    x_pca = PCA(n_components=n_components, random_state=random_state, name="pca")(x)
    y = LogisticRegression(
        multi_class="multinomial",
        solver="lbfgs",
        random_state=random_state,
        name="logreg",
    )(x_pca, y_t)

    model = Model(x, y, y_t)
    y_pred_baikal = model.fit(x_data, y_t_data).predict(x_data)

    # traditional way
    pca = PCA(n_components=n_components, random_state=random_state)
    logreg = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    x_data_transformed = pca.fit_transform(x_data)
    y_pred_traditional = logreg.fit(x_data_transformed, y_t_data).predict(
        x_data_transformed
    )

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Ejemplo n.º 5
0
 def build_model(step):
     x1 = Input()
     x2 = Input()
     y_t1 = Input()
     y_t2 = Input()
     y_p = step([x1, x2], [y_t1, y_t2])
     return Model([x1, x2], y_p, [y_t1, y_t2])
Ejemplo n.º 6
0
def test_get_params(teardown):
    dummy1 = DummyEstimator(name="dummy1")
    dummy2 = DummyEstimator(x=456, y="def", name="dummy2")
    concat = Concatenate(name="concat")  # a step without get_params/set_params

    # a meaningless pipeline that contains shared steps
    x1 = Input()
    x2 = Input()
    h = dummy1(x1)
    c = concat([x1, h])
    y1 = dummy2(c)
    y2 = dummy2(x2, compute_func=lambda X: X * 2, trainable=False)
    model = Model([x1, x2], [y1, y2])

    expected = {
        "dummy1": dummy1,
        "dummy2": dummy2,
        "concat": concat,
        "dummy1__x": 123,
        "dummy1__y": "abc",
        "dummy2__x": 456,
        "dummy2__y": "def",
    }

    params = model.get_params()
    assert params == expected
Ejemplo n.º 7
0
 def dataplaceholders(self):
     x1 = Input(name="x1")
     x2 = Input(name="x2")
     y1_t = Input(name="y1_t")
     y1 = LogisticRegression()(x1, y1_t)
     y2 = PCA()(x2)
     return x1, x2, y1, y2, y1_t
Ejemplo n.º 8
0
    def test_with_missing_inputs(self, teardown):
        x1 = Input()
        x2 = Input()
        c = Concatenate()([x1, x2])

        with pytest.raises(ValueError):
            Model(x1, c)
Ejemplo n.º 9
0
def build_fn():
    x = Input()
    y_t = Input()
    h = PCA(random_state=random_state, name="pca")(x)
    y_p = LogisticRegression(random_state=random_state, name="classifier")(h, y_t)
    model = Model(x, y_p, y_t)
    return model
Ejemplo n.º 10
0
def test_fit_predict_ensemble(teardown):
    mask = iris.target != 2  # Reduce to binary problem to avoid ConvergenceWarning
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123

    # baikal way
    x = Input()
    y_t = Input()
    y1 = LogisticRegression(random_state=random_state)(x, y_t)
    y2 = RandomForestClassifier(random_state=random_state)(x, y_t)
    features = Stack(axis=1)([y1, y2])
    y = LogisticRegression(random_state=random_state)(features, y_t)

    model = Model(x, y, y_t)
    model.fit(x_data, y_t_data)
    y_pred_baikal = model.predict(x_data)

    # traditional way
    logreg = sklearn.linear_model.LogisticRegression(random_state=random_state)
    logreg.fit(x_data, y_t_data)
    logreg_pred = logreg.predict(x_data)

    random_forest = sklearn.ensemble.RandomForestClassifier(random_state=random_state)
    random_forest.fit(x_data, y_t_data)
    random_forest_pred = random_forest.predict(x_data)

    features = np.stack([logreg_pred, random_forest_pred], axis=1)
    ensemble = sklearn.linear_model.LogisticRegression(random_state=random_state)
    ensemble.fit(features, y_t_data)
    y_pred_traditional = ensemble.predict(features)

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Ejemplo n.º 11
0
 def dataplaceholders(self):
     x1 = Input(name="x1")
     x2 = Input(name="x2")
     y1_t = Input(name="y1_t")
     x1_rescaled = StandardScaler()(x1)
     y1 = LogisticRegression()(x1_rescaled, y1_t)
     y2 = PCA()(x2)
     return x1, x2, x1_rescaled, y1, y2, y1_t
Ejemplo n.º 12
0
 def test_predict_with_shared_step(self, teardown):
     x1 = Input()
     x2 = Input()
     doubler = Lambda(lambda x: x * 2)
     y1 = doubler(x1)
     y2 = doubler(x2)
     model = Model([x1, x2], [y1, y2])
     assert model.predict([2, 3]) == [4, 6]
Ejemplo n.º 13
0
 def build_fn():
     x = Input()
     y_t = Input()
     h = PCA(random_state=random_state, name="pca")(x)
     y = LogisticRegression(random_state=random_state,
                            solver="liblinear",
                            name="logreg")(h, y_t)
     model = Model(x, y, y_t)
     return model
Ejemplo n.º 14
0
    def test_call_with_two_inputs(self, teardown):
        x0 = Input()
        x1 = Input()
        y0, y1 = DummyMIMO()([x0, x1])

        assert isinstance(y0, DataPlaceholder)
        assert isinstance(y1, DataPlaceholder)
        assert y0.name == "DummyMIMO_0:0/0"
        assert y1.name == "DummyMIMO_0:0/1"
Ejemplo n.º 15
0
 def test_with_unnecessarily_defined_but_missing_target(self, teardown):
     x = Input()
     y_t = Input()
     pca = PCA()
     # The target passed to PCA is unnecessary (see notes in Step.__call__)
     y = pca(x, y_t, trainable=True)
     model = Model(inputs=x, outputs=y, targets=y_t)
     with pytest.raises(ValueError):
         # fails because of the model target specification and trainable=True
         model.fit(iris.data)
Ejemplo n.º 16
0
 def test_with_non_fitted_non_trainable_step(self, teardown):
     x = Input()
     y_t = Input()
     z = PCA()(x, trainable=False)
     y = LogisticRegression()(z, y_t)
     model = Model(x, y, y_t)
     with raises_with_cause(RuntimeError, NotFittedError):
         # this will raise an error when calling compute
         # on PCA which was flagged as trainable=False but
         # hasn't been fitted
         model.fit(iris.data, iris.target)
Ejemplo n.º 17
0
    def test_call_twice(self, teardown):
        x0 = Input()
        x1 = Input()
        step = DummySISO()
        y0 = step(x0)
        y1 = step(x1)

        assert isinstance(y0, DataPlaceholder)
        assert isinstance(y1, DataPlaceholder)
        assert y0.name == "DummySISO_0:0/0"
        assert y1.name == "DummySISO_0:1/0"
Ejemplo n.º 18
0
def test_stack(teardown):
    x1 = Input()
    x2 = Input()
    y = Stack(axis=1)([x1, x2])
    model = Model([x1, x2], y)

    x1_data = np.array([[1, 2], [10, 20]])
    x2_data = np.array([[3, 4], [30, 40]])
    y_expected = np.stack([x1_data, x2_data], axis=1)
    y_pred = model.predict([x1_data, x2_data])

    assert_array_equal(y_pred, y_expected)
Ejemplo n.º 19
0
def test_concatenate(teardown):
    x1 = Input()
    x2 = Input()
    y = Concatenate(axis=1)([x1, x2])
    model = Model([x1, x2], y)

    x1_data = np.array([[1, 2], [10, 20]])
    x2_data = np.array([[3, 4, 5], [30, 40, 50]])
    y_expected = np.concatenate([x1_data, x2_data], axis=1)
    y_pred = model.predict([x1_data, x2_data])

    assert_array_equal(y_pred, y_expected)
Ejemplo n.º 20
0
    def test_with_unnecessary_inputs(self, teardown):
        x1 = Input()
        x2 = Input()
        y_t = Input()
        h = PCA()(x1)
        y = LogisticRegression()(h, y_t)

        with pytest.raises(ValueError):
            Model([x1, x2], y, y_t)

        with pytest.raises(ValueError):
            Model([x1, h], y, y_t)  # x1 is an unnecessary input upstream of h
Ejemplo n.º 21
0
 def dataplaceholders(self, simple_step, shared_step):
     x1 = Input(name="x1")
     x2 = Input(name="x2")
     y_t = Input(name="y_t")
     y_simple = simple_step(x1, y_t)
     y_shared_1 = shared_step(x1, y_t)
     y_shared_2 = shared_step(
         x2,
         compute_func="predict_proba",
         fit_compute_func="fit_predict_proba",
         trainable=False,
     )
     return x1, x2, y_t, y_simple, y_shared_1, y_shared_2
Ejemplo n.º 22
0
def test_transformed_target(teardown):
    x = Input()
    y_t = Input()
    y_t_mod = Lambda(lambda y: np.log(y))(y_t)
    y_p_mod = LinearRegression()(x, y_t_mod)
    y_p = Lambda(lambda y: np.exp(y))(y_p_mod)
    model = Model(x, y_p, y_t)

    x_data = np.arange(4).reshape(-1, 1)
    y_t_data = np.exp(2 * x_data).ravel()
    model.fit(x_data, y_t_data)

    assert_array_equal(model.get_step("LinearRegression_0").coef_, np.array([2.0]))
Ejemplo n.º 23
0
    def test_with_unnecessary_target(self, teardown):
        x = Input()
        y_t = Input()
        logreg = LogisticRegression()
        y_p = logreg(x, y_t)
        model = Model(x, y_p, y_t)

        model.fit(iris.data, iris.target)

        # won't require the target is trainable was set to False,
        # but won't complain if it was passed to fit
        logreg.trainable = False
        model.fit(iris.data, iris.target)
Ejemplo n.º 24
0
    def test_with_unnecessary_target(self, teardown):
        x = Input()
        y_t = Input()
        classifier = RandomForestClassifier()
        y_p = classifier(x, y_t)
        model = Model(x, y_p, y_t)

        model.fit(iris.data, iris.target)

        # won't require the target is trainable was set to False,
        # but won't complain if it was passed to fit
        classifier.trainable = False
        model.fit(iris.data, iris.target)
Ejemplo n.º 25
0
def test_plot_nested_submodels(teardown, tmp_path, levels, expand_nested):

    submodels = [LogisticRegression()]
    for level in range(levels):
        sub_model = build_submodel(submodels[level], level + 1)
        submodels.append(sub_model)

    x = Input(name="x")
    y_t = Input(name="y_t")
    y_p = submodels[-1](x, y_t)
    model = Model(x, y_p, y_t)

    filename = str(tmp_path / "test_plot_model.png")
    plot_model(model, filename, show=False, expand_nested=expand_nested)
Ejemplo n.º 26
0
def test_fit_params_unhashable_step():
    class UnhashableStep(Step, sklearn.linear_model.LogisticRegression):
        def __eq__(self, other):
            pass

    x = Input()
    y_t = Input()
    y = UnhashableStep()(x, y_t)
    model = Model(x, y, y_t)

    mask = iris.target != 2  # Reduce to binary problem to avoid ConvergenceWarning
    x_data = iris.data[mask]
    y_t_data = iris.target[mask]
    model.fit(x_data, y_t_data)
Ejemplo n.º 27
0
def test_fit_predict_standard_stack(teardown):
    # This uses the "standard" protocol where the 2nd level features
    # are the out-of-fold predictions of the 1st. It also appends the
    # original data to the 2nd level features.
    # See for example: https://www.kdnuggets.com/2017/02/stacking-models-imropved-predictions.html
    X_data, y_t_data = breast_cancer.data, breast_cancer.target
    X_train, X_test, y_t_train, y_t_test = train_test_split(X_data,
                                                            y_t_data,
                                                            test_size=0.2,
                                                            random_state=0)
    random_state = 42

    # baikal way
    x = Input()
    y_t = Input()

    y_p1 = RandomForestClassifierOOF(n_estimators=10,
                                     random_state=random_state)(
                                         x, y_t, compute_func="predict_proba")
    y_p1 = Lambda(lambda array: array[:, 1:])(y_p1)  # remove collinear feature

    x_scaled = StandardScaler()(x)
    y_p2 = LinearSVCOOF(random_state=random_state)(
        x_scaled, y_t, compute_func="decision_function")

    stacked_features = ColumnStack()([x, y_p1, y_p2])
    y_p = LogisticRegression(solver="liblinear",
                             random_state=random_state)(stacked_features, y_t)

    model = Model(x, y_p, y_t)
    model.fit(X_train, y_t_train)
    y_pred_baikal = model.predict(X_test)

    # traditional way
    estimators = [
        ("rf",
         RandomForestClassifier(n_estimators=10, random_state=random_state)),
        ("svr",
         make_pipeline(StandardScaler(),
                       LinearSVC(random_state=random_state))),
    ]
    clf = sklearn.ensemble.StackingClassifier(
        estimators=estimators,
        final_estimator=LogisticRegression(solver="liblinear",
                                           random_state=random_state),
        passthrough=True,
    )
    y_pred_traditional = clf.fit(X_train, y_t_train).predict(X_test)

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Ejemplo n.º 28
0
def test_columnstack(teardown):
    x1 = Input()
    x2 = Input()
    y = ColumnStack()([x1, x2])
    model = Model([x1, x2], y)

    x1_data = np.array([1, 10, 100])
    x2_data = np.array([2, 20, 200])

    y_expected = np.column_stack([x1_data, x2_data])

    y_pred = model.predict([x1_data, x2_data])

    assert_array_equal(y_pred, y_expected)
Ejemplo n.º 29
0
    def test_with_wrong_type(self, teardown):
        x = Input()
        y_t = Input()
        y = LogisticRegression()(x, y_t)

        wrong = np.zeros((10,))
        with pytest.raises(ValueError):
            Model(wrong, y, y_t)

        with pytest.raises(ValueError):
            Model(x, wrong, y_t)

        with pytest.raises(ValueError):
            Model(x, y, wrong)
Ejemplo n.º 30
0
    def test_fit_compute(self, teardown):
        dummy_estimator_1 = DummyEstimator()
        dummy_estimator_2 = DummyEstimator()

        x = Input()
        y_t = Input()
        y_p1 = dummy_estimator_1(x, y_t, fit_compute_func=None)
        y_p2 = dummy_estimator_2(x, y_t)
        model = Model(x, [y_p1, y_p2], y_t)
        model.fit(iris.data, iris.target)

        assert dummy_estimator_1.fit_calls == 1
        assert dummy_estimator_1.fit_predict_calls == 0
        assert dummy_estimator_2.fit_calls == 0
        assert dummy_estimator_2.fit_predict_calls == 1