예제 #1
0
def test_KerasClassifier_loss_invariance(y, y_type):
    """Test that KerasClassifier can use both
    categorical_crossentropy and sparse_categorical_crossentropy
    with either one-hot encoded targets or sparse targets.
    """
    X = np.arange(0, y.shape[0]).reshape(-1, 1)
    clf_1 = KerasClassifier(
        model=dynamic_classifier,
        hidden_layer_sizes=(100,),
        loss="categorical_crossentropy",
        random_state=0,
    )
    clf_1.fit(X, y)
    clf_1.partial_fit(X, y)
    y_1 = clf_1.predict(X)
    if y_type != "multilabel-indicator":
        # sparse_categorical_crossentropy is not compatible with
        # one-hot encoded targets, and one-hot encoded targets are not used in sklearn
        # This is a use case that does not natively succeed in Keras or skelarn estimators
        # and thus SciKeras does not intend to auto-convert data to support it
        clf_2 = KerasClassifier(
            model=dynamic_classifier,
            hidden_layer_sizes=(100,),
            loss="sparse_categorical_crossentropy",
            random_state=0,
        )
        clf_2.fit(X, y)
        y_2 = clf_1.predict(X)

        np.testing.assert_equal(y_1, y_2)
def test_single_output_multilabel_indicator():
    """Tests a target that a multilabel-indicator
    target can be used without errors.
    """
    X = np.random.random(size=(100, 2))
    y = np.random.randint(0, 1, size=(100, 3))
    y[0, :] = 1  # i.e. not "one hot encoded"

    def build_fn():
        model = Sequential()
        model.add(Dense(10, input_shape=(2, ), activation="relu"))
        model.add(Dense(3, activation="sigmoid"))
        return model

    clf = KerasClassifier(
        model=build_fn,
        loss="categorical_crossentropy",
    )
    # check that there are no errors
    clf.fit(X, y)
    clf.predict(X)
    # check the target type
    assert clf.target_type_ == "multilabel-indicator"
    # check classes
    np.testing.assert_equal(clf.classes_, np.arange(3))
예제 #3
0
def test_not_fitted_error():
    """Tests error when trying to use predict before fit."""
    estimator = KerasClassifier(dynamic_classifier)
    X = np.random.rand(10, 20)
    with pytest.raises(NotFittedError):
        # This is in BaseWrapper so it covers
        # KerasRegressor as well
        estimator.predict(X)
    with pytest.raises(NotFittedError):
        estimator.predict_proba(X)
예제 #4
0
def test_class_weight_param():
    """Backport of sklearn.utils.estimator_checks.check_class_weight_classifiers
    for sklearn <= 0.23.0.
    """
    clf = KerasClassifier(
        model=dynamic_classifier,
        model__hidden_layer_sizes=(100, ),
        epochs=50,
        random_state=0,
    )
    problems = (2, 3)
    for n_centers in problems:
        # create a very noisy dataset
        X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)
        X_train, X_test, y_train, _ = train_test_split(X,
                                                       y,
                                                       test_size=0.5,
                                                       random_state=0)

        n_centers = len(np.unique(y_train))

        if n_centers == 2:
            class_weight = {0: 1000, 1: 0.0001}
        else:
            class_weight = {0: 1000, 1: 0.0001, 2: 0.0001}

        clf.set_params(class_weight=class_weight)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        assert np.mean(y_pred == 0) > 0.87
예제 #5
0
def test_callbacks_prefixes():
    """Test dispatching of callbacks using no prefix, the fit__ prefix or the predict__ prefix."""
    class SentinalCallback(Callback):
        def __init__(self, call_logs: DefaultDict[str, int]):
            self.call_logs = call_logs

        def on_test_begin(self, logs=None):
            self.call_logs["on_test_begin"] += 1

        def on_train_begin(self, logs=None):
            self.call_logs["on_train_begin"] += 1

        def on_predict_begin(self, logs=None):
            self.call_logs["on_predict_begin"] += 1

    callbacks_call_logs = defaultdict(int)
    fit_callbacks_call_logs = defaultdict(int)
    predict_callbacks_call_logs = defaultdict(int)

    def get_clf() -> keras.Model:
        model = keras.models.Sequential()
        model.add(keras.layers.InputLayer((1, )))
        model.add(keras.layers.Dense(1, activation="sigmoid"))
        return model

    clf = KerasClassifier(
        model=get_clf,
        loss="binary_crossentropy",
        callbacks=SentinalCallback(callbacks_call_logs),
        fit__callbacks=SentinalCallback(fit_callbacks_call_logs),
        predict__callbacks=SentinalCallback(predict_callbacks_call_logs),
        validation_split=0.1,
    )

    clf.fit([[0]] * 100, [0] * 100)
    assert callbacks_call_logs == {"on_train_begin": 1, "on_test_begin": 1}
    assert fit_callbacks_call_logs == {"on_train_begin": 1, "on_test_begin": 1}
    assert predict_callbacks_call_logs == {}
    clf.predict([[0]])
    assert callbacks_call_logs == {
        "on_train_begin": 1,
        "on_test_begin": 1,
        "on_predict_begin": 1,
    }
    assert fit_callbacks_call_logs == {"on_train_begin": 1, "on_test_begin": 1}
    assert predict_callbacks_call_logs == {"on_predict_begin": 1}
예제 #6
0
def test_class_weight_param():
    """Backport of sklearn.utils.estimator_checks.check_class_weight_classifiers
    for sklearn <= 0.23.0.

    Tests that fit and partial_fit correctly handle the class_weight parameter.
    """
    clf = KerasClassifier(
        model=dynamic_classifier,
        model__hidden_layer_sizes=(100, ),
        random_state=0,
    )
    problems = (2, 3)
    for n_centers in problems:
        # create a very noisy dataset
        X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)
        X_train, X_test, y_train, _ = train_test_split(X,
                                                       y,
                                                       test_size=0.5,
                                                       random_state=0)

        n_centers = len(np.unique(y_train))

        if n_centers == 2:
            class_weight = {0: 1000, 1: 0.0001}
            fit_epochs = 4
            partial_fit_epochs = 3
        else:
            class_weight = {0: 1000, 1: 0.0001, 2: 0.0001}
            fit_epochs = 8
            partial_fit_epochs = 6

        clf.set_params(class_weight=class_weight)

        # run fit epochs followed by several partial_fit iterations
        # these numbers are purely empirical, just like they are in the
        # original sklearn test
        clf.set_params(fit__epochs=fit_epochs)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        assert np.mean(y_pred == 0) > 0.8
        for _ in range(partial_fit_epochs):
            clf.partial_fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        assert np.mean(y_pred == 0) > 0.95