Example #1
0
def test_incompatible_output_dimensions():
    """Compares to the scikit-learn RandomForestRegressor classifier.
    """
    # create dataset with 4 outputs
    X = np.random.rand(10, 20)
    y = np.random.randint(low=0, high=3, size=(10, 4))

    # create a model with 2 outputs
    def build_fn_clf(
        meta: Dict[str, Any],
        compile_kwargs: Dict[str, Any],
    ) -> Model:
        """Builds a Sequential based classifier."""
        model = Sequential()
        model.add(Dense(20, input_shape=(20, ), activation="relu"))
        model.add(Dense(np.unique(y).size, activation="relu"))
        model.compile(
            optimizer="sgd",
            loss="categorical_crossentropy",
            metrics=["accuracy"],
        )
        return model

    clf = KerasClassifier(model=build_fn_clf)

    with pytest.raises(RuntimeError):
        clf.fit(X, y)
Example #2
0
def test_parameter_precedence():
    """Routed parameters should override non-routed parameters, and fit keyword arguments should override routed"""
    class TestModel(Sequential):
        def fit(self, *args, **kwargs):
            assert kwargs["class_weight"] == {0: 0.5, 1: 0.5}
            assert kwargs.pop("custom") == "fit_keyword"
            return super().fit(*args, **kwargs)

    def get_model() -> TestModel:
        return TestModel([
            layers_mod.InputLayer((1, )),
            layers_mod.Dense(1, activation="sigmoid")
        ])

    X, y = [[1], [2]], [0, 1]

    clf = KerasClassifier(
        get_model,
        loss="binary_crossentropy",
        fit__class_weight={
            0: 0.5,
            1: 0.5,
        },  # test w/ a built in parameter to make sure we can override them
        fit__custom="constructor_routed",
    )

    clf.fit(X, y, custom="fit_keyword")
Example #3
0
def test_optimizer(optimizer):
    """Tests compiling of single optimizer with options.
    Since there can only ever be a single optimizer, there is no
    ("name", optimizer, "output") option.
    Only optimizer classes will be compiled with custom options,
    all others (class names, function names) should pass through
    untouched.
    """
    # Single output
    X, y = make_classification()

    est = KerasClassifier(
        model=get_model,
        optimizer=optimizer,
        optimizer__learning_rate=0.15,
        optimizer__momentum=0.5,
        loss="binary_crossentropy",
    )
    est.fit(X, y)
    est_opt = est.model_.optimizer
    if not isinstance(optimizer, str):
        assert float(est_opt.momentum.value()) == pytest.approx(0.5)
        assert float(est_opt.learning_rate) == pytest.approx(0.15, abs=1e-6)
    else:
        est_opt.__class__ == optimizers_module.get(optimizer).__class__
def test_compiling_of_routed_parameters():
    """Tests that routed parameters
    can themselves be compiled.
    """

    X, y = make_classification()

    class Foo:
        got = dict()

        def __init__(self, foo_kwarg="foo_kwarg_default"):
            self.foo_kwarg = foo_kwarg

    class MyLoss(losses_module.Loss):
        def __init__(self, param1="param1_default", *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.param1 = param1

        def __call__(self, y_true, y_pred, sample_weight=None):
            return losses_module.binary_crossentropy(y_true, y_pred)

    est = KerasClassifier(
        model=get_model,
        loss=MyLoss,
        loss__param1=[Foo, Foo],
        loss__param1__foo_kwarg=1,
        loss__param1__0__foo_kwarg=2,
    )
    est.fit(X, y)
    assert est.model_.loss.param1[0].foo_kwarg == 2
    assert est.model_.loss.param1[1].foo_kwarg == 1
Example #5
0
def test_compiling_of_routed_parameters():
    """Tests that routed parameters
    can themselves be compiled.
    """

    X, y = make_classification()

    class Foo:
        got = dict()

        def __init__(self, foo_kwarg):
            self.foo_kwarg = foo_kwarg

    class MyLoss:
        def __init__(self, param1):
            self.param1 = param1
            self.__name__ = str(id(self))

        def __call__(self, y_true, y_pred):
            return losses_module.binary_crossentropy(y_true, y_pred)

    est = KerasClassifier(
        model=get_model,
        loss=MyLoss,
        loss__param1=[Foo, Foo],
        loss__param1__foo_kwarg=1,
        loss__param1__0__foo_kwarg=2,
    )
    est.fit(X, y)
    assert est.model_.loss.param1[0].foo_kwarg == 2
    assert est.model_.loss.param1[1].foo_kwarg == 1
Example #6
0
    def test_invalid_build_fn(self):
        class Model:
            pass

        clf = KerasClassifier(model=Model())
        with pytest.raises(TypeError, match="``model`` must be"):
            clf.fit(np.array([[0], [1]]), np.array([0, 1]))
Example #7
0
def test_build_fn_deprecation():
    """An appropriate warning is raised when using the `build_fn`
    parameter instead of `model`.
    """
    clf = KerasClassifier(build_fn=dynamic_classifier, model__hidden_layer_sizes=(100,))
    with pytest.warns(UserWarning, match="``build_fn`` will be renamed to ``model``"):
        clf.fit([[0], [1]], [0, 1])
def test_incompatible_output_dimensions():
    """Compares to the scikit-learn RandomForestRegressor classifier.
    """
    # create dataset with 4 outputs
    X = np.random.rand(10, 20)
    y = np.random.randint(low=0, high=3, size=(10,))

    # create a model with 2 outputs
    def build_fn_clf(meta: Dict[str, Any], compile_kwargs: Dict[str, Any],) -> Model:
        # get params
        n_features_in_ = meta["n_features_in_"]

        inp = Input((n_features_in_,))

        x1 = Dense(100)(inp)

        binary_out = Dense(1, activation="sigmoid")(x1)
        cat_out = Dense(2, activation="softmax")(x1)

        model = Model([inp], [binary_out, cat_out])
        model.compile(loss=["binary_crossentropy", "categorical_crossentropy"])

        return model

    clf = KerasClassifier(model=build_fn_clf)

    with pytest.raises(ValueError, match="input of size"):
        clf.fit(X, y)
 def test_metrics(self, metric):
     """Test the metrics param.
     
     Specifically test ``accuracy``, which Keras automatically
     matches to the loss function and hence should be passed through
     as a string and not as a retrieved function.
     """
     est = KerasClassifier(model=dynamic_classifier,
                           model__hidden_layer_sizes=(100, ),
                           metrics=[metric])
     X, y = make_classification()
     est.fit(X, y)
     assert len(est.history_[metric]) == 1
Example #10
0
def test_class_weight_balanced(class_weight):
    """KerasClassifier should accept the class_weight parameter in the same format as ScikitLearn.
    Passing "balanced" will automatically compute class_weight.
    Class weights will always be converted to sample weights before calling the Keras model,
    preserving compatibility with encoders.
    """

    clf = KerasClassifier(model=dynamic_classifier,
                          model__hidden_layer_sizes=[],
                          class_weight="balanced")
    clf.fit([[1], [1]], [0, 1])

    class TestModel(Sequential):
        def fit(self, *args, **kwargs):
            np.testing.assert_equal(
                kwargs["sample_weight"] / kwargs["sample_weight"], [1, 1])
            return super().fit(*args, **kwargs)

    def get_model() -> TestModel:
        return TestModel([
            layers_mod.InputLayer((1, )),
            layers_mod.Dense(1, activation="sigmoid")
        ])

    X, y = [[1], [1]], [0, 1]

    clf = KerasClassifier(get_model,
                          loss="binary_crossentropy",
                          class_weight=class_weight)
    clf.fit(X, y)
Example #11
0
def test_loss_routed_params_iterable(loss, n_outputs_):
    """Tests compiling of loss when it is
    given as an iterable of losses
    mapping to outputs.
    """

    X, y = make_classification()
    y = np.column_stack([y for _ in range(n_outputs_)]).squeeze()

    # Test iterable with global routed param
    est = KerasClassifier(
        model=get_model,
        loss=[loss],
        loss__from_logits=True,  # default is False
    )
    est.fit(X, y)
    assert est.model_.loss[0].from_logits

    # Test iterable with index-based routed param
    est = KerasClassifier(
        model=get_model,
        loss=[loss],
        loss__from_logits=True,
        loss__0__from_logits=False,  # should override above
    )
    est.fit(X, y)
    assert est.model_.loss[0].from_logits == False
Example #12
0
def test_loss_routed_params_dict(loss, n_outputs_):
    """Tests compiling of loss when it is
    given as an dict of losses
    mapping to outputs.
    """

    X, y = make_classification()
    y = np.column_stack([y for _ in range(n_outputs_)]).squeeze()

    # Test dict with global routed param
    est = KerasClassifier(
        model=get_model,
        loss={"out1": loss},
        loss__from_logits=True,  # default is False
    )
    est.fit(X, y)
    assert est.model_.loss["out1"].from_logits == True

    # Test dict with key-based routed param
    est = KerasClassifier(
        model=get_model,
        loss={"out1": loss},
        loss__from_logits=True,
        loss__out1__from_logits=False,  # should override above
    )
    est.fit(X, y)
    assert est.model_.loss["out1"].from_logits == False
Example #13
0
def test_callback_compiling_args_or_kwargs():
    """Test compiling callbacks with routed positional (args) or keyword (kwargs) arguments."""
    def get_clf() -> keras.Model:
        model = keras.models.Sequential()
        model.add(keras.layers.InputLayer((1, )))
        model.add(keras.layers.Dense(1, activation="sigmoid"))
        return model

    class ArgsOnlyCallback(keras.callbacks.Callback):
        def __init__(self, *args):
            assert args == ("arg0", "arg1")
            ArgsOnlyCallback.called = True
            super().__init__()

    class KwargsOnlyCallback(keras.callbacks.Callback):
        def __init__(self, **kwargs):
            assert kwargs == {"kwargname": None}
            KwargsOnlyCallback.called = True
            super().__init__()

    class ArgsAndKwargsCallback(keras.callbacks.Callback):
        def __init__(self, *args, **kwargs):
            assert args == ("arg", )
            assert kwargs == {"kwargname": None}
            ArgsAndKwargsCallback.called = True
            super().__init__()

    clf = KerasClassifier(
        model=get_clf,
        epochs=5,
        optimizer=keras.optimizers.SGD,
        optimizer__learning_rate=0.1,
        loss="binary_crossentropy",
        callbacks={
            "args": ArgsOnlyCallback,
            "kwargs": KwargsOnlyCallback,
            "argskwargs": ArgsAndKwargsCallback,
        },
        callbacks__args__1="arg1",  # passed as an arg
        callbacks__args__0=
        "arg0",  # unorder the args on purpose, SciKeras should not care about the order of the keys
        callbacks__kwargs__kwargname=None,  # passed as a kwarg
        callbacks__argskwargs__0="arg",  # passed as an arg
        callbacks__argskwargs__kwargname=None,  # passed as a kwarg
    )
    clf.fit([[1]], [1])

    for cls in (ArgsOnlyCallback, KwargsOnlyCallback, ArgsAndKwargsCallback):
        assert cls.called
Example #14
0
def test_loss_invalid_string():
    """Tests that a ValueError is raised when an unknown
    string is passed as a loss.
    """

    X, y = make_classification()

    loss = "binary_crossentropr"  # binary_crossentropr is not a loss

    est = KerasClassifier(
        model=get_model,
        num_hidden=20,
        loss=loss,
    )
    with pytest.raises(ValueError, match="Unknown loss function"):
        est.fit(X, y)
Example #15
0
def test_loss(loss, n_outputs_):
    """Tests compiling of single loss
    using routed parameters.
    """

    X, y = make_classification()
    y = np.column_stack([y for _ in range(n_outputs_)]).squeeze()

    est = KerasClassifier(
        model=get_model,
        loss=loss,
        loss__name="custom_name",
    )
    est.fit(X, y)
    assert str(loss) in str(est.model_.loss) or isinstance(
        est.model_.loss, loss)
Example #16
0
def test_optimizer_invalid_string():
    """Tests that a ValueError is raised when an unknown
    string is passed as an optimizer.
    """

    X, y = make_classification()

    optimizer = "sgf"  # sgf is not a loss

    est = KerasClassifier(
        model=get_model,
        optimizer=optimizer,
        loss="binary_crossentropy",
    )
    with pytest.raises(ValueError, match="Unknown optimizer"):
        est.fit(X, y)
Example #17
0
def test_keras(c, s, a, b):
    # Mirror the mnist dataset
    X, y = make_classification(n_classes=10, n_features=784, n_informative=100)
    X = X.astype("float32")
    assert y.dtype == np.dtype("int64")

    model = KerasClassifier(build_fn=_keras_build_fn, lr=0.01, verbose=False)
    params = {"lr": loguniform(1e-3, 1e-1)}

    search = IncrementalSearchCV(model,
                                 params,
                                 max_iter=3,
                                 n_initial_parameters=5,
                                 decay_rate=None)
    yield search.fit(X, y)
    #  search.fit(X, y)

    assert search.best_score_ >= 0

    # Make sure the model trains, and scores aren't constant
    scores = {
        ident: [h["score"] for h in hist]
        for ident, hist in search.model_history_.items()
    }
    assert all(len(hist) == 3 for hist in scores.values())
    nuniq_scores = [pd.Series(v).nunique() for v in scores.values()]
    assert max(nuniq_scores) > 1
Example #18
0
def test_metrics_two_metric_per_output(n_outputs_):
    """Metrics without the ("name", metric, "output")
    syntax should ignore all routed and custom options.

    This tests multiple (two) metrics per output.
    """

    X, y = make_classification()
    y = np.column_stack([y for _ in range(n_outputs_)]).squeeze()

    metric_class = metrics_module.BinaryAccuracy

    # loss functions for each output and joined show up as metrics
    metric_idx = 1 + (n_outputs_ if n_outputs_ > 1 else 0)

    # List of lists of metrics
    if n_outputs_ == 1:
        metrics_ = [metric_class(name="1"), metric_class(name="2")]
    else:
        metrics_ = [[metric_class(name="1"),
                     metric_class(name="2")] for _ in range(n_outputs_)]

    est = KerasClassifier(
        model=get_model,
        loss="binary_crossentropy",
        metrics=metrics_,
    )
    est.fit(X, y)
    if n_outputs_ == 1:
        assert est.model_.metrics[metric_idx].name == "1"
    else:
        # For multi-output models, Keras pre-appends the output name
        assert est.model_.metrics[metric_idx].name == "out1_1"

    # List of lists of metrics
    if n_outputs_ == 1:
        metrics_ = {"out1": [metric_class(name="1"), metric_class(name="2")]}
    else:
        metrics_ = {
            f"out{i+1}": [metric_class(name="1"),
                          metric_class(name="2")]
            for i in range(n_outputs_)
        }

    # Dict of metrics
    est = KerasClassifier(
        model=get_model,
        loss="binary_crossentropy",
        metrics=metrics_,
    )
    est.fit(X, y)
    if n_outputs_ == 1:
        assert est.model_.metrics[metric_idx].name == "1"
    else:
        # For multi-output models, Keras pre-appends the output name
        assert est.model_.metrics[metric_idx].name == "out1_1"
Example #19
0
def test_sample_weights_all_zero():
    """Checks for a user-friendly error when sample_weights
    are all zero.
    """
    # build estimator
    estimator = KerasClassifier(
        model=dynamic_classifier, model__hidden_layer_sizes=(100,),
    )

    # we create 20 points
    n, d = 50, 4
    X = np.random.uniform(size=(n, d))
    y = np.random.choice(2, size=n).astype("uint8")
    sample_weight = np.zeros(y.shape)

    with pytest.raises(ValueError, match="only zeros were passed in sample_weight"):
        estimator.fit(X, y, sample_weight=sample_weight)
Example #20
0
 def test_calibratedclassifiercv(self, config):
     """Tests compatibility with Scikit-learn's calibrated classifier CV."""
     loader, _, build_fn, _ = CONFIG[config]
     base_estimator = KerasClassifier(
         build_fn, epochs=1, model__hidden_layer_sizes=[]
     )
     estimator = CalibratedClassifierCV(base_estimator=base_estimator, cv=5)
     basic_checks(estimator, loader)
def test_class_weight_param():
    """Backport of sklearn.utils.estimator_checks.check_class_weight_classifiers
    for sklearn <= 0.23.0.
    """
    clf = KerasClassifier(
        model=dynamic_classifier,
        model__hidden_layer_sizes=(100, ),
        epochs=50,
        random_state=0,
    )
    problems = (2, 3)
    for n_centers in problems:
        # create a very noisy dataset
        X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)
        X_train, X_test, y_train, _ = train_test_split(X,
                                                       y,
                                                       test_size=0.5,
                                                       random_state=0)

        n_centers = len(np.unique(y_train))

        if n_centers == 2:
            class_weight = {0: 1000, 1: 0.0001}
        else:
            class_weight = {0: 1000, 1: 0.0001, 2: 0.0001}

        clf.set_params(class_weight=class_weight)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        assert np.mean(y_pred == 0) > 0.87
Example #22
0
def test_callback_param_routing_syntax(callback_kwargs: Dict[str, Any]):
    """Test support for the various parameter routing syntaxes for callbacks."""
    def get_clf() -> keras.Model:
        model = keras.models.Sequential()
        model.add(keras.layers.InputLayer((1, )))
        model.add(keras.layers.Dense(1, activation="sigmoid"))
        return model

    clf = KerasClassifier(
        model=get_clf,
        epochs=5,
        loss="binary_crossentropy",
        metrics="acc",
        **callback_kwargs,
    )
    clf.fit([[1], [1]], [0, 1])
    # should early stop after 1-2 epochs (depending on the TF version) since we set the accuracy delta to 1
    assert clf.current_epoch < 5
Example #23
0
def test_metrics_invalid_string():
    """Tests that a ValueError is raised when an unknown
    string is passed as a metric.
    """

    X, y = make_classification()

    metrics = [
        "acccuracy",
    ]  # acccuracy (extra `c`) is not a metric

    est = KerasClassifier(
        model=get_model,
        loss="binary_crossentropy",
        metrics=metrics,
    )
    with pytest.raises(ValueError, match="Unknown metric function"):
        est.fit(X, y)
Example #24
0
def test_sample_weights_all_zero():
    """Checks for a user-friendly error when sample_weights
    are all zero.
    """
    # build estimator
    estimator = KerasClassifier(
        model=dynamic_classifier,
        model__hidden_layer_sizes=(100, ),
    )

    # we create 20 points
    n, d = 50, 4
    X = np.random.uniform(size=(n, d))
    y = np.random.uniform(size=n)
    sample_weight = np.zeros(y.shape)

    with pytest.raises(RuntimeError, match="no samples left"):
        estimator.fit(X, y, sample_weight=sample_weight)
Example #25
0
def test_loss_uncompilable():
    """Tests that a TypeError is raised when a loss
    that is not compilable is passed routed parameters.
    """

    X, y = make_classification()

    loss = losses_module.binary_crossentropy

    est = KerasClassifier(
        model=get_model,
        loss=loss,
        loss__from_logits=True,
    )
    with pytest.raises(
            TypeError,
            match="does not accept parameters because it's not a class"):
        est.fit(X, y)
Example #26
0
def test_X_dtype_changes_incremental_fit():
    X = np.array([[1, 2], [2, 3]])
    y = np.array([1, 3])

    est = KerasClassifier(model=dynamic_classifier, hidden_layer_sizes=(100,))
    est.fit(X, y)
    est.partial_fit(X.astype(np.uint8), y)
    with pytest.raises(
        ValueError, match="Got `X` with dtype",
    ):
        est.partial_fit(X.astype(np.float64), y)
Example #27
0
def test_exclude_parameters_with_further_routing():
    """SciKeras should only route parameters to final destinations that do not contain further routing
    For example, optimizer__xyz__abc should _not_ be passed to the Optimizer as Optimizer(xyz__abc=xyz__abc).
    """
    def get_model() -> Sequential:
        return Sequential([
            layers_mod.InputLayer((1, )),
            layers_mod.Dense(1, activation="sigmoid")
        ])

    X, y = [[1], [2]], [0, 1]

    clf = KerasClassifier(
        get_model,
        loss="binary_crossentropy",
        optimizer__this_should_not_pass__abc="error!",
    )

    clf.fit(X, y)
Example #28
0
def test_target_classes_change_incremental_fit():
    X = np.array([[1, 2], [2, 3]])
    y = np.array([1, 3])

    est = KerasClassifier(model=dynamic_classifier, hidden_layer_sizes=(100,))
    est.fit(X, y)
    est.partial_fit(X.astype(np.uint8), y)
    with pytest.raises(
        ValueError, match="Found unknown categories",
    ):
        y[0] = 10
        est.partial_fit(X, y)
Example #29
0
def test_metrics_uncompilable():
    """Tests that a TypeError is raised when a metric
    that is not compilable is passed routed parameters.
    """

    X, y = make_classification()

    metrics = [
        metrics_module.get("accuracy"),
    ]  # a function

    est = KerasClassifier(
        model=get_model,
        loss="binary_crossentropy",
        metrics=metrics,
        metrics__name="custom_name",
    )
    with pytest.raises(
            TypeError,
            match="does not accept parameters because it's not a class"):
        est.fit(X, y)
def test_single_output_multilabel_indicator():
    """Tests a target that a multilabel-indicator
    target can be used without errors.
    """
    X = np.random.random(size=(100, 2))
    y = np.random.randint(0, 1, size=(100, 3))
    y[0, :] = 1  # i.e. not "one hot encoded"

    def build_fn():
        model = Sequential()
        model.add(Dense(10, input_shape=(2, ), activation="relu"))
        model.add(Dense(3, activation="sigmoid"))
        return model

    clf = KerasClassifier(
        model=build_fn,
        loss="categorical_crossentropy",
    )
    # check that there are no errors
    clf.fit(X, y)
    clf.predict(X)
    # check the target type
    assert clf.target_type_ == "multilabel-indicator"
    # check classes
    np.testing.assert_equal(clf.classes_, np.arange(3))