예제 #1
0
    def test_partial_fit_single_epoch(self):
        """Test that partial_fit trains for a single epoch,
        independently of what epoch value is passed to the constructor.
        """
        data = fetch_california_housing()
        X, y = data.data[:100], data.target[:100]
        epochs = 9
        partial_fit_iter = 4

        estimator = KerasRegressor(
            model=dynamic_regressor,
            model__hidden_layer_sizes=[
                100,
            ],
            epochs=epochs,
        )

        # Check that each partial_fit call trains for 1 epoch
        for k in range(1, partial_fit_iter):
            estimator = estimator.partial_fit(X, y)
            assert len(estimator.history_["loss"]) == k

        # Check that fit calls still train for the number of
        # epochs specified in the constructor
        estimator = estimator.fit(X, y)
        assert len(estimator.history_["loss"]) == epochs
예제 #2
0
    def test_history(self):
        """Test that history_'s keys are strings and values are lists."""
        data = fetch_california_housing()
        X, y = data.data[:100], data.target[:100]
        estimator = KerasRegressor(
            model=dynamic_regressor, model__hidden_layer_sizes=[]
        )

        estimator.partial_fit(X, y)

        assert isinstance(estimator.history_, dict)
        assert all(isinstance(k, str) for k in estimator.history_.keys())
        assert all(isinstance(v, list) for v in estimator.history_.values())
예제 #3
0
def test_no_attributes_set_init_no_args():
    """Tests that models with no build arguments
    set all parameters in a single __init__
    """
    def build_fn():
        model = Sequential()
        model.add(layers.Dense(1, input_dim=1, activation="relu"))
        model.add(layers.Dense(1))
        model.compile(loss="mse")
        return model

    estimator = KerasRegressor(model=build_fn)
    check_no_attributes_set_in_init(estimator.__name__, estimator)
    estimator.fit([[1]], [1])
예제 #4
0
    def test_current_epoch_property(self, warm_start):
        """Test the public current_epoch property
        that tracks the overall training epochs.

        The warm_start parameter should have
        NO impact on this behavior.
        """
        data = load_boston()
        X, y = data.data[:100], data.target[:100]
        epochs = 2
        partial_fit_iter = 4

        estimator = KerasRegressor(
            model=dynamic_regressor,
            loss=KerasRegressor.r_squared,
            model__hidden_layer_sizes=[
                100,
            ],
            epochs=epochs,
            warm_start=warm_start,
        )

        # Check that each partial_fit call trains for 1 epoch
        for k in range(1, partial_fit_iter):
            estimator.partial_fit(X, y)
            assert estimator.current_epoch == k

        # Check that fit calls still train for the number of
        # epochs specified in the constructor
        estimator.fit(X, y)
        assert estimator.current_epoch == epochs

        # partial_fit is able to resume from a non-zero epoch
        estimator.partial_fit(X, y)
        assert estimator.current_epoch == epochs + 1
예제 #5
0
 def test_partial_fit_shorthand_metric_name(self):
     """Test that metrics get stored in the `history_` attribute
     by their long name (and not shorthand) even if the user
     compiles their model with a shorthand name.
     """
     est = KerasRegressor(
         model=force_compile_shorthand,
         model__hidden_layer_sizes=(100,),
         metrics=["mae"],  # shorthand
     )
     X, y = fetch_california_housing(return_X_y=True)
     X = X[:100]
     y = y[:100]
     est.fit(X, y)
     assert "mae" not in est.history_ and "mean_absolute_error" in est.history_
예제 #6
0
def test_sample_weights_score():
    """Checks that the `sample_weight` parameter when passed to
    `score` has the intended effect.
    """
    # build estimator
    estimator = KerasRegressor(
        model=dynamic_regressor,
        model__hidden_layer_sizes=(100, ),
        epochs=10,
        random_state=0,
    )
    estimator1 = clone(estimator)
    estimator2 = clone(estimator)

    # we create 20 points
    X = np.array([1] * 10000).reshape(-1, 1)
    y = [1] * 5000 + [-1] * 5000

    # train
    estimator1.fit(X, y)
    estimator2.fit(X, y)

    # heavily weight towards y=1 points
    bad_sw = [0.999] * 5000 + [0.001] * 5000

    # score with weights, estimator2 should
    # score higher since the weights "unbalance"
    score1 = estimator1.score(X, y, sample_weight=bad_sw)
    score2 = estimator2.score(X, y)
    assert score2 > score1
예제 #7
0
def test_no_loss(loss, compile):
    def get_model(compile, meta, compile_kwargs):
        inp = Input(shape=(meta["n_features_in_"], ))
        hidden = Dense(10, activation="relu")(inp)
        out = [
            Dense(1, activation="sigmoid", name=f"out{i+1}")(hidden)
            for i in range(meta["n_outputs_"])
        ]
        model = Model(inp, out)
        if compile:
            model.compile(**compile_kwargs)
        return model

    est = KerasRegressor(model=get_model, loss=loss, compile=compile)
    with pytest.raises(ValueError, match="must provide a loss function"):
        est.fit([[0], [1]], [0, 1])
예제 #8
0
 def keras_backend_r2(y_true, y_pred):
     """Wrap Keras operations to numpy."""
     y_true = convert_to_tensor(y_true)
     y_pred = convert_to_tensor(y_pred)
     return KerasRegressor.root_mean_squared_error(
         y_true, y_pred
     ).numpy()
예제 #9
0
def test_custom_loss_function():
    """Test that a custom loss function can be serialized."""
    estimator = KerasRegressor(
        model=dynamic_regressor,
        loss=CustomLoss(),
        model__hidden_layer_sizes=(100, ),
    )
    check_pickle(estimator, fetch_california_housing)
예제 #10
0
def test_run_eagerly():
    """Test that models compiled with run_eagerly can be serialized."""
    estimator = KerasRegressor(
        model=dynamic_regressor,
        run_eagerly=True,
        model__hidden_layer_sizes=(100, ),
    )
    check_pickle(estimator, fetch_california_housing)
예제 #11
0
def test_pickle_with_callbacks():
    """Test that models with callbacks (which hold a refence to the Keras model itself) are picklable."""
    clf = KerasRegressor(model=get_reg,
                         loss="mse",
                         callbacks=[keras.callbacks.Callback()])
    # Fit and roundtrip validating only that there are no errors
    clf.fit([[1]], [1])
    clf = pickle.loads(pickle.dumps(clf))
    clf.predict([[1]])
    clf.partial_fit([[1]], [1])
예제 #12
0
def test_partial_fit_pickle(optim):
    """
    This test is implemented to make sure model pickling does not affect
    training.

    (this is essentially what Dask-ML does for search)
    """
    X, y = make_regression(n_features=8, n_samples=100)

    m1 = KerasRegressor(model=dynamic_regressor,
                        optimizer=optim,
                        random_state=42,
                        hidden_layer_sizes=[])
    m2 = clone(m1)

    # Ensure we can roundtrip before training
    m2 = _reload(m2)

    # Make sure start from same model
    m1.partial_fit(X, y)
    m2.partial_fit(X, y)
    assert _weights_close(m1, m2)

    # Train; make sure pickling doesn't affect it
    for k in range(4):
        m1.partial_fit(X, y)
        m2 = _reload(m2, epoch=k + 1).partial_fit(X, y)

        # Make sure the same model is produced
        assert _weights_close(m1, m2)

        # Make sure predictions are the same
        assert np.allclose(m1.predict(X), m2.predict(X))
예제 #13
0
def test_no_optimizer(compile):
    def get_model(compile, meta, compile_kwargs):
        inp = Input(shape=(meta["n_features_in_"],))
        hidden = Dense(10, activation="relu")(inp)
        out = [
            Dense(1, activation="sigmoid", name=f"out{i+1}")(hidden)
            for i in range(meta["n_outputs_"])
        ]
        model = Model(inp, out)
        if compile:
            model.compile(**compile_kwargs)
        return model

    est = KerasRegressor(model=get_model, loss="mse", compile=compile, optimizer=None,)
    with pytest.raises(
        ValueError, match="Could not interpret optimizer identifier"  # Keras error
    ):
        est.fit([[0], [1]], [0, 1])
예제 #14
0
def test_kerasregressor_r2_as_metric():
    """Test custom R^2 implementation against scikit-learn's."""
    est = KerasRegressor(dynamic_regressor,
                         metrics=[KerasRegressor.r_squared],
                         epochs=10,
                         random_state=0)

    y = np.random.randint(low=0, high=2, size=(1000, ))
    X = y.reshape((-1, 1))

    est.fit(X, y)

    current_score = est.score(X, y)
    last_hist = est.history_["r_squared"][-1]
    np.testing.assert_almost_equal(current_score, last_hist, decimal=3)

    current_eval = est.model_.evaluate(X, y, return_dict=True)["r_squared"]
    np.testing.assert_almost_equal(current_score, current_eval, decimal=3)
예제 #15
0
def test_run_eagerly():
    """Test that models compiled with run_eagerly can be serialized.
    """
    estimator = KerasRegressor(
        model=dynamic_regressor,
        run_eagerly=True,
        loss=KerasRegressor.r_squared,
        model__hidden_layer_sizes=(100, ),
    )
    check_pickle(estimator, load_boston)
예제 #16
0
 def test_partial_fit_history_metric_names(self):
     data = fetch_california_housing()
     X, y = data.data[:100], data.target[:100]
     estimator = KerasRegressor(
         model=dynamic_regressor,
         model__hidden_layer_sizes=[
             100,
         ],
         metrics=["mse", CustomMetric(name="custom_metric")],
     )
     estimator.partial_fit(X, y)
     # Make custom metric names are preserved
     # and shorthand metric names are saved by their full name
     for _ in range(2):
         estimator = pickle.loads(pickle.dumps(estimator))
         estimator = estimator.partial_fit(X, y)
         assert set(estimator.history_.keys()) == {
             "loss",
             "mean_squared_error",
             "custom_metric",
         }
예제 #17
0
def test_kerasregressor_r2_as_metric_in_model():
    """Test custom R^2 implementation as part of a model"""
    epochs = 25

    est = KerasRegressor(
        dynamic_regressor,
        metrics=[KerasRegressor.r_squared],
        epochs=epochs,
        random_state=42,
    )

    y = np.random.uniform(size=(1000,))
    X = y.reshape((-1, 1))

    est.fit(X, y)

    scores = np.array(est.history_["r_squared"])

    # basic sanity check
    assert np.all(scores <= 1) and len(scores) == epochs, scores
    # rough estimate of expected end result given the random seed
    assert scores[-1] > 0.9, scores
예제 #18
0
    def test_partial_fit_history_len(self):
        # history_ records the history from this partial_fit call
        # Make sure for each call to partial_fit a single entry
        # into the history is added
        # As per https://github.com/keras-team/keras/issues/1766,
        # there is no direct measure of epochs
        data = fetch_california_housing()
        X, y = data.data[:100], data.target[:100]
        estimator = KerasRegressor(
            model=dynamic_regressor,
            metrics="mean_squared_error",
            model__hidden_layer_sizes=[
                100,
            ],
        )

        for k in range(10):
            estimator = estimator.partial_fit(X, y)
            assert len(estimator.history_["loss"]) == k + 1
            assert set(estimator.history_.keys()) == {
                "loss",
                "mean_squared_error",
            }
예제 #19
0
    def test_current_epoch_property(self, warm_start, epochs_prefix):
        """Test the public current_epoch property
        that tracks the overall training epochs.

        The warm_start parameter should have
        NO impact on this behavior.

        The prefix should NOT have any impact on
        behavior. It is tested because the epochs
        param has special handling within param routing.
        """
        data = load_boston()
        X, y = data.data[:10], data.target[:10]
        epochs = 2
        partial_fit_iter = 3

        estimator = KerasRegressor(
            model=dynamic_regressor,
            loss=KerasRegressor.r_squared,
            model__hidden_layer_sizes=[],
            warm_start=warm_start,
        )
        estimator.set_params(**{epochs_prefix + "epochs": epochs})

        # Check that each partial_fit call trains for 1 epoch
        for k in range(1, partial_fit_iter):
            estimator.partial_fit(X, y)
            assert estimator.current_epoch == k

        # Check that fit calls still train for the number of
        # epochs specified in the constructor
        estimator.fit(X, y)
        assert estimator.current_epoch == epochs

        # partial_fit is able to resume from a non-zero epoch
        estimator.partial_fit(X, y)
        assert estimator.current_epoch == epochs + 1
예제 #20
0
def test_target_shape_changes_incremental_fit_reg():
    X = np.array([[1, 2], [2, 3]])
    y = np.array([1, 3]).reshape(-1, 1)

    est = KerasRegressor(model=dynamic_regressor, hidden_layer_sizes=(100,))
    est.fit(X, y)
    with pytest.raises(
        ValueError, match="Detected `y` to have ",
    ):
        est.partial_fit(X, np.column_stack([y, y]))
예제 #21
0
    def test_partial_fit(self):
        data = fetch_california_housing()
        X, y = data.data[:100], data.target[:100]
        estimator = KerasRegressor(
            model=dynamic_regressor,
            model__hidden_layer_sizes=[
                100,
            ],
        )

        estimator.partial_fit(X, y)
        # Make sure loss history is incremented
        assert len(estimator.history_["loss"]) == 1
        estimator.partial_fit(X, y)
        assert len(estimator.history_["loss"]) == 2
        # Make sure new model not created
        model = estimator.model_
        estimator.partial_fit(X, y)
        assert estimator.model_ is model, "Model memory address should remain constant"
예제 #22
0
def test_compile_model_from_params():
    """Tests that if build_fn returns an un-compiled model,
    the __init__ parameters will be used to compile it
    and that if build_fn returns a compiled model
    it is not re-compiled.
    """
    # Load data
    data = load_boston()
    X, y = data.data[:100], data.target[:100]

    losses = ("mean_squared_error", "mean_absolute_error")

    # build_fn that does not compile
    def build_fn(compile_with_loss=None):
        model = Sequential()
        model.add(keras.layers.Dense(X.shape[1], input_shape=(X.shape[1], )))
        model.add(keras.layers.Activation("relu"))
        model.add(keras.layers.Dense(1))
        model.add(keras.layers.Activation("linear"))
        if compile_with_loss:
            model.compile(loss=compile_with_loss)
        return model

    for loss in losses:
        estimator = KerasRegressor(
            model=build_fn,
            loss=loss,
            # compile_with_loss=None returns an un-compiled model
            compile_with_loss=None,
        )
        estimator.fit(X, y)
        assert estimator.model_.loss.__name__ == loss

    for myloss in losses:
        estimator = KerasRegressor(
            model=build_fn,
            loss="binary_crossentropy",
            # compile_with_loss != None overrides loss
            compile_with_loss=myloss,
        )
        estimator.fit(X, y)
        assert estimator.model_.loss == myloss
예제 #23
0
def continuous():
    # use ints so that we get measurable scores when castint to uint8
    y = np.random.randint(low=0, high=2, size=(1000,))
    X = y.reshape(-1, 1)
    sklearn_est = MLPRegressor(**mlp_kwargs)
    scikeras_est = KerasRegressor(dynamic_regressor, **scikeras_kwargs)
    for dtype in ("float32", "float64", "int64", "int32", "uint8", "uint16"):
        y_ = y.astype(dtype)
        yield TestParams(
            sklearn_est=sklearn_est,
            scikeras_est=scikeras_est,
            X=X,
            y=y_,
            X_expected_dtype_keras=X.dtype,
            y_expected_dtype_keras=dtype,
            min_score=0.99,
            scorer=r2_score,
        )
예제 #24
0
def test_X_shape_change():
    """Tests that a ValueError is raised if the input
    changes shape in subsequent partial fit calls.
    """

    estimator = KerasRegressor(
        model=dynamic_regressor,
        hidden_layer_sizes=(100, ),
    )
    X = np.array([[1, 2], [3, 4]]).reshape(2, 2, 1)
    y = np.array([[0, 1, 0], [1, 0, 0]])

    estimator.fit(X=X, y=y)

    with pytest.raises(ValueError, match="dimensions in X"):
        # Calling with a different number of dimensions for X raises an error
        estimator.partial_fit(X=X.reshape(2, 2), y=y)
예제 #25
0
def test_batch_size_all_fit(length, prefix, base):

    kw = prefix + base

    y = np.random.random((length, ))
    X = y.reshape((-1, 1))
    est = KerasRegressor(dynamic_regressor, hidden_layer_sizes=[], **{kw: -1})

    est.initialize(X, y)

    fit_orig = est.model_.fit

    def check_batch_size(**kwargs):
        assert kwargs[base] == X.shape[0]
        return fit_orig(**kwargs)

    with mock.patch.object(est.model_, "fit", new=check_batch_size):
        est.fit(X, y)
예제 #26
0
def test_shape_change_error():
    """Tests that a ValueError is raised if the input
    changes shape in subsequent partial fit calls.
    """

    estimator = KerasRegressor(
        model=dynamic_regressor,
        loss=KerasRegressor.r_squared,
        hidden_layer_sizes=(100, ),
    )
    X = np.array([[1, 2], [3, 4]])
    y = np.array([[0, 1, 0], [1, 0, 0]])

    estimator.fit(X=X, y=y)

    with pytest.raises(ValueError, match=r"but this [\w\d]+ is expecting "):
        # Calling with a different shape for X raises an error
        estimator.partial_fit(X=X[:, :1], y=y)
예제 #27
0
def test_partial_fit_pickle(optim):
    """
    This test is implemented to make sure model pickling does not affect
    training, which is (essentially) what Dask-ML does for a model selection
    search.

    This test is simple for functional optimizers (like SGD without momentum),
    and tricky for stateful transforms (SGD w/ momentum, Adam, Adagrad, etc).
    For more detail, see https://github.com/adriangb/scikeras/pull/126 and
    links within
    """
    X, y = make_regression(n_features=8, n_samples=100)

    m1 = KerasRegressor(model=dynamic_regressor,
                        optimizer=optim,
                        random_state=42,
                        hidden_layer_sizes=[])
    m2 = clone(m1)

    # Ensure we can roundtrip before training
    m2 = _reload(m2)

    # Make sure start from same model
    m1.partial_fit(X, y)
    m2.partial_fit(X, y)
    assert _weights_close(m1, m2)

    # Train; make sure pickling doesn't affect it
    for k in range(4):
        m1.partial_fit(X, y)
        m2 = _reload(m2, epoch=k + 1).partial_fit(X, y)

        # Make sure the same model is produced
        assert _weights_close(m1, m2)

        # Make sure predictions are the same
        assert np.allclose(m1.predict(X), m2.predict(X))
예제 #28
0
    assert y_out_scikeras.shape == y_out_sklearn.shape
    # Check dtype
    # By default, KerasRegressor (or rather it's default target_encoder)
    # always returns tf.keras.backend.floatx(). This is similar to sklearn, which always
    # returns float64, except that we avoid a pointless conversion from
    # float32 -> float64 that would just be adding noise if TF is using float32
    # internally (which is usually the case).
    assert y_out_scikeras.dtype.name == tf.keras.backend.floatx()
    scikeras_score = test_data.scorer(y_test, y_out_scikeras)
    assert scikeras_score >= test_data.min_score


@pytest.mark.parametrize(
    "est",
    (
        KerasRegressor(dynamic_regressor, model__hidden_layer_sizes=[]),
        KerasClassifier(dynamic_classifier, model__hidden_layer_sizes=[]),
    ),
)
@pytest.mark.parametrize(
    "X_dtype", ("float32", "float64", "int64", "int32", "uint8", "uint16", "object")
)
def test_input_dtype_conversion(X_dtype, est):
    """Tests that using the default transformers in SciKeras,
    `X` is not converted/modified unless it is of dtype object.
    This mimics the behavior of sklearn estimators, which
    try to cast object -> numeric.
    """
    y = np.arange(0, 10, 1, int)
    X = np.random.uniform(size=(y.shape[0], 2)).astype(X_dtype)
    est.fit(X, y)  # generate model_
예제 #29
0
# slightly if X is shuffled.
# This is only required for this tests and is not really
# applicable to real world datasets
batch_size = 1000


@parametrize_with_checks(
    estimators=[
        MultiOutputClassifier(
            model=dynamic_classifier,
            batch_size=batch_size,
            model__hidden_layer_sizes=[],
        ),
        KerasRegressor(
            model=dynamic_regressor,
            batch_size=batch_size,
            model__hidden_layer_sizes=[],
        ),
    ], )
def test_fully_compliant_estimators_low_precision(estimator, check):
    """Checks that can be passed with sklearn's default tolerances
    and in a single epoch.
    """
    check_name = check.func.__name__
    if check_name in higher_precision:
        pytest.skip(
            "This test is run as part of test_fully_compliant_estimators_high_precision."
        )
    check(estimator)

예제 #30
0
class TestRandomState:
    @pytest.mark.parametrize(
        "random_state",
        [0, 123, np.random.RandomState(0)],
    )
    @pytest.mark.parametrize(
        "estimator",
        [
            KerasRegressor(
                model=dynamic_regressor,
                loss=KerasRegressor.r_squared,
                model__hidden_layer_sizes=(100, ),
            ),
            KerasClassifier(model=dynamic_classifier,
                            model__hidden_layer_sizes=(100, )),
        ],
    )
    def test_random_states(self, random_state, estimator):
        """Tests that the random_state parameter correctly
        engages deterministric training and prediction.
        """
        X, y = make_classification()

        # With seed
        estimator.set_params(random_state=random_state)
        estimator.fit(X, y)
        y1 = estimator.predict(X)
        estimator.fit(X, y)
        y2 = estimator.predict(X)
        assert np.allclose(y1, y2)

        if isinstance(estimator, KerasRegressor):
            # Without seed, regressors should NOT
            # give the same results
            # Classifiers _may_ give the same classes
            estimator.set_params(random_state=None)
            estimator.fit(X, y)
            y1 = estimator.predict(X)
            estimator.fit(X, y)
            y2 = estimator.predict(X)
            assert not np.allclose(y1, y2)

    @pytest.mark.parametrize(
        "estimator",
        [
            KerasRegressor(
                model=dynamic_regressor,
                loss=KerasRegressor.r_squared,
                model__hidden_layer_sizes=(100, ),
            ),
            KerasClassifier(model=dynamic_classifier,
                            model__hidden_layer_sizes=(100, )),
        ],
    )
    @pytest.mark.parametrize("pyhash", [None, "0", "1"])
    @pytest.mark.parametrize("gpu", [None, "0", "1"])
    def test_random_states_env_vars(self, estimator, pyhash, gpu):
        """Tests that the random state context management correctly
        handles TF related env variables.
        """
        X, y = make_classification()

        if "random_state" in estimator.get_params():
            estimator.set_params(random_state=None)
        estimator1 = clone(estimator)
        estimator2 = clone(estimator)
        if "random_state" in estimator1.get_params():
            estimator1.set_params(random_state=0)
        if "random_state" in estimator2.get_params():
            estimator2.set_params(random_state=0)
        if gpu is not None:
            os.environ["TF_DETERMINISTIC_OPS"] = gpu
        else:
            if os.environ.get("TF_DETERMINISTIC_OPS"):
                os.environ.pop("TF_DETERMINISTIC_OPS")
        if pyhash is not None:
            os.environ["PYTHONHASHSEED"] = pyhash
        else:
            if os.environ.get("PYTHONHASHSEED"):
                os.environ.pop("PYTHONHASHSEED")
        estimator1.fit(X, y)
        estimator2.fit(X, y)
        if gpu is not None:
            assert os.environ["TF_DETERMINISTIC_OPS"] == gpu
        else:
            assert "TF_DETERMINISTIC_OPS" not in os.environ
        if pyhash is not None:
            assert os.environ["PYTHONHASHSEED"] == pyhash
        else:
            assert "PYTHONHASHSEED" not in os.environ
        y1 = estimator1.predict(X)
        y2 = estimator2.predict(X)
        assert np.allclose(y1, y2)
        if gpu is not None:
            assert os.environ["TF_DETERMINISTIC_OPS"] == gpu
        else:
            assert "TF_DETERMINISTIC_OPS" not in os.environ
        if pyhash is not None:
            assert os.environ["PYTHONHASHSEED"] == pyhash
        else:
            assert "PYTHONHASHSEED" not in os.environ