def test_partial_fit_pickle(optim): """ This test is implemented to make sure model pickling does not affect training. (this is essentially what Dask-ML does for search) """ X, y = make_regression(n_features=8, n_samples=100) m1 = KerasRegressor(model=dynamic_regressor, optimizer=optim, random_state=42, hidden_layer_sizes=[]) m2 = clone(m1) # Ensure we can roundtrip before training m2 = _reload(m2) # Make sure start from same model m1.partial_fit(X, y) m2.partial_fit(X, y) assert _weights_close(m1, m2) # Train; make sure pickling doesn't affect it for k in range(4): m1.partial_fit(X, y) m2 = _reload(m2, epoch=k + 1).partial_fit(X, y) # Make sure the same model is produced assert _weights_close(m1, m2) # Make sure predictions are the same assert np.allclose(m1.predict(X), m2.predict(X))
def test_current_epoch_property(self, warm_start): """Test the public current_epoch property that tracks the overall training epochs. The warm_start parameter should have NO impact on this behavior. """ data = load_boston() X, y = data.data[:100], data.target[:100] epochs = 2 partial_fit_iter = 4 estimator = KerasRegressor( model=dynamic_regressor, loss=KerasRegressor.r_squared, model__hidden_layer_sizes=[ 100, ], epochs=epochs, warm_start=warm_start, ) # Check that each partial_fit call trains for 1 epoch for k in range(1, partial_fit_iter): estimator.partial_fit(X, y) assert estimator.current_epoch == k # Check that fit calls still train for the number of # epochs specified in the constructor estimator.fit(X, y) assert estimator.current_epoch == epochs # partial_fit is able to resume from a non-zero epoch estimator.partial_fit(X, y) assert estimator.current_epoch == epochs + 1
def test_pickle_with_callbacks(): """Test that models with callbacks (which hold a refence to the Keras model itself) are picklable.""" clf = KerasRegressor(model=get_reg, loss="mse", callbacks=[keras.callbacks.Callback()]) # Fit and roundtrip validating only that there are no errors clf.fit([[1]], [1]) clf = pickle.loads(pickle.dumps(clf)) clf.predict([[1]]) clf.partial_fit([[1]], [1])
def test_target_shape_changes_incremental_fit_reg(): X = np.array([[1, 2], [2, 3]]) y = np.array([1, 3]).reshape(-1, 1) est = KerasRegressor(model=dynamic_regressor, hidden_layer_sizes=(100,)) est.fit(X, y) with pytest.raises( ValueError, match="Detected `y` to have ", ): est.partial_fit(X, np.column_stack([y, y]))
def test_history(self): """Test that history_'s keys are strings and values are lists.""" data = fetch_california_housing() X, y = data.data[:100], data.target[:100] estimator = KerasRegressor( model=dynamic_regressor, model__hidden_layer_sizes=[] ) estimator.partial_fit(X, y) assert isinstance(estimator.history_, dict) assert all(isinstance(k, str) for k in estimator.history_.keys()) assert all(isinstance(v, list) for v in estimator.history_.values())
def test_partial_fit_single_epoch(self): """Test that partial_fit trains for a single epoch, independently of what epoch value is passed to the constructor. """ data = fetch_california_housing() X, y = data.data[:100], data.target[:100] epochs = 9 partial_fit_iter = 4 estimator = KerasRegressor( model=dynamic_regressor, model__hidden_layer_sizes=[ 100, ], epochs=epochs, ) # Check that each partial_fit call trains for 1 epoch for k in range(1, partial_fit_iter): estimator = estimator.partial_fit(X, y) assert len(estimator.history_["loss"]) == k # Check that fit calls still train for the number of # epochs specified in the constructor estimator = estimator.fit(X, y) assert len(estimator.history_["loss"]) == epochs
def test_X_shape_change(): """Tests that a ValueError is raised if the input changes shape in subsequent partial fit calls. """ estimator = KerasRegressor( model=dynamic_regressor, hidden_layer_sizes=(100, ), ) X = np.array([[1, 2], [3, 4]]).reshape(2, 2, 1) y = np.array([[0, 1, 0], [1, 0, 0]]) estimator.fit(X=X, y=y) with pytest.raises(ValueError, match="dimensions in X"): # Calling with a different number of dimensions for X raises an error estimator.partial_fit(X=X.reshape(2, 2), y=y)
def test_shape_change_error(): """Tests that a ValueError is raised if the input changes shape in subsequent partial fit calls. """ estimator = KerasRegressor( model=dynamic_regressor, loss=KerasRegressor.r_squared, hidden_layer_sizes=(100, ), ) X = np.array([[1, 2], [3, 4]]) y = np.array([[0, 1, 0], [1, 0, 0]]) estimator.fit(X=X, y=y) with pytest.raises(ValueError, match=r"but this [\w\d]+ is expecting "): # Calling with a different shape for X raises an error estimator.partial_fit(X=X[:, :1], y=y)
def test_partial_fit_history_metric_names(self): data = fetch_california_housing() X, y = data.data[:100], data.target[:100] estimator = KerasRegressor( model=dynamic_regressor, model__hidden_layer_sizes=[ 100, ], metrics=["mse", CustomMetric(name="custom_metric")], ) estimator.partial_fit(X, y) # Make custom metric names are preserved # and shorthand metric names are saved by their full name for _ in range(2): estimator = pickle.loads(pickle.dumps(estimator)) estimator = estimator.partial_fit(X, y) assert set(estimator.history_.keys()) == { "loss", "mean_squared_error", "custom_metric", }
def test_current_epoch_property(self, warm_start, epochs_prefix): """Test the public current_epoch property that tracks the overall training epochs. The warm_start parameter should have NO impact on this behavior. The prefix should NOT have any impact on behavior. It is tested because the epochs param has special handling within param routing. """ data = load_boston() X, y = data.data[:10], data.target[:10] epochs = 2 partial_fit_iter = 3 estimator = KerasRegressor( model=dynamic_regressor, loss=KerasRegressor.r_squared, model__hidden_layer_sizes=[], warm_start=warm_start, ) estimator.set_params(**{epochs_prefix + "epochs": epochs}) # Check that each partial_fit call trains for 1 epoch for k in range(1, partial_fit_iter): estimator.partial_fit(X, y) assert estimator.current_epoch == k # Check that fit calls still train for the number of # epochs specified in the constructor estimator.fit(X, y) assert estimator.current_epoch == epochs # partial_fit is able to resume from a non-zero epoch estimator.partial_fit(X, y) assert estimator.current_epoch == epochs + 1
def test_partial_fit_pickle(optim): """ This test is implemented to make sure model pickling does not affect training, which is (essentially) what Dask-ML does for a model selection search. This test is simple for functional optimizers (like SGD without momentum), and tricky for stateful transforms (SGD w/ momentum, Adam, Adagrad, etc). For more detail, see https://github.com/adriangb/scikeras/pull/126 and links within """ X, y = make_regression(n_features=8, n_samples=100) m1 = KerasRegressor(model=dynamic_regressor, optimizer=optim, random_state=42, hidden_layer_sizes=[]) m2 = clone(m1) # Ensure we can roundtrip before training m2 = _reload(m2) # Make sure start from same model m1.partial_fit(X, y) m2.partial_fit(X, y) assert _weights_close(m1, m2) # Train; make sure pickling doesn't affect it for k in range(4): m1.partial_fit(X, y) m2 = _reload(m2, epoch=k + 1).partial_fit(X, y) # Make sure the same model is produced assert _weights_close(m1, m2) # Make sure predictions are the same assert np.allclose(m1.predict(X), m2.predict(X))
def test_partial_fit_history_len(self): # history_ records the history from this partial_fit call # Make sure for each call to partial_fit a single entry # into the history is added # As per https://github.com/keras-team/keras/issues/1766, # there is no direct measure of epochs data = fetch_california_housing() X, y = data.data[:100], data.target[:100] estimator = KerasRegressor( model=dynamic_regressor, metrics="mean_squared_error", model__hidden_layer_sizes=[ 100, ], ) for k in range(10): estimator = estimator.partial_fit(X, y) assert len(estimator.history_["loss"]) == k + 1 assert set(estimator.history_.keys()) == { "loss", "mean_squared_error", }
def test_partial_fit(self): data = fetch_california_housing() X, y = data.data[:100], data.target[:100] estimator = KerasRegressor( model=dynamic_regressor, model__hidden_layer_sizes=[ 100, ], ) estimator.partial_fit(X, y) # Make sure loss history is incremented assert len(estimator.history_["loss"]) == 1 estimator.partial_fit(X, y) assert len(estimator.history_["loss"]) == 2 # Make sure new model not created model = estimator.model_ estimator.partial_fit(X, y) assert estimator.model_ is model, "Model memory address should remain constant"