def test_KerasClassifier_loss_invariance(y, y_type): """Test that KerasClassifier can use both categorical_crossentropy and sparse_categorical_crossentropy with either one-hot encoded targets or sparse targets. """ X = np.arange(0, y.shape[0]).reshape(-1, 1) clf_1 = KerasClassifier( model=dynamic_classifier, hidden_layer_sizes=(100,), loss="categorical_crossentropy", random_state=0, ) clf_1.fit(X, y) clf_1.partial_fit(X, y) y_1 = clf_1.predict(X) if y_type != "multilabel-indicator": # sparse_categorical_crossentropy is not compatible with # one-hot encoded targets, and one-hot encoded targets are not used in sklearn # This is a use case that does not natively succeed in Keras or skelarn estimators # and thus SciKeras does not intend to auto-convert data to support it clf_2 = KerasClassifier( model=dynamic_classifier, hidden_layer_sizes=(100,), loss="sparse_categorical_crossentropy", random_state=0, ) clf_2.fit(X, y) y_2 = clf_1.predict(X) np.testing.assert_equal(y_1, y_2)
def test_target_shape_changes_incremental_fit_clf(): X = np.array([[1, 2], [2, 3]]) y = np.array([1, 3]).reshape(-1, 1) est = KerasClassifier(model=dynamic_classifier, hidden_layer_sizes=(100,)) est.fit(X, y) with pytest.raises(ValueError, match="features"): # raised by transformers est.partial_fit(X, np.column_stack([y, y]))
def test_X_dtype_changes_incremental_fit(): X = np.array([[1, 2], [2, 3]]) y = np.array([1, 3]) est = KerasClassifier(model=dynamic_classifier, hidden_layer_sizes=(100,)) est.fit(X, y) est.partial_fit(X.astype(np.uint8), y) with pytest.raises( ValueError, match="Got `X` with dtype", ): est.partial_fit(X.astype(np.float64), y)
def test_target_dims_changes_incremental_fit(): X = np.array([[1, 2], [2, 3]]) y = np.array([1, 3]) est = KerasClassifier(model=dynamic_classifier, hidden_layer_sizes=(100,)) est.fit(X, y) y_new = y.reshape(-1, 1) with pytest.raises( ValueError, match="`y` has 2 dimensions, but this ", ): est.partial_fit(X, y_new)
def test_target_classes_change_incremental_fit(): X = np.array([[1, 2], [2, 3]]) y = np.array([1, 3]) est = KerasClassifier(model=dynamic_classifier, hidden_layer_sizes=(100,)) est.fit(X, y) est.partial_fit(X.astype(np.uint8), y) with pytest.raises( ValueError, match="Found unknown categories", ): y[0] = 10 est.partial_fit(X, y)
def test_KerasClassifier_transformers_can_be_reused(y, y_type, loss): """Test that KerasClassifier can use both categorical_crossentropy and sparse_categorical_crossentropy with either one-hot encoded targets or sparse targets. """ if y_type == "multilabel-indicator" and loss == "sparse_categorical_crossentropy": return # not compatible, see test_KerasClassifier_loss_invariance X1, y1 = np.array([[1, 2, 3]]).T, np.array([1, 2, 3]) clf = KerasClassifier( model=dynamic_classifier, hidden_layer_sizes=(100,), loss=loss, random_state=0, ) clf.fit(X1, y1) tfs = clf.target_encoder_ X2, y2 = X1, np.array([1, 1, 1]) # only 1 out or 3 classes clf.partial_fit(X2, y2) tfs_new = clf.target_encoder_ assert tfs_new is tfs # same transformer was re-used assert set(clf.classes_) == set(y1)
def test_class_weight_param(): """Backport of sklearn.utils.estimator_checks.check_class_weight_classifiers for sklearn <= 0.23.0. Tests that fit and partial_fit correctly handle the class_weight parameter. """ clf = KerasClassifier( model=dynamic_classifier, model__hidden_layer_sizes=(100, ), random_state=0, ) problems = (2, 3) for n_centers in problems: # create a very noisy dataset X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20) X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=0) n_centers = len(np.unique(y_train)) if n_centers == 2: class_weight = {0: 1000, 1: 0.0001} fit_epochs = 4 partial_fit_epochs = 3 else: class_weight = {0: 1000, 1: 0.0001, 2: 0.0001} fit_epochs = 8 partial_fit_epochs = 6 clf.set_params(class_weight=class_weight) # run fit epochs followed by several partial_fit iterations # these numbers are purely empirical, just like they are in the # original sklearn test clf.set_params(fit__epochs=fit_epochs) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) assert np.mean(y_pred == 0) > 0.8 for _ in range(partial_fit_epochs): clf.partial_fit(X_train, y_train) y_pred = clf.predict(X_test) assert np.mean(y_pred == 0) > 0.95
def test_partial_fit_classes_param(self): """Test use of `partial_fit` with the `classes` parameter and incomplete classes in the first pass. """ clf = KerasClassifier( model=dynamic_classifier, loss="sparse_categorical_crossentropy", model__hidden_layer_sizes=[ 100, ], ) X1 = np.array([[1, 2, 3], [4, 5, 6]]).T y1 = np.array([1, 2, 2]) X2 = X1 y2 = np.array([2, 3, 3]) classes = np.unique(np.concatenate([y1, y2])) clf.partial_fit(X=X1, y=y1, classes=classes) clf.score(X1, y1) clf.score(X2, y2) clf.partial_fit(X=X2, y=y2) clf.score(X1, y1) clf.score(X2, y2)
def test_partial_fit_epoch_kwargs(kwargs): est = KerasClassifier(dynamic_classifier) with pytest.raises(TypeError, match="Invalid argument"): est.partial_fit([[1]], [1], **kwargs)