def test_embedding_default(): # Make sure the embedding works by default. X, y = iris.data, iris.target clf = MLPClassifier(n_epochs=1) clf.fit(X, y) assert clf.transform(X).shape[1] == 256
def test_embedding_no_layers(): # Make sure the embedding works with no layers. X, y = iris.data, iris.target clf = MLPClassifier(n_epochs=1, hidden_units=[]) clf.fit(X, y) assert clf.transform(X).shape[1] == np.unique(y).shape[0]
def test_multiple_layers(): for n_layers in range(3): clf = MLPClassifier(hidden_units=(8,) * n_layers, **KWARGS) target = iris.target_names[iris.target] clf.fit(iris.data, target) y_pred = clf.predict(iris.data) accuracy = accuracy_score(target, y_pred) # Just make sure the model doesn't crash and isn't terrible. assert accuracy > 0.9, \ "low accuracy ({}) with {} layers".format(accuracy, n_layers)
def test_embedding_specific_layer(): # Make sure the embedding works with no layers. X, y = iris.data, iris.target clf = MLPClassifier(n_epochs=1, hidden_units=(256, 8, 256), transform_layer_index=1) clf.fit(X, y) assert clf.transform(X).shape[1] == 8
def test_persistence(): """Test that models can be pickled and reloaded.""" clf = MLPClassifier(random_state=42) target = iris.target_names[iris.target] clf.fit(iris.data, target) probs1 = clf.predict_proba(iris.data) b = BytesIO() pickle.dump(clf, b) clf2 = pickle.loads(b.getvalue()) probs2 = clf2.predict_proba(iris.data) assert_array_almost_equal(probs1, probs2)
def test_refitting(): # Check that fitting twice works (e.g., to make sure that fit-related # variables are cleared appropriately when refitting). X, y = iris.data, iris.target clf = MLPClassifier(n_epochs=1) clf.fit(X, y) assert np.array_equal(clf.classes_, np.unique(y)) y_binary = (y == y[0]).astype(float) clf.fit(X, y_binary) assert np.array_equal(clf.classes_, np.unique(y_binary))
def test_dropout(): """Test binary classification.""" # Check that predictions are deterministic. clf = MLPClassifier(keep_prob=0.5, **KWARGS) clf.fit(X_sp, Y1) y_pred1 = clf.predict_proba(X_sp) for _ in range(100): y_pred_i = clf.predict_proba(X_sp) assert_array_almost_equal(y_pred1, y_pred_i) check_predictions(MLPClassifier(keep_prob=0.5, **KWARGS), X, Y1) check_predictions(MLPClassifier(keep_prob=0.5, **KWARGS), X_sp, Y1)
def test_alpha_dropout_and_selu(): """Test binary classification with SEUL and alpha dropout.""" # Check that predictions are deterministic. clf = MLPClassifier(keep_prob=0.7, activation=nn.selu, **KWARGS) clf.fit(X_sp, Y1) y_pred1 = clf.predict_proba(X_sp) for _ in range(100): y_pred_i = clf.predict_proba(X_sp) assert_array_almost_equal(y_pred1, y_pred_i) check_predictions( MLPClassifier(keep_prob=0.7, activation=nn.selu, **KWARGS), X, Y1) check_predictions( MLPClassifier(keep_prob=0.7, activation=nn.selu, **KWARGS), X_sp, Y1)
def test_dropout(): """Test binary classification.""" # Check that predictions are deterministic. clf = MLPClassifier(keep_prob=0.5, **KWARGS) clf.fit(X_sp, Y1) y_pred1 = clf.predict_proba(X_sp) for _ in range(100): y_pred_i = clf.predict_proba(X_sp) assert_array_almost_equal(y_pred1, y_pred_i) check_predictions( MLPClassifier(keep_prob=0.5, **KWARGS), X, Y1) check_predictions( MLPClassifier(keep_prob=0.5, **KWARGS), X_sp, Y1)
def test_prediction_gradient(): """Test computation of prediction gradients.""" # Binary classification n_classes = 1 mlp = MLPClassifier(n_epochs=100, random_state=42, hidden_units=(5, )) X, y = make_classification(n_samples=1000, n_features=20, n_informative=n_classes, n_redundant=0, n_classes=n_classes, n_clusters_per_class=1, shuffle=False) mlp.fit(X, y) grad = mlp.prediction_gradient(X) grad_means = grad.mean(axis=0) assert grad.shape == X.shape # Check that only the informative feature has a large gradient. # The values of 1 and 0.5 here are somewhat arbitrary but should serve as # a regression test if nothing else. assert np.abs(grad_means[0]) > 1. for m in grad_means[1:]: assert np.abs(m) < 0.5 # Multiclass classification: here, we'll just check that it runs and that # the output is the right shape. n_classes = 5 X, y = make_classification(n_samples=1000, n_features=20, n_informative=n_classes, n_redundant=0, n_classes=n_classes, n_clusters_per_class=1, shuffle=False) mlp.fit(X, y) grad = mlp.prediction_gradient(X) assert grad.shape == (X.shape[0], n_classes, X.shape[1]) # Multilabel binary classification. X, y = make_multilabel_classification(n_samples=1000, random_state=42, n_classes=n_classes) mlp.fit(X, y) grad = mlp.prediction_gradient(X) assert grad.shape == (X.shape[0], n_classes, X.shape[1]) # Raise an exception for sparse inputs, which are not yet supported. X_sp = sp.csr_matrix(X) mlp.fit(X_sp, y) with pytest.raises(NotImplementedError): mlp.prediction_gradient(X_sp)
def test_replicability(): clf = MLPClassifier(keep_prob=0.5, random_state=42) target = iris.target_names[iris.target] probs1 = clf.fit(iris.data, target).predict_proba(iris.data) probs2 = clf.fit(iris.data, target).predict_proba(iris.data) assert_array_almost_equal(probs1, probs2)