def test_multiple_layers(): for n_layers in range(3): clf = MLPClassifier(hidden_units=(8,) * n_layers, **KWARGS) target = iris.target_names[iris.target] clf.fit(iris.data, target) y_pred = clf.predict(iris.data) accuracy = accuracy_score(target, y_pred) # Just make sure the model doesn't crash and isn't terrible. assert accuracy > 0.9, \ "low accuracy ({}) with {} layers".format(accuracy, n_layers)
def test_persistence(): """Test that models can be pickled and reloaded.""" clf = MLPClassifier(random_state=42) target = iris.target_names[iris.target] clf.fit(iris.data, target) probs1 = clf.predict_proba(iris.data) b = BytesIO() pickle.dump(clf, b) clf2 = pickle.loads(b.getvalue()) probs2 = clf2.predict_proba(iris.data) assert_array_almost_equal(probs1, probs2)
def test_refitting(): # Check that fitting twice works (e.g., to make sure that fit-related # variables are cleared appropriately when refitting). X, y = iris.data, iris.target clf = MLPClassifier(n_epochs=1) clf.fit(X, y) assert np.array_equal(clf.classes_, np.unique(y)) y_binary = (y == y[0]).astype(float) clf.fit(X, y_binary) assert np.array_equal(clf.classes_, np.unique(y_binary))
def test_dropout(): """Test binary classification.""" # Check that predictions are deterministic. clf = MLPClassifier(keep_prob=0.5, **KWARGS) clf.fit(X_sp, Y1) y_pred1 = clf.predict_proba(X_sp) for _ in range(100): y_pred_i = clf.predict_proba(X_sp) assert_array_almost_equal(y_pred1, y_pred_i) check_predictions( MLPClassifier(keep_prob=0.5, **KWARGS), X, Y1) check_predictions( MLPClassifier(keep_prob=0.5, **KWARGS), X_sp, Y1)
def test_cross_val_predict(): # Make sure it works in cross_val_predict for multiclass. X, y = load_iris(return_X_y=True) y = LabelBinarizer().fit_transform(y) X = StandardScaler().fit_transform(X) mlp = MLPClassifier(n_epochs=10, solver_kwargs={'learning_rate': 0.05}, random_state=4567).fit(X, y) cv = KFold(n_splits=4, random_state=457, shuffle=True) y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba') auc = roc_auc_score(y, y_oos, average=None) assert np.all(auc >= 0.96)
def test_partial_fit(): X, y = iris.data, iris.target # Predict on the training set and check that it (over)fit as expected. clf = MLPClassifier(n_epochs=1) for _ in range(30): clf.partial_fit(X, y) y_pred = clf.predict(X) assert ((y_pred - y) ** 2).mean() < 10 # Check that the classes argument works. clf = MLPClassifier(n_epochs=1) clf.partial_fit(X[:10], y[:10], classes=np.unique(y)) for _ in range(30): clf.partial_fit(X, y) # Check that using the classes argument wrong will fail. with pytest.raises(ValueError): clf = MLPClassifier(n_epochs=1) clf.partial_fit(X, y, classes=np.array([0, 1]))
def test_replicability(): clf = MLPClassifier(keep_prob=0.5, random_state=42) target = iris.target_names[iris.target] probs1 = clf.fit(iris.data, target).predict_proba(iris.data) probs2 = clf.fit(iris.data, target).predict_proba(iris.data) assert_array_almost_equal(probs1, probs2)
def test_feed_dict(): # Note that `tf.nn.dropout` scales the input up by t_dropout during # training so that scaling down during prediction isn't needed. # https://github.com/tensorflow/tensorflow/blob/2e152ecd67b3c5080f417260bc751e0c6bd7f1d3/tensorflow/python/ops/nn_ops.py#L1081-L1082. # Instantiate an MLP and mock out things that would be set in fit. mlp = MLPClassifier(keep_prob=0.5) mlp.input_targets_ = "input_targets" mlp._input_indices = "input_indices" mlp._input_values = "input_values" mlp._input_shape = "input_shape" mlp._keep_prob = "t_keep_prob" mlp._sample_weight = "sample_weight" # sparse, targets given for training mlp.is_sparse_ = True X_sparse = MagicMock() X_sparse_dok = MagicMock() X_sparse.todok.return_value = X_sparse_dok X_sparse_dok.nnz = 100 y = MagicMock() fd = mlp._make_feed_dict(X_sparse, y) expected_keys = {'input_shape', 'input_values', 'input_indices', 'input_targets', 't_keep_prob', 'sample_weight'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 0.5 # sparse, no targets given fd = mlp._make_feed_dict(X_sparse) expected_keys = {'input_shape', 'input_values', 'input_indices', 't_keep_prob', 'sample_weight'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 1.0 # dense, targets given for training mlp.is_sparse_ = False X_dense = MagicMock() fd = mlp._make_feed_dict(X_dense, y) expected_keys = {'input_values', 't_keep_prob', 'input_targets', 'sample_weight'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 0.5 # dense, no targets given fd = mlp._make_feed_dict(X_dense) expected_keys = {'input_values', 't_keep_prob', 'sample_weight'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 1.0
def test_predict_3_classes(): """Test multiclass classification.""" check_predictions(MLPClassifier(**KWARGS), X, Y2) check_predictions(MLPClassifier(**KWARGS), X_sp, Y2)
def test_predict_2_classes(): """Test binary classification.""" check_predictions(MLPClassifier(**KWARGS), X, Y1) check_predictions(MLPClassifier(**KWARGS), X_sp, Y1)
def test_multilabel(): check_multilabel_predictions(MLPClassifier(**KWARGS), X, Y_multilabel) check_multilabel_predictions(MLPClassifier(**KWARGS), X_sp, Y_multilabel)
def test_sample_weight(make_dataset_func, dataset_kwargs): """Ensure we handle sample weights for all classification problems.""" assert_sample_weights_work( make_dataset_func, dataset_kwargs, lambda: MLPClassifier( n_epochs=30, random_state=42, keep_prob=0.8, hidden_units=(128, )))
def test_prediction_gradient(): """Test computation of prediction gradients.""" # Binary classification n_classes = 1 mlp = MLPClassifier(n_epochs=100, random_state=42, hidden_units=(5,)) X, y = make_classification( n_samples=1000, n_features=20, n_informative=n_classes, n_redundant=0, n_classes=n_classes, n_clusters_per_class=1, shuffle=False) mlp.fit(X, y) grad = mlp.prediction_gradient(X) grad_means = grad.mean(axis=0) assert grad.shape == X.shape # Check that only the informative feature has a large gradient. # The values of 1 and 0.5 here are somewhat arbitrary but should serve as # a regression test if nothing else. assert np.abs(grad_means[0]) > 1. for m in grad_means[1:]: assert np.abs(m) < 0.5 # Multiclass classification: here, we'll just check that it runs and that # the output is the right shape. n_classes = 5 X, y = make_classification( n_samples=1000, n_features=20, n_informative=n_classes, n_redundant=0, n_classes=n_classes, n_clusters_per_class=1, shuffle=False) mlp.fit(X, y) grad = mlp.prediction_gradient(X) assert grad.shape == (X.shape[0], n_classes, X.shape[1]) # Multilabel binary classification. X, y = make_multilabel_classification( n_samples=1000, random_state=42, n_classes=n_classes) mlp.fit(X, y) grad = mlp.prediction_gradient(X) assert grad.shape == (X.shape[0], n_classes, X.shape[1]) # Raise an exception for sparse inputs, which are not yet supported. X_sp = sp.csr_matrix(X) mlp.fit(X_sp, y) with pytest.raises(NotImplementedError): mlp.prediction_gradient(X_sp)
def test_feed_dict(): # Note that `tf.nn.dropout` scales the input up by t_dropout during # training so that scaling down during prediction isn't needed. # https://github.com/tensorflow/tensorflow/blob/2e152ecd67b3c5080f417260bc751e0c6bd7f1d3/tensorflow/python/ops/nn_ops.py#L1081-L1082. # Instantiate an MLP and mock out things that would be set in fit. mlp = MLPClassifier(keep_prob=0.5) mlp.input_targets_ = "input_targets" mlp._input_indices = "input_indices" mlp._input_values = "input_values" mlp._input_shape = "input_shape" mlp._keep_prob = "t_keep_prob" # sparse, targets given for training mlp.is_sparse_ = True X_sparse = MagicMock() X_sparse_dok = MagicMock() X_sparse.todok.return_value = X_sparse_dok X_sparse_dok.nnz = 100 y = MagicMock() fd = mlp._make_feed_dict(X_sparse, y) expected_keys = {'input_shape', 'input_values', 'input_indices', 'input_targets', 't_keep_prob'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 0.5 # sparse, no targets given fd = mlp._make_feed_dict(X_sparse) expected_keys = {'input_shape', 'input_values', 'input_indices', 't_keep_prob'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 1.0 # dense, targets given for training mlp.is_sparse_ = False X_dense = MagicMock() fd = mlp._make_feed_dict(X_dense, y) expected_keys = {'input_values', 't_keep_prob', 'input_targets'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 0.5 # dense, no targets given fd = mlp._make_feed_dict(X_dense) expected_keys = {'input_values', 't_keep_prob'} assert set(fd.keys()) == expected_keys assert fd['t_keep_prob'] == 1.0