def test_label_spreading_closed_form(global_dtype, Estimator, parameters, alpha): n_classes = 2 X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0) X = X.astype(global_dtype, copy=False) y[::3] = -1 gamma = 0.1 clf = label_propagation.LabelSpreading(gamma=gamma).fit(X, y) # adopting notation from Zhou et al (2004): S = clf._build_graph() Y = np.zeros((len(y), n_classes + 1), dtype=X.dtype) Y[np.arange(len(y)), y] = 1 Y = Y[:, :-1] expected = np.dot(np.linalg.inv(np.eye(len(S), dtype=S.dtype) - alpha * S), Y) expected /= expected.sum(axis=1)[:, np.newaxis] clf = label_propagation.LabelSpreading(max_iter=100, alpha=alpha, tol=1e-10, gamma=gamma) clf.fit(X, y) assert_allclose(expected, clf.label_distributions_)
def test_predict_sparse_callable_kernel(): # This is a non-regression test for #15866 # Custom sparse kernel (top-K RBF) def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5): nn = NearestNeighbors(n_neighbors=10, metric='euclidean', n_jobs=-1) nn.fit(X) W = -1 * nn.kneighbors_graph(Y, mode='distance').power(2) * gamma np.exp(W.data, out=W.data) assert issparse(W) return W.T n_classes = 4 n_samples = 500 n_test = 10 X, y = make_classification(n_classes=n_classes, n_samples=n_samples, n_features=20, n_informative=20, n_redundant=0, n_repeated=0, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test, random_state=0) model = label_propagation.LabelSpreading(kernel=topk_rbf) model.fit(X_train, y_train) assert model.score(X_test, y_test) >= 0.9 model = label_propagation.LabelPropagation(kernel=topk_rbf) model.fit(X_train, y_train) assert model.score(X_test, y_test) >= 0.9
def test_valid_alpha(): n_classes = 2 X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0) for alpha in [-0.1, 0, 1, 1.1, None]: with pytest.raises(ValueError): label_propagation.LabelSpreading(alpha=alpha).fit(X, y)
def test_label_spreading_closed_form(): n_classes = 2 X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0) y[::3] = -1 clf = label_propagation.LabelSpreading().fit(X, y) # adopting notation from Zhou et al (2004): S = clf._build_graph() Y = np.zeros((len(y), n_classes + 1)) Y[np.arange(len(y)), y] = 1 Y = Y[:, :-1] for alpha in [0.1, 0.3, 0.5, 0.7, 0.9]: expected = np.dot(np.linalg.inv(np.eye(len(S)) - alpha * S), Y) expected /= expected.sum(axis=1)[:, np.newaxis] clf = label_propagation.LabelSpreading(max_iter=10000, alpha=alpha) clf.fit(X, y) assert_array_almost_equal(expected, clf.label_distributions_, 4)
def test_convergence_warning(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y) mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y)
def test_valid_alpha(global_dtype, alpha): n_classes = 2 X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0) X = X.astype(global_dtype) with pytest.raises(ValueError): label_propagation.LabelSpreading(alpha=alpha).fit(X, y)
def test_convergence_speed(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=5000) mdl.fit(X, y) # this should converge quickly: assert mdl.n_iter_ < 10 assert_array_equal(mdl.predict(X), [0, 1, 1])
def test_label_propagation_non_zero_normalizer(): # check that we don't divide by zero in case of null normalizer # non-regression test for # https://github.com/scikit-learn/scikit-learn/pull/15946 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) mdl = label_propagation.LabelSpreading(kernel='knn', max_iter=100, n_neighbors=1) assert_no_warnings(mdl.fit, X, y)
def test_convergence_warning(): # This is a non-regression test for #5774 X = np.array([[1., 0.], [0., 1.], [1., 2.5]]) y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) warn_msg = ('max_iter=1 was reached without convergence.') with pytest.warns(ConvergenceWarning, match=warn_msg): mdl.fit(X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) with pytest.warns(ConvergenceWarning, match=warn_msg): mdl.fit(X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) with pytest.warns(None) as record: mdl.fit(X, y) assert len(record) == 0 mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) with pytest.warns(None) as record: mdl.fit(X, y) assert len(record) == 0
def test_convergence_warning(): # This is a non-regression test for #5774 X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]]) y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=1) warn_msg = "max_iter=1 was reached without convergence." with pytest.warns(ConvergenceWarning, match=warn_msg): mdl.fit(X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=1) with pytest.warns(ConvergenceWarning, match=warn_msg): mdl.fit(X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=500) with warnings.catch_warnings(): warnings.simplefilter("error", ConvergenceWarning) mdl.fit(X, y) mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=500) with warnings.catch_warnings(): warnings.simplefilter("error", ConvergenceWarning) mdl.fit(X, y)
def test_convergence_warning(): # This is a non-regression test for #5774 X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]]) y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=1) warn_msg = "max_iter=1 was reached without convergence." with pytest.warns(ConvergenceWarning, match=warn_msg): mdl.fit(X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=1) with pytest.warns(ConvergenceWarning, match=warn_msg): mdl.fit(X, y) assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=500) with pytest.warns(None) as record: mdl.fit(X, y) assert not [w.message for w in record] mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=500) with pytest.warns(None) as record: mdl.fit(X, y) assert not [w.message for w in record]