コード例 #1
0
def test_label_spreading_closed_form(global_dtype, Estimator, parameters,
                                     alpha):
    n_classes = 2
    X, y = make_classification(n_classes=n_classes,
                               n_samples=200,
                               random_state=0)
    X = X.astype(global_dtype, copy=False)
    y[::3] = -1

    gamma = 0.1
    clf = label_propagation.LabelSpreading(gamma=gamma).fit(X, y)
    # adopting notation from Zhou et al (2004):
    S = clf._build_graph()
    Y = np.zeros((len(y), n_classes + 1), dtype=X.dtype)
    Y[np.arange(len(y)), y] = 1
    Y = Y[:, :-1]

    expected = np.dot(np.linalg.inv(np.eye(len(S), dtype=S.dtype) - alpha * S),
                      Y)
    expected /= expected.sum(axis=1)[:, np.newaxis]

    clf = label_propagation.LabelSpreading(max_iter=100,
                                           alpha=alpha,
                                           tol=1e-10,
                                           gamma=gamma)
    clf.fit(X, y)

    assert_allclose(expected, clf.label_distributions_)
コード例 #2
0
def test_predict_sparse_callable_kernel():
    # This is a non-regression test for #15866

    # Custom sparse kernel (top-K RBF)
    def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5):
        nn = NearestNeighbors(n_neighbors=10, metric='euclidean', n_jobs=-1)
        nn.fit(X)
        W = -1 * nn.kneighbors_graph(Y, mode='distance').power(2) * gamma
        np.exp(W.data, out=W.data)
        assert issparse(W)
        return W.T

    n_classes = 4
    n_samples = 500
    n_test = 10
    X, y = make_classification(n_classes=n_classes,
                               n_samples=n_samples,
                               n_features=20,
                               n_informative=20,
                               n_redundant=0,
                               n_repeated=0,
                               random_state=0)

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=n_test,
                                                        random_state=0)

    model = label_propagation.LabelSpreading(kernel=topk_rbf)
    model.fit(X_train, y_train)
    assert model.score(X_test, y_test) >= 0.9

    model = label_propagation.LabelPropagation(kernel=topk_rbf)
    model.fit(X_train, y_train)
    assert model.score(X_test, y_test) >= 0.9
コード例 #3
0
def test_valid_alpha():
    n_classes = 2
    X, y = make_classification(n_classes=n_classes, n_samples=200,
                               random_state=0)
    for alpha in [-0.1, 0, 1, 1.1, None]:
        with pytest.raises(ValueError):
            label_propagation.LabelSpreading(alpha=alpha).fit(X, y)
コード例 #4
0
def test_label_spreading_closed_form():
    n_classes = 2
    X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0)
    y[::3] = -1
    clf = label_propagation.LabelSpreading().fit(X, y)
    # adopting notation from Zhou et al (2004):
    S = clf._build_graph()
    Y = np.zeros((len(y), n_classes + 1))
    Y[np.arange(len(y)), y] = 1
    Y = Y[:, :-1]
    for alpha in [0.1, 0.3, 0.5, 0.7, 0.9]:
        expected = np.dot(np.linalg.inv(np.eye(len(S)) - alpha * S), Y)
        expected /= expected.sum(axis=1)[:, np.newaxis]
        clf = label_propagation.LabelSpreading(max_iter=10000, alpha=alpha)
        clf.fit(X, y)
        assert_array_almost_equal(expected, clf.label_distributions_, 4)
コード例 #5
0
def test_convergence_warning():
    # This is a non-regression test for #5774
    X = np.array([[1., 0.], [0., 1.], [1., 2.5]])
    y = np.array([0, 1, -1])
    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1)
    assert_warns(ConvergenceWarning, mdl.fit, X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1)
    assert_warns(ConvergenceWarning, mdl.fit, X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500)
    assert_no_warnings(mdl.fit, X, y)

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500)
    assert_no_warnings(mdl.fit, X, y)
コード例 #6
0
def test_valid_alpha(global_dtype, alpha):
    n_classes = 2
    X, y = make_classification(n_classes=n_classes,
                               n_samples=200,
                               random_state=0)
    X = X.astype(global_dtype)
    with pytest.raises(ValueError):
        label_propagation.LabelSpreading(alpha=alpha).fit(X, y)
コード例 #7
0
def test_convergence_speed():
    # This is a non-regression test for #5774
    X = np.array([[1., 0.], [0., 1.], [1., 2.5]])
    y = np.array([0, 1, -1])
    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=5000)
    mdl.fit(X, y)

    # this should converge quickly:
    assert mdl.n_iter_ < 10
    assert_array_equal(mdl.predict(X), [0, 1, 1])
コード例 #8
0
def test_label_propagation_non_zero_normalizer():
    # check that we don't divide by zero in case of null normalizer
    # non-regression test for
    # https://github.com/scikit-learn/scikit-learn/pull/15946
    X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]])
    y = np.array([0, 1, -1, -1])
    mdl = label_propagation.LabelSpreading(kernel='knn',
                                           max_iter=100,
                                           n_neighbors=1)
    assert_no_warnings(mdl.fit, X, y)
コード例 #9
0
def test_convergence_warning():
    # This is a non-regression test for #5774
    X = np.array([[1., 0.], [0., 1.], [1., 2.5]])
    y = np.array([0, 1, -1])
    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1)
    warn_msg = ('max_iter=1 was reached without convergence.')
    with pytest.warns(ConvergenceWarning, match=warn_msg):
        mdl.fit(X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1)
    with pytest.warns(ConvergenceWarning, match=warn_msg):
        mdl.fit(X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500)
    with pytest.warns(None) as record:
        mdl.fit(X, y)
    assert len(record) == 0

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500)
    with pytest.warns(None) as record:
        mdl.fit(X, y)
    assert len(record) == 0
コード例 #10
0
def test_convergence_warning():
    # This is a non-regression test for #5774
    X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]])
    y = np.array([0, 1, -1])
    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=1)
    warn_msg = "max_iter=1 was reached without convergence."
    with pytest.warns(ConvergenceWarning, match=warn_msg):
        mdl.fit(X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=1)
    with pytest.warns(ConvergenceWarning, match=warn_msg):
        mdl.fit(X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=500)
    with warnings.catch_warnings():
        warnings.simplefilter("error", ConvergenceWarning)
        mdl.fit(X, y)

    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=500)
    with warnings.catch_warnings():
        warnings.simplefilter("error", ConvergenceWarning)
        mdl.fit(X, y)
コード例 #11
0
def test_convergence_warning():
    # This is a non-regression test for #5774
    X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]])
    y = np.array([0, 1, -1])
    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=1)
    warn_msg = "max_iter=1 was reached without convergence."
    with pytest.warns(ConvergenceWarning, match=warn_msg):
        mdl.fit(X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=1)
    with pytest.warns(ConvergenceWarning, match=warn_msg):
        mdl.fit(X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=500)
    with pytest.warns(None) as record:
        mdl.fit(X, y)
    assert not [w.message for w in record]

    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=500)
    with pytest.warns(None) as record:
        mdl.fit(X, y)
    assert not [w.message for w in record]