def testLabelPropagationClosedForm(self): n_classes = 2 X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0) y[::3] = -1 Y = np.zeros((len(y), n_classes + 1)) Y[np.arange(len(y)), y] = 1 unlabelled_idx = Y[:, (-1, )].nonzero()[0] labelled_idx = (Y[:, (-1, )] == 0).nonzero()[0] clf = LabelPropagation(max_iter=10000, gamma=0.1) clf.fit(X, y) # adopting notation from Zhu et al 2002 T_bar = clf._build_graph().to_numpy() Tuu = T_bar[tuple( np.meshgrid(unlabelled_idx, unlabelled_idx, indexing='ij'))] Tul = T_bar[tuple( np.meshgrid(unlabelled_idx, labelled_idx, indexing='ij'))] Y = Y[:, :-1] Y_l = Y[labelled_idx, :] Y_u = np.dot(np.dot(np.linalg.inv(np.eye(Tuu.shape[0]) - Tuu), Tul), Y_l) expected = Y.copy() expected[unlabelled_idx, :] = Y_u expected /= expected.sum(axis=1)[:, np.newaxis] np.testing.assert_array_almost_equal(expected, clf.label_distributions_.fetch(), 4)
def testPredictSparseCallableKernel(self): # This is a non-regression test for #15866 # Custom sparse kernel (top-K RBF) def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5): nn = NearestNeighbors(n_neighbors=10, metric='euclidean', n_jobs=-1) nn.fit(X) W = -1 * mt.power(nn.kneighbors_graph(Y, mode='distance'), 2) * gamma W = mt.exp(W) assert W.issparse() return W.T n_classes = 4 n_samples = 500 n_test = 10 X, y = make_classification(n_classes=n_classes, n_samples=n_samples, n_features=20, n_informative=20, n_redundant=0, n_repeated=0, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test, random_state=0) model = LabelPropagation(kernel=topk_rbf) model.fit(X_train, y_train) assert model.score(X_test, y_test).fetch() >= 0.9