def test_lle_init_parameters(): X = np.random.rand(5, 3) clf = manifold.LocallyLinearEmbedding(eigen_solver="error") msg = "unrecognized eigen_solver 'error'" assert_raise_message(ValueError, msg, clf.fit, X) clf = manifold.LocallyLinearEmbedding(method="error") msg = "unrecognized method 'error'" assert_raise_message(ValueError, msg, clf.fit, X)
def test_lle_manifold(): rng = np.random.RandomState(0) # similar test on a slightly more complex manifold X = np.array(list(product(np.arange(18), repeat=2))) X = np.c_[X, X[:, 0] ** 2 / 18] X = X + 1e-10 * rng.uniform(size=X.shape) n_components = 2 for method in ["standard", "hessian", "modified", "ltsa"]: clf = manifold.LocallyLinearEmbedding(n_neighbors=6, n_components=n_components, method=method, random_state=0) tol = 1.5 if method == "standard" else 3 N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray() reconstruction_error = linalg.norm(np.dot(N, X) - X) assert reconstruction_error < tol for solver in eigen_solvers: clf.set_params(eigen_solver=solver) clf.fit(X) assert clf.embedding_.shape[1] == n_components reconstruction_error = linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 details = ("solver: %s, method: %s" % (solver, method)) assert reconstruction_error < tol, details assert (np.abs(clf.reconstruction_error_ - reconstruction_error) < tol * reconstruction_error), details
def test_lle_simple_grid(): # note: ARPACK is numerically unstable, so this test will fail for # some random seeds. We choose 2 because the tests pass. rng = np.random.RandomState(2) # grid of equidistant points in 2D, n_components = n_dim X = np.array(list(product(range(5), repeat=2))) X = X + 1e-10 * rng.uniform(size=X.shape) n_components = 2 clf = manifold.LocallyLinearEmbedding(n_neighbors=5, n_components=n_components, random_state=rng) tol = 0.1 N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray() reconstruction_error = linalg.norm(np.dot(N, X) - X, 'fro') assert reconstruction_error < tol for solver in eigen_solvers: clf.set_params(eigen_solver=solver) clf.fit(X) assert clf.embedding_.shape[1] == n_components reconstruction_error = linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 assert reconstruction_error < tol assert_almost_equal(clf.reconstruction_error_, reconstruction_error, decimal=1) # re-embed a noisy version of X using the transform method noise = rng.randn(*X.shape) / 100 X_reembedded = clf.transform(X + noise) assert linalg.norm(X_reembedded - clf.embedding_) < tol
def test_integer_input(): rand = np.random.RandomState(0) X = rand.randint(0, 100, size=(20, 3)) for method in ["standard", "hessian", "modified", "ltsa"]: clf = manifold.LocallyLinearEmbedding(method=method, n_neighbors=10) clf.fit(X) # this previously raised a TypeError
def test_pipeline(): # check that LocallyLinearEmbedding works fine as a Pipeline # only checks that no error is raised. # TODO check that it actually does something useful from mrex import pipeline, datasets X, y = datasets.make_blobs(random_state=0) clf = pipeline.Pipeline( [('filter', manifold.LocallyLinearEmbedding(random_state=0)), ('clf', neighbors.KNeighborsClassifier())]) clf.fit(X, y) assert .9 < clf.score(X, y)
fig = plt.figure(figsize=(15, 8)) plt.suptitle("Manifold Learning with %i points, %i neighbors" % (1000, n_neighbors), fontsize=14) ax = fig.add_subplot(251, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral) ax.view_init(4, -72) methods = ['standard', 'ltsa', 'hessian', 'modified'] labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE'] for i, method in enumerate(methods): t0 = time() Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components, eigen_solver='auto', method=method).fit_transform(X) t1 = time() print("%s: %.2g sec" % (methods[i], t1 - t0)) ax = fig.add_subplot(252 + i) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) plt.title("%s (%.2g sec)" % (labels[i], t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') t0 = time() Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X) t1 = time() print("Isomap: %.2g sec" % (t1 - t0))
# Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) n_labels = labels.max() for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) # ############################################################################# # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane # We use a dense eigen_solver to achieve reproducibility (arpack is # initiated with random vectors that we don't control). In addition, we # use a large number of neighbors to capture the large-scale structure. node_position_model = manifold.LocallyLinearEmbedding( n_components=2, eigen_solver='dense', n_neighbors=6) embedding = node_position_model.fit_transform(X.T).T # ############################################################################# # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() ax = plt.axes([0., 0., 1., 1.]) plt.axis('off') # Display a graph of the partial correlations partial_correlations = edge_model.precision_.copy() d = 1 / np.sqrt(np.diag(partial_correlations)) partial_correlations *= d partial_correlations *= d[:, np.newaxis]
# ---------------------------------------------------------------------- # Isomap projection of the digits dataset print("Computing Isomap projection") t0 = time() X_iso = manifold.Isomap(n_neighbors, n_components=2).fit_transform(X) print("Done.") plot_embedding(X_iso, "Isomap projection of the digits (time %.2fs)" % (time() - t0)) # ---------------------------------------------------------------------- # Locally linear embedding of the digits dataset print("Computing LLE embedding") clf = manifold.LocallyLinearEmbedding(n_neighbors, n_components=2, method='standard') t0 = time() X_lle = clf.fit_transform(X) print("Done. Reconstruction error: %g" % clf.reconstruction_error_) plot_embedding(X_lle, "Locally Linear Embedding of the digits (time %.2fs)" % (time() - t0)) # ---------------------------------------------------------------------- # Modified Locally linear embedding of the digits dataset print("Computing modified LLE embedding") clf = manifold.LocallyLinearEmbedding(n_neighbors, n_components=2, method='modified') t0 = time() X_mlle = clf.fit_transform(X)