def ler(X, Y, n_components=2, affinity='nearest_neighbors', n_neighbors=None, gamma=None, mu=1.0, y_gamma=None, eigen_solver='auto', tol=1e-6, max_iter=100, random_state=None): if eigen_solver not in ('auto', 'arpack', 'dense'): raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1) nbrs.fit(X) X = nbrs._fit_X Nx, d_in = X.shape Ny = Y.shape[0] if n_components > d_in: raise ValueError("output dimension must be less than or equal " "to input dimension") if Nx != Ny: raise ValueError("X and Y must have same number of points") if affinity == 'nearest_neighbors': if n_neighbors >= Nx: raise ValueError("n_neighbors must be less than number of points") if n_neighbors == None or n_neighbors <= 0: raise ValueError("n_neighbors must be positive") elif affinity == 'rbf': if gamma != None and gamma <= 0: raise ValueError("n_neighbors must be positive") else: raise ValueError("affinity must be 'nearest_neighbors' or 'rbf' must be positive") if Y.ndim == 1: Y = Y[:, None] if y_gamma is None: dists = pairwise_distances(Y) y_gamma = 1.0 / median(dists) if affinity == 'nearest_neighbors': affinity = kneighbors_graph(X, n_neighbors, include_self=True) else: if gamma == None: dists = pairwise_distances(X) gamma = 1.0 / median(dists) affinity = kneighbors_graph(X, n_neighbors, mode='distance', include_self=True) affinity.data = exp(-gamma * affinity.data ** 2) K = rbf_kernel(Y, gamma=y_gamma) lap = laplacian(affinity, normed=True) lapK = laplacian(K, normed=True) embedding, _ = null_space(lap + mu * lapK, n_components, k_skip=1, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, random_state=random_state) return embedding
def ler(X, Y, n_components=2, affinity='nearest_neighbors', n_neighbors=None, gamma=None, mu=1.0, y_gamma=None, eigen_solver='auto', tol=1e-6, max_iter=100, random_state=None): """ Laplacian Eigenmaps for Regression (LER) Parameters ---------- X : ndarray, 2-dimensional The data matrix, shape (num_points, num_dims) Y : ndarray, 1 or 2-dimensional The response matrix, shape (num_points, num_responses). n_components : int Number of dimensions for embedding. Default is 2. affinity : string or callable, default : "nearest_neighbors" How to construct the affinity matrix. - 'nearest_neighbors' : construct affinity matrix by knn graph - 'rbf' : construct affinity matrix by rbf kernel n_neighbors : int, optional, default=None Number of neighbors for kNN graph construction on X. gamma : float, optional, default=None Scaling factor for RBF kernel on X. mu : float, optional, default=1.0 Influence of the Y-similarity penalty. y_gamma : float, optional Scaling factor for RBF kernel on Y. Defaults to the inverse of the median distance between rows of Y. Returns ------- embedding : ndarray, 2-dimensional The embedding of X, shape (num_points, n_components) """ if eigen_solver not in ('auto', 'arpack', 'dense'): raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1) nbrs.fit(X) X = nbrs._fit_X Nx, d_in = X.shape Ny = Y.shape[0] if n_components > d_in: raise ValueError("output dimension must be less than or equal " "to input dimension") if Nx != Ny: raise ValueError("X and Y must have same number of points") if affinity == 'nearest_neighbors': if n_neighbors >= Nx: raise ValueError("n_neighbors must be less than number of points") if n_neighbors == None or n_neighbors <= 0: raise ValueError("n_neighbors must be positive") elif affinity == 'rbf': if gamma != None and gamma <= 0: raise ValueError("n_neighbors must be positive") else: raise ValueError("affinity must be 'nearest_neighbors' or 'rbf' must be positive") if Y.ndim == 1: Y = Y[:, None] if y_gamma is None: dists = pairwise_distances(Y) y_gamma = 1.0 / median(dists) if affinity == 'nearest_neighbors': affinity = kneighbors_graph(X, n_neighbors, include_self=True) else: if gamma == None: dists = pairwise_distances(X) gamma = 1.0 / median(dists) affinity = kneighbors_graph(X, n_neighbors, mode='distance', include_self=True) affinity.data = exp(-gamma * affinity.data ** 2) K = rbf_kernel(Y, gamma=y_gamma) lap = laplacian(affinity, normed=True) lapK = laplacian(K, normed=True) embedding, _ = null_space(lap + mu * lapK, n_components, k_skip=1, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, random_state=random_state) return embedding
def ler(X, Y, n_components=2, affinity='nearest_neighbors', n_neighbors=None, gamma=None, mu=1.0, y_gamma=None, eigen_solver='auto', tol=1e-6, max_iter=100, random_state=None): """ Laplacian Eigenmaps for Regression (LER) Parameters ---------- X : ndarray, 2-dimensional The data matrix, shape (num_points, num_dims) Y : ndarray, 1 or 2-dimensional The response matrix, shape (num_points, num_responses). Y[0:] is assumed to provide responses for X[:num_response_points] n_components : int Number of dimensions for embedding. Default is 2. affinity : string or callable, default : "nearest_neighbors" How to construct the affinity matrix. - 'nearest_neighbors' : construct affinity matrix by knn graph - 'rbf' : construct affinity matrix by rbf kernel n_neighbors : int, optional, default=None Number of neighbors for kNN graph construction on X. gamma : float, optional, default=None Scaling factor for RBF kernel on X. mu : float, optional, default=1.0 Influence of the Y-similarity penalty. y_gamma : float, optional Scaling factor for RBF kernel on Y. Defaults to the inverse of the median distance between rows of Y. Returns ------- embedding : ndarray, 2-dimensional The embedding of X, shape (num_points, n_components) """ if eigen_solver not in ('auto', 'arpack', 'dense'): raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1) nbrs.fit(X) X = nbrs._fit_X Nx, d_in = X.shape Ny = Y.shape[0] if n_components > d_in: raise ValueError("output dimension must be less than or equal " "to input dimension") if Nx < Ny: raise ValueError("X should have at least as many points as Y") if affinity == 'nearest_neighbors': if n_neighbors >= Nx: raise ValueError("n_neighbors must be less than number of points") if n_neighbors is None or n_neighbors <= 0: raise ValueError("n_neighbors must be positive") elif affinity == 'rbf': if gamma is not None and gamma <= 0: raise ValueError("n_neighbors must be positive") else: raise ValueError("affinity must be 'nearest_neighbors' or 'rbf' must" + " be positive") if Y.ndim == 1: Y = Y[:, None] if y_gamma is None: dists = pairwise_distances(Y) y_gamma = 1.0 / median(dists) if affinity == 'nearest_neighbors': affinity = kneighbors_graph(X, n_neighbors, include_self=True) else: if gamma is None: dists = pairwise_distances(X) gamma = 1.0 / median(dists) affinity = kneighbors_graph(X, n_neighbors, mode='distance', include_self=True) affinity.data = exp(-gamma * affinity.data**2) K = rbf_kernel(Y, gamma=y_gamma) lap = laplacian(affinity, normed=True) lapK = laplacian(K, normed=True) if Nx > Ny: # zeros = csr_matrix((Nx-Ny,Nx-Ny),dtype=lap.dtype) # lapK = bmat([[lapK, None], [None, zeros]]) ones = csr_matrix(np.ones((Nx - Ny, Nx - Ny)), dtype=lap.dtype) lapK = bmat([[lapK, None], [None, ones]]) embedding, _ = null_space(lap + mu * lapK, n_components, k_skip=1, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, random_state=random_state) return embedding
def locally_linear_embedding( X, n_neighbors, n_components, reg=1e-3, eigen_solver='auto', tol=1e-6, max_iter=100, method='standard', hessian_tol=1E-4, modified_tol=1E-12, random_state=None, n_jobs=None): if eigen_solver not in ('auto', 'arpack', 'dense'): raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) if method not in ('standard', 'hessian', 'modified', 'ltsa'): raise ValueError("unrecognized method '%s'" % method) nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs) nbrs.fit(X) X = nbrs._fit_X N, d_in = X.shape if n_components > d_in: raise ValueError("output dimension must be less than or equal " "to input dimension") if n_neighbors >= N: raise ValueError( "Expected n_neighbors <= n_samples, " " but n_samples = %d, n_neighbors = %d" % (N, n_neighbors) ) if n_neighbors <= 0: raise ValueError("n_neighbors must be positive") M_sparse = (eigen_solver != 'dense') if method == 'standard': W = barycenter_kneighbors_graph(nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=1) if M_sparse: M = eye(*W.shape, format=W.format) - W M = (M.T * M).tocsr() else: M = (W.T * W - W.T - W).toarray() M.flat[::M.shape[0] + 1] += 1 # W = W - I = W - I elif method == 'hessian': dp = n_components * (n_components + 1) // 2 if n_neighbors <= n_components + dp: raise ValueError("for method='hessian', n_neighbors must be " "greater than " "[n_components * (n_components + 3) / 2]") neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1, return_distance=False) neighbors = neighbors[:, 1:] Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64) Yi[:, 0] = 1 M = np.zeros((N, N), dtype=np.float64) use_svd = (n_neighbors > d_in) for i in range(N): Gi = X[neighbors[i]] Gi -= Gi.mean(0) # build Hessian estimator if use_svd: U = svd(Gi, full_matrices=0)[0] else: Ci = np.dot(Gi, Gi.T) U = eigh(Ci)[1][:, ::-1] Yi[:, 1:1 + n_components] = U[:, :n_components] j = 1 + n_components for k in range(n_components): Yi[:, j:j + n_components - k] = (U[:, k:k + 1] * U[:, k:n_components]) j += n_components - k Q, R = qr(Yi) w = Q[:, n_components + 1:] S = w.sum(0) S[np.where(abs(S) < hessian_tol)] = 1 w /= S nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i]) M[nbrs_x, nbrs_y] += np.dot(w, w.T) if M_sparse: M = csr_matrix(M) elif method == 'modified': if n_neighbors < n_components: raise ValueError("modified LLE requires " "n_neighbors >= n_components") neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1, return_distance=False) neighbors = neighbors[:, 1:] # find the eigenvectors and eigenvalues of each local covariance # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix, # where the columns are eigenvectors V = np.zeros((N, n_neighbors, n_neighbors)) nev = min(d_in, n_neighbors) evals = np.zeros([N, nev]) # choose the most efficient way to find the eigenvectors use_svd = (n_neighbors > d_in) if use_svd: for i in range(N): X_nbrs = X[neighbors[i]] - X[i] V[i], evals[i], _ = svd(X_nbrs, full_matrices=True) evals **= 2 else: for i in range(N): X_nbrs = X[neighbors[i]] - X[i] C_nbrs = np.dot(X_nbrs, X_nbrs.T) evi, vi = eigh(C_nbrs) evals[i] = evi[::-1] V[i] = vi[:, ::-1] # find regularized weights: this is like normal LLE. # because we've already computed the SVD of each covariance matrix, # it's faster to use this rather than np.linalg.solve reg = 1E-3 * evals.sum(1) tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors)) tmp[:, :nev] /= evals + reg[:, None] tmp[:, nev:] /= reg[:, None] w_reg = np.zeros((N, n_neighbors)) for i in range(N): w_reg[i] = np.dot(V[i], tmp[i]) w_reg /= w_reg.sum(1)[:, None] # calculate eta: the median of the ratio of small to large eigenvalues # across the points. This is used to determine s_i, below rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1) eta = np.median(rho) # find s_i, the size of the "almost null space" for each point: # this is the size of the largest set of eigenvalues # such that Sum[v; v in set]/Sum[v; v not in set] < eta s_range = np.zeros(N, dtype=int) evals_cumsum = stable_cumsum(evals, 1) eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1 for i in range(N): s_range[i] = np.searchsorted(eta_range[i, ::-1], eta) s_range += n_neighbors - nev # number of zero eigenvalues # Now calculate M. # This is the [N x N] matrix whose null space is the desired embedding M = np.zeros((N, N), dtype=np.float64) for i in range(N): s_i = s_range[i] # select bottom s_i eigenvectors and calculate alpha Vi = V[i, :, n_neighbors - s_i:] alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i) # compute Householder matrix which satisfies # Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s) # using prescription from paper h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors)) norm_h = np.linalg.norm(h) if norm_h < modified_tol: h *= 0 else: h /= norm_h # Householder matrix is # >> Hi = np.identity(s_i) - 2*np.outer(h,h) # Then the weight matrix is # >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None] # We do this much more efficiently: Wi = (Vi - 2 * np.outer(np.dot(Vi, h), h) + (1 - alpha_i) * w_reg[i, :, None]) # Update M as follows: # >> W_hat = np.zeros( (N,s_i) ) # >> W_hat[neighbors[i],:] = Wi # >> W_hat[i] -= 1 # >> M += np.dot(W_hat,W_hat.T) # We can do this much more efficiently: nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i]) M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T) Wi_sum1 = Wi.sum(1) M[i, neighbors[i]] -= Wi_sum1 M[neighbors[i], i] -= Wi_sum1 M[i, i] += s_i if M_sparse: M = csr_matrix(M) elif method == 'ltsa': neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1, return_distance=False) neighbors = neighbors[:, 1:] M = np.zeros((N, N)) use_svd = (n_neighbors > d_in) for i in range(N): Xi = X[neighbors[i]] Xi -= Xi.mean(0) # compute n_components largest eigenvalues of Xi * Xi^T if use_svd: v = svd(Xi, full_matrices=True)[0] else: Ci = np.dot(Xi, Xi.T) v = eigh(Ci)[1][:, ::-1] Gi = np.zeros((n_neighbors, n_components + 1)) Gi[:, 1:] = v[:, :n_components] Gi[:, 0] = 1. / np.sqrt(n_neighbors) GiGiT = np.dot(Gi, Gi.T) nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i]) M[nbrs_x, nbrs_y] -= GiGiT M[neighbors[i], neighbors[i]] += 1 return W a,b = null_space(M, n_components, k_skip=1, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, random_state=random_state)
def ller(X, Y, n_neighbors, n_components, mu=0.5, gamma=None, reg=1e-3, eigen_solver='auto', tol=1e-6, max_iter=100, random_state=None): """ Locally Linear Embedding for Regression (LLER) Parameters ---------- X : ndarray, 2-dimensional The data matrix, shape (num_data_points, num_dims) Y : ndarray, 1 or 2-dimensional The response matrix, shape (num_response_points, num_responses). Y[0:] is assumed to provide responses for X[:num_response_points] n_neighbors : int Number of neighbors for kNN graph construction. n_components : int Number of dimensions for embedding. mu : float, optional Influence of the Y-similarity penalty. gamma : float, optional Scaling factor for RBF kernel on Y. Defaults to the inverse of the median distance between rows of Y. Returns ------- embedding : ndarray, 2-dimensional The embedding of X, shape (num_points, n_components) lle_error : float The embedding error of X (for a fixed reconstruction matrix W) ller_error : float The embedding error of X that takes Y into account. """ if eigen_solver not in ('auto', 'arpack', 'dense'): raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) if Y.ndim == 1: Y = Y[:, None] if gamma is None: dists = pairwise_distances(Y) gamma = 1.0 / np.median(dists) nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1) nbrs.fit(X) X = nbrs._fit_X Nx, d_in = X.shape Ny = Y.shape[0] if n_components > d_in: raise ValueError("output dimension must be less than or equal " "to input dimension") if n_neighbors >= Nx: raise ValueError("n_neighbors must be less than number of points") if n_neighbors <= 0: raise ValueError("n_neighbors must be positive") if Nx < Ny: raise ValueError("X should have at least as many points as Y") M_sparse = (eigen_solver != 'dense') W = barycenter_kneighbors_graph(nbrs, n_neighbors=n_neighbors, reg=reg) if M_sparse: M = speye(*W.shape, format=W.format) - W M = (M.T * M).tocsr() else: M = (W.T * W - W.T - W).toarray() M.flat[::M.shape[0] + 1] += 1 P = rbf_kernel(Y, gamma=gamma) L = laplacian(P, normed=False) M /= np.abs(M).max() # optional scaling step L /= np.abs(L).max() if Nx > Ny: # zeros = csr_matrix((Nx-Ny,Nx-Ny),dtype=M.dtype) # L = bmat([[L, None], [None, zeros]]) ones = csr_matrix(np.ones((Nx - Ny, Nx - Ny)), dtype=M.dtype) L = bmat([[L, None], [None, ones]]) omega = M + mu * L embedding, lle_error = null_space(omega, n_components, k_skip=1, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, random_state=random_state) ller_error = np.trace(embedding.T.dot(L).dot(embedding)) return embedding, lle_error, ller_error
def ller(X, Y, n_neighbors, n_components, mu=0.5, gamma=None, reg=1e-3,eigen_solver='auto', tol=1e-6, max_iter=100, random_state=None): """ Locally Linear Embedding for Regression (LLER) Parameters ---------- X : ndarray, 2-dimensional The data matrix, shape (num_data_points, num_dims) Y : ndarray, 1 or 2-dimensional The response matrix, shape (num_response_points, num_responses). Y[0:] is assumed to provide responses for X[:num_response_points] n_neighbors : int Number of neighbors for kNN graph construction. n_components : int Number of dimensions for embedding. mu : float, optional Influence of the Y-similarity penalty. gamma : float, optional Scaling factor for RBF kernel on Y. Defaults to the inverse of the median distance between rows of Y. Returns ------- embedding : ndarray, 2-dimensional The embedding of X, shape (num_points, n_components) lle_error : float The embedding error of X (for a fixed reconstruction matrix W) ller_error : float The embedding error of X that takes Y into account. """ if eigen_solver not in ('auto', 'arpack', 'dense'): raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) if Y.ndim == 1: Y = Y[:, None] if gamma is None: dists = pairwise_distances(Y) gamma = 1.0 / np.median(dists) nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1) nbrs.fit(X) X = nbrs._fit_X Nx, d_in = X.shape Ny = Y.shape[0] if n_components > d_in: raise ValueError("output dimension must be less than or equal " "to input dimension") if n_neighbors >= Nx: raise ValueError("n_neighbors must be less than number of points") if n_neighbors <= 0: raise ValueError("n_neighbors must be positive") if Nx < Ny: raise ValueError("X should have at least as many points as Y") M_sparse = (eigen_solver != 'dense') W = barycenter_kneighbors_graph( nbrs, n_neighbors=n_neighbors, reg=reg) if M_sparse: M = speye(*W.shape, format=W.format) - W M = (M.T * M).tocsr() else: M = (W.T * W - W.T - W).toarray() M.flat[::M.shape[0] + 1] += 1 P = rbf_kernel(Y, gamma=gamma) L = laplacian(P, normed=False) M /= np.abs(M).max() # optional scaling step L /= np.abs(L).max() if Nx > Ny: # zeros = csr_matrix((Nx-Ny,Nx-Ny),dtype=M.dtype) # L = bmat([[L, None], [None, zeros]]) ones = csr_matrix(np.ones((Nx-Ny,Nx-Ny)),dtype=M.dtype) L = bmat([[L, None], [None, ones]]) omega = M + mu * L embedding, lle_error = null_space(omega, n_components, k_skip=1, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, random_state=random_state) ller_error = np.trace(embedding.T.dot(L).dot(embedding)) return embedding, lle_error, ller_error