Esempio n. 1
0
def ler(X, Y, n_components=2, affinity='nearest_neighbors',
        n_neighbors=None, gamma=None, mu=1.0, y_gamma=None,
        eigen_solver='auto', tol=1e-6, max_iter=100, 
        random_state=None):

    if eigen_solver not in ('auto', 'arpack', 'dense'):
        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1)
    nbrs.fit(X)
    X = nbrs._fit_X

    Nx, d_in = X.shape
    Ny = Y.shape[0]

    if n_components > d_in:
        raise ValueError("output dimension must be less than or equal "
                         "to input dimension")
    if Nx != Ny:
        raise ValueError("X and Y must have same number of points")
    if affinity == 'nearest_neighbors':
        if n_neighbors >= Nx:
            raise ValueError("n_neighbors must be less than number of points")
        if n_neighbors == None or n_neighbors <= 0:
            raise ValueError("n_neighbors must be positive")
    elif affinity == 'rbf':
        if gamma != None and gamma <= 0:
            raise ValueError("n_neighbors must be positive")
    else:
        raise ValueError("affinity must be 'nearest_neighbors' or 'rbf' must be positive")

    if Y.ndim == 1:
        Y = Y[:, None]

    if y_gamma is None:
        dists = pairwise_distances(Y)
        y_gamma = 1.0 / median(dists)

    if affinity == 'nearest_neighbors':
        affinity = kneighbors_graph(X, n_neighbors, include_self=True)
    else:
        if gamma == None:
            dists = pairwise_distances(X)
            gamma = 1.0 / median(dists)
        affinity = kneighbors_graph(X, n_neighbors, mode='distance', include_self=True)
        affinity.data = exp(-gamma * affinity.data ** 2)

    K = rbf_kernel(Y, gamma=y_gamma)
    lap = laplacian(affinity, normed=True)
    lapK = laplacian(K, normed=True)
    embedding, _ = null_space(lap + mu * lapK, n_components,
                              k_skip=1, eigen_solver=eigen_solver,
                              tol=tol, max_iter=max_iter,
                              random_state=random_state)

    return embedding
Esempio n. 2
0
File: ler.py Progetto: all-umass/ler
def ler(X, Y, n_components=2, affinity='nearest_neighbors',
        n_neighbors=None, gamma=None, mu=1.0, y_gamma=None,
        eigen_solver='auto', tol=1e-6, max_iter=100, 
        random_state=None):
    """
    Laplacian Eigenmaps for Regression (LER)

    Parameters
    ----------
    X : ndarray, 2-dimensional
        The data matrix, shape (num_points, num_dims)

    Y : ndarray, 1 or 2-dimensional
        The response matrix, shape (num_points, num_responses).

    n_components : int
        Number of dimensions for embedding. Default is 2.

    affinity : string or callable, default : "nearest_neighbors"
        How to construct the affinity matrix.
         - 'nearest_neighbors' : construct affinity matrix by knn graph
         - 'rbf' : construct affinity matrix by rbf kernel

    n_neighbors : int, optional, default=None
        Number of neighbors for kNN graph construction on X.

    gamma : float, optional, default=None
        Scaling factor for RBF kernel on X.

    mu : float, optional, default=1.0
        Influence of the Y-similarity penalty.

    y_gamma : float, optional
        Scaling factor for RBF kernel on Y.
        Defaults to the inverse of the median distance between rows of Y.

    Returns
    -------
    embedding : ndarray, 2-dimensional
        The embedding of X, shape (num_points, n_components)
    """

    if eigen_solver not in ('auto', 'arpack', 'dense'):
        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1)
    nbrs.fit(X)
    X = nbrs._fit_X

    Nx, d_in = X.shape
    Ny = Y.shape[0]

    if n_components > d_in:
        raise ValueError("output dimension must be less than or equal "
                         "to input dimension")
    if Nx != Ny:
        raise ValueError("X and Y must have same number of points")
    if affinity == 'nearest_neighbors':
        if n_neighbors >= Nx:
            raise ValueError("n_neighbors must be less than number of points")
        if n_neighbors == None or n_neighbors <= 0:
            raise ValueError("n_neighbors must be positive")
    elif affinity == 'rbf':
        if gamma != None and gamma <= 0:
            raise ValueError("n_neighbors must be positive")
    else:
        raise ValueError("affinity must be 'nearest_neighbors' or 'rbf' must be positive")

    if Y.ndim == 1:
        Y = Y[:, None]

    if y_gamma is None:
        dists = pairwise_distances(Y)
        y_gamma = 1.0 / median(dists)

    if affinity == 'nearest_neighbors':
        affinity = kneighbors_graph(X, n_neighbors, include_self=True)
    else:
        if gamma == None:
            dists = pairwise_distances(X)
            gamma = 1.0 / median(dists)
        affinity = kneighbors_graph(X, n_neighbors, mode='distance', include_self=True)
        affinity.data = exp(-gamma * affinity.data ** 2)

    K = rbf_kernel(Y, gamma=y_gamma)
    lap = laplacian(affinity, normed=True)
    lapK = laplacian(K, normed=True)
    embedding, _ = null_space(lap + mu * lapK, n_components,
                              k_skip=1, eigen_solver=eigen_solver,
                              tol=tol, max_iter=max_iter,
                              random_state=random_state)

    return embedding
Esempio n. 3
0
def ler(X,
        Y,
        n_components=2,
        affinity='nearest_neighbors',
        n_neighbors=None,
        gamma=None,
        mu=1.0,
        y_gamma=None,
        eigen_solver='auto',
        tol=1e-6,
        max_iter=100,
        random_state=None):
    """
    Laplacian Eigenmaps for Regression (LER)

    Parameters
    ----------
    X : ndarray, 2-dimensional
        The data matrix, shape (num_points, num_dims)

    Y : ndarray, 1 or 2-dimensional
        The response matrix, shape (num_points, num_responses).
        Y[0:] is assumed to provide responses for X[:num_response_points]

    n_components : int
        Number of dimensions for embedding. Default is 2.

    affinity : string or callable, default : "nearest_neighbors"
        How to construct the affinity matrix.
         - 'nearest_neighbors' : construct affinity matrix by knn graph
         - 'rbf' : construct affinity matrix by rbf kernel

    n_neighbors : int, optional, default=None
        Number of neighbors for kNN graph construction on X.

    gamma : float, optional, default=None
        Scaling factor for RBF kernel on X.

    mu : float, optional, default=1.0
        Influence of the Y-similarity penalty.

    y_gamma : float, optional
        Scaling factor for RBF kernel on Y.
        Defaults to the inverse of the median distance between rows of Y.

    Returns
    -------
    embedding : ndarray, 2-dimensional
        The embedding of X, shape (num_points, n_components)
    """

    if eigen_solver not in ('auto', 'arpack', 'dense'):
        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1)
    nbrs.fit(X)
    X = nbrs._fit_X

    Nx, d_in = X.shape
    Ny = Y.shape[0]

    if n_components > d_in:
        raise ValueError("output dimension must be less than or equal "
                         "to input dimension")
    if Nx < Ny:
        raise ValueError("X should have at least as many points as Y")
    if affinity == 'nearest_neighbors':
        if n_neighbors >= Nx:
            raise ValueError("n_neighbors must be less than number of points")
        if n_neighbors is None or n_neighbors <= 0:
            raise ValueError("n_neighbors must be positive")
    elif affinity == 'rbf':
        if gamma is not None and gamma <= 0:
            raise ValueError("n_neighbors must be positive")
    else:
        raise ValueError("affinity must be 'nearest_neighbors' or 'rbf' must" +
                         " be positive")

    if Y.ndim == 1:
        Y = Y[:, None]

    if y_gamma is None:
        dists = pairwise_distances(Y)
        y_gamma = 1.0 / median(dists)

    if affinity == 'nearest_neighbors':
        affinity = kneighbors_graph(X, n_neighbors, include_self=True)
    else:
        if gamma is None:
            dists = pairwise_distances(X)
            gamma = 1.0 / median(dists)
        affinity = kneighbors_graph(X,
                                    n_neighbors,
                                    mode='distance',
                                    include_self=True)
        affinity.data = exp(-gamma * affinity.data**2)

    K = rbf_kernel(Y, gamma=y_gamma)
    lap = laplacian(affinity, normed=True)
    lapK = laplacian(K, normed=True)
    if Nx > Ny:
        # zeros = csr_matrix((Nx-Ny,Nx-Ny),dtype=lap.dtype)
        # lapK = bmat([[lapK, None], [None, zeros]])
        ones = csr_matrix(np.ones((Nx - Ny, Nx - Ny)), dtype=lap.dtype)
        lapK = bmat([[lapK, None], [None, ones]])
    embedding, _ = null_space(lap + mu * lapK,
                              n_components,
                              k_skip=1,
                              eigen_solver=eigen_solver,
                              tol=tol,
                              max_iter=max_iter,
                              random_state=random_state)

    return embedding
def locally_linear_embedding(
        X, n_neighbors, n_components, reg=1e-3, eigen_solver='auto', tol=1e-6,
        max_iter=100, method='standard', hessian_tol=1E-4, modified_tol=1E-12,
        random_state=None, n_jobs=None):
    
    if eigen_solver not in ('auto', 'arpack', 'dense'):
        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

    if method not in ('standard', 'hessian', 'modified', 'ltsa'):
        raise ValueError("unrecognized method '%s'" % method)

    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)
    nbrs.fit(X)
    X = nbrs._fit_X

    N, d_in = X.shape

    if n_components > d_in:
        raise ValueError("output dimension must be less than or equal "
                         "to input dimension")
    if n_neighbors >= N:
        raise ValueError(
            "Expected n_neighbors <= n_samples, "
            " but n_samples = %d, n_neighbors = %d" %
            (N, n_neighbors)
        )

    if n_neighbors <= 0:
        raise ValueError("n_neighbors must be positive")

    M_sparse = (eigen_solver != 'dense')

    if method == 'standard':
        W = barycenter_kneighbors_graph(nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=1)

    
        if M_sparse:
            M = eye(*W.shape, format=W.format) - W
            M = (M.T * M).tocsr()
        else:
            M = (W.T * W - W.T - W).toarray()
            M.flat[::M.shape[0] + 1] += 1  # W = W - I = W - I

    elif method == 'hessian':
        dp = n_components * (n_components + 1) // 2

        if n_neighbors <= n_components + dp:
            raise ValueError("for method='hessian', n_neighbors must be "
                             "greater than "
                             "[n_components * (n_components + 3) / 2]")

        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
                                    return_distance=False)
        neighbors = neighbors[:, 1:]

        Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
        Yi[:, 0] = 1

        M = np.zeros((N, N), dtype=np.float64)

        use_svd = (n_neighbors > d_in)

        for i in range(N):
            Gi = X[neighbors[i]]
            Gi -= Gi.mean(0)

            # build Hessian estimator
            if use_svd:
                U = svd(Gi, full_matrices=0)[0]
            else:
                Ci = np.dot(Gi, Gi.T)
                U = eigh(Ci)[1][:, ::-1]

            Yi[:, 1:1 + n_components] = U[:, :n_components]

            j = 1 + n_components
            for k in range(n_components):
                Yi[:, j:j + n_components - k] = (U[:, k:k + 1] *
                                                 U[:, k:n_components])
                j += n_components - k

            Q, R = qr(Yi)

            w = Q[:, n_components + 1:]
            S = w.sum(0)

            S[np.where(abs(S) < hessian_tol)] = 1
            w /= S

            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
            M[nbrs_x, nbrs_y] += np.dot(w, w.T)

        if M_sparse:
            M = csr_matrix(M)

    elif method == 'modified':
        if n_neighbors < n_components:
            raise ValueError("modified LLE requires "
                             "n_neighbors >= n_components")

        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
                                    return_distance=False)
        neighbors = neighbors[:, 1:]

        # find the eigenvectors and eigenvalues of each local covariance
        # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,
        # where the columns are eigenvectors
        V = np.zeros((N, n_neighbors, n_neighbors))
        nev = min(d_in, n_neighbors)
        evals = np.zeros([N, nev])

        # choose the most efficient way to find the eigenvectors
        use_svd = (n_neighbors > d_in)

        if use_svd:
            for i in range(N):
                X_nbrs = X[neighbors[i]] - X[i]
                V[i], evals[i], _ = svd(X_nbrs,
                                        full_matrices=True)
            evals **= 2
        else:
            for i in range(N):
                X_nbrs = X[neighbors[i]] - X[i]
                C_nbrs = np.dot(X_nbrs, X_nbrs.T)
                evi, vi = eigh(C_nbrs)
                evals[i] = evi[::-1]
                V[i] = vi[:, ::-1]

        # find regularized weights: this is like normal LLE.
        # because we've already computed the SVD of each covariance matrix,
        # it's faster to use this rather than np.linalg.solve
        reg = 1E-3 * evals.sum(1)

        tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))
        tmp[:, :nev] /= evals + reg[:, None]
        tmp[:, nev:] /= reg[:, None]

        w_reg = np.zeros((N, n_neighbors))
        for i in range(N):
            w_reg[i] = np.dot(V[i], tmp[i])
        w_reg /= w_reg.sum(1)[:, None]

        # calculate eta: the median of the ratio of small to large eigenvalues
        # across the points.  This is used to determine s_i, below
        rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)
        eta = np.median(rho)

        # find s_i, the size of the "almost null space" for each point:
        # this is the size of the largest set of eigenvalues
        # such that Sum[v; v in set]/Sum[v; v not in set] < eta
        s_range = np.zeros(N, dtype=int)
        evals_cumsum = stable_cumsum(evals, 1)
        eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1
        for i in range(N):
            s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)
        s_range += n_neighbors - nev  # number of zero eigenvalues

        # Now calculate M.
        # This is the [N x N] matrix whose null space is the desired embedding
        M = np.zeros((N, N), dtype=np.float64)
        for i in range(N):
            s_i = s_range[i]

            # select bottom s_i eigenvectors and calculate alpha
            Vi = V[i, :, n_neighbors - s_i:]
            alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)

            # compute Householder matrix which satisfies
            #  Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)
            # using prescription from paper
            h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors))

            norm_h = np.linalg.norm(h)
            if norm_h < modified_tol:
                h *= 0
            else:
                h /= norm_h

            # Householder matrix is
            #  >> Hi = np.identity(s_i) - 2*np.outer(h,h)
            # Then the weight matrix is
            #  >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]
            # We do this much more efficiently:
            Wi = (Vi - 2 * np.outer(np.dot(Vi, h), h) +
                  (1 - alpha_i) * w_reg[i, :, None])

            # Update M as follows:
            # >> W_hat = np.zeros( (N,s_i) )
            # >> W_hat[neighbors[i],:] = Wi
            # >> W_hat[i] -= 1
            # >> M += np.dot(W_hat,W_hat.T)
            # We can do this much more efficiently:
            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
            M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
            Wi_sum1 = Wi.sum(1)
            M[i, neighbors[i]] -= Wi_sum1
            M[neighbors[i], i] -= Wi_sum1
            M[i, i] += s_i

        if M_sparse:
            M = csr_matrix(M)

    elif method == 'ltsa':
        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
                                    return_distance=False)
        neighbors = neighbors[:, 1:]

        M = np.zeros((N, N))

        use_svd = (n_neighbors > d_in)

        for i in range(N):
            Xi = X[neighbors[i]]
            Xi -= Xi.mean(0)

            # compute n_components largest eigenvalues of Xi * Xi^T
            if use_svd:
                v = svd(Xi, full_matrices=True)[0]
            else:
                Ci = np.dot(Xi, Xi.T)
                v = eigh(Ci)[1][:, ::-1]

            Gi = np.zeros((n_neighbors, n_components + 1))
            Gi[:, 1:] = v[:, :n_components]
            Gi[:, 0] = 1. / np.sqrt(n_neighbors)

            GiGiT = np.dot(Gi, Gi.T)

            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
            M[nbrs_x, nbrs_y] -= GiGiT
            M[neighbors[i], neighbors[i]] += 1
    
    return W
    a,b = null_space(M, n_components, k_skip=1, eigen_solver=eigen_solver,
                      tol=tol, max_iter=max_iter, random_state=random_state)
Esempio n. 5
0
File: ller.py Progetto: imgemp/ller
def ller(X,
         Y,
         n_neighbors,
         n_components,
         mu=0.5,
         gamma=None,
         reg=1e-3,
         eigen_solver='auto',
         tol=1e-6,
         max_iter=100,
         random_state=None):
    """
    Locally Linear Embedding for Regression (LLER)

    Parameters
    ----------
    X : ndarray, 2-dimensional
        The data matrix, shape (num_data_points, num_dims)

    Y : ndarray, 1 or 2-dimensional
        The response matrix, shape (num_response_points, num_responses).
        Y[0:] is assumed to provide responses for X[:num_response_points]

    n_neighbors : int
        Number of neighbors for kNN graph construction.

    n_components : int
        Number of dimensions for embedding.

    mu : float, optional
        Influence of the Y-similarity penalty.

    gamma : float, optional
        Scaling factor for RBF kernel on Y.
        Defaults to the inverse of the median distance between rows of Y.

    Returns
    -------
    embedding : ndarray, 2-dimensional
        The embedding of X, shape (num_points, n_components)

    lle_error : float
        The embedding error of X (for a fixed reconstruction matrix W)

    ller_error : float
        The embedding error of X that takes Y into account.
    """
    if eigen_solver not in ('auto', 'arpack', 'dense'):
        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

    if Y.ndim == 1:
        Y = Y[:, None]

    if gamma is None:
        dists = pairwise_distances(Y)
        gamma = 1.0 / np.median(dists)

    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1)
    nbrs.fit(X)
    X = nbrs._fit_X

    Nx, d_in = X.shape
    Ny = Y.shape[0]

    if n_components > d_in:
        raise ValueError("output dimension must be less than or equal "
                         "to input dimension")
    if n_neighbors >= Nx:
        raise ValueError("n_neighbors must be less than number of points")
    if n_neighbors <= 0:
        raise ValueError("n_neighbors must be positive")
    if Nx < Ny:
        raise ValueError("X should have at least as many points as Y")

    M_sparse = (eigen_solver != 'dense')

    W = barycenter_kneighbors_graph(nbrs, n_neighbors=n_neighbors, reg=reg)

    if M_sparse:
        M = speye(*W.shape, format=W.format) - W
        M = (M.T * M).tocsr()
    else:
        M = (W.T * W - W.T - W).toarray()
        M.flat[::M.shape[0] + 1] += 1

    P = rbf_kernel(Y, gamma=gamma)
    L = laplacian(P, normed=False)
    M /= np.abs(M).max()  # optional scaling step
    L /= np.abs(L).max()
    if Nx > Ny:
        # zeros = csr_matrix((Nx-Ny,Nx-Ny),dtype=M.dtype)
        # L = bmat([[L, None], [None, zeros]])
        ones = csr_matrix(np.ones((Nx - Ny, Nx - Ny)), dtype=M.dtype)
        L = bmat([[L, None], [None, ones]])
    omega = M + mu * L
    embedding, lle_error = null_space(omega,
                                      n_components,
                                      k_skip=1,
                                      eigen_solver=eigen_solver,
                                      tol=tol,
                                      max_iter=max_iter,
                                      random_state=random_state)
    ller_error = np.trace(embedding.T.dot(L).dot(embedding))
    return embedding, lle_error, ller_error
Esempio n. 6
0
File: ller.py Progetto: imgemp/ller
def ller(X, Y, n_neighbors, n_components, mu=0.5, gamma=None,
         reg=1e-3,eigen_solver='auto', tol=1e-6, max_iter=100,
         random_state=None):
    """
    Locally Linear Embedding for Regression (LLER)

    Parameters
    ----------
    X : ndarray, 2-dimensional
        The data matrix, shape (num_data_points, num_dims)

    Y : ndarray, 1 or 2-dimensional
        The response matrix, shape (num_response_points, num_responses).
        Y[0:] is assumed to provide responses for X[:num_response_points]

    n_neighbors : int
        Number of neighbors for kNN graph construction.

    n_components : int
        Number of dimensions for embedding.

    mu : float, optional
        Influence of the Y-similarity penalty.

    gamma : float, optional
        Scaling factor for RBF kernel on Y.
        Defaults to the inverse of the median distance between rows of Y.

    Returns
    -------
    embedding : ndarray, 2-dimensional
        The embedding of X, shape (num_points, n_components)

    lle_error : float
        The embedding error of X (for a fixed reconstruction matrix W)

    ller_error : float
        The embedding error of X that takes Y into account.
    """
    if eigen_solver not in ('auto', 'arpack', 'dense'):
        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

    if Y.ndim == 1:
        Y = Y[:, None]

    if gamma is None:
        dists = pairwise_distances(Y)
        gamma = 1.0 / np.median(dists)

    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1)
    nbrs.fit(X)
    X = nbrs._fit_X

    Nx, d_in = X.shape
    Ny = Y.shape[0]

    if n_components > d_in:
        raise ValueError("output dimension must be less than or equal "
                         "to input dimension")
    if n_neighbors >= Nx:
        raise ValueError("n_neighbors must be less than number of points")
    if n_neighbors <= 0:
        raise ValueError("n_neighbors must be positive")
    if Nx < Ny:
        raise ValueError("X should have at least as many points as Y")

    M_sparse = (eigen_solver != 'dense')

    W = barycenter_kneighbors_graph(
        nbrs, n_neighbors=n_neighbors, reg=reg)

    if M_sparse:
        M = speye(*W.shape, format=W.format) - W
        M = (M.T * M).tocsr()
    else:
        M = (W.T * W - W.T - W).toarray()
        M.flat[::M.shape[0] + 1] += 1

    P = rbf_kernel(Y, gamma=gamma)
    L = laplacian(P, normed=False)
    M /= np.abs(M).max()  # optional scaling step
    L /= np.abs(L).max()
    if Nx > Ny:
        # zeros = csr_matrix((Nx-Ny,Nx-Ny),dtype=M.dtype)
        # L = bmat([[L, None], [None, zeros]])
        ones = csr_matrix(np.ones((Nx-Ny,Nx-Ny)),dtype=M.dtype)
        L = bmat([[L, None], [None, ones]])
    omega = M + mu * L
    embedding, lle_error = null_space(omega, n_components, k_skip=1,
                                      eigen_solver=eigen_solver, tol=tol,
                                      max_iter=max_iter,
                                      random_state=random_state)
    ller_error = np.trace(embedding.T.dot(L).dot(embedding))
    return embedding, lle_error, ller_error