예제 #1
0
파일: dr.py 프로젝트: sdvillal/mlmusings
def kpca(x, target_dim=None, kernel=gaussian_kernel, **kernel_params):

    #Recall that in kpca, depending on the kernel, each point can span a new direction in feature space...
    num_examples, num_features = shape(x)
    if not target_dim:
        target_dim = num_features #...we usually do not want that many

    #Kernel matrix
    K = kernel_matrix(x, kernel=kernel, **kernel_params)

    #Center the data in feature space: K' = K -1nK -K1n +1nK1n
    #Some algebra can optimize this both in time and space, think...
    onen = ones((num_examples, num_examples)) / num_examples
    onenK = dot(onen, K)
    K = K - onenK  - onenK.T + dot(onenK, onen)

    #Eigendecomposition
    eigenvalues, eigenvectors = eig(K)

    #Selection of the PCs
    eigenvalues, eigenvectors, inertias = select_top(eigenvalues, eigenvectors, target_dim)

    #Transform
    x = dot(diag(sqrt(eigenvalues)), eigenvectors.T).T

    return x, K, eigenvectors, eigenvalues, inertias
예제 #2
0
def nystrom_experiment(X, m0, mmax, kernel, dataset):
    """
    Incremental calculation of the Nyström approximation to the kernel matrix.
    For each data point the difference in Frobenius norm between the
    approximation and the full kernel matrix is plotted.

    Parameters
    ----------

    X : numpy.ndarray, 2d
        Data matrix
    m0 : int
        Initial size of Nyström subset
    mmax : int
        Maximum size of Nyström subset
    kernel : callable
        Kernel function
    dataset : str
        Either 'magic' or 'yeast'

    """
    print("\nIncremental Nyström approximation")
    inc = IncrKPCA(X, m0, mmax, kernel=kernel, nystrom=True)
    idx = inc.get_idx_array()
    n = X.shape[0]
    K = kernel_matrix(X, kernel, range(n), range(n))
    fnorms = []
    for i, L, U, L_nys, U_nys in inc:
        K_tilde = dot(U_nys, dot(diag(L_nys), U_nys.T))
        fnorm = np.sqrt(np.sum(np.sum(np.power(K - K_tilde, 2))))
        fnorms.append(fnorm)

    plotting(range(m0, m0 + len(fnorms)), fnorms, dataset, "m",
             "Frobenius norm")
 def update_K_nm(self):
     """
     Update K_nm for one iteration by adding another column
     """
     i = self.i
     K_ni = kernel_matrix(self.X, self.kernel, range(self.n), [self.idx[i]])
     self.K_nm = np.c_[self.K_nm, K_ni]
예제 #4
0
def create_update_terms(X, cols, col, kernel):
    """
    Create the terms supplied to eigenvalue update algorithm

    Parameters
    ----------
    X : np.ndarray, 2d
        Data matrix
    cols : np.ndarray, 1d
        Indices of columns to create the kernel matrix
    col : float
        The additional column index

    Returns
    -------
    Parameters supplied to update algorithm for
    eigendecomposition

    """
    k1 = kernel_matrix(X, kernel, cols, [col])
    k = copy(k1[-1][0])
    k1[-1] = k / 2
    k0 = deepcopy(k1)  # numpy pass by reference
    k0[-1] = k / 4
    sigma = 4 / k

    return sigma, k1, k0
    def __init__(self,
                 X,
                 m0,
                 mmax=None,
                 kernel=kernel_error,
                 adjust=False,
                 nystrom=False,
                 maxiter=500):

        # Setup default arguments
        n = X.shape[0]
        self.X = X
        self.i = m0
        self.m0 = m0
        self.j = 0
        self.n = n
        self.maxiter = maxiter

        if mmax is None:
            mmax = n
        self.mmax = min(mmax, n)

        self.idx = np.random.permutation(n)
        self.kernel = kernel
        self.adjust = adjust
        self.nystrom = nystrom

        # Initial eigensystem
        cols = self.idx[:m0]
        K_mm = kernel_matrix(X, kernel, cols, cols)

        if self.adjust:
            self.L, self.U, self.capsig, self.K1 = init_vars(K_mm)
        else:
            self.L, self.U = linalg.eigh(K_mm)

        if self.nystrom:
            self.K_nm = kernel_matrix(X, kernel, range(n), cols)
예제 #6
0
def test_nystrom_approx():
    datasize = 100
    fraction = 0.1

    X = get_magic_data()
    X = X[:datasize]
    cols = range(int(fraction * datasize))
    all_cols = range(datasize)

    K_mm, K_nm = kernel_matrices(X, rbf, cols)
    K = kernel_matrix(X, rbf, all_cols, all_cols)
    L, U = linalg.eigh(K_mm)
    L_nys, U_nys = nystrom_approximation(L, U, K_nm)
    K_nys = dot(U_nys, dot(diag(L_nys), U_nys.T))

    # F norm of difference
    fnorm = np.sqrt(np.sum(np.sum(np.power(K - K_nys, 2))))
    assert_less(fnorm / datasize, datasize)
예제 #7
0
def kpca_cv_experiment(X, kernel, dataset, n_iter, kernel_label):
    """
    """

    print("\nCross-validation of kernel PCA\n------------------------------")

    K = kernel_matrix(X, kernel)
    n = K.shape[0]

    print("Number of data points: {}".format(n))
    print("Kernel: {}".format(kernel_label))

    pc, errs = kpca_cv(K, n_iter)

    print("Selected PC: {}".format(pc))

    err_mean = errs.mean(0)
    title = dataset + " " + kernel_label
    plotting(np.arange(n_iter - 1) + 1, err_mean, title, "k", "Mean error")
예제 #8
0
def predict(weights, train_xs, test_xs, test_ys, kernel_func, dim, kernel=None):
    """
    Predict classes of test set using multi-class kernel perceptron algorithm

    :param weights:     weights fit to train set
    :param train_xs:    train set
    :param test_xs:     test set
    :param test_ys:     test classes
    :param kernel_func: kernel function
    :param dim:         dimension used for kernel function
    :param kernel:      kernel matrix

    :return: test error
    """
    K = kernel if kernel is not None else kernel_matrix(test_xs, train_xs, dim, kernel_func)
    num_correct = 0
    w = (weights @ K.T).T
    for index, x in enumerate(test_xs):
        predicted = np.argmax(w[index, :])
        correct = test_ys[index]
        if predicted == correct:
            num_correct += 1
    return 1 - num_correct / len(test_xs)
예제 #9
0
def incremental_experiment(X, m0, mmax, kernel, dataset, adjust=False):
    """
    Experiment for the incremental kernel pca algorithm. For each additional
    data point the difference in Frobenius norm between incremental and batch
    calculation is plotted (termed drift).

    Parameters
    ----------

    X : numpy.ndarray, 2d
        Data matrix
    m0 : int
        Initial size of kernel matrix
    mmax : int
        Maximum size of kernel matrix
    kernel : callable
        Kernel function
    dataset : str
        Either 'magic' or 'yeast'
    adjust : bool
        Whether to adjust the mean

    """
    print("\nIncremental kernel PCA")
    inc = IncrKPCA(X, m0, mmax, adjust=adjust, kernel=kernel)
    fnorms = []
    for i, L, U in inc:
        idx = inc.get_idx_array()
        K = kernel_matrix(X, kernel, idx[:i + 1], idx[:i + 1])
        if adjust:
            K = adjust_K(K)
        K_tilde = dot(U, dot(diag(L), U.T))
        fnorm = np.sqrt(np.sum(np.sum(np.power(K - K_tilde, 2))))
        fnorms.append(fnorm)

    plotting(
        np.arange(len(fnorms)) + m0, fnorms, dataset, "m", "Frobenius norm")
예제 #10
0
파일: dr.py 프로젝트: fmonta/mayolmol
def kpca(x, target_dim=None, kernel=gaussian_kernel, shuffle=None, **kernel_params):
    #Recall that in kpca, depending on the kernel, each point can span a new direction in feature space...
    num_examples, num_features = shape(x)
    if not target_dim:
        target_dim = num_features #...we usually do not want that many

    #Compute the kernel matrix
    K = kernel_matrix(x, kernel=kernel, **kernel_params)
    if shuffle:
        K = shuffle_matrix(K, shuffle)

    #Center the data in feature space: K' = K -1nK -K1n +1nK1n
    K = center_kernel_matrix(K)

    #Eigendecomposition
    eigenvalues, eigenvectors = eig(K)

    #Selection of the PCs
    eigenvalues, eigenvectors, inertias = select_top(eigenvalues, eigenvectors, target_dim)

    #Transform
    x = dot(diag(sqrt(eigenvalues)), eigenvectors.T).T

    return x, K, eigenvectors, eigenvalues, inertias
    def update_eig_adjust(self):
        """
        Update the kernel PCA solution including adjustment of the mean.
        """
        i = self.i
        col = self.idx[i]
        cols = self.idx[:i + 1]
        k = kernel_matrix(self.X, self.kernel, cols, [col])  # OK
        a = k[:-1, :]
        a_sum = np.sum(a)
        k_sum = np.sum(k)
        capsig2 = self.capsig + 2 * a_sum + k[-1, 0]
        C = -self.capsig / i**2 + capsig2 / (i + 1)**2
        u = self.K1 / (i * (i + 1)) - a / (i + 1) + 0.5 * C * ones((i, 1))
        u1 = 1 + u
        u2 = 1 - u
        sigma_u = 0.5

        K1 = np.r_[self.K1 + a, [[k_sum]]]
        capsig = capsig2
        v = k - (ones((i + 1, 1)) * k_sum + K1 - capsig / (i + 1)) / (i + 1)
        v1 = deepcopy(v)
        v2 = deepcopy(v)
        v0 = copy(v[-1, 0])
        v1[-1, 0] = v0 / 2
        v2[-1, 0] = v0 / 4
        sigma_k = 4 / v0

        # Apply rank one updates
        L, U = update_eigensystem(self.L, self.U, u1, sigma_u)
        if isinstance(L, np.ndarray):
            L, U = update_eigensystem(L, U, u2, -sigma_u)
        if isinstance(L, np.ndarray):
            L, U = expand_eigensystem(L, U, v0 / 4)

            # Ordering
            idx = np.argsort(L)
            L = L[idx]
            U = U[:, idx]
            U = U[idx, :]
            v1 = v1[idx, :]
            v2 = v2[idx, :]

            L, U = update_eigensystem(L, U, v1, sigma_k)

        if isinstance(L, np.ndarray):
            L, U = update_eigensystem(L, U, v2, -sigma_k)

        if isinstance(L, np.ndarray):
            #f self.nystrom:
            #    self.update_K_nm()
            K1 = K1[idx, :]
            self.idx[:i + 1] = self.idx[:i + 1][idx]  # Reorder index
            if self.nystrom:
                self.K_nm = self.K_nm[:, idx]  # Reorder columns
            self.i, self.L, self.U, self.K1 = i + 1, L, U, K1
            self.capsig = capsig
            rc = 0
        else:  # Ignore data example
            self.idx[i:-1] = self.idx[i + 1:]
            self.idx = self.idx[:-1]
            rc = 1

        return rc