Ejemplo n.º 1
0
def kernel_derivative(X, Y, K, sigma_x, sigma_y, eps):
    """
    computes initial estimate of SDR matrix by gradient descent
    
    arguments :
    X       -- nxd array of n samples, d features
    Y       -- nxp array of class labels
    K       -- target dimension of SDR subspace
    sigma_x -- scale factor for the Gaussian kernel associated to X
    sigma_y -- scale factor for the Gaussian kernel associated to Y
    eps     -- regularization factor for matrix inversion
    
    returns :
    B       -- initial SDR matrix estimate after gradient descent
    tr      -- corresponding trace value (trace=objective function)        
    
    """

    n, d = X.shape
    #gram matrix of X
    Kx = rbf_dot(X, X, sigma_x)
    Kxi = linalg.inv(Kx + n * eps * np.eye(n))

    #Gram matrix of Y
    Ky = rbf_dot(Y, Y, sigma_y)

    #Derivative of Kx(xi, x) w.r.t. x
    Dx = np.reshape(np.tile(X, (n, 1)), (n, n, d))
    Xij = Dx - Dx.transpose((1, 0, 2))
    Xij = Xij / (sigma_x**2)
    H = H = Xij * np.reshape(np.tile(Kx, (1, d)),
                             (n, n, d))  #Xij*np.tile(Kx,(1,1,d)) #

    #sum_i H(X_i)'*Kxi*Ky*Kxi*H(X_i)

    Fmat = np.dot(Kxi, np.dot(Ky, Kxi))
    Hd = H.reshape((n, n * d))
    HH = np.reshape(np.dot(Hd.T, Hd), (n, d, n, d))
    HHd = np.reshape(np.transpose(HH, (0, 2, 1, 3)), (n**2, d, d))
    Fd = np.tile(np.reshape(Fmat, (n**2, 1, 1)), (1, d, d))

    R = np.reshape(np.sum(HHd * Fd, axis=0), (d, d))
    L, V = linalg.eigh(R)
    B = V[:, ::-1][:, :K]
    L = L[::-1]
    tr = np.sum(L[:K])

    return B, tr
Ejemplo n.º 2
0
def kernel_derivative(X, Y, K, sigma_x, sigma_y, eps):
    """
    computes initial estimate of SDR matrix by gradient descent
    
    arguments :
    X       -- nxd array of n samples, d features
    Y       -- nxp array of class labels
    K       -- target dimension of SDR subspace
    sigma_x -- scale factor for the Gaussian kernel associated to X
    sigma_y -- scale factor for the Gaussian kernel associated to Y
    eps     -- regularization factor for matrix inversion
    
    returns :
    B       -- initial SDR matrix estimate after gradient descent
    tr      -- corresponding trace value (trace=objective function)        
    
    """
    
    n, d = X.shape
    #gram matrix of X
    Kx  = rbf_dot(X, X, sigma_x)
    Kxi = linalg.inv(Kx + n*eps*np.eye(n))
    
    #Gram matrix of Y
    Ky = rbf_dot(Y, Y, sigma_y)
    
    #Derivative of Kx(xi, x) w.r.t. x
    Dx = np.reshape(np.tile(X, (n, 1)), (n,n,d))
    Xij = Dx - Dx.transpose((1, 0, 2))
    Xij = Xij/(sigma_x**2)
    H = H = Xij*np.reshape(np.tile(Kx,( 1, d)), (n,n,d)) #Xij*np.tile(Kx,(1,1,d)) #
    
    #sum_i H(X_i)'*Kxi*Ky*Kxi*H(X_i)
    
    Fmat = np.dot(Kxi, np.dot(Ky, Kxi))
    Hd = H.reshape((n, n*d))
    HH = np.reshape(np.dot(Hd.T, Hd), (n,d,n,d))
    HHd = np.reshape(np.transpose(HH, (0,2,1,3)), (n**2,d,d)) 
    Fd = np.tile(np.reshape(Fmat, (n**2,1,1)), (1,d,d))
    
    R = np.reshape(np.sum(HHd*Fd, axis=0), (d,d))
    L, V = linalg.eigh(R)
    B = V[:,::-1][:,:K]
    L = L[::-1]
    tr = np.sum(L[:K])    
    
    return B, tr
Ejemplo n.º 3
0
    def kdrobjfun1D(s):
        tmpB = B - s * dB
        tmpB = linalg.svd(tmpB, full_matrices=False)[0]
        Z = np.dot(X, tmpB)
        Kz = rbf_dot(Z, Z, np.sqrt(sz2))
        Kz = center_matrix(Kz)  #np.dot(np.dot(Q,Kz), Q)
        Kz = (Kz + Kz.T) / 2

        t = np.sum(Ky * linalg.inv(Kz + n * eps * np.eye(n)))

        return t
Ejemplo n.º 4
0
 def kdrobjfun1D(s):
     tmpB = B - s*dB
     tmpB = linalg.svd(tmpB, full_matrices=False)[0]
     Z    = np.dot(X, tmpB)
     Kz   = rbf_dot(Z, Z, np.sqrt(sz2))
     Kz   = center_matrix(Kz) #np.dot(np.dot(Q,Kz), Q)
     Kz   = (Kz + Kz.T)/2 
     
     t = np.sum(Ky*linalg.inv(Kz + n*eps*np.eye(n)))
     
     return t
Ejemplo n.º 5
0
Archivo: kdr.py Proyecto: aschmu/KDRPy
def kdr_optim(X, Y, K, max_loop, sigma_x, sigma_y, eps,
              eta, anl, verbose = True, tol=1e-9, 
              init_deriv = False, ls_maxiter=30):
    """                                             
    arguments :
    X           -- nxd array of n samples, d features
    Y           -- nxp array of class labels
    K           -- target dimension of SDR subspace
    max_loop    -- maximum number of iterations    
    sigma_x     -- scale factor for the Gaussian kernel associated to X (float)
    sigma_y     -- scale factor for the Gaussian kernel associated to Y (float)
    eps         -- regularization factor for matrix inversion (float)
    eta         -- upper bound for linesearch step parameter (float)
    anl         -- maximum annealing parameter (int/float)
    verbose     -- print objective function value at each iteration ? (bool)
    tol         -- stopping criterion for gradient descent, ie 
                   optim stops when ||dB||_s < tol (float) where ||.||_s is the
                   spectral norm
    init_deriv  -- use initial estimate of B through gradient descent ? (bool)
    ls_maxiter  -- max number of iterations during line search step size selection (int)
    
    returns :
    B           -- SDR matrix estimate 
    
    """
    n, d  = X.shape
            
    if n != Y.shape[0]:
        raise ValueError, 'X and Y have incompatible dimensions'
     
    assert K<=d, 'dimension K must be lower than d !'
    assert sigma_x > 0 and sigma_y > 0, 'scale parameters must be positive!'
    assert tol > 0, 'tolerance factor must be >0'
    
    if init_deriv:
        print 'Initialization by derivative method...\n'
        B, t = kernel_derivative(X, Y, K, np.sqrt(anl)*sigma_x,
                                 sigma_y, eps)
    else:            
        B = np.random.randn(d, K)
    
    B = linalg.svd(B, full_matrices=False)[0]
                
    """Gram matrix of Y"""
    Gy  = rbf_dot(Y, Y, sigma_y) 
    Kyo = center_matrix(Gy) 
    Kyo  = (Kyo + Kyo.T)/2
    
    """objective function initial value """
    Z = np.dot(X, B)
    Gz = rbf_dot(Z, Z, sigma_x)
    Kz = center_matrix(Gz) 
    Kz = (Kz + Kz.T)/2
    
    mz = linalg.inv(Kz + eps*n*np.eye(n))
    tr = np.sum(Kyo*mz)
    
    if verbose:
        print '[0]trace = ', tr
    
    ssz2 = 2*sigma_x**2
    ssy2 = 2*sigma_y**2
    #careful h from 0 to maxloop-1, implement accordingly
    for h in xrange(max_loop): 
        sz2 = ssz2+(anl-1)*ssz2*(max_loop-h-1)/max_loop
        sy2 = ssy2+(anl-1)*ssy2*(max_loop-h-1)/max_loop
        
        Z  = np.dot(X, B)
        Kzw = rbf_dot(Z, Z, np.sqrt(sz2))
        Kz  = center_matrix(Kzw) 
        Kzi = linalg.inv(Kz + eps*n*np.eye(n)) #
        
        Ky = rbf_dot(Y, Y, np.sqrt(sy2))
        Ky = center_matrix(Ky) 
        Ky = (Ky + Ky.T)/2
         
        
        dB = np.zeros((d,K))
        KziKyzi = np.dot(Kzi, np.dot(Ky, Kzi))
        
        for a in xrange(d):
            Xa = np.tile(X[:,a][:,np.newaxis], (1, n))
            XX = Xa - Xa.T
            for b in xrange(K):
                Zb = np.tile(Z[:,b][:,np.newaxis], (1, n))
                tt = XX*(Zb - Zb.T)*Kzw
                dKB = center_matrix(tt) 
                dB[a, b] = np.sum(KziKyzi*dKB.T) #np.trace(np.dot(Kzi.dot(Kyzi),dKB))  #
        
        nm = linalg.norm(dB, 2)
        if nm < tol:
            break
        B, tr = KDR_linesearch(X, Ky, sz2, B, dB/nm, eta, eps,
                               ls_maxiter=ls_maxiter)
        B = linalg.svd(B, full_matrices=False)[0]
       
        """ compute trace with unannealed parameter"""
        if verbose:
            Z = np.dot(X, B)
            Kz = rbf_dot(Z, Z, sigma_x)
            Kz = center_matrix(Kz) #np.dot(np.dot(Q, Kz), Q)
            Kz = (Kz + Kz.T)/2
            mz = linalg.inv(Kz + n*eps*np.eye(n))
            tr = np.sum(Kyo*mz)
            print '[%d]trace = %.6f'  % (h+1,tr) 
    
    return B
Ejemplo n.º 6
0
def kdr_optim(X,
              Y,
              K,
              max_loop,
              sigma_x,
              sigma_y,
              eps,
              eta,
              anl,
              verbose=True,
              tol=1e-9,
              init_deriv=False,
              ls_maxiter=30):
    """                                             
    arguments :
    X           -- nxd array of n samples, d features
    Y           -- nxp array of class labels
    K           -- target dimension of SDR subspace
    max_loop    -- maximum number of iterations    
    sigma_x     -- scale factor for the Gaussian kernel associated to X (float)
    sigma_y     -- scale factor for the Gaussian kernel associated to Y (float)
    eps         -- regularization factor for matrix inversion (float)
    eta         -- upper bound for linesearch step parameter (float)
    anl         -- maximum annealing parameter (int/float)
    verbose     -- print objective function value at each iteration ? (bool)
    tol         -- stopping criterion for gradient descent, ie 
                   optim stops when ||dB||_s < tol (float) where ||.||_s is the
                   spectral norm
    init_deriv  -- use initial estimate of B through gradient descent ? (bool)
    ls_maxiter  -- max number of iterations during line search step size selection (int)
    
    returns :
    B           -- SDR matrix estimate 
    
    """
    n, d = X.shape

    if n != Y.shape[0]:
        raise (ValueError('X and Y have incompatible dimensions'))

    assert K <= d, 'dimension K must be lower than d !'
    assert sigma_x > 0 and sigma_y > 0, 'scale parameters must be positive!'
    assert tol > 0, 'tolerance factor must be >0'

    if init_deriv:
        print('Initialization by derivative method...\n')
        B, t = kernel_derivative(X, Y, K, np.sqrt(anl) * sigma_x, sigma_y, eps)
    else:
        B = np.random.randn(d, K)

    B = linalg.svd(B, full_matrices=False)[0]
    """Gram matrix of Y"""
    Gy = rbf_dot(Y, Y, sigma_y)
    Kyo = center_matrix(Gy)
    Kyo = (Kyo + Kyo.T) / 2
    """objective function initial value """
    Z = np.dot(X, B)
    Gz = rbf_dot(Z, Z, sigma_x)
    Kz = center_matrix(Gz)
    Kz = (Kz + Kz.T) / 2

    mz = linalg.inv(Kz + eps * n * np.eye(n))
    tr = np.sum(Kyo * mz)

    if verbose:
        print('[0]trace = {}'.format(tr))

    ssz2 = 2 * sigma_x**2
    ssy2 = 2 * sigma_y**2
    #careful h from 0 to maxloop-1, implement accordingly
    for h in range(max_loop):
        sz2 = ssz2 + (anl - 1) * ssz2 * (max_loop - h - 1) / max_loop
        sy2 = ssy2 + (anl - 1) * ssy2 * (max_loop - h - 1) / max_loop

        Z = np.dot(X, B)
        Kzw = rbf_dot(Z, Z, np.sqrt(sz2))
        Kz = center_matrix(Kzw)
        Kzi = linalg.inv(Kz + eps * n * np.eye(n))  #

        Ky = rbf_dot(Y, Y, np.sqrt(sy2))
        Ky = center_matrix(Ky)
        Ky = (Ky + Ky.T) / 2

        dB = np.zeros((d, K))
        KziKyzi = np.dot(Kzi, np.dot(Ky, Kzi))

        for a in range(d):
            Xa = np.tile(X[:, a][:, np.newaxis], (1, n))
            XX = Xa - Xa.T
            for b in range(K):
                Zb = np.tile(Z[:, b][:, np.newaxis], (1, n))
                tt = XX * (Zb - Zb.T) * Kzw
                dKB = center_matrix(tt)
                dB[a, b] = np.sum(
                    KziKyzi * dKB.T)  #np.trace(np.dot(Kzi.dot(Kyzi),dKB))  #

        nm = linalg.norm(dB, 2)
        if nm < tol:
            break
        B, tr = KDR_linesearch(X,
                               Ky,
                               sz2,
                               B,
                               dB / nm,
                               eta,
                               eps,
                               ls_maxiter=ls_maxiter)
        B = linalg.svd(B, full_matrices=False)[0]
        """ compute trace with unannealed parameter"""
        if verbose:
            Z = np.dot(X, B)
            Kz = rbf_dot(Z, Z, sigma_x)
            Kz = center_matrix(Kz)  #np.dot(np.dot(Q, Kz), Q)
            Kz = (Kz + Kz.T) / 2
            mz = linalg.inv(Kz + n * eps * np.eye(n))
            tr = np.sum(Kyo * mz)
            print('[%d]trace = %.6f' % (h + 1, tr))

    return B