def kdrobjfun1D(s): tmpB = B - s * dB tmpB = linalg.svd(tmpB, full_matrices=False)[0] Z = np.dot(X, tmpB) Kz = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kz) #np.dot(np.dot(Q,Kz), Q) Kz = (Kz + Kz.T) / 2 t = np.sum(Ky * linalg.inv(Kz + n * eps * np.eye(n))) return t
def kdrobjfun1D(s): tmpB = B - s*dB tmpB = linalg.svd(tmpB, full_matrices=False)[0] Z = np.dot(X, tmpB) Kz = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kz) #np.dot(np.dot(Q,Kz), Q) Kz = (Kz + Kz.T)/2 t = np.sum(Ky*linalg.inv(Kz + n*eps*np.eye(n))) return t
def kdr_optim(X, Y, K, max_loop, sigma_x, sigma_y, eps, eta, anl, verbose = True, tol=1e-9, init_deriv = False, ls_maxiter=30): """ arguments : X -- nxd array of n samples, d features Y -- nxp array of class labels K -- target dimension of SDR subspace max_loop -- maximum number of iterations sigma_x -- scale factor for the Gaussian kernel associated to X (float) sigma_y -- scale factor for the Gaussian kernel associated to Y (float) eps -- regularization factor for matrix inversion (float) eta -- upper bound for linesearch step parameter (float) anl -- maximum annealing parameter (int/float) verbose -- print objective function value at each iteration ? (bool) tol -- stopping criterion for gradient descent, ie optim stops when ||dB||_s < tol (float) where ||.||_s is the spectral norm init_deriv -- use initial estimate of B through gradient descent ? (bool) ls_maxiter -- max number of iterations during line search step size selection (int) returns : B -- SDR matrix estimate """ n, d = X.shape if n != Y.shape[0]: raise ValueError, 'X and Y have incompatible dimensions' assert K<=d, 'dimension K must be lower than d !' assert sigma_x > 0 and sigma_y > 0, 'scale parameters must be positive!' assert tol > 0, 'tolerance factor must be >0' if init_deriv: print 'Initialization by derivative method...\n' B, t = kernel_derivative(X, Y, K, np.sqrt(anl)*sigma_x, sigma_y, eps) else: B = np.random.randn(d, K) B = linalg.svd(B, full_matrices=False)[0] """Gram matrix of Y""" Gy = rbf_dot(Y, Y, sigma_y) Kyo = center_matrix(Gy) Kyo = (Kyo + Kyo.T)/2 """objective function initial value """ Z = np.dot(X, B) Gz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Gz) Kz = (Kz + Kz.T)/2 mz = linalg.inv(Kz + eps*n*np.eye(n)) tr = np.sum(Kyo*mz) if verbose: print '[0]trace = ', tr ssz2 = 2*sigma_x**2 ssy2 = 2*sigma_y**2 #careful h from 0 to maxloop-1, implement accordingly for h in xrange(max_loop): sz2 = ssz2+(anl-1)*ssz2*(max_loop-h-1)/max_loop sy2 = ssy2+(anl-1)*ssy2*(max_loop-h-1)/max_loop Z = np.dot(X, B) Kzw = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kzw) Kzi = linalg.inv(Kz + eps*n*np.eye(n)) # Ky = rbf_dot(Y, Y, np.sqrt(sy2)) Ky = center_matrix(Ky) Ky = (Ky + Ky.T)/2 dB = np.zeros((d,K)) KziKyzi = np.dot(Kzi, np.dot(Ky, Kzi)) for a in xrange(d): Xa = np.tile(X[:,a][:,np.newaxis], (1, n)) XX = Xa - Xa.T for b in xrange(K): Zb = np.tile(Z[:,b][:,np.newaxis], (1, n)) tt = XX*(Zb - Zb.T)*Kzw dKB = center_matrix(tt) dB[a, b] = np.sum(KziKyzi*dKB.T) #np.trace(np.dot(Kzi.dot(Kyzi),dKB)) # nm = linalg.norm(dB, 2) if nm < tol: break B, tr = KDR_linesearch(X, Ky, sz2, B, dB/nm, eta, eps, ls_maxiter=ls_maxiter) B = linalg.svd(B, full_matrices=False)[0] """ compute trace with unannealed parameter""" if verbose: Z = np.dot(X, B) Kz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Kz) #np.dot(np.dot(Q, Kz), Q) Kz = (Kz + Kz.T)/2 mz = linalg.inv(Kz + n*eps*np.eye(n)) tr = np.sum(Kyo*mz) print '[%d]trace = %.6f' % (h+1,tr) return B
def kdr_optim(X, Y, K, max_loop, sigma_x, sigma_y, eps, eta, anl, verbose=True, tol=1e-9, init_deriv=False, ls_maxiter=30): """ arguments : X -- nxd array of n samples, d features Y -- nxp array of class labels K -- target dimension of SDR subspace max_loop -- maximum number of iterations sigma_x -- scale factor for the Gaussian kernel associated to X (float) sigma_y -- scale factor for the Gaussian kernel associated to Y (float) eps -- regularization factor for matrix inversion (float) eta -- upper bound for linesearch step parameter (float) anl -- maximum annealing parameter (int/float) verbose -- print objective function value at each iteration ? (bool) tol -- stopping criterion for gradient descent, ie optim stops when ||dB||_s < tol (float) where ||.||_s is the spectral norm init_deriv -- use initial estimate of B through gradient descent ? (bool) ls_maxiter -- max number of iterations during line search step size selection (int) returns : B -- SDR matrix estimate """ n, d = X.shape if n != Y.shape[0]: raise (ValueError('X and Y have incompatible dimensions')) assert K <= d, 'dimension K must be lower than d !' assert sigma_x > 0 and sigma_y > 0, 'scale parameters must be positive!' assert tol > 0, 'tolerance factor must be >0' if init_deriv: print('Initialization by derivative method...\n') B, t = kernel_derivative(X, Y, K, np.sqrt(anl) * sigma_x, sigma_y, eps) else: B = np.random.randn(d, K) B = linalg.svd(B, full_matrices=False)[0] """Gram matrix of Y""" Gy = rbf_dot(Y, Y, sigma_y) Kyo = center_matrix(Gy) Kyo = (Kyo + Kyo.T) / 2 """objective function initial value """ Z = np.dot(X, B) Gz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Gz) Kz = (Kz + Kz.T) / 2 mz = linalg.inv(Kz + eps * n * np.eye(n)) tr = np.sum(Kyo * mz) if verbose: print('[0]trace = {}'.format(tr)) ssz2 = 2 * sigma_x**2 ssy2 = 2 * sigma_y**2 #careful h from 0 to maxloop-1, implement accordingly for h in range(max_loop): sz2 = ssz2 + (anl - 1) * ssz2 * (max_loop - h - 1) / max_loop sy2 = ssy2 + (anl - 1) * ssy2 * (max_loop - h - 1) / max_loop Z = np.dot(X, B) Kzw = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kzw) Kzi = linalg.inv(Kz + eps * n * np.eye(n)) # Ky = rbf_dot(Y, Y, np.sqrt(sy2)) Ky = center_matrix(Ky) Ky = (Ky + Ky.T) / 2 dB = np.zeros((d, K)) KziKyzi = np.dot(Kzi, np.dot(Ky, Kzi)) for a in range(d): Xa = np.tile(X[:, a][:, np.newaxis], (1, n)) XX = Xa - Xa.T for b in range(K): Zb = np.tile(Z[:, b][:, np.newaxis], (1, n)) tt = XX * (Zb - Zb.T) * Kzw dKB = center_matrix(tt) dB[a, b] = np.sum( KziKyzi * dKB.T) #np.trace(np.dot(Kzi.dot(Kyzi),dKB)) # nm = linalg.norm(dB, 2) if nm < tol: break B, tr = KDR_linesearch(X, Ky, sz2, B, dB / nm, eta, eps, ls_maxiter=ls_maxiter) B = linalg.svd(B, full_matrices=False)[0] """ compute trace with unannealed parameter""" if verbose: Z = np.dot(X, B) Kz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Kz) #np.dot(np.dot(Q, Kz), Q) Kz = (Kz + Kz.T) / 2 mz = linalg.inv(Kz + n * eps * np.eye(n)) tr = np.sum(Kyo * mz) print('[%d]trace = %.6f' % (h + 1, tr)) return B