def kernel_derivative(X, Y, K, sigma_x, sigma_y, eps): """ computes initial estimate of SDR matrix by gradient descent arguments : X -- nxd array of n samples, d features Y -- nxp array of class labels K -- target dimension of SDR subspace sigma_x -- scale factor for the Gaussian kernel associated to X sigma_y -- scale factor for the Gaussian kernel associated to Y eps -- regularization factor for matrix inversion returns : B -- initial SDR matrix estimate after gradient descent tr -- corresponding trace value (trace=objective function) """ n, d = X.shape #gram matrix of X Kx = rbf_dot(X, X, sigma_x) Kxi = linalg.inv(Kx + n * eps * np.eye(n)) #Gram matrix of Y Ky = rbf_dot(Y, Y, sigma_y) #Derivative of Kx(xi, x) w.r.t. x Dx = np.reshape(np.tile(X, (n, 1)), (n, n, d)) Xij = Dx - Dx.transpose((1, 0, 2)) Xij = Xij / (sigma_x**2) H = H = Xij * np.reshape(np.tile(Kx, (1, d)), (n, n, d)) #Xij*np.tile(Kx,(1,1,d)) # #sum_i H(X_i)'*Kxi*Ky*Kxi*H(X_i) Fmat = np.dot(Kxi, np.dot(Ky, Kxi)) Hd = H.reshape((n, n * d)) HH = np.reshape(np.dot(Hd.T, Hd), (n, d, n, d)) HHd = np.reshape(np.transpose(HH, (0, 2, 1, 3)), (n**2, d, d)) Fd = np.tile(np.reshape(Fmat, (n**2, 1, 1)), (1, d, d)) R = np.reshape(np.sum(HHd * Fd, axis=0), (d, d)) L, V = linalg.eigh(R) B = V[:, ::-1][:, :K] L = L[::-1] tr = np.sum(L[:K]) return B, tr
def kernel_derivative(X, Y, K, sigma_x, sigma_y, eps): """ computes initial estimate of SDR matrix by gradient descent arguments : X -- nxd array of n samples, d features Y -- nxp array of class labels K -- target dimension of SDR subspace sigma_x -- scale factor for the Gaussian kernel associated to X sigma_y -- scale factor for the Gaussian kernel associated to Y eps -- regularization factor for matrix inversion returns : B -- initial SDR matrix estimate after gradient descent tr -- corresponding trace value (trace=objective function) """ n, d = X.shape #gram matrix of X Kx = rbf_dot(X, X, sigma_x) Kxi = linalg.inv(Kx + n*eps*np.eye(n)) #Gram matrix of Y Ky = rbf_dot(Y, Y, sigma_y) #Derivative of Kx(xi, x) w.r.t. x Dx = np.reshape(np.tile(X, (n, 1)), (n,n,d)) Xij = Dx - Dx.transpose((1, 0, 2)) Xij = Xij/(sigma_x**2) H = H = Xij*np.reshape(np.tile(Kx,( 1, d)), (n,n,d)) #Xij*np.tile(Kx,(1,1,d)) # #sum_i H(X_i)'*Kxi*Ky*Kxi*H(X_i) Fmat = np.dot(Kxi, np.dot(Ky, Kxi)) Hd = H.reshape((n, n*d)) HH = np.reshape(np.dot(Hd.T, Hd), (n,d,n,d)) HHd = np.reshape(np.transpose(HH, (0,2,1,3)), (n**2,d,d)) Fd = np.tile(np.reshape(Fmat, (n**2,1,1)), (1,d,d)) R = np.reshape(np.sum(HHd*Fd, axis=0), (d,d)) L, V = linalg.eigh(R) B = V[:,::-1][:,:K] L = L[::-1] tr = np.sum(L[:K]) return B, tr
def kdrobjfun1D(s): tmpB = B - s * dB tmpB = linalg.svd(tmpB, full_matrices=False)[0] Z = np.dot(X, tmpB) Kz = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kz) #np.dot(np.dot(Q,Kz), Q) Kz = (Kz + Kz.T) / 2 t = np.sum(Ky * linalg.inv(Kz + n * eps * np.eye(n))) return t
def kdrobjfun1D(s): tmpB = B - s*dB tmpB = linalg.svd(tmpB, full_matrices=False)[0] Z = np.dot(X, tmpB) Kz = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kz) #np.dot(np.dot(Q,Kz), Q) Kz = (Kz + Kz.T)/2 t = np.sum(Ky*linalg.inv(Kz + n*eps*np.eye(n))) return t
def kdr_optim(X, Y, K, max_loop, sigma_x, sigma_y, eps, eta, anl, verbose = True, tol=1e-9, init_deriv = False, ls_maxiter=30): """ arguments : X -- nxd array of n samples, d features Y -- nxp array of class labels K -- target dimension of SDR subspace max_loop -- maximum number of iterations sigma_x -- scale factor for the Gaussian kernel associated to X (float) sigma_y -- scale factor for the Gaussian kernel associated to Y (float) eps -- regularization factor for matrix inversion (float) eta -- upper bound for linesearch step parameter (float) anl -- maximum annealing parameter (int/float) verbose -- print objective function value at each iteration ? (bool) tol -- stopping criterion for gradient descent, ie optim stops when ||dB||_s < tol (float) where ||.||_s is the spectral norm init_deriv -- use initial estimate of B through gradient descent ? (bool) ls_maxiter -- max number of iterations during line search step size selection (int) returns : B -- SDR matrix estimate """ n, d = X.shape if n != Y.shape[0]: raise ValueError, 'X and Y have incompatible dimensions' assert K<=d, 'dimension K must be lower than d !' assert sigma_x > 0 and sigma_y > 0, 'scale parameters must be positive!' assert tol > 0, 'tolerance factor must be >0' if init_deriv: print 'Initialization by derivative method...\n' B, t = kernel_derivative(X, Y, K, np.sqrt(anl)*sigma_x, sigma_y, eps) else: B = np.random.randn(d, K) B = linalg.svd(B, full_matrices=False)[0] """Gram matrix of Y""" Gy = rbf_dot(Y, Y, sigma_y) Kyo = center_matrix(Gy) Kyo = (Kyo + Kyo.T)/2 """objective function initial value """ Z = np.dot(X, B) Gz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Gz) Kz = (Kz + Kz.T)/2 mz = linalg.inv(Kz + eps*n*np.eye(n)) tr = np.sum(Kyo*mz) if verbose: print '[0]trace = ', tr ssz2 = 2*sigma_x**2 ssy2 = 2*sigma_y**2 #careful h from 0 to maxloop-1, implement accordingly for h in xrange(max_loop): sz2 = ssz2+(anl-1)*ssz2*(max_loop-h-1)/max_loop sy2 = ssy2+(anl-1)*ssy2*(max_loop-h-1)/max_loop Z = np.dot(X, B) Kzw = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kzw) Kzi = linalg.inv(Kz + eps*n*np.eye(n)) # Ky = rbf_dot(Y, Y, np.sqrt(sy2)) Ky = center_matrix(Ky) Ky = (Ky + Ky.T)/2 dB = np.zeros((d,K)) KziKyzi = np.dot(Kzi, np.dot(Ky, Kzi)) for a in xrange(d): Xa = np.tile(X[:,a][:,np.newaxis], (1, n)) XX = Xa - Xa.T for b in xrange(K): Zb = np.tile(Z[:,b][:,np.newaxis], (1, n)) tt = XX*(Zb - Zb.T)*Kzw dKB = center_matrix(tt) dB[a, b] = np.sum(KziKyzi*dKB.T) #np.trace(np.dot(Kzi.dot(Kyzi),dKB)) # nm = linalg.norm(dB, 2) if nm < tol: break B, tr = KDR_linesearch(X, Ky, sz2, B, dB/nm, eta, eps, ls_maxiter=ls_maxiter) B = linalg.svd(B, full_matrices=False)[0] """ compute trace with unannealed parameter""" if verbose: Z = np.dot(X, B) Kz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Kz) #np.dot(np.dot(Q, Kz), Q) Kz = (Kz + Kz.T)/2 mz = linalg.inv(Kz + n*eps*np.eye(n)) tr = np.sum(Kyo*mz) print '[%d]trace = %.6f' % (h+1,tr) return B
def kdr_optim(X, Y, K, max_loop, sigma_x, sigma_y, eps, eta, anl, verbose=True, tol=1e-9, init_deriv=False, ls_maxiter=30): """ arguments : X -- nxd array of n samples, d features Y -- nxp array of class labels K -- target dimension of SDR subspace max_loop -- maximum number of iterations sigma_x -- scale factor for the Gaussian kernel associated to X (float) sigma_y -- scale factor for the Gaussian kernel associated to Y (float) eps -- regularization factor for matrix inversion (float) eta -- upper bound for linesearch step parameter (float) anl -- maximum annealing parameter (int/float) verbose -- print objective function value at each iteration ? (bool) tol -- stopping criterion for gradient descent, ie optim stops when ||dB||_s < tol (float) where ||.||_s is the spectral norm init_deriv -- use initial estimate of B through gradient descent ? (bool) ls_maxiter -- max number of iterations during line search step size selection (int) returns : B -- SDR matrix estimate """ n, d = X.shape if n != Y.shape[0]: raise (ValueError('X and Y have incompatible dimensions')) assert K <= d, 'dimension K must be lower than d !' assert sigma_x > 0 and sigma_y > 0, 'scale parameters must be positive!' assert tol > 0, 'tolerance factor must be >0' if init_deriv: print('Initialization by derivative method...\n') B, t = kernel_derivative(X, Y, K, np.sqrt(anl) * sigma_x, sigma_y, eps) else: B = np.random.randn(d, K) B = linalg.svd(B, full_matrices=False)[0] """Gram matrix of Y""" Gy = rbf_dot(Y, Y, sigma_y) Kyo = center_matrix(Gy) Kyo = (Kyo + Kyo.T) / 2 """objective function initial value """ Z = np.dot(X, B) Gz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Gz) Kz = (Kz + Kz.T) / 2 mz = linalg.inv(Kz + eps * n * np.eye(n)) tr = np.sum(Kyo * mz) if verbose: print('[0]trace = {}'.format(tr)) ssz2 = 2 * sigma_x**2 ssy2 = 2 * sigma_y**2 #careful h from 0 to maxloop-1, implement accordingly for h in range(max_loop): sz2 = ssz2 + (anl - 1) * ssz2 * (max_loop - h - 1) / max_loop sy2 = ssy2 + (anl - 1) * ssy2 * (max_loop - h - 1) / max_loop Z = np.dot(X, B) Kzw = rbf_dot(Z, Z, np.sqrt(sz2)) Kz = center_matrix(Kzw) Kzi = linalg.inv(Kz + eps * n * np.eye(n)) # Ky = rbf_dot(Y, Y, np.sqrt(sy2)) Ky = center_matrix(Ky) Ky = (Ky + Ky.T) / 2 dB = np.zeros((d, K)) KziKyzi = np.dot(Kzi, np.dot(Ky, Kzi)) for a in range(d): Xa = np.tile(X[:, a][:, np.newaxis], (1, n)) XX = Xa - Xa.T for b in range(K): Zb = np.tile(Z[:, b][:, np.newaxis], (1, n)) tt = XX * (Zb - Zb.T) * Kzw dKB = center_matrix(tt) dB[a, b] = np.sum( KziKyzi * dKB.T) #np.trace(np.dot(Kzi.dot(Kyzi),dKB)) # nm = linalg.norm(dB, 2) if nm < tol: break B, tr = KDR_linesearch(X, Ky, sz2, B, dB / nm, eta, eps, ls_maxiter=ls_maxiter) B = linalg.svd(B, full_matrices=False)[0] """ compute trace with unannealed parameter""" if verbose: Z = np.dot(X, B) Kz = rbf_dot(Z, Z, sigma_x) Kz = center_matrix(Kz) #np.dot(np.dot(Q, Kz), Q) Kz = (Kz + Kz.T) / 2 mz = linalg.inv(Kz + n * eps * np.eye(n)) tr = np.sum(Kyo * mz) print('[%d]trace = %.6f' % (h + 1, tr)) return B