def nystrom_kernel_svd(X, kernel_f, n, k, bs=512): """Compute top eigensystem of kernel matrix using Nystrom method. Arguments: X: data matrix of shape (n_sample, n_feature). kernel_f: kernel tensor function k(X, Y). n: number of training points. k: top-k eigensystem. bs: batch size. Returns: s: top eigenvalues of shape (k). U: top eigenvectors of shape (n_sample, k). """ m, d = X.shape # Assemble kernel function evaluator. input_shape = (d, ) x = Input(shape=input_shape, dtype='float32', name='feat-for-nystrom') K_t = KernelEmbedding(kernel_f, X)(x) kernel_tf = Model(x, K_t) K = kernel_tf.predict(X, batch_size=bs) D = np.float32(np.ones((m, 1)) * np.sqrt(n) / np.sqrt(m)) W = D * K * D.T w, V = sp.linalg.eigh(W, eigvals=(m - k, m - 1)) U1r, sr = V[:, ::-1], w[::-1] s = sr[:k] NU = np.float32(D * U1r[:, :k]) return s, NU
def nystrom_kernel_svd(X, kernel_f, q, bs=512): """Compute top eigensystem of kernel matrix using Nystrom method. Arguments: X: data matrix of shape (n_sample, n_feature). kernel_f: kernel tensor function k(X, Y). q: top-q eigensystem. bs: batch size. Returns: s: top eigenvalues of shape (q). U: (rescaled) top eigenvectors of shape (n_sample, q). """ m, d = X.shape # Assemble kernel function evaluator. input_shape = (d, ) x = Input(shape=input_shape, dtype='float32', name='feat-for-nystrom') K_t = KernelEmbedding(kernel_f, X)(x) kernel_tf = Model(x, K_t) K = kernel_tf.predict(X, batch_size=bs) W = K / m w, V = sp.linalg.eigh(W, eigvals=(m - q, m - 1)) U1r, s = V[:, ::-1], w[::-1][:q] NU = np.float32(U1r[:, :q] / np.sqrt(m)) return s, NU
def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False, seed=1): """Assemble eigenpro map and calculate step size scale factor such that the update rule, p <- p - eta * g becomes, p <- p - scale * eta * (g - f(g)) Arguments: feat: feature matrix. phi: feature map or kernel function. M: subsample size. k: top-k eigensystem for eigenpro. tau: damping factor. Returns: f: tensor function. scale: factor that rescales step size. s0: largest eigenvalue. """ np.random.seed(seed) # set random seed for subsamples start = time.time() n, D = feat.shape x = Input(shape=(D, ), dtype='float32', name='feat') if in_rkhs: if n >= 10**5: _s, _V = nystrom_kernel_svd(feat, phi, M, k) # phi is k(x, y) else: kfeat = KernelEmbedding(phi, feat, input_shape=(D, ))(x) model = Model(x, kfeat) fmap = lambda _x: model.predict(_x, batch_size=1024) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map else: model = Model(x, phi(x)) fmap = lambda _x: model.predict(_x, batch_size=1024) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map _s, _sk, _V = _s[:k], _s[-1], _V[:, :k] print("SVD time: %.2f, Eigenvalue ratio: %.2f" % (time.time() - start, _s[0] / _sk)) s = K.constant(_s) V = K.constant(_V) sk = K.constant(_sk) if in_rkhs: scale = np.sqrt(_s[0] / _sk, dtype='float32') D = (1 - K.sqrt(tau * sk / s)) / s f = lambda g, kfeat: K.dot(V * D, K.dot(K.transpose(K.dot(kfeat, V)), g )) s0 = 2 * _s[0] / n else: scale = np.float32(_s[0] / _sk) D = 1 - tau * sk / s f = lambda g: K.dot(V * D, K.dot(K.transpose(V), g)) s0 = np.float32(_s[0] / np.sqrt(n)) return f, scale, s0
def nystrom_kernel_svd(X, kernel_f, m, k, bs=512): """Compute top eigensystem of kernel matrix using Nystrom method. Arguments: X: data matrix of shape (n_sample, n_feature). kernel_f: kernel tensor function k(X, Y). m: subsample factor. k: top-k eigensystem. bs: batch size. Returns: s: top eigenvalues of shape (k). U: top eigenvectors of shape (n_sample, k). """ n, d = X.shape m = min(m, n) inx = np.random.permutation(n)[:m] Xm = X[inx] # Assemble kernel function evaluator. input_shape = (d, ) x = Input(shape=input_shape, dtype='float32', name='nystrom-kernel-feat') K_t = KernelEmbedding(kernel_f, Xm)(x) kernel_tf = Model(x, K_t) Kmm = kernel_tf.predict(Xm, batch_size=bs) D = np.float32(np.ones((m, 1)) * np.sqrt(n) / np.sqrt(m)) W = D * Kmm * D.T U1r, sr, _ = np.linalg.svd(W) s = sr[:k] DU1 = K.variable(D * U1r[:, :k]) U2_t = Lambda(lambda _K: K.dot(_K, DU1))(K_t) U2_tf = Model(x, U2_t) U2 = U2_tf.predict(X, batch_size=bs) U = U2 / np.linalg.norm(U2, axis=0, keepdims=True) NU = GramSchmidtProcess(U.T).T return s, NU
def __init__(self, kernel, centers, n_label, mem_gb, n_subsample=None, k=None, bs=None, metric='accuracy', scale=.5, seed=1): """Assemble learner using EigenPro iteration/kernel. Arguments: kernel: kernel tensor function k(X, Y). centers: kernel centers of shape (n_center, n_feature). n_label: number of labels. mem_gb: GPU memory in GB. n_subsample: number of subsamples for preconditioner. k: top-k eigensystem for preconditioner. bs: mini-batch size. metric: keras metric, e.g., 'accuracy'. seed: random seed. """ n, d = centers.shape if n_subsample is None: if n < 100000: n_subsample = 2000 else: n_subsample = 10000 mem_bytes = mem_gb * 1024**3 - 100 * 1024**2 # preserve 100MB # Has a factor 3 due to tensorflow implementation. mem_usages = (d + n_label + 3 * np.arange(n_subsample)) * n * 4 mG = np.sum(mem_usages < mem_bytes) # device-dependent batch size # Calculate batch/step size for improved EigenPro iteration. np.random.seed(seed) pinx = np.random.choice(n, n_subsample, replace=False).astype('int32') kf, gap, s1, beta = pre_eigenpro_f(centers[pinx], kernel, k, n, mG, alpha=.95, seed=seed) new_s1 = s1 / gap if bs is None: bs = min(np.int32(beta / new_s1 + 1), mG) if bs < beta / new_s1 + 1: eta = bs / beta elif bs < n: eta = 2 * bs / (beta + (bs - 1) * new_s1) else: eta = 0.95 * 2 / new_s1 eta = .5 * eta # .5 for constant related to mse loss. print("n_subsample=%d, mG=%d, eta=%.2f, bs=%d, s1=%.2e, beta=%.2f" % (n_subsample, mG, eta, bs, s1, beta)) eta = np.float32(eta * n_label) # Assemble kernel model. ix = Input(shape=(d + 1, ), dtype='float32', name='indexed-feat') x, index = utils.separate_index(ix) # features, sample_id kfeat = KernelEmbedding(kernel, centers, input_shape=(d, ))(x) y = Dense(n_label, input_shape=(n, ), activation='linear', kernel_initializer='zeros', use_bias=False)(kfeat) model = Model(ix, y) model.compile(loss='mse', optimizer=PSGD(pred_t=y, index_t=index, eta=eta, eigenpro_f=asm_eigenpro_f( kf, kfeat, pinx)), metrics=[metric]) self.n_label = n_label self.seed = seed self.bs = bs self.model = model
raise Exception("Unknown kernel function - %s. \ Try Gaussian, Laplace, or Cauchy" % args_dict['kernel']) trainers = collections.OrderedDict() Trainer = collections.namedtuple('Trainer', ['model', 'x_train', 'x_test']) # Calculate step size and (Primal) EigenPro preconditioner. kf, scale, s0 = utils.asm_eigenpro_f(x_train, kernel, M, k, 1, in_rkhs=True) eta = np.float32(1.5 / s0) # 1.5 / s0 eta = eta * num_classes # correction due to mse loss # Assemble Pegasos trainer. input_shape = (D + 1, ) # n_feature, (sample) index ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') x, index = utils.separate_index(ix) # features, sample_id kfeat = KernelEmbedding(kernel, x_train, input_shape=(D, ))(x) y = Dense(num_classes, input_shape=(n, ), activation='linear', kernel_initializer='zeros', use_bias=False)(kfeat) model = Model(ix, y) model.compile(loss='mse', optimizer=PSGD(pred_t=y, index_t=index, eta=eta), metrics=['accuracy']) trainers['Pegasos'] = Trainer(model=model, x_train=utils.add_index(x_train), x_test=utils.add_index(x_test)) # Assemble kernel EigenPro trainer.