Esempio n. 1
0
def nystrom_kernel_svd(X, kernel_f, n, k, bs=512):
    """Compute top eigensystem of kernel matrix using Nystrom method.

    Arguments:
        X: data matrix of shape (n_sample, n_feature).
        kernel_f: kernel tensor function k(X, Y).
        n: number of training points.
        k: top-k eigensystem.
        bs: batch size.

    Returns:
        s: top eigenvalues of shape (k).
        U: top eigenvectors of shape (n_sample, k).
    """

    m, d = X.shape

    # Assemble kernel function evaluator.
    input_shape = (d, )
    x = Input(shape=input_shape, dtype='float32', name='feat-for-nystrom')
    K_t = KernelEmbedding(kernel_f, X)(x)
    kernel_tf = Model(x, K_t)

    K = kernel_tf.predict(X, batch_size=bs)
    D = np.float32(np.ones((m, 1)) * np.sqrt(n) / np.sqrt(m))
    W = D * K * D.T
    w, V = sp.linalg.eigh(W, eigvals=(m - k, m - 1))
    U1r, sr = V[:, ::-1], w[::-1]
    s = sr[:k]
    NU = np.float32(D * U1r[:, :k])

    return s, NU
def nystrom_kernel_svd(X, kernel_f, q, bs=512):
    """Compute top eigensystem of kernel matrix using Nystrom method.

    Arguments:
        X: data matrix of shape (n_sample, n_feature).
        kernel_f: kernel tensor function k(X, Y).
        q: top-q eigensystem.
        bs: batch size.

    Returns:
        s: top eigenvalues of shape (q).
        U: (rescaled) top eigenvectors of shape (n_sample, q).
    """

    m, d = X.shape

    # Assemble kernel function evaluator.
    input_shape = (d, )
    x = Input(shape=input_shape, dtype='float32', name='feat-for-nystrom')
    K_t = KernelEmbedding(kernel_f, X)(x)
    kernel_tf = Model(x, K_t)

    K = kernel_tf.predict(X, batch_size=bs)
    W = K / m
    w, V = sp.linalg.eigh(W, eigvals=(m - q, m - 1))
    U1r, s = V[:, ::-1], w[::-1][:q]
    NU = np.float32(U1r[:, :q] / np.sqrt(m))

    return s, NU
Esempio n. 3
0
def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False, seed=1):
    """Assemble eigenpro map and calculate step size scale factor
    such that the update rule,
        p <- p - eta * g
    becomes,
        p <- p - scale * eta * (g - f(g))

    Arguments:
        feat:   feature matrix.
        phi:    feature map or kernel function.
        M:      subsample size.
        k:      top-k eigensystem for eigenpro.
        tau:    damping factor.

    Returns:
        f:      tensor function.
        scale:  factor that rescales step size.
        s0:     largest eigenvalue.
    """

    np.random.seed(seed)  # set random seed for subsamples
    start = time.time()
    n, D = feat.shape
    x = Input(shape=(D, ), dtype='float32', name='feat')
    if in_rkhs:
        if n >= 10**5:
            _s, _V = nystrom_kernel_svd(feat, phi, M, k)  # phi is k(x, y)
        else:
            kfeat = KernelEmbedding(phi, feat, input_shape=(D, ))(x)
            model = Model(x, kfeat)
            fmap = lambda _x: model.predict(_x, batch_size=1024)
            _s, _V, _sk = rsvd(feat, fmap, M, k)  # phi is a feature map
    else:
        model = Model(x, phi(x))
        fmap = lambda _x: model.predict(_x, batch_size=1024)
        _s, _V, _sk = rsvd(feat, fmap, M, k)  # phi is a feature map
    _s, _sk, _V = _s[:k], _s[-1], _V[:, :k]
    print("SVD time: %.2f, Eigenvalue ratio: %.2f" %
          (time.time() - start, _s[0] / _sk))

    s = K.constant(_s)
    V = K.constant(_V)
    sk = K.constant(_sk)

    if in_rkhs:
        scale = np.sqrt(_s[0] / _sk, dtype='float32')
        D = (1 - K.sqrt(tau * sk / s)) / s
        f = lambda g, kfeat: K.dot(V * D, K.dot(K.transpose(K.dot(kfeat, V)), g
                                                ))
        s0 = 2 * _s[0] / n
    else:
        scale = np.float32(_s[0] / _sk)
        D = 1 - tau * sk / s
        f = lambda g: K.dot(V * D, K.dot(K.transpose(V), g))
        s0 = np.float32(_s[0] / np.sqrt(n))

    return f, scale, s0
Esempio n. 4
0
def nystrom_kernel_svd(X, kernel_f, m, k, bs=512):
    """Compute top eigensystem of kernel matrix using Nystrom method.

    Arguments:
        X: data matrix of shape (n_sample, n_feature).
        kernel_f: kernel tensor function k(X, Y).
        m: subsample factor.
        k: top-k eigensystem.
        bs: batch size.

    Returns:
        s: top eigenvalues of shape (k).
        U: top eigenvectors of shape (n_sample, k).
    """

    n, d = X.shape
    m = min(m, n)
    inx = np.random.permutation(n)[:m]
    Xm = X[inx]

    # Assemble kernel function evaluator.
    input_shape = (d, )
    x = Input(shape=input_shape, dtype='float32', name='nystrom-kernel-feat')
    K_t = KernelEmbedding(kernel_f, Xm)(x)
    kernel_tf = Model(x, K_t)

    Kmm = kernel_tf.predict(Xm, batch_size=bs)
    D = np.float32(np.ones((m, 1)) * np.sqrt(n) / np.sqrt(m))
    W = D * Kmm * D.T

    U1r, sr, _ = np.linalg.svd(W)
    s = sr[:k]
    DU1 = K.variable(D * U1r[:, :k])

    U2_t = Lambda(lambda _K: K.dot(_K, DU1))(K_t)
    U2_tf = Model(x, U2_t)
    U2 = U2_tf.predict(X, batch_size=bs)
    U = U2 / np.linalg.norm(U2, axis=0, keepdims=True)
    NU = GramSchmidtProcess(U.T).T

    return s, NU
Esempio n. 5
0
    def __init__(self,
                 kernel,
                 centers,
                 n_label,
                 mem_gb,
                 n_subsample=None,
                 k=None,
                 bs=None,
                 metric='accuracy',
                 scale=.5,
                 seed=1):
        """Assemble learner using EigenPro iteration/kernel.

        Arguments:
            kernel: kernel tensor function k(X, Y).
            centers: kernel centers of shape (n_center, n_feature).
            n_label: number of labels.
            mem_gb: GPU memory in GB.
            n_subsample: number of subsamples for preconditioner.
            k: top-k eigensystem for preconditioner.
            bs: mini-batch size.
            metric: keras metric, e.g., 'accuracy'.
            seed: random seed.
        """

        n, d = centers.shape
        if n_subsample is None:
            if n < 100000:
                n_subsample = 2000
            else:
                n_subsample = 10000

        mem_bytes = mem_gb * 1024**3 - 100 * 1024**2  # preserve 100MB
        # Has a factor 3 due to tensorflow implementation.
        mem_usages = (d + n_label + 3 * np.arange(n_subsample)) * n * 4
        mG = np.sum(mem_usages < mem_bytes)  # device-dependent batch size

        # Calculate batch/step size for improved EigenPro iteration.
        np.random.seed(seed)
        pinx = np.random.choice(n, n_subsample, replace=False).astype('int32')
        kf, gap, s1, beta = pre_eigenpro_f(centers[pinx],
                                           kernel,
                                           k,
                                           n,
                                           mG,
                                           alpha=.95,
                                           seed=seed)
        new_s1 = s1 / gap

        if bs is None:
            bs = min(np.int32(beta / new_s1 + 1), mG)

        if bs < beta / new_s1 + 1:
            eta = bs / beta
        elif bs < n:
            eta = 2 * bs / (beta + (bs - 1) * new_s1)
        else:
            eta = 0.95 * 2 / new_s1
        eta = .5 * eta  # .5 for constant related to mse loss.

        print("n_subsample=%d, mG=%d, eta=%.2f, bs=%d, s1=%.2e, beta=%.2f" %
              (n_subsample, mG, eta, bs, s1, beta))
        eta = np.float32(eta * n_label)

        # Assemble kernel model.
        ix = Input(shape=(d + 1, ), dtype='float32', name='indexed-feat')
        x, index = utils.separate_index(ix)  # features, sample_id
        kfeat = KernelEmbedding(kernel, centers, input_shape=(d, ))(x)

        y = Dense(n_label,
                  input_shape=(n, ),
                  activation='linear',
                  kernel_initializer='zeros',
                  use_bias=False)(kfeat)
        model = Model(ix, y)
        model.compile(loss='mse',
                      optimizer=PSGD(pred_t=y,
                                     index_t=index,
                                     eta=eta,
                                     eigenpro_f=asm_eigenpro_f(
                                         kf, kfeat, pinx)),
                      metrics=[metric])

        self.n_label = n_label
        self.seed = seed
        self.bs = bs
        self.model = model
Esempio n. 6
0
    raise Exception("Unknown kernel function - %s. \
                     Try Gaussian, Laplace, or Cauchy" % args_dict['kernel'])

trainers = collections.OrderedDict()
Trainer = collections.namedtuple('Trainer', ['model', 'x_train', 'x_test'])

# Calculate step size and (Primal) EigenPro preconditioner.
kf, scale, s0 = utils.asm_eigenpro_f(x_train, kernel, M, k, 1, in_rkhs=True)
eta = np.float32(1.5 / s0)  # 1.5 / s0
eta = eta * num_classes  # correction due to mse loss

# Assemble Pegasos trainer.
input_shape = (D + 1, )  # n_feature, (sample) index
ix = Input(shape=input_shape, dtype='float32', name='indexed-feat')
x, index = utils.separate_index(ix)  # features, sample_id
kfeat = KernelEmbedding(kernel, x_train, input_shape=(D, ))(x)
y = Dense(num_classes,
          input_shape=(n, ),
          activation='linear',
          kernel_initializer='zeros',
          use_bias=False)(kfeat)

model = Model(ix, y)
model.compile(loss='mse',
              optimizer=PSGD(pred_t=y, index_t=index, eta=eta),
              metrics=['accuracy'])
trainers['Pegasos'] = Trainer(model=model,
                              x_train=utils.add_index(x_train),
                              x_test=utils.add_index(x_test))

# Assemble kernel EigenPro trainer.