Beispiel #1
0
def prediction(x_star, y_star_true, X_train, L_inv, W, first_deri,
               kernel_parameter):
    """
    make prediction
    :param x_star: input test point
    :param y_star_true: its label
    :param X_train: training dataset
    :param L_inv: L inversion after Cholesky decomposition
    :param W:
    :param first_deri: first derivative at mode(optimal point after Newton update)
    :param kernel_parameter: specify kernel parameter
    :return: prediction label
    """
    l = 1
    k_star = RBF_kernel(X_train, x_star, kernel_parameter, l)
    f_star_mean = np.dot(k_star.T, first_deri)
    v = np.dot(L_inv, np.dot(np.sqrt(W), k_star))
    k_ss = RBF_kernel(x_star, x_star, kernel_parameter, l)
    var_f_star = k_ss - np.dot(v.T, v)
    return label_function(f_star_mean) == y_star_true
def bayesian_opt(X_train, X_test, y_train):
    """
    compute current GP for Bayesian optimization
    :param X_train: training data
    :param X_test: test data
    :param y_train: training targets
    :return: mean of GP posterior function, standard deviation, GP posterior function
    """
    s = 0.0001  # noise variance and zero mean for noise
    n = len(X_test)  # number of test points
    N = len(X_train)  # number of training points
    num_fun = 1
    #print X_test

    K = RBF_kernel(X_train, X_train, 1, 1)
    K_s = RBF_kernel(X_train, X_test, 1, 1)
    K_ss = RBF_kernel(X_test, X_test, 1, 1)

    L = np.linalg.cholesky(K + s * np.eye(N))
    m = np.linalg.solve(L, y_train)
    alpha = np.linalg.solve(L.T, m)

    # compute mean of test points for posterior
    mu_post = np.dot(K_s.T, alpha)
    v = np.linalg.solve(L, K_s)

    # compute variance for test points
    var_test = np.diag(K_ss) - np.sum(v**2, axis=0)
    stand_devi = np.sqrt(var_test)

    # sample from test points, in other words, make prediction
    L_ = np.linalg.cholesky(K_ss + 1e-6 * np.eye(n) - np.dot(v.T, v))
    f_post_fun = mu_post.reshape(-1, 1) + np.dot(
        L_, np.random.normal(size=(n, num_fun)))
    #plot_BO(X_train, y_train, X_test, f_post_fun, mu_post, stand_devi)
    return mu_post, stand_devi, f_post_fun
Beispiel #3
0
def prediction(x_star, y_star_true, X_train, C, y, pi_vector,
               kernel_parameter):
    """
    make prediction
    :param x_star: test input
    :param y_star_true: label of test input
    :param X_train: training dataset
    :param C: num of classes
    :param y: labels of training dataset
    :param pi_vector: pi vector which computed through softmax function
    :param kernel_parameter: parameter for kernel
    :return: true or false
    """
    n = len(X_train)
    l = 1
    k_star = RBF_kernel(X_train, x_star, kernel_parameter, l)
    f_star_mean = np.zeros((C, ))
    for c in range(C):
        f_star_mean[c] = np.dot(
            k_star.T, y[c * n:(c + 1) * n] - pi_vector[c * n:(c + 1) * n])
    return np.argmax(f_star_mean) == y_star_true
def compute_mar_likelihood(X_train, X_test, y_train, sigma, l):
    """
    compute log marginal likelihood for tuning parameters using Bayesian optimization
    :param X_train: training data
    :param X_test: test data
    :param y_train: training targets
    :param sigma: output variance
    :param l: lengthscalar
    :return: log marginal likelihood
    """
    s = 0.0005  # noise variance and zero mean for noise
    n = len(X_train)

    # choose RBF kernel in this regression case
    K_train = RBF_kernel(X_train, X_train, sigma, l)
    L = np.linalg.cholesky(K_train + s * np.eye(n))
    m = np.linalg.solve(L, y_train)
    alpha = np.linalg.solve(L.T, m)

    # compute log marginal likelihood
    log_marg_likelihood = -.5 * np.dot(y_train.T, alpha) - np.log(
        np.diagonal(L)).sum(0) - n / 2.0 * np.log(2 * np.pi)
    return log_marg_likelihood
Beispiel #5
0
    num_sampling = num_train
    kernel_parameter = 1

    # plot dataset scatter at first
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    plt.subplot(2, 3, 1)
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
    plt.scatter(X_test[:, 0],
                X_test[:, 1],
                c=y_test,
                cmap=cm_bright,
                alpha=0.6)
    plt.title('data points')

    # compute covariance matrix K under RBF_kernel.
    K_train = RBF_kernel(X_train, X_train, kernel_parameter, l=1)

    # sampling points for GP prior function
    x1_min = np.min(X[:, 0])
    x1_max = np.max(X[:, 0])
    x2_min = np.min(X[:, 1])
    x2_max = np.max(X[:, 1])
    X1_sampling = np.linspace(x1_min, x1_max, num_sampling).reshape(-1, 1)
    X2_sampling = np.linspace(x2_min, x2_max, num_sampling).reshape(-1, 1)
    X_sampling = np.concatenate((X1_sampling, X2_sampling), axis=1)

    # sampling GP prior sampling GP prior f for likelihood
    mu_prior = np.zeros((num_sampling, 1))
    num_sampling = X_sampling.shape[0]
    f_prior = f_prior(X_sampling, mu_prior, 'rbf', num_sampling, num_funs)
    print 'shape of prior function:' + ` f_prior.shape `
def tune_hyperparms_first(X_train, X_test, y_train, num_fun, sigma, l):
    """
    maximize log marginal likelihood using gradient ascent
    :param X_train: training data
    :param X_test: test data
    :param y_train: training target
    :param num_fun: number of functions
    :param sigma: the output variance of RBF kernel
    :param l: lengthscalar
    :return: mean, standard derivation, posterior function
    """
    s = 0.0005  # noise variance and zero mean for noise
    log_marg_likelihood_old = 0
    tolerance = 0.001
    n = len(X_train)
    N = len(X_test)

    for i in range(10000):
        # choose RBF kernel in this regression case
        K_train = RBF_kernel(X_train, X_train, sigma, l)
        K_s = RBF_kernel(X_train, X_test, sigma, l)
        K_ss = RBF_kernel(X_test, X_test, sigma, l)

        L = np.linalg.cholesky(K_train + s * np.eye(n))
        m = np.linalg.solve(L, y_train)
        alpha = np.linalg.solve(L.T, m)

        # compute mean of test points for posterior
        mu_post = np.dot(K_s.T, alpha)
        v = np.linalg.solve(L, K_s)

        # compute variance for test points
        var_test = np.diag(K_ss) - np.sum(v**2, axis=0)
        stand_devi = np.sqrt(var_test)

        # compute log marginal likelihood
        #log_marg_likelihood = -.5 * np.dot(y_train.T, alpha) - np.diagonal(L).sum(0) - n / 2 * np.log(2 * np.pi)
        log_marg_likelihood = -.5 * np.dot(y_train.T, alpha) - np.log(
            np.diagonal(L)).sum(0) - n / 2.0 * np.log(2 * np.pi)

        # tune the hyperparameters for RBF kernel
        K_y_inv = np.dot(np.linalg.inv(L.T), np.linalg.inv(L))
        sigma, l = gradient_ascent(X_train, X_train, sigma, l,
                                   alpha.reshape(-1, 1), K_y_inv)

        error = np.sqrt(
            np.sum((log_marg_likelihood - log_marg_likelihood_old)**2))
        log_marg_likelihood_old = log_marg_likelihood
        if error <= tolerance:
            print "The hyperparameter tuning function has already converged after " + ` i + 1 ` + " iterations!"
            print "The error is " + ` error `
            print "training end!"
            break

    optimal_likelihood = log_marg_likelihood
    print 'optimal lenghscalar is: ' + ` l[0] `
    print 'maximum log marginal likelihood is: ' + ` optimal_likelihood `
    # sample from test points, in other words, make prediction
    L_ = np.linalg.cholesky(K_ss + 1e-6 * np.eye(N) - np.dot(v.T, v))
    f_post_fun = mu_post.reshape(-1, 1) + np.dot(
        L_, np.random.normal(size=(N, num_fun)))
    plt.axis([-5, 5, -3, 3])
    return mu_post, stand_devi, f_post_fun, optimal_likelihood
Beispiel #7
0
        X = np.load('X_multi.npy')
        y = np.load('y_multi.npy')
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=.4, random_state=42)
    num_train = len(X_train)
    num_test = len(X_test)
    num_classes = np.size(np.unique(y))

    # hyper-parameters
    num_funs = num_classes  # num of GP prior functions = num of categories
    kernel_parameter = 1
    l = 1

    # compute kernel matrix K
    for c in range(num_classes):
        K_sub = RBF_kernel(X_train, X_train, kernel_parameter, l)
        if c == 0:
            K = K_sub
        else:
            K = block_diag(K, K_sub)
    # generate 0/1 targets for training dataset
    y_targets = np.zeros((num_classes * num_train, ))
    index = np.arange(num_train)
    indices = y_train * 60 + index
    y_targets[indices] = 1

    # train the model
    #model_training(K, y_targets, num_classes, num_train)
    pi_vector = model_training2(K, y_targets, num_classes, num_train)
    true_count = np.ones(num_test)
    for i in range(len(X_test)):