Esempio n. 1
0
def predict_test(X_train,
                 Y_train,
                 X_test,
                 hyps,
                 str_cov=constants.STR_GP_COV,
                 prior_mu=None,
                 debug=False):
    """
    This function returns posterior mean and posterior standard deviation functions over `X_test`, computed by Gaussian process regression with `X_train`, `Y_train`, and `hyps`.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param X_test: inputs. Shape: (l, d) or (l, m, d).
    :type X_test: numpy.ndarray
    :param hyps: dictionary of hyperparameters for Gaussian process.
    :type hyps: dict.
    :param str_cov: the name of covariance function.
    :type str_cov: str., optional
    :param prior_mu: None, or prior mean function.
    :type prior_mu: NoneType, or function, optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of posterior mean function over `X_test`, posterior standard deviation function over `X_test`, and posterior covariance matrix over `X_test`. Shape: ((l, 1), (l, 1), (l, l)).
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert isinstance(X_test, np.ndarray)
    assert isinstance(hyps, dict)
    assert isinstance(str_cov, str)
    assert isinstance(debug, bool)
    assert callable(prior_mu) or prior_mu is None
    assert len(Y_train.shape) == 2
    utils_gp.check_str_cov('predict_test',
                           str_cov,
                           X_train.shape,
                           shape_X2=X_test.shape)
    assert X_train.shape[0] == Y_train.shape[0]
    assert X_train.shape[1] == X_test.shape[1]

    cov_X_X, inv_cov_X_X, grad_cov_X_X = gp_common.get_kernel_inverse(
        X_train, hyps, str_cov, debug=debug)
    mu_Xs, sigma_Xs, Sigma_Xs = predict_test_(X_train,
                                              Y_train,
                                              X_test,
                                              cov_X_X,
                                              inv_cov_X_X,
                                              hyps,
                                              str_cov=str_cov,
                                              prior_mu=prior_mu,
                                              debug=debug)
    return mu_Xs, sigma_Xs, Sigma_Xs
Esempio n. 2
0
def get_kernel_cholesky(X_train,
                        hyps,
                        str_cov,
                        is_fixed_noise=constants.IS_FIXED_GP_NOISE,
                        is_gradient=False,
                        debug=False):
    """
    This function computes a kernel inverse with Cholesky decomposition.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param hyps: dictionary of hyperparameters for Gaussian process.
    :type hyps: dict.
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param is_gradient: flag for computing and returning gradients of negative log marginal likelihood.
    :type is_gradient: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, lower matrix computed by Cholesky decomposition, and gradients of kernel matrix. If `is_gradient` is False, gradients of kernel matrix would be None.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(hyps, dict)
    assert isinstance(str_cov, str)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(is_gradient, bool)
    assert isinstance(debug, bool)
    utils_gp.check_str_cov('get_kernel_cholesky', str_cov, X_train.shape)

    cov_X_X = covariance.cov_main(
        str_cov, X_train, X_train, hyps,
        True) + hyps['noise']**2 * np.eye(X_train.shape[0])
    cov_X_X = (cov_X_X + cov_X_X.T) / 2.0
    try:
        lower = scipy.linalg.cholesky(cov_X_X, lower=True)
    except np.linalg.LinAlgError:  # pragma: no cover
        cov_X_X += 1e-2 * np.eye(X_train.shape[0])
        lower = scipy.linalg.cholesky(cov_X_X, lower=True)

    if is_gradient:
        grad_cov_X_X = covariance.grad_cov_main(str_cov,
                                                X_train,
                                                X_train,
                                                hyps,
                                                is_fixed_noise,
                                                same_X_Xs=True)
    else:
        grad_cov_X_X = None
    return cov_X_X, lower, grad_cov_X_X
Esempio n. 3
0
def get_optimized_kernel(X_train, Y_train, prior_mu, str_cov,
    is_fixed_noise=constants.IS_FIXED_GP_NOISE,
    num_iters=1000,
    debug=False
):
    """
    This function computes the kernel matrix optimized by optimization method specified, its inverse matrix, and the optimized hyperparameters, using GPyTorch.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param prior_mu: prior mean function or None.
    :type prior_mu: function or NoneType
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param num_iters: the number of iterations for optimizing negative log likelihood.
    :type num_iters: int., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, kernel matrix inverse, and dictionary of hyperparameters.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, dict.)

    :raises: AssertionError, ValueError

    """

    # TODO: check to input same is_fixed_noise to convert_hyps and restore_hyps
    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert callable(prior_mu) or prior_mu is None
    assert isinstance(str_cov, str)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(num_iters, int)
    assert isinstance(debug, bool)
    assert len(Y_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0]
    utils_gp.check_str_cov('get_optimized_kernel', str_cov, X_train.shape)
    assert num_iters >= 10 or num_iters == 0

    # TODO: prior_mu and is_fixed_noise are not working now.
    prior_mu = None
    is_fixed_noise = False

    time_start = time.time()

    if str_cov in constants.ALLOWED_GP_COV_BASE:
        num_dim = X_train.shape[1]
    elif str_cov in constants.ALLOWED_GP_COV_SET:
        num_dim = X_train.shape[2]
        raise NotImplementedError('It is not implemented yet.')

    X_train_ = torch.from_numpy(X_train).double()
    Y_train_ = torch.from_numpy(Y_train.flatten()).double()

    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = ExactGPModel(str_cov, prior_mu, X_train_, Y_train_, likelihood)

    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam([
        {'params': model.parameters()},
    ], lr=1e-2)

    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    list_neg_log_likelihoods = []
    ind_iter = 0

    while num_iters >= 10:
        optimizer.zero_grad()
        outputs = model(X_train_)
        loss = -1.0 * mll(outputs, Y_train_)
        loss.backward()
        optimizer.step()
        list_neg_log_likelihoods.append(loss.item())

        if ind_iter > num_iters and np.abs(np.mean(list_neg_log_likelihoods[-6:-1]) - loss.item()) < 5e-2:
            break
        elif ind_iter > 10 * num_iters: # pragma: no cover
            break
        else:
            ind_iter += 1

    model.eval()
    likelihood.eval()

    hyps = {
        'signal': np.sqrt(model.covar_module.outputscale.item()),
        'lengthscales': model.covar_module.base_kernel.lengthscale.detach().numpy()[0],
        'noise': np.sqrt(model.likelihood.noise.item())
    }

    cov_X_X, inv_cov_X_X, _ = gp_common.get_kernel_inverse(X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, debug=debug)

    time_end = time.time()

    if debug: logger.debug('iterations to be converged: {}'.format(ind_iter))
    if debug: logger.debug('hyps optimized: {}'.format(utils_logger.get_str_hyps(hyps)))
    if debug: logger.debug('time consumed to construct gpr: {:.4f} sec.'.format(time_end - time_start))

    return cov_X_X, inv_cov_X_X, hyps
Esempio n. 4
0
def test_check_str_cov():
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov(1, 'se', (2, 1))
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov('test', 1, (2, 1))
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov('test', 'se', 1)
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov('test', 'se', (2, 100, 100))
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov('test', 'se', (2, 100), shape_X2=(2, 100, 100))
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov('test',
                               'set_se', (2, 100),
                               shape_X2=(2, 100, 100))
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov('test',
                               'set_se', (2, 100, 100),
                               shape_X2=(2, 100))
    with pytest.raises(AssertionError) as error:
        utils_gp.check_str_cov('test', 'se', (2, 1), shape_X2=1)

    with pytest.raises(ValueError) as error:
        utils_gp.check_str_cov('test', 'abc', (2, 1))
Esempio n. 5
0
def get_optimized_kernel(
        X_train,
        Y_train,
        prior_mu,
        str_cov,
        str_framework='scipy',
        str_optimizer_method=constants.STR_OPTIMIZER_METHOD_GP,
        str_modelselection_method=constants.STR_MODELSELECTION_METHOD,
        is_fixed_noise=constants.IS_FIXED_GP_NOISE,
        debug=False):
    """
    This function computes the kernel matrix optimized by optimization method specified, its inverse matrix, and the optimized hyperparameters.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param prior_mu: prior mean function or None.
    :type prior_mu: function or NoneType
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param str_framework: the name of framework for optimizing kernel hyperparameters.
    :type str_framework: str.
    :param str_optimizer_method: the name of optimization method.
    :type str_optimizer_method: str., optional
    :param str_modelselection_method: the name of model selection method.
    :type str_modelselection_method: str., optional
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, kernel matrix inverse, and dictionary of hyperparameters.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, dict.)

    :raises: AssertionError, ValueError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert callable(prior_mu) or prior_mu is None
    assert isinstance(str_cov, str)
    assert isinstance(str_framework, str)
    assert isinstance(str_optimizer_method, str)
    assert isinstance(str_modelselection_method, str)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(debug, bool)
    assert len(Y_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0]
    utils_gp.check_str_cov('get_optimized_kernel', str_cov, X_train.shape)
    assert str_optimizer_method in constants.ALLOWED_OPTIMIZER_METHOD_GP
    assert str_modelselection_method in constants.ALLOWED_MODELSELECTION_METHOD
    assert str_framework in constants.ALLOWED_FRAMEWORK_GP

    try:
        if str_framework == 'tensorflow': import tensorflow as tf
        elif str_framework == 'gpytorch': import gpytorch
    except:  # pragma: no cover
        str_framework = 'scipy'

    if str_framework == 'scipy':
        cov_X_X, inv_cov_X_X, hyps = gp_scipy.get_optimized_kernel(
            X_train,
            Y_train,
            prior_mu,
            str_cov,
            str_optimizer_method=str_optimizer_method,
            str_modelselection_method=str_modelselection_method,
            is_fixed_noise=is_fixed_noise,
            debug=debug)
    elif str_framework == 'tensorflow':
        from bayeso.gp import gp_tensorflow
        cov_X_X, inv_cov_X_X, hyps = gp_tensorflow.get_optimized_kernel(
            X_train,
            Y_train,
            prior_mu,
            str_cov,
            is_fixed_noise=is_fixed_noise,
            debug=debug)
    elif str_framework == 'gpytorch':
        from bayeso.gp import gp_gpytorch
        cov_X_X, inv_cov_X_X, hyps = gp_gpytorch.get_optimized_kernel(
            X_train,
            Y_train,
            prior_mu,
            str_cov,
            is_fixed_noise=is_fixed_noise,
            debug=debug)
    else:  # pragma: no cover
        raise ValueError('{}: invalid str_framework.'.format(str_framework))

    return cov_X_X, inv_cov_X_X, hyps
Esempio n. 6
0
def predict_optimized(X_train,
                      Y_train,
                      X_test,
                      str_cov=constants.STR_GP_COV,
                      prior_mu=None,
                      is_fixed_noise=constants.IS_FIXED_GP_NOISE,
                      debug=False):
    """
    This function returns posterior mean and posterior standard deviation functions over `X_test`, computed by the Gaussian process regression optimized with `X_train` and `Y_train`.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param X_test: inputs. Shape: (l, d) or (l, m, d).
    :type X_test: numpy.ndarray
    :param str_cov: the name of covariance function.
    :type str_cov: str., optional
    :param prior_mu: None, or prior mean function.
    :type prior_mu: NoneType, or function, optional
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of posterior mean function over `X_test`, posterior standard deviation function over `X_test`, and posterior covariance matrix over `X_test`. Shape: ((l, 1), (l, 1), (l, l)).
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert isinstance(X_test, np.ndarray)
    assert isinstance(str_cov, str)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(debug, bool)
    assert callable(prior_mu) or prior_mu is None
    assert len(Y_train.shape) == 2
    utils_gp.check_str_cov('predict_optimized',
                           str_cov,
                           X_train.shape,
                           shape_X2=X_test.shape)
    assert X_train.shape[0] == Y_train.shape[0]
    assert X_train.shape[1] == X_test.shape[1]

    time_start = time.time()

    cov_X_X, inv_cov_X_X, hyps = get_optimized_kernel(
        X_train,
        Y_train,
        prior_mu,
        str_cov,
        is_fixed_noise=is_fixed_noise,
        debug=debug)
    mu_Xs, sigma_Xs, Sigma_Xs = predict_test_(X_train,
                                              Y_train,
                                              X_test,
                                              cov_X_X,
                                              inv_cov_X_X,
                                              hyps,
                                              str_cov=str_cov,
                                              prior_mu=prior_mu,
                                              debug=debug)

    time_end = time.time()
    if debug:
        logger.debug(
            'time consumed to construct gpr: {:.4f} sec.'.format(time_end -
                                                                 time_start))
    return mu_Xs, sigma_Xs, Sigma_Xs
Esempio n. 7
0
def predict_test_(X_train,
                  Y_train,
                  X_test,
                  cov_X_X,
                  inv_cov_X_X,
                  hyps,
                  str_cov=constants.STR_GP_COV,
                  prior_mu=None,
                  debug=False):
    """
    This function returns posterior mean and posterior standard deviation functions over `X_test`, computed by Gaussian process regression with `X_train`, `Y_train`, `cov_X_X`, `inv_cov_X_X`, and `hyps`.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param X_test: inputs. Shape: (l, d) or (l, m, d).
    :type X_test: numpy.ndarray
    :param cov_X_X: kernel matrix over `X_train`. Shape: (n, n).
    :type cov_X_X: numpy.ndarray
    :param inv_cov_X_X: kernel matrix inverse over `X_train`. Shape: (n, n).
    :type inv_cov_X_X: numpy.ndarray
    :param hyps: dictionary of hyperparameters for Gaussian process.
    :type hyps: dict.
    :param str_cov: the name of covariance function.
    :type str_cov: str., optional
    :param prior_mu: None, or prior mean function.
    :type prior_mu: NoneType, or function, optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of posterior mean function over `X_test`, posterior standard deviation function over `X_test`, and posterior covariance matrix over `X_test`. Shape: ((l, 1), (l, 1), (l, l)).
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, numpy.ndarray)

    :raises: AssertionError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert isinstance(X_test, np.ndarray)
    assert isinstance(cov_X_X, np.ndarray)
    assert isinstance(inv_cov_X_X, np.ndarray)
    assert isinstance(hyps, dict)
    assert isinstance(str_cov, str)
    assert isinstance(debug, bool)
    assert callable(prior_mu) or prior_mu is None
    assert len(Y_train.shape) == 2
    assert len(cov_X_X.shape) == 2
    assert len(inv_cov_X_X.shape) == 2
    assert (np.array(cov_X_X.shape) == np.array(inv_cov_X_X.shape)).all()
    utils_gp.check_str_cov('predict_test_',
                           str_cov,
                           X_train.shape,
                           shape_X2=X_test.shape)
    assert X_train.shape[0] == Y_train.shape[0]
    assert X_train.shape[1] == X_test.shape[1]

    prior_mu_train = utils_gp.get_prior_mu(prior_mu, X_train)
    prior_mu_test = utils_gp.get_prior_mu(prior_mu, X_test)
    cov_X_Xs = covariance.cov_main(str_cov, X_train, X_test, hyps, False)
    cov_Xs_Xs = covariance.cov_main(str_cov, X_test, X_test, hyps, True)
    cov_Xs_Xs = (cov_Xs_Xs + cov_Xs_Xs.T) / 2.0

    mu_Xs = np.dot(np.dot(cov_X_Xs.T, inv_cov_X_X),
                   Y_train - prior_mu_train) + prior_mu_test
    Sigma_Xs = cov_Xs_Xs - np.dot(np.dot(cov_X_Xs.T, inv_cov_X_X), cov_X_Xs)
    return mu_Xs, np.expand_dims(np.sqrt(np.maximum(np.diag(Sigma_Xs), 0.0)),
                                 axis=1), Sigma_Xs
Esempio n. 8
0
def get_optimized_kernel(X_train,
                         Y_train,
                         prior_mu,
                         str_cov,
                         is_fixed_noise=constants.IS_FIXED_GP_NOISE,
                         num_iters=1000,
                         debug=False):
    """
    This function computes the kernel matrix optimized by optimization method specified, its inverse matrix, and the optimized hyperparameters, using TensorFlow and TensorFlow probability.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param prior_mu: prior mean function or None.
    :type prior_mu: function or NoneType
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param num_iters: the number of iterations for optimizing negative log likelihood.
    :type num_iters: int., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, kernel matrix inverse, and dictionary of hyperparameters.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, dict.)

    :raises: AssertionError, ValueError

    """

    # TODO: check to input same is_fixed_noise to convert_hyps and restore_hyps
    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert callable(prior_mu) or prior_mu is None
    assert isinstance(str_cov, str)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(num_iters, int)
    assert isinstance(debug, bool)
    assert len(Y_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0]
    utils_gp.check_str_cov('get_optimized_kernel', str_cov, X_train.shape)
    assert num_iters >= 10 or num_iters == 0

    # TODO: prior_mu and is_fixed_noise are not working now.
    prior_mu = None
    is_fixed_noise = False

    time_start = time.time()

    if str_cov in constants.ALLOWED_GP_COV_BASE:
        num_dim = X_train.shape[1]
    elif str_cov in constants.ALLOWED_GP_COV_SET:
        num_dim = X_train.shape[2]
        raise NotImplementedError('It is not implemented yet.')

    constraint_positive = tfp.bijectors.Shift(np.finfo(np.float64).tiny)(
        tfp.bijectors.Exp())

    var_amplitude = tfp.util.TransformedVariable(initial_value=1.0,
                                                 bijector=constraint_positive,
                                                 dtype=np.float64)

    var_length_scale = tfp.util.TransformedVariable(
        initial_value=[1.0] * num_dim,
        bijector=constraint_positive,
        dtype=np.float64)

    var_observation_noise_variance = tfp.util.TransformedVariable(
        initial_value=1.0, bijector=constraint_positive, dtype=np.float64)

    def create_kernel(str_cov):
        if str_cov == 'eq' or str_cov == 'se':
            kernel_main = tfp.math.psd_kernels.ExponentiatedQuadratic(
                amplitude=var_amplitude, length_scale=None)
        elif str_cov == 'matern32':
            kernel_main = tfp.math.psd_kernels.MaternThreeHalves(
                amplitude=var_amplitude, length_scale=None)
        elif str_cov == 'matern52':
            kernel_main = tfp.math.psd_kernels.MaternFiveHalves(
                amplitude=var_amplitude, length_scale=None)
        else:
            raise NotImplementedError(
                'allowed str_cov conditions, but it is not implemented.')

        kernel = tfp.math.psd_kernels.FeatureScaled(kernel_main,
                                                    var_length_scale)

        return kernel

    model_gp = tfp.distributions.GaussianProcess(
        kernel=create_kernel(str_cov),
        index_points=X_train,
        observation_noise_variance=var_observation_noise_variance,
        mean_fn=prior_mu)

    @tf.function()
    def log_prob_outputs():  # pragma: no cover
        return model_gp.log_prob(np.ravel(Y_train))

    optimizer = tf.optimizers.Adam(learning_rate=1e-2)
    trainable_variables = [
        var_.trainable_variables[0] for var_ in
        [var_amplitude, var_length_scale, var_observation_noise_variance]
    ]

    list_neg_log_likelihoods = []
    ind_iter = 0

    while num_iters >= 10:
        with tf.GradientTape() as tape:
            loss = -1.0 * log_prob_outputs()

        grads = tape.gradient(loss, trainable_variables)
        optimizer.apply_gradients(zip(grads, trainable_variables))
        list_neg_log_likelihoods.append(loss)

        if ind_iter > num_iters and np.abs(
                np.mean(list_neg_log_likelihoods[-6:-1]) - loss) < 5e-2:
            break
        elif ind_iter > 10 * num_iters:  # pragma: no cover
            break
        else:
            ind_iter += 1

    hyps = {
        'signal': var_amplitude._value().numpy(),
        'lengthscales': var_length_scale._value().numpy(),
        'noise': np.sqrt(var_observation_noise_variance._value().numpy())
    }

    cov_X_X, inv_cov_X_X, _ = gp_common.get_kernel_inverse(
        X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, debug=debug)

    time_end = time.time()

    if debug: logger.debug('iterations to be converged: {}'.format(ind_iter))
    if debug:
        logger.debug('hyps optimized: {}'.format(
            utils_logger.get_str_hyps(hyps)))
    if debug:
        logger.debug(
            'time consumed to construct gpr: {:.4f} sec.'.format(time_end -
                                                                 time_start))

    return cov_X_X, inv_cov_X_X, hyps
Esempio n. 9
0
def get_optimized_kernel(
        X_train,
        Y_train,
        prior_mu,
        str_cov,
        str_optimizer_method=constants.STR_OPTIMIZER_METHOD_GP,
        str_modelselection_method=constants.STR_MODELSELECTION_METHOD,
        is_fixed_noise=constants.IS_FIXED_GP_NOISE,
        debug=False):
    """
    This function computes the kernel matrix optimized by optimization method specified, its inverse matrix, and the optimized hyperparameters.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param prior_mu: prior mean function or None.
    :type prior_mu: function or NoneType
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param str_optimizer_method: the name of optimization method.
    :type str_optimizer_method: str., optional
    :param str_modelselection_method: the name of model selection method.
    :type str_modelselection_method: str., optional
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: a tuple of kernel matrix over `X_train`, kernel matrix inverse, and dictionary of hyperparameters.
    :rtype: tuple of (numpy.ndarray, numpy.ndarray, dict.)

    :raises: AssertionError, ValueError

    """

    # TODO: check to input same is_fixed_noise to convert_hyps and restore_hyps
    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert callable(prior_mu) or prior_mu is None
    assert isinstance(str_cov, str)
    assert isinstance(str_optimizer_method, str)
    assert isinstance(str_modelselection_method, str)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(debug, bool)
    assert len(Y_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0]
    utils_gp.check_str_cov('get_optimized_kernel', str_cov, X_train.shape)
    assert str_optimizer_method in constants.ALLOWED_OPTIMIZER_METHOD_GP
    assert str_modelselection_method in constants.ALLOWED_MODELSELECTION_METHOD
    # TODO: fix this.
    if str_optimizer_method != 'Nelder-Mead':
        is_gradient = True
    else:
        is_gradient = False

    time_start = time.time()

    if debug:
        logger.debug('str_optimizer_method: {}'.format(str_optimizer_method))
    if debug:
        logger.debug(
            'str_modelselection_method: {}'.format(str_modelselection_method))

    prior_mu_train = utils_gp.get_prior_mu(prior_mu, X_train)
    if str_cov in constants.ALLOWED_GP_COV_BASE:
        num_dim = X_train.shape[1]
    elif str_cov in constants.ALLOWED_GP_COV_SET:
        num_dim = X_train.shape[2]
        is_gradient = False

    if str_modelselection_method == 'ml':
        neg_log_ml_ = lambda hyps: neg_log_ml(X_train,
                                              Y_train,
                                              hyps,
                                              str_cov,
                                              prior_mu_train,
                                              is_fixed_noise=is_fixed_noise,
                                              is_gradient=is_gradient,
                                              debug=debug)
    elif str_modelselection_method == 'loocv':
        neg_log_ml_ = lambda hyps: neg_log_pseudo_l_loocv(X_train,
                                                          Y_train,
                                                          hyps,
                                                          str_cov,
                                                          prior_mu_train,
                                                          is_fixed_noise=
                                                          is_fixed_noise,
                                                          debug=debug)
        is_gradient = False
    else:  # pragma: no cover
        raise ValueError(
            'get_optimized_kernel: missing conditions for str_modelselection_method.'
        )

    hyps_converted = utils_covariance.convert_hyps(
        str_cov,
        utils_covariance.get_hyps(str_cov, num_dim),
        is_fixed_noise=is_fixed_noise,
    )

    if str_optimizer_method == 'BFGS':
        result_optimized = scipy.optimize.minimize(neg_log_ml_,
                                                   hyps_converted,
                                                   method=str_optimizer_method,
                                                   jac=is_gradient,
                                                   options={'disp': False})
        if debug:
            logger.debug('scipy message: {}'.format(result_optimized.message))

        result_optimized = result_optimized.x
    elif str_optimizer_method == 'L-BFGS-B':
        bounds = utils_covariance.get_range_hyps(str_cov,
                                                 num_dim,
                                                 is_fixed_noise=is_fixed_noise)
        result_optimized = scipy.optimize.minimize(neg_log_ml_,
                                                   hyps_converted,
                                                   method=str_optimizer_method,
                                                   bounds=bounds,
                                                   jac=is_gradient,
                                                   options={'disp': False})
        if debug:
            logger.debug('scipy message: {}'.format(result_optimized.message))

        result_optimized = result_optimized.x
    elif str_optimizer_method == 'Nelder-Mead':
        result_optimized = scipy.optimize.minimize(neg_log_ml_,
                                                   hyps_converted,
                                                   method=str_optimizer_method,
                                                   options={'disp': False})
        if debug:
            logger.debug('scipy message: {}'.format(result_optimized.message))

        result_optimized = result_optimized.x
    # TODO: Fill this conditions
    elif str_optimizer_method == 'DIRECT':  # pragma: no cover
        raise NotImplementedError(
            'get_optimized_kernel: allowed str_optimizer_method, but it is not implemented.'
        )
    else:  # pragma: no cover
        raise ValueError(
            'get_optimized_kernel: missing conditions for str_optimizer_method'
        )

    hyps = utils_covariance.restore_hyps(str_cov,
                                         result_optimized,
                                         is_fixed_noise=is_fixed_noise)

    hyps, _ = utils_covariance.validate_hyps_dict(hyps, str_cov, num_dim)
    cov_X_X, inv_cov_X_X, grad_cov_X_X = gp_common.get_kernel_inverse(
        X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, debug=debug)

    time_end = time.time()

    if debug:
        logger.debug('hyps optimized: {}'.format(
            utils_logger.get_str_hyps(hyps)))
    if debug:
        logger.debug(
            'time consumed to construct gpr: {:.4f} sec.'.format(time_end -
                                                                 time_start))
    return cov_X_X, inv_cov_X_X, hyps
Esempio n. 10
0
def neg_log_ml(X_train,
               Y_train,
               hyps,
               str_cov,
               prior_mu_train,
               is_fixed_noise=constants.IS_FIXED_GP_NOISE,
               is_cholesky=True,
               is_gradient=True,
               debug=False):
    """
    This function computes a negative log marginal likelihood.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param hyps: hyperparameters for Gaussian process. Shape: (h, ).
    :type hyps: numpy.ndarray
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1).
    :type prior_mu_train: numpy.ndarray
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param is_cholesky: flag for using a cholesky decomposition.
    :type is_cholesky: bool., optional
    :param is_gradient: flag for computing and returning gradients of negative log marginal likelihood.
    :type is_gradient: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: negative log marginal likelihood, or (negative log marginal likelihood, gradients of the likelihood).
    :rtype: float, or tuple of (float, float)

    :raises: AssertionError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert isinstance(hyps, np.ndarray)
    assert isinstance(str_cov, str)
    assert isinstance(prior_mu_train, np.ndarray)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(is_cholesky, bool)
    assert isinstance(is_gradient, bool)
    assert isinstance(debug, bool)
    assert len(Y_train.shape) == 2
    assert len(prior_mu_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0]
    utils_gp.check_str_cov('neg_log_ml', str_cov, X_train.shape)

    hyps = utils_covariance.restore_hyps(str_cov,
                                         hyps,
                                         is_fixed_noise=is_fixed_noise)
    new_Y_train = Y_train - prior_mu_train
    if is_cholesky:
        cov_X_X, lower, grad_cov_X_X = gp_common.get_kernel_cholesky(
            X_train,
            hyps,
            str_cov,
            is_fixed_noise=is_fixed_noise,
            is_gradient=is_gradient,
            debug=debug)

        alpha = scipy.linalg.cho_solve((lower, True), new_Y_train)

        first_term = -0.5 * np.dot(new_Y_train.T, alpha)
        second_term = -1.0 * np.sum(
            np.log(np.diagonal(lower) + constants.JITTER_LOG))

        if is_gradient:
            assert grad_cov_X_X is not None

            first_term_grad = np.einsum("ik,jk->ijk", alpha, alpha)
            first_term_grad -= np.expand_dims(scipy.linalg.cho_solve(
                (lower, True), np.eye(cov_X_X.shape[0])),
                                              axis=2)
            grad_log_ml_ = 0.5 * np.einsum("ijl,ijk->kl", first_term_grad,
                                           grad_cov_X_X)
            grad_log_ml_ = np.sum(grad_log_ml_, axis=1)
    else:
        # TODO: is_gradient is fixed.
        is_gradient = False
        cov_X_X, inv_cov_X_X, grad_cov_X_X = gp_common.get_kernel_inverse(
            X_train,
            hyps,
            str_cov,
            is_fixed_noise=is_fixed_noise,
            is_gradient=is_gradient,
            debug=debug)

        first_term = -0.5 * np.dot(np.dot(new_Y_train.T, inv_cov_X_X),
                                   new_Y_train)
        second_term = -0.5 * np.log(
            np.linalg.det(cov_X_X) + constants.JITTER_LOG)

    third_term = -float(X_train.shape[0]) / 2.0 * np.log(2.0 * np.pi)
    log_ml_ = np.squeeze(first_term + second_term + third_term)
    log_ml_ /= X_train.shape[0]

    if is_gradient:
        return -1.0 * log_ml_, -1.0 * grad_log_ml_ / X_train.shape[0]
    else:
        return -1.0 * log_ml_
Esempio n. 11
0
def neg_log_pseudo_l_loocv(X_train,
                           Y_train,
                           hyps,
                           str_cov,
                           prior_mu_train,
                           is_fixed_noise=constants.IS_FIXED_GP_NOISE,
                           debug=False):
    """
    It computes a negative log pseudo-likelihood using leave-one-out cross-validation.

    :param X_train: inputs. Shape: (n, d) or (n, m, d).
    :type X_train: numpy.ndarray
    :param Y_train: outputs. Shape: (n, 1).
    :type Y_train: numpy.ndarray
    :param hyps: hyperparameters for Gaussian process. Shape: (h, ).
    :type hyps: numpy.ndarray
    :param str_cov: the name of covariance function.
    :type str_cov: str.
    :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1).
    :type prior_mu_train: numpy.ndarray
    :param is_fixed_noise: flag for fixing a noise.
    :type is_fixed_noise: bool., optional
    :param debug: flag for printing log messages.
    :type debug: bool., optional

    :returns: negative log pseudo-likelihood.
    :rtype: float

    :raises: AssertionError

    """

    assert isinstance(X_train, np.ndarray)
    assert isinstance(Y_train, np.ndarray)
    assert isinstance(hyps, np.ndarray)
    assert isinstance(str_cov, str)
    assert isinstance(prior_mu_train, np.ndarray)
    assert isinstance(is_fixed_noise, bool)
    assert isinstance(debug, bool)
    assert len(Y_train.shape) == 2
    assert len(prior_mu_train.shape) == 2
    assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0]
    utils_gp.check_str_cov('neg_log_pseudo_l_loocv', str_cov, X_train.shape)

    num_data = X_train.shape[0]
    hyps = utils_covariance.restore_hyps(str_cov,
                                         hyps,
                                         is_fixed_noise=is_fixed_noise)

    cov_X_X, inv_cov_X_X, grad_cov_X_X = gp_common.get_kernel_inverse(
        X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, debug=debug)

    log_pseudo_l_ = 0.0
    for ind_data in range(0, num_data):
        cur_X_train = np.vstack((X_train[:ind_data], X_train[ind_data + 1:]))
        cur_Y_train = np.vstack((Y_train[:ind_data], Y_train[ind_data + 1:]))

        cur_X_test = np.expand_dims(X_train[ind_data], axis=0)
        cur_Y_test = Y_train[ind_data]

        cur_mu = np.squeeze(cur_Y_test) - np.dot(
            inv_cov_X_X, Y_train)[ind_data] / inv_cov_X_X[ind_data, ind_data]
        cur_sigma = np.sqrt(
            1.0 / (inv_cov_X_X[ind_data, ind_data] + constants.JITTER_COV))

        first_term = -0.5 * np.log(cur_sigma**2)
        second_term = -0.5 * (np.squeeze(cur_Y_test - cur_mu))**2 / (cur_sigma
                                                                     **2)
        third_term = -0.5 * np.log(2.0 * np.pi)
        cur_log_pseudo_l_ = first_term + second_term + third_term
        log_pseudo_l_ += cur_log_pseudo_l_

    log_pseudo_l_ /= num_data
    log_pseudo_l_ *= -1.0

    return log_pseudo_l_