def log_ml(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise=constants.IS_FIXED_GP_NOISE, is_cholesky=True, debug=False): assert isinstance(X_train, np.ndarray) assert isinstance(Y_train, np.ndarray) assert isinstance(hyps, np.ndarray) assert isinstance(str_cov, str) assert isinstance(prior_mu_train, np.ndarray) assert isinstance(is_fixed_noise, bool) assert isinstance(is_cholesky, bool) assert isinstance(debug, bool) assert len(Y_train.shape) == 2 assert len(prior_mu_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0] _check_str_cov('log_ml', str_cov, X_train.shape) hyps = utils_covariance.restore_hyps(str_cov, hyps, is_fixed_noise=is_fixed_noise) new_Y_train = Y_train - prior_mu_train if is_cholesky: cov_X_X, lower = get_kernel_cholesky(X_train, hyps, str_cov, debug=debug) # lower_new_Y_train = scipy.linalg.cho_solve((lower, True), new_Y_train) alpha = scipy.linalg.cho_solve((lower, True), new_Y_train) first_term = -0.5 * np.dot(new_Y_train.T, alpha) second_term = -1.0 * np.sum( np.log(np.diagonal(lower) + constants.JITTER_LOG)) else: cov_X_X, inv_cov_X_X = get_kernel_inverse(X_train, hyps, str_cov, debug=debug) first_term = -0.5 * np.dot(np.dot(new_Y_train.T, inv_cov_X_X), new_Y_train) second_term = -0.5 * np.log( np.linalg.det(cov_X_X) + constants.JITTER_LOG) third_term = -float(X_train.shape[0]) / 2.0 * np.log(2.0 * np.pi) log_ml_ = np.squeeze(first_term + second_term + third_term) return log_ml_
def log_pseudo_l_loocv(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise=constants.IS_FIXED_GP_NOISE, debug=False): assert isinstance(X_train, np.ndarray) assert isinstance(Y_train, np.ndarray) assert isinstance(hyps, np.ndarray) assert isinstance(str_cov, str) assert isinstance(prior_mu_train, np.ndarray) assert isinstance(is_fixed_noise, bool) assert isinstance(debug, bool) assert len(Y_train.shape) == 2 assert len(prior_mu_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0] _check_str_cov('log_pseudo_l_loocv', str_cov, X_train.shape) num_data = X_train.shape[0] hyps = utils_covariance.restore_hyps(str_cov, hyps, is_fixed_noise=is_fixed_noise) cov_X_X, inv_cov_X_X = get_kernel_inverse(X_train, hyps, str_cov, debug=debug) log_pseudo_l = 0.0 for ind_data in range(0, num_data): cur_X_train = np.vstack((X_train[:ind_data], X_train[ind_data + 1:])) cur_Y_train = np.vstack((Y_train[:ind_data], Y_train[ind_data + 1:])) cur_X_test = np.expand_dims(X_train[ind_data], axis=0) cur_Y_test = Y_train[ind_data] cur_mu = np.squeeze(cur_Y_test) - np.dot( inv_cov_X_X, Y_train)[ind_data] / inv_cov_X_X[ind_data, ind_data] cur_sigma = np.sqrt( 1.0 / (inv_cov_X_X[ind_data, ind_data] + constants.JITTER_COV)) first_term = -0.5 * np.log(cur_sigma**2) second_term = -0.5 * (np.squeeze(cur_Y_test - cur_mu))**2 / (cur_sigma **2) third_term = -0.5 * np.log(2.0 * np.pi) cur_log_pseudo_l = first_term + second_term + third_term log_pseudo_l += cur_log_pseudo_l return log_pseudo_l
def test_restore_hyps(): with pytest.raises(AssertionError) as error: utils_covariance.restore_hyps(1.2, 2.1) with pytest.raises(AssertionError) as error: utils_covariance.restore_hyps(1.2, np.array([1.0, 1.0])) with pytest.raises(AssertionError) as error: utils_covariance.restore_hyps('se', 2.1) with pytest.raises(AssertionError) as error: utils_covariance.restore_hyps('abc', 2.1) with pytest.raises(AssertionError) as error: utils_covariance.restore_hyps('se', np.array([[1.0, 1.0], [1.0, 1.0]])) with pytest.raises(AssertionError) as error: utils_covariance.restore_hyps('se', np.array([1.0, 1.0, 1.0]), is_fixed_noise=1) with pytest.raises(AssertionError) as error: utils_covariance.restore_hyps('se', np.array([1.0, 1.0, 1.0]), fixed_noise='abc') cur_hyps = np.array([0.1, 1.0, 1.0, 1.0, 1.0]) restored_hyps = utils_covariance.restore_hyps('se', cur_hyps) assert restored_hyps['noise'] == cur_hyps[0] assert restored_hyps['signal'] == cur_hyps[1] assert (restored_hyps['lengthscales'] == cur_hyps[2:]).all() restored_hyps = utils_covariance.restore_hyps('se', cur_hyps, is_fixed_noise=True) assert restored_hyps['noise'] == constants.GP_NOISE assert restored_hyps['signal'] == cur_hyps[0] assert (restored_hyps['lengthscales'] == cur_hyps[1:]).all()
def get_optimized_kernel( X_train: np.ndarray, Y_train: np.ndarray, prior_mu: constants.TYPING_UNION_CALLABLE_NONE, str_cov: str, str_optimizer_method: str = constants.STR_OPTIMIZER_METHOD_GP, str_modelselection_method: str = constants.STR_MODELSELECTION_METHOD, use_ard: bool = constants.USE_ARD, fix_noise: bool = constants.FIX_GP_NOISE, debug: bool = False) -> constants.TYPING_TUPLE_TWO_ARRAYS_DICT: """ This function computes the kernel matrix optimized by optimization method specified, its inverse matrix, and the optimized hyperparameters. :param X_train: inputs. Shape: (n, d) or (n, m, d). :type X_train: numpy.ndarray :param Y_train: outputs. Shape: (n, 1). :type Y_train: numpy.ndarray :param prior_mu: prior mean function or None. :type prior_mu: callable or NoneType :param str_cov: the name of covariance function. :type str_cov: str. :param str_optimizer_method: the name of optimization method. :type str_optimizer_method: str., optional :param str_modelselection_method: the name of model selection method. :type str_modelselection_method: str., optional :param use_ard: flag for using automatic relevance determination. :type use_ard: bool., optional :param fix_noise: flag for fixing a noise. :type fix_noise: bool., optional :param debug: flag for printing log messages. :type debug: bool., optional :returns: a tuple of kernel matrix over `X_train`, kernel matrix inverse, and dictionary of hyperparameters. :rtype: tuple of (numpy.ndarray, numpy.ndarray, dict.) :raises: AssertionError, ValueError """ # TODO: check to input same fix_noise to convert_hyps and restore_hyps utils_gp.validate_common_args(X_train, Y_train, str_cov, prior_mu, debug) assert isinstance(str_optimizer_method, str) assert isinstance(str_modelselection_method, str) assert isinstance(use_ard, bool) assert isinstance(fix_noise, bool) utils_covariance.check_str_cov('get_optimized_kernel', str_cov, X_train.shape) assert str_optimizer_method in constants.ALLOWED_OPTIMIZER_METHOD_GP assert str_modelselection_method in constants.ALLOWED_MODELSELECTION_METHOD use_gradient = bool(str_optimizer_method != 'Nelder-Mead') # TODO: Now, use_gradient is fixed as False. # use_gradient = False time_start = time.time() if debug: logger.debug('str_optimizer_method: %s', str_optimizer_method) logger.debug('str_modelselection_method: %s', str_modelselection_method) logger.debug('use_gradient: %s', use_gradient) prior_mu_train = utils_gp.get_prior_mu(prior_mu, X_train) if str_cov in constants.ALLOWED_COV_BASE: num_dim = X_train.shape[1] elif str_cov in constants.ALLOWED_COV_SET: num_dim = X_train.shape[2] use_gradient = False if str_modelselection_method == 'ml': neg_log_ml_ = lambda hyps: gp_likelihood.neg_log_ml( X_train, Y_train, hyps, str_cov, prior_mu_train, use_ard=use_ard, fix_noise=fix_noise, use_gradient=use_gradient, debug=debug) elif str_modelselection_method == 'loocv': # TODO: add use_ard. neg_log_ml_ = lambda hyps: gp_likelihood.neg_log_pseudo_l_loocv( X_train, Y_train, hyps, str_cov, prior_mu_train, fix_noise=fix_noise, debug=debug) use_gradient = False else: # pragma: no cover raise ValueError( 'get_optimized_kernel: missing conditions for str_modelselection_method.' ) hyps_converted = utils_covariance.convert_hyps(str_cov, utils_covariance.get_hyps( str_cov, num_dim, use_ard=use_ard), fix_noise=fix_noise) if str_optimizer_method in ['BFGS', 'SLSQP']: result_optimized = scipy.optimize.minimize(neg_log_ml_, hyps_converted, method=str_optimizer_method, jac=use_gradient, options={'disp': False}) if debug: logger.debug('negative log marginal likelihood: %.6f', result_optimized.fun) logger.debug('scipy message: %s', result_optimized.message) result_optimized = result_optimized.x elif str_optimizer_method in ['L-BFGS-B', 'SLSQP-Bounded']: if str_optimizer_method == 'SLSQP-Bounded': str_optimizer_method = 'SLSQP' bounds = utils_covariance.get_range_hyps(str_cov, num_dim, use_ard=use_ard, fix_noise=fix_noise) result_optimized = scipy.optimize.minimize(neg_log_ml_, hyps_converted, method=str_optimizer_method, bounds=bounds, jac=use_gradient, options={'disp': False}) if debug: logger.debug('negative log marginal likelihood: %.6f', result_optimized.fun) logger.debug('scipy message: %s', result_optimized.message) result_optimized = result_optimized.x elif str_optimizer_method in ['Nelder-Mead']: result_optimized = scipy.optimize.minimize(neg_log_ml_, hyps_converted, method=str_optimizer_method, options={'disp': False}) if debug: logger.debug('negative log marginal likelihood: %.6f', result_optimized.fun) logger.debug('scipy message: %s', result_optimized.message) result_optimized = result_optimized.x else: # pragma: no cover raise ValueError( 'get_optimized_kernel: missing conditions for str_optimizer_method' ) hyps = utils_covariance.restore_hyps(str_cov, result_optimized, use_ard=use_ard, fix_noise=fix_noise) hyps = utils_covariance.validate_hyps_dict(hyps, str_cov, num_dim) cov_X_X, inv_cov_X_X, _ = covariance.get_kernel_inverse( X_train, hyps, str_cov, fix_noise=fix_noise, debug=debug) time_end = time.time() if debug: logger.debug('hyps optimized: %s', utils_logger.get_str_hyps(hyps)) logger.debug('time consumed to construct gpr: %.4f sec.', time_end - time_start) return cov_X_X, inv_cov_X_X, hyps
def get_optimized_kernel( X_train, Y_train, prior_mu, str_cov, str_optimizer_method=constants.STR_OPTIMIZER_METHOD_GP, str_modelselection_method=constants.STR_MODELSELECTION_METHOD, is_fixed_noise=constants.IS_FIXED_GP_NOISE, debug=False): # TODO: check to input same is_fixed_noise to convert_hyps and restore_hyps assert isinstance(X_train, np.ndarray) assert isinstance(Y_train, np.ndarray) assert callable(prior_mu) or prior_mu is None assert isinstance(str_cov, str) assert isinstance(str_optimizer_method, str) assert isinstance(str_modelselection_method, str) assert isinstance(is_fixed_noise, bool) assert isinstance(debug, bool) assert len(Y_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] _check_str_cov('get_optimized_kernel', str_cov, X_train.shape) assert str_optimizer_method in constants.ALLOWED_OPTIMIZER_METHOD_GP assert str_modelselection_method in constants.ALLOWED_MODELSELECTION_METHOD time_start = time.time() if debug: print('[DEBUG] get_optimized_kernel in gp.py: str_optimizer_method {}'. format(str_optimizer_method)) print( '[DEBUG] get_optimized_kernel in gp.py: str_modelselection_method {}' .format(str_modelselection_method)) prior_mu_train = get_prior_mu(prior_mu, X_train) if str_cov in constants.ALLOWED_GP_COV_BASE: num_dim = X_train.shape[1] elif str_cov in constants.ALLOWED_GP_COV_SET: num_dim = X_train.shape[2] if str_modelselection_method == 'ml': neg_log_ml = lambda hyps: -1.0 * log_ml(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise=is_fixed_noise, debug=debug) elif str_modelselection_method == 'loocv': neg_log_ml = lambda hyps: -1.0 * log_pseudo_l_loocv(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise= is_fixed_noise, debug=debug) else: # pragma: no cover raise ValueError( 'get_optimized_kernel: missing conditions for str_modelselection_method.' ) hyps_converted = utils_covariance.convert_hyps( str_cov, utils_covariance.get_hyps(str_cov, num_dim), is_fixed_noise=is_fixed_noise, ) if str_optimizer_method == 'BFGS': result_optimized = scipy.optimize.minimize(neg_log_ml, hyps_converted, method=str_optimizer_method) result_optimized = result_optimized.x elif str_optimizer_method == 'L-BFGS-B': bounds = utils_covariance.get_range_hyps(str_cov, num_dim, is_fixed_noise=is_fixed_noise) result_optimized = scipy.optimize.minimize(neg_log_ml, hyps_converted, method=str_optimizer_method, bounds=bounds) result_optimized = result_optimized.x # TODO: Fill this conditions elif str_optimizer_method == 'DIRECT': # pragma: no cover raise NotImplementedError( 'get_optimized_kernel: allowed str_optimizer_method, but it is not implemented.' ) elif str_optimizer_method == 'CMA-ES': # pragma: no cover raise NotImplementedError( 'get_optimized_kernel: allowed str_optimizer_method, but it is not implemented.' ) # INFO: It is allowed, but a condition is missed. else: # pragma: no cover raise ValueError( 'get_optimized_kernel: missing conditions for str_optimizer_method' ) hyps = utils_covariance.restore_hyps(str_cov, result_optimized, is_fixed_noise=is_fixed_noise) hyps, _ = utils_covariance.validate_hyps_dict(hyps, str_cov, num_dim) cov_X_X, inv_cov_X_X = get_kernel_inverse(X_train, hyps, str_cov, debug=debug) time_end = time.time() if debug: print('[DEBUG] get_optimized_kernel in gp.py: optimized hyps for gpr', hyps) print('[DEBUG] get_optimized_kernel in gp.py: time consumed', time_end - time_start, 'sec.') return cov_X_X, inv_cov_X_X, hyps
def neg_log_ml(X_train: np.ndarray, Y_train: np.ndarray, hyps: np.ndarray, str_cov: str, prior_mu_train: np.ndarray, use_ard: bool = constants.USE_ARD, fix_noise: bool = constants.FIX_GP_NOISE, use_gradient: bool = True, debug: bool = False) -> constants.TYPING_UNION_FLOAT_FA: """ This function computes a negative log marginal likelihood. :param X_train: inputs. Shape: (n, d) or (n, m, d). :type X_train: numpy.ndarray :param Y_train: outputs. Shape: (n, 1). :type Y_train: numpy.ndarray :param hyps: hyperparameters for Gaussian process. Shape: (h, ). :type hyps: numpy.ndarray :param str_cov: the name of covariance function. :type str_cov: str. :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1). :type prior_mu_train: numpy.ndarray :param use_ard: flag for automatic relevance determination. :type use_ard: bool., optional :param fix_noise: flag for fixing a noise. :type fix_noise: bool., optional :param use_gradient: flag for computing and returning gradients of negative log marginal likelihood. :type use_gradient: bool., optional :param debug: flag for printing log messages. :type debug: bool., optional :returns: negative log marginal likelihood, or (negative log marginal likelihood, gradients of the likelihood). :rtype: float, or tuple of (float, np.ndarray) :raises: AssertionError """ utils_gp.validate_common_args(X_train, Y_train, str_cov, None, debug) assert isinstance(hyps, np.ndarray) assert isinstance(prior_mu_train, np.ndarray) assert isinstance(use_ard, bool) assert isinstance(fix_noise, bool) assert isinstance(use_gradient, bool) assert len(prior_mu_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0] utils_covariance.check_str_cov('neg_log_ml', str_cov, X_train.shape) num_X = float(X_train.shape[0]) hyps = utils_covariance.restore_hyps(str_cov, hyps, use_ard=use_ard, fix_noise=fix_noise, use_gp=False) new_Y_train = Y_train - prior_mu_train nu = hyps['dof'] cov_X_X, inv_cov_X_X, grad_cov_X_X = covariance.get_kernel_inverse( X_train, hyps, str_cov, fix_noise=fix_noise, use_gradient=use_gradient, debug=debug) alpha = np.dot(inv_cov_X_X, new_Y_train) beta = np.squeeze(np.dot(np.dot(new_Y_train.T, inv_cov_X_X), new_Y_train)) first_term = -0.5 * num_X * np.log((nu - 2.0) * np.pi) sign_second_term, second_term = np.linalg.slogdet(cov_X_X) # TODO: it should be checked. if sign_second_term <= 0: # pragma: no cover second_term = 0.0 second_term = -0.5 * second_term third_term = np.log( scipy.special.gamma( (nu + num_X) / 2.0) / scipy.special.gamma(nu / 2.0)) fourth_term = -0.5 * (nu + num_X) * np.log(1.0 + beta / (nu - 2.0)) log_ml_ = np.squeeze(first_term + second_term + third_term + fourth_term) log_ml_ /= num_X if use_gradient: assert grad_cov_X_X is not None grad_log_ml_ = np.zeros(grad_cov_X_X.shape[2] + 1) first_term_grad = ((nu + num_X) / (nu + beta - 2.0) * np.dot(alpha, alpha.T) - inv_cov_X_X) nu_grad = -num_X / (2.0 * (nu - 2.0))\ + scipy.special.digamma((nu + num_X) / 2.0)\ - scipy.special.digamma(nu / 2.0)\ - 0.5 * np.log(1.0 + beta / (nu - 2.0))\ + (nu + num_X) * beta / (2.0 * (nu - 2.0)**2 + 2.0 * beta * (nu - 2.0)) if fix_noise: grad_log_ml_[0] = nu_grad else: grad_log_ml_[1] = nu_grad for ind in range(0, grad_cov_X_X.shape[2]): cur_grad = 0.5 * np.trace( np.dot(first_term_grad, grad_cov_X_X[:, :, ind])) if fix_noise: grad_log_ml_[ind + 1] = cur_grad else: if ind == 0: cur_ind = 0 else: cur_ind = ind + 1 grad_log_ml_[cur_ind] = cur_grad if use_gradient: return -1.0 * log_ml_, -1.0 * grad_log_ml_ / num_X return -1.0 * log_ml_
def get_optimized_kernel( X_train, Y_train, prior_mu, str_cov, str_optimizer_method=constants.STR_OPTIMIZER_METHOD_GP, str_modelselection_method=constants.STR_MODELSELECTION_METHOD, is_fixed_noise=constants.IS_FIXED_GP_NOISE, debug=False): """ This function computes the kernel matrix optimized by optimization method specified, its inverse matrix, and the optimized hyperparameters. :param X_train: inputs. Shape: (n, d) or (n, m, d). :type X_train: numpy.ndarray :param Y_train: outputs. Shape: (n, 1). :type Y_train: numpy.ndarray :param prior_mu: prior mean function or None. :type prior_mu: function or NoneType :param str_cov: the name of covariance function. :type str_cov: str. :param str_optimizer_method: the name of optimization method. :type str_optimizer_method: str., optional :param str_modelselection_method: the name of model selection method. :type str_modelselection_method: str., optional :param is_fixed_noise: flag for fixing a noise. :type is_fixed_noise: bool., optional :param debug: flag for printing log messages. :type debug: bool., optional :returns: a tuple of kernel matrix over `X_train`, kernel matrix inverse, and dictionary of hyperparameters. :rtype: tuple of (numpy.ndarray, numpy.ndarray, dict.) :raises: AssertionError, ValueError """ # TODO: check to input same is_fixed_noise to convert_hyps and restore_hyps assert isinstance(X_train, np.ndarray) assert isinstance(Y_train, np.ndarray) assert callable(prior_mu) or prior_mu is None assert isinstance(str_cov, str) assert isinstance(str_optimizer_method, str) assert isinstance(str_modelselection_method, str) assert isinstance(is_fixed_noise, bool) assert isinstance(debug, bool) assert len(Y_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] _check_str_cov('get_optimized_kernel', str_cov, X_train.shape) assert str_optimizer_method in constants.ALLOWED_OPTIMIZER_METHOD_GP assert str_modelselection_method in constants.ALLOWED_MODELSELECTION_METHOD # TODO: fix this. is_gradient = True time_start = time.time() if debug: print('[DEBUG] get_optimized_kernel in gp.py: str_optimizer_method {}'. format(str_optimizer_method)) print( '[DEBUG] get_optimized_kernel in gp.py: str_modelselection_method {}' .format(str_modelselection_method)) prior_mu_train = get_prior_mu(prior_mu, X_train) if str_cov in constants.ALLOWED_GP_COV_BASE: num_dim = X_train.shape[1] elif str_cov in constants.ALLOWED_GP_COV_SET: num_dim = X_train.shape[2] is_gradient = False if str_modelselection_method == 'ml': neg_log_ml_ = lambda hyps: neg_log_ml(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise=is_fixed_noise, is_gradient=is_gradient, debug=debug) elif str_modelselection_method == 'loocv': neg_log_ml_ = lambda hyps: neg_log_pseudo_l_loocv(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise= is_fixed_noise, debug=debug) is_gradient = False else: # pragma: no cover raise ValueError( 'get_optimized_kernel: missing conditions for str_modelselection_method.' ) hyps_converted = utils_covariance.convert_hyps( str_cov, utils_covariance.get_hyps(str_cov, num_dim), is_fixed_noise=is_fixed_noise, ) if str_optimizer_method == 'BFGS': result_optimized = scipy.optimize.minimize(neg_log_ml_, hyps_converted, method=str_optimizer_method, jac=is_gradient, options={'disp': False}) result_optimized = result_optimized.x elif str_optimizer_method == 'L-BFGS-B': bounds = utils_covariance.get_range_hyps(str_cov, num_dim, is_fixed_noise=is_fixed_noise) result_optimized = scipy.optimize.minimize(neg_log_ml_, hyps_converted, method=str_optimizer_method, bounds=bounds, jac=is_gradient, options={'disp': False}) result_optimized = result_optimized.x # TODO: Fill this conditions elif str_optimizer_method == 'DIRECT': # pragma: no cover raise NotImplementedError( 'get_optimized_kernel: allowed str_optimizer_method, but it is not implemented.' ) elif str_optimizer_method == 'CMA-ES': # pragma: no cover raise NotImplementedError( 'get_optimized_kernel: allowed str_optimizer_method, but it is not implemented.' ) else: # pragma: no cover raise ValueError( 'get_optimized_kernel: missing conditions for str_optimizer_method' ) hyps = utils_covariance.restore_hyps(str_cov, result_optimized, is_fixed_noise=is_fixed_noise) hyps, _ = utils_covariance.validate_hyps_dict(hyps, str_cov, num_dim) cov_X_X, inv_cov_X_X, grad_cov_X_X = get_kernel_inverse( X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, debug=debug) time_end = time.time() if debug: print('[DEBUG] get_optimized_kernel in gp.py: optimized hyps for gpr', hyps) print('[DEBUG] get_optimized_kernel in gp.py: time consumed', time_end - time_start, 'sec.') return cov_X_X, inv_cov_X_X, hyps
def neg_log_pseudo_l_loocv(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise=constants.IS_FIXED_GP_NOISE, debug=False): """ It computes a negative log pseudo-likelihood using leave-one-out cross-validation. :param X_train: inputs. Shape: (n, d) or (n, m, d). :type X_train: numpy.ndarray :param Y_train: outputs. Shape: (n, 1). :type Y_train: numpy.ndarray :param hyps: hyperparameters for Gaussian process. Shape: (h, ). :type hyps: numpy.ndarray :param str_cov: the name of covariance function. :type str_cov: str. :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1). :type prior_mu_train: numpy.ndarray :param is_fixed_noise: flag for fixing a noise. :type is_fixed_noise: bool., optional :param debug: flag for printing log messages. :type debug: bool., optional :returns: negative log pseudo-likelihood. :rtype: float :raises: AssertionError """ assert isinstance(X_train, np.ndarray) assert isinstance(Y_train, np.ndarray) assert isinstance(hyps, np.ndarray) assert isinstance(str_cov, str) assert isinstance(prior_mu_train, np.ndarray) assert isinstance(is_fixed_noise, bool) assert isinstance(debug, bool) assert len(Y_train.shape) == 2 assert len(prior_mu_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0] _check_str_cov('neg_log_pseudo_l_loocv', str_cov, X_train.shape) num_data = X_train.shape[0] hyps = utils_covariance.restore_hyps(str_cov, hyps, is_fixed_noise=is_fixed_noise) cov_X_X, inv_cov_X_X, grad_cov_X_X = get_kernel_inverse( X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, debug=debug) log_pseudo_l_ = 0.0 for ind_data in range(0, num_data): cur_X_train = np.vstack((X_train[:ind_data], X_train[ind_data + 1:])) cur_Y_train = np.vstack((Y_train[:ind_data], Y_train[ind_data + 1:])) cur_X_test = np.expand_dims(X_train[ind_data], axis=0) cur_Y_test = Y_train[ind_data] cur_mu = np.squeeze(cur_Y_test) - np.dot( inv_cov_X_X, Y_train)[ind_data] / inv_cov_X_X[ind_data, ind_data] cur_sigma = np.sqrt( 1.0 / (inv_cov_X_X[ind_data, ind_data] + constants.JITTER_COV)) first_term = -0.5 * np.log(cur_sigma**2) second_term = -0.5 * (np.squeeze(cur_Y_test - cur_mu))**2 / (cur_sigma **2) third_term = -0.5 * np.log(2.0 * np.pi) cur_log_pseudo_l_ = first_term + second_term + third_term log_pseudo_l_ += cur_log_pseudo_l_ log_pseudo_l_ /= num_data log_pseudo_l_ *= -1.0 return log_pseudo_l_
def neg_log_ml(X_train, Y_train, hyps, str_cov, prior_mu_train, is_fixed_noise=constants.IS_FIXED_GP_NOISE, is_cholesky=True, is_gradient=True, debug=False): """ This function computes a negative log marginal likelihood. :param X_train: inputs. Shape: (n, d) or (n, m, d). :type X_train: numpy.ndarray :param Y_train: outputs. Shape: (n, 1). :type Y_train: numpy.ndarray :param hyps: hyperparameters for Gaussian process. Shape: (h, ). :type hyps: numpy.ndarray :param str_cov: the name of covariance function. :type str_cov: str. :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1). :type prior_mu_train: numpy.ndarray :param is_fixed_noise: flag for fixing a noise. :type is_fixed_noise: bool., optional :param is_cholesky: flag for using a cholesky decomposition. :type is_cholesky: bool., optional :param is_gradient: flag for computing and returning gradients of negative log marginal likelihood. :type is_gradient: bool., optional :param debug: flag for printing log messages. :type debug: bool., optional :returns: negative log marginal likelihood, or (negative log marginal likelihood, gradients of the likelihood). :rtype: float, or tuple of (float, float) :raises: AssertionError """ assert isinstance(X_train, np.ndarray) assert isinstance(Y_train, np.ndarray) assert isinstance(hyps, np.ndarray) assert isinstance(str_cov, str) assert isinstance(prior_mu_train, np.ndarray) assert isinstance(is_fixed_noise, bool) assert isinstance(is_cholesky, bool) assert isinstance(is_gradient, bool) assert isinstance(debug, bool) assert len(Y_train.shape) == 2 assert len(prior_mu_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0] _check_str_cov('neg_log_ml', str_cov, X_train.shape) hyps = utils_covariance.restore_hyps(str_cov, hyps, is_fixed_noise=is_fixed_noise) new_Y_train = Y_train - prior_mu_train if is_cholesky: cov_X_X, lower, grad_cov_X_X = get_kernel_cholesky( X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, is_gradient=is_gradient, debug=debug) alpha = scipy.linalg.cho_solve((lower, True), new_Y_train) first_term = -0.5 * np.dot(new_Y_train.T, alpha) second_term = -1.0 * np.sum( np.log(np.diagonal(lower) + constants.JITTER_LOG)) if is_gradient: assert grad_cov_X_X is not None first_term_grad = np.einsum("ik,jk->ijk", alpha, alpha) first_term_grad -= np.expand_dims(scipy.linalg.cho_solve( (lower, True), np.eye(cov_X_X.shape[0])), axis=2) grad_log_ml_ = 0.5 * np.einsum("ijl,ijk->kl", first_term_grad, grad_cov_X_X) grad_log_ml_ = np.sum(grad_log_ml_, axis=1) else: # TODO: is_gradient is fixed. is_gradient = False cov_X_X, inv_cov_X_X, grad_cov_X_X = get_kernel_inverse( X_train, hyps, str_cov, is_fixed_noise=is_fixed_noise, is_gradient=is_gradient, debug=debug) first_term = -0.5 * np.dot(np.dot(new_Y_train.T, inv_cov_X_X), new_Y_train) second_term = -0.5 * np.log( np.linalg.det(cov_X_X) + constants.JITTER_LOG) third_term = -float(X_train.shape[0]) / 2.0 * np.log(2.0 * np.pi) log_ml_ = np.squeeze(first_term + second_term + third_term) log_ml_ /= X_train.shape[0] if is_gradient: return -1.0 * log_ml_, -1.0 * grad_log_ml_ / X_train.shape[0] else: return -1.0 * log_ml_
def neg_log_ml(X_train: np.ndarray, Y_train: np.ndarray, hyps: np.ndarray, str_cov: str, prior_mu_train: np.ndarray, use_ard: bool=constants.USE_ARD, fix_noise: bool=constants.FIX_GP_NOISE, use_cholesky: bool=True, use_gradient: bool=True, debug: bool=False ) -> constants.TYPING_UNION_FLOAT_FA: """ This function computes a negative log marginal likelihood. :param X_train: inputs. Shape: (n, d) or (n, m, d). :type X_train: numpy.ndarray :param Y_train: outputs. Shape: (n, 1). :type Y_train: numpy.ndarray :param hyps: hyperparameters for Gaussian process. Shape: (h, ). :type hyps: numpy.ndarray :param str_cov: the name of covariance function. :type str_cov: str. :param prior_mu_train: the prior values computed by get_prior_mu(). Shape: (n, 1). :type prior_mu_train: numpy.ndarray :param use_ard: flag for automatic relevance determination. :type use_ard: bool., optional :param fix_noise: flag for fixing a noise. :type fix_noise: bool., optional :param use_cholesky: flag for using a cholesky decomposition. :type use_cholesky: bool., optional :param use_gradient: flag for computing and returning gradients of negative log marginal likelihood. :type use_gradient: bool., optional :param debug: flag for printing log messages. :type debug: bool., optional :returns: negative log marginal likelihood, or (negative log marginal likelihood, gradients of the likelihood). :rtype: float, or tuple of (float, np.ndarray) :raises: AssertionError """ # TODO: add use_ard. utils_gp.validate_common_args(X_train, Y_train, str_cov, None, debug) assert isinstance(hyps, np.ndarray) assert isinstance(prior_mu_train, np.ndarray) assert isinstance(use_ard, bool) assert isinstance(fix_noise, bool) assert isinstance(use_cholesky, bool) assert isinstance(use_gradient, bool) assert len(prior_mu_train.shape) == 2 assert X_train.shape[0] == Y_train.shape[0] == prior_mu_train.shape[0] utils_covariance.check_str_cov('neg_log_ml', str_cov, X_train.shape) hyps = utils_covariance.restore_hyps(str_cov, hyps, use_ard=use_ard, fix_noise=fix_noise) new_Y_train = Y_train - prior_mu_train if use_cholesky: cov_X_X, lower, grad_cov_X_X = covariance.get_kernel_cholesky(X_train, hyps, str_cov, fix_noise=fix_noise, use_gradient=use_gradient, debug=debug) alpha = scipy.linalg.cho_solve((lower, True), new_Y_train) first_term = -0.5 * np.dot(new_Y_train.T, alpha) second_term = -1.0 * np.sum(np.log(np.diagonal(lower) + constants.JITTER_LOG)) if use_gradient: assert grad_cov_X_X is not None first_term_grad = np.einsum("ik,jk->ijk", alpha, alpha) first_term_grad -= np.expand_dims(scipy.linalg.cho_solve((lower, True), np.eye(cov_X_X.shape[0])), axis=2) grad_log_ml_ = 0.5 * np.einsum("ijl,ijk->kl", first_term_grad, grad_cov_X_X) grad_log_ml_ = np.sum(grad_log_ml_, axis=1) else: # TODO: use_gradient is fixed. use_gradient = False cov_X_X, inv_cov_X_X, grad_cov_X_X = covariance.get_kernel_inverse(X_train, hyps, str_cov, fix_noise=fix_noise, use_gradient=use_gradient, debug=debug) first_term = -0.5 * np.dot(np.dot(new_Y_train.T, inv_cov_X_X), new_Y_train) sign_second_term, second_term = np.linalg.slogdet(cov_X_X) # TODO: It should be checked. if sign_second_term <= 0: # pragma: no cover second_term = 0.0 second_term = -0.5 * second_term third_term = -float(X_train.shape[0]) / 2.0 * np.log(2.0 * np.pi) log_ml_ = np.squeeze(first_term + second_term + third_term) log_ml_ /= X_train.shape[0] if use_gradient: return -1.0 * log_ml_, -1.0 * grad_log_ml_ / X_train.shape[0] return -1.0 * log_ml_
def test_restore_hyps(): with pytest.raises(AssertionError) as error: package_target.restore_hyps(1.2, 2.1) with pytest.raises(AssertionError) as error: package_target.restore_hyps(1.2, np.array([1.0, 1.0])) with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', 2.1) with pytest.raises(AssertionError) as error: package_target.restore_hyps('abc', 2.1) with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([[1.0, 1.0], [1.0, 1.0]])) with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([1.0, 1.0, 1.0]), use_ard=1) with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([1.0, 1.0, 1.0]), use_ard='abc') with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([1.0, 1.0, 1.0]), fix_noise=1) with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([1.0, 1.0, 1.0]), noise='abc') with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([1.0, 1.0, 1.0]), use_gp=1) with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([1.0, 1.0, 1.0]), use_gp='abc') with pytest.raises(AssertionError) as error: package_target.restore_hyps('se', np.array([0.1, 1.0, 1.0, 1.0]), use_ard=False) cur_hyps = np.array([0.1, 1.0, 1.0, 1.0, 1.0]) restored_hyps = package_target.restore_hyps('se', cur_hyps, fix_noise=False) assert restored_hyps['noise'] == cur_hyps[0] assert restored_hyps['signal'] == cur_hyps[1] assert (restored_hyps['lengthscales'] == cur_hyps[2:]).all() restored_hyps = package_target.restore_hyps('se', cur_hyps, fix_noise=True) assert restored_hyps['noise'] == constants.GP_NOISE assert restored_hyps['signal'] == cur_hyps[0] assert (restored_hyps['lengthscales'] == cur_hyps[1:]).all() cur_hyps = np.array([0.1, 100.0, 20.0, 1.0, 1.0, 1.0]) restored_hyps = package_target.restore_hyps('se', cur_hyps, fix_noise=False, use_gp=False) assert restored_hyps['noise'] == cur_hyps[0] assert restored_hyps['dof'] == cur_hyps[1] assert restored_hyps['signal'] == cur_hyps[2] assert (restored_hyps['lengthscales'] == cur_hyps[3:]).all() cur_hyps = np.array([100.0, 20.0, 1.0, 1.0, 1.0]) restored_hyps = package_target.restore_hyps('se', cur_hyps, fix_noise=True, use_gp=False) assert restored_hyps['noise'] == constants.GP_NOISE assert restored_hyps['dof'] == cur_hyps[0] assert restored_hyps['signal'] == cur_hyps[1] assert (restored_hyps['lengthscales'] == cur_hyps[2:]).all() cur_hyps = np.array([0.1, 1.0, 4.0]) restored_hyps = package_target.restore_hyps('se', cur_hyps, fix_noise=False) assert restored_hyps['noise'] == cur_hyps[0] assert restored_hyps['signal'] == cur_hyps[1] assert restored_hyps['lengthscales'] == cur_hyps[2]