def initialize_gp(self, xs_train, ys_train): kernel = "15**2 * AnisotropicVonKarman(invLam=np.array([[1./3000.**2,0],[0,1./3000.**2]]))" gp_x = treegp.GPInterpolation(kernel=kernel, optimizer='anisotropic', normalize=False, white_noise=0., p0=[3000., 0.,0.], n_neighbors=4, average_fits=None, nbins=20, min_sep=None, max_sep=None) gp_y = treegp.GPInterpolation(kernel=kernel, optimizer='anisotropic', normalize=False, white_noise=0., p0=[3000., 0.,0.], n_neighbors=4, average_fits=None, nbins=20, min_sep=None, max_sep=None) gp_x.initialize(xs_train, ys_train[:, 0], y_err=None) gp_y.initialize(xs_train, ys_train[:, 1], y_err=None) self.gp_x = gp_x self.gp_y = gp_y
def test_anisotropic_limit(): """Test that AnisotropicRBF with isotropic covariance equals RBF""" np.random.seed(42) #test isotropic vs anisotropic RBF kernel1 = "RBF(0.45)" kernel2 = "AnisotropicRBF(scale_length=[0.45, 0.45])" gp1 = treegp.GPInterpolation(kernel=kernel1) gp2 = treegp.GPInterpolation(kernel=kernel2) X = np.random.rand(1000, 2) np.testing.assert_allclose(gp1.kernel_template.__call__(X), gp2.kernel_template.__call__(X)) #test isotropic vs anisotropic VonKarman kernel3 = "VonKarman(0.45)" kernel4 = "AnisotropicVonKarman(scale_length=[0.45, 0.45])" gp3 = treegp.GPInterpolation(kernel=kernel1) gp4 = treegp.GPInterpolation(kernel=kernel2) X = np.random.rand(1000, 2) np.testing.assert_allclose(gp3.kernel_template.__call__(X), gp4.kernel_template.__call__(X))
def vk(l): kernel_vk = "1 * VonKarman(length_scale=%f)" % (l) interp_vk = treegp.GPInterpolation(kernel=kernel_vk, normalize=False, white_noise=0.) ker_vk = interp_vk.kernel_template corr_vk = ker_vk.__call__(coord_corr, Y=np.zeros_like(coord_corr))[:, 0] return corr_vk
def test_gpinterp_meanify(): optimizer = ['log-likelihood', 'anisotropic'] npoints = [600, 2000] noise = 0.01 sigma = 2. size = 0.5 g1 = 0.2 g2 = 0.2 ker = 'AnisotropicRBF' # Generate 2D gaussian random fields. L = get_correlation_length_matrix(size, g1, g2) invL = np.linalg.inv(L) kernel = "%f**2*%s"%((sigma, ker)) kernel += "(invLam={0!r})".format(invL) kernel_skl = treegp.eval_kernel(kernel) for n, opt in enumerate(optimizer): x, y, y_err = make_2d_grf(kernel_skl, noise=noise, seed=42, npoints=npoints[n]) # add mean function coords0, y0 = make_average(coord=x, gp=False) y += y0 # Do gp interpolation without hyperparameters # fitting (truth is put initially). gp = treegp.GPInterpolation(kernel=kernel, optimizer=opt, normalize=True, nbins=21, min_sep=0., max_sep=3., p0 = [0.5, 0, 0], average_fits=os.path.join('inputs', 'mean_gp_stat_mean.fits')) gp.initialize(x, y, y_err=y_err) gp.solve() # test if found hyperparameters are close the true hyperparameters. np.testing.assert_allclose(kernel_skl.theta, gp.kernel.theta, atol=5e-1) # Predict at same position as the simulated data. # Predictions are strictily equal to the input data # in the case of no noise. With noise you should expect # to have a pull distribution with mean function arround 0 # with a std<1 (you use the same data to train and validate, and # the data are well sample compared to the input correlation # length). y_predict, y_cov = gp.predict(x, return_cov=True) y_std = np.sqrt(np.diag(y_cov)) pull = y - y_predict pull /= np.sqrt(y_err**2 + y_std**2) mean_pull = np.mean(pull) std_pull = np.std(pull) # Test that mean of the pull is close to zeros and std of the pull bellow 1. np.testing.assert_allclose(0., mean_pull, atol=3.*(std_pull)/np.sqrt(npoints[n])) if std_pull > 1.: raise ValueError("std_pull is > 1. Current value std_pull = %f"%(std_pull))
def test_vonkarman_kernel(): from scipy import special corr_lenght = [1., 10., 100., 1000.] kernel_amp = [1e-4, 1e-3, 1e-2, 1.] dist = np.linspace(0, 10, 100) coord = np.array([dist, dist]).T dist = np.linspace(0.01, 10, 100) coord_corr = np.array([dist, np.zeros_like(dist)]).T def _vonkarman_kernel(param, x): A = (x[:, 0] - x[:, 0][:, None]) B = (x[:, 1] - x[:, 1][:, None]) distance = np.sqrt(A * A + B * B) Filter = distance != 0. K = np.zeros_like(distance) K[Filter] = param[0]**2 * ( (distance[Filter] / param[1])**(5. / 6.) * special.kv(-5. / 6., 2 * np.pi * distance[Filter] / param[1])) dist = np.linspace(1e-4, 1., 100) div = 5. / 6. lim0 = special.gamma(div) / (2 * (np.pi**div)) K[~Filter] = param[0]**2 * lim0 K /= lim0 return K def _vonkarman_corr_function(param, distance): div = 5. / 6. lim0 = (2 * (np.pi**div)) / special.gamma(div) return param[0]**2 * lim0 * ((distance / param[1])**( 5. / 6.)) * special.kv(-5. / 6., 2 * np.pi * distance / param[1]) for corr in corr_lenght: for amp in kernel_amp: kernel = "%.10f * VonKarman(length_scale=%f)" % ((amp**2, corr)) interp = treegp.GPInterpolation(kernel=kernel, normalize=False, white_noise=0.) ker = interp.kernel_template ker_piff = ker.__call__(coord) corr_piff = ker.__call__(coord_corr, Y=np.zeros_like(coord_corr))[:, 0] ker_test = _vonkarman_kernel([amp, corr], coord) corr_test = _vonkarman_corr_function([amp, corr], dist) np.testing.assert_allclose(ker_piff, ker_test, atol=1e-12) np.testing.assert_allclose(corr_piff, corr_test, atol=1e-12)
def initialize(self, stars, logger=None): """Initialize both the interpolator to some state prefatory to any solve iterations and initialize the stars for use with this interpolator. :param stars: A list of Star instances to interpolate between :param logger: A logger object for logging debug info. [default: None] """ if self.rows is None: self.nparams = len(stars[0].fit.params) self.rows = np.arange(0, self.nparams, 1).astype(int) else: self.nparams = len(self.rows) self.rows = np.array(self.rows) if len(self.kernel_template) == 1: self.kernels = [ self.kernel_template[0] for i in range(self.nparams) ] elif len(self.kernel_template) == self.nparams: self.kernels = [ker for ker in self.kernel_template] else: raise ValueError( "numbers of kernel provided should be 1 (same for all parameters) " "or equal to the number of params (%i), number kernel provided: %i" % ((self.nparams, len(self.kernel_template)))) self.gps = [] for i in range(self.nparams): gp = treegp.GPInterpolation( kernel=self.kernels[i], optimizer=self.treegp_alias[self.optimizer], normalize=self.normalize, p0=[self.l0, 0, 0], white_noise=self.white_noise, n_neighbors=self.n_neighbors, average_fits=self.average_fits, indice_meanify=i, nbins=self.nbins, min_sep=self.min_sep, max_sep=self.max_sep) self.gps.append(gp) self._init_theta = np.array( [gp.kernel_template.theta for gp in self.gps]) return stars
def _finish_read(self, fits, extname): data = fits[extname + '_kernel'].read() # Run fit to set up GP, but don't actually do any hyperparameter optimization. Just # set the GP up using the current hyperparameters. # Need to give back average fits files if needed. init_theta = np.atleast_1d(data['INIT_THETA'][0]) fit_theta = np.atleast_1d(data['FIT_THETA'][0]) self._X = np.atleast_1d(data['X'][0]) self._y = np.atleast_1d(data['Y'][0]) self._y_err = np.atleast_1d(data['Y_ERR'][0]) self.rows = np.atleast_1d(data['ROWS'][0]) self._init_theta = init_theta self.nparams = len(init_theta) self.optimizer = data['OPTIMIZER'][0] if len(self.kernel_template) == 1: self.kernels = [ self.kernel_template[0] for i in range(self.nparams) ] else: self.kernels = [ker for ker in self.kernel_template] self.gps = [] for i in range(self.nparams): gp = treegp.GPInterpolation( kernel=self.kernels[i], optimizer=self.treegp_alias[self.optimizer], normalize=self.normalize, p0=[3000., 0., 0.], white_noise=self.white_noise, n_neighbors=4, average_fits=None, nbins=20, min_sep=None, max_sep=None) gp.kernel_template.clone_with_theta(fit_theta[i]) gp.initialize(self._X, self._y[:, i], y_err=self._y_err[:, i]) self.gps.append(gp)
def test_gp_interp_1d(): npoints = 40 noise = [None, 0.1] # When there is no noise, a "magic" # factor is needed in order to be abble # to get a numericaly definite positive # matrix and to get a gp interpolation (determinant # of the kernel matrix is close to 0.). This # problem is solved by adding a little bit of # white noise when there is no noise. white_noise = [1e-5, 0.] sigma = [1., 2.] l = [2., 2.] atols_on_data = [0., 1e-3] kernels = ['RBF', 'VonKarman'] for ker in kernels: for i in range(2): # Generate 1D gaussian random fields. kernel = "%f**2 * %s(%f)" % ((sigma[i], ker, l[i])) kernel_skl = treegp.eval_kernel(kernel) x, y, y_err = make_1d_grf(kernel_skl, noise=noise[i], seed=42, npoints=npoints) # Do gp interpolation without hyperparameters # fitting (truth is put initially). gp = treegp.GPInterpolation(kernel=kernel, optimizer="none", white_noise=white_noise[i]) gp.initialize(x, y, y_err=y_err) # Predict at same position as the simulated data. # Predictions are strictily equal to the input data # in the case of no noise. With noise you should expect # to have a pull distribution with mean function arround 0 # with a std<1 (you use the same data to train and validate, and # the data are well sample compared to the input correlation # length). y_predict, y_cov = gp.predict(x, return_cov=True) y_std = np.sqrt(np.diag(y_cov)) pull = y - y_predict if noise[i] is not None: pull /= np.sqrt(y_err**2 + y_std**2) else: # Test that prediction is equal to the data at data # postion. Also test that diagonal of predict # covariance is zeros at data positions when no noise. np.testing.assert_allclose(y, y_predict, atol=3. * white_noise[i]) np.testing.assert_allclose(np.zeros_like(y_std), y_std, atol=3. * white_noise[i]) mean_pull = np.mean(pull) std_pull = np.std(pull) # Test that mean of the pull is close to zeros and std of the pull bellow 1. np.testing.assert_allclose(0., mean_pull, atol=3. * (std_pull) / np.sqrt(npoints)) if std_pull > 1.: raise ValueError( "std_pull is > 1. Current value std_pull = %f" % (std_pull)) # Test that for extrapolation, interpolation is the mean function (0 here) # and the diagonal of the covariance matrix is close to the hyperameters is # link to the amplitudes of the fluctuation of the gaussian random fields. new_x = np.linspace( np.max(x) + 6. * l[i], np.max(x) + 7. * l[i], npoints).reshape((npoints, 1)) gp = treegp.GPInterpolation(kernel=kernel, optimizer="none", normalize=False, white_noise=white_noise[i]) gp.initialize(x, y, y_err=y_err) y_predict, y_cov = gp.predict(new_x, return_cov=True) y_std = np.sqrt(np.diag(y_cov)) np.testing.assert_allclose(np.zeros_like(y_predict), y_predict, atol=1e-5) np.testing.assert_allclose(sigma[i] * np.ones_like(y_std), y_std, atol=1e-5)
def test_hyperparameter_search_2d(): optimizer = ['log-likelihood', 'anisotropic'] npoints = [400, 2000] noise = 0.01 sigma = 2. size = 0.5 g1 = 0.2 g2 = 0.2 ker = 'AnisotropicRBF' # Generate 2D gaussian random fields. L = get_correlation_length_matrix(size, g1, g2) invL = np.linalg.inv(L) kernel = "%f**2*%s"%((sigma, ker)) kernel += "(invLam={0!r})".format(invL) kernel_skl = treegp.eval_kernel(kernel) for n, opt in enumerate(optimizer): x, y, y_err = make_2d_grf(kernel_skl, noise=noise, seed=42, npoints=npoints[n]) # Do gp interpolation without hyperparameters # fitting (truth is put initially). gp = treegp.GPInterpolation(kernel=kernel, optimizer=opt, normalize=True, nbins=21, min_sep=0., max_sep=1., p0=[0.3, 0.,0.]) gp.initialize(x, y, y_err=y_err) gp.solve() # test if found hyperparameters are close the true hyperparameters. np.testing.assert_allclose(kernel_skl.theta, gp.kernel.theta, atol=5e-1) # Predict at same position as the simulated data. # Predictions are strictily equal to the input data # in the case of no noise. With noise you should expect # to have a pull distribution with mean function arround 0 # with a std<1 (you use the same data to train and validate, and # the data are well sample compared to the input correlation # length). y_predict, y_cov = gp.predict(x, return_cov=True) y_std = np.sqrt(np.diag(y_cov)) pull = y - y_predict pull /= np.sqrt(y_err**2 + y_std**2) mean_pull = np.mean(pull) std_pull = np.std(pull) # Test that mean of the pull is close to zeros and std of the pull bellow 1. np.testing.assert_allclose(0., mean_pull, atol=3.*(std_pull)/np.sqrt(npoints[n])) if std_pull > 1.: raise ValueError("std_pull is > 1. Current value std_pull = %f"%(std_pull)) # Test that for extrapolation, interpolation is the mean function (0 here) # and the diagonal of the covariance matrix is close to the hyperameters is # link to the amplitudes of the fluctuation of the gaussian random fields. np.random.seed(42) x1 = np.random.uniform(np.max(x)+6.*size, np.max(x)+6.*size, npoints[n]) x2 = np.random.uniform(np.max(x)+6.*size, np.max(x)+6.*size, npoints[n]) new_x = np.array([x1, x2]).T y_predict, y_cov = gp.predict(new_x, return_cov=True) y_std = np.sqrt(np.diag(y_cov)) np.testing.assert_allclose(np.mean(y), y_predict, atol=1e-5) sig = np.sqrt(np.exp(gp.kernel.theta[0])) np.testing.assert_allclose(sig*np.ones_like(y_std), y_std, atol=1e-5)
def test_hyperparameter_search_1d(): optimizer = ['log-likelihood', 'two-pcf'] npoints = [100, 2000] noise = 0.01 sigma = [1., 2., 1., 2.] l = [0.5, 0.8, 8., 10.] kernels = ['RBF', 'RBF', 'VonKarman', 'VonKarman'] max_sep = [1.75, 1.75, 1.25, 1.25] for n, opt in enumerate(optimizer): for i, ker in enumerate(kernels): # Generate 1D gaussian random fields. kernel = "%f**2 * %s(%f)"%((sigma[i], ker, l[i])) kernel_skl = treegp.eval_kernel(kernel) x, y, y_err = make_1d_grf(kernel_skl, noise=noise, seed=42, npoints=npoints[n]) # Do gp interpolation without hyperparameters # fitting (truth is put initially). gp = treegp.GPInterpolation(kernel=kernel, optimizer=opt, normalize=True, nbins=15, min_sep=0.1, max_sep=max_sep[i]) gp.initialize(x, y, y_err=y_err) gp.solve() # test if found hyperparameters are close the true hyperparameters. np.testing.assert_allclose(kernel_skl.theta, gp.kernel.theta, atol=7e-1) if opt is "two-pcf": xi, xi_weight, distance, coord, mask = gp.return_2pcf() np.testing.assert_allclose(xi, gp._optimizer._2pcf, atol=1e-10) if opt is "log-likelihood": logL = gp.return_log_likelihood() np.testing.assert_allclose(logL, gp._optimizer._logL, atol=1e-10) # Predict at same position as the simulated data. # Predictions are strictily equal to the input data # in the case of no noise. With noise you should expect # to have a pull distribution with mean function arround 0 # with a std<1 (you use the same data to train and validate, and # the data are well sample compared to the input correlation # length). y_predict, y_cov = gp.predict(x, return_cov=True) y_std = np.sqrt(np.diag(y_cov)) pull = y - y_predict mean_pull = np.mean(pull) std_pull = np.std(pull) # Test that mean of the pull is close to zeros and std of the pull bellow 1. np.testing.assert_allclose(0., mean_pull, atol=3.*(std_pull)/np.sqrt(npoints[n])) if std_pull > 1.: raise ValueError("std_pull is > 1. Current value std_pull = %f"%(std_pull)) # Test that for extrapolation, interpolation is the mean function (0 here) # and the diagonal of the covariance matrix is close to the hyperameters is # link to the amplitudes of the fluctuation of the gaussian random fields. new_x = np.linspace(np.max(x)+6.*l[i], np.max(x)+7.*l[i], npoints[n]).reshape((npoints[n],1)) y_predict, y_cov = gp.predict(new_x, return_cov=True) y_std = np.sqrt(np.diag(y_cov)) np.testing.assert_allclose(np.mean(y)*np.ones_like(y_std), y_predict, atol=1e-5) sig = np.sqrt(np.exp(gp.kernel.theta[0])) np.testing.assert_allclose(sig*np.ones_like(y_std), y_std, atol=1e-5)
import pylab as plt import treegp import numpy as np import warnings from iminuit import Minuit dist = np.linspace(0.0, 6, 100) coord_corr = np.array([dist, np.zeros_like(dist)]).T kernel_rbf = "1 * RBF(length_scale=1)" interp_rbf = treegp.GPInterpolation(kernel=kernel_rbf, normalize=False, white_noise=0.) ker_rbf = interp_rbf.kernel_template corr_rbf = ker_rbf.__call__(coord_corr, Y=np.zeros_like(coord_corr))[:, 0] def vk(l): kernel_vk = "1 * VonKarman(length_scale=%f)" % (l) interp_vk = treegp.GPInterpolation(kernel=kernel_vk, normalize=False, white_noise=0.) ker_vk = interp_vk.kernel_template corr_vk = ker_vk.__call__(coord_corr, Y=np.zeros_like(coord_corr))[:, 0] return corr_vk def chi2_fct(param): residuals = corr_rbf - vk(param[0]) return np.sum(residuals**2)