Esempio n. 1
0
    def initialize_gp(self, xs_train, ys_train):
        kernel = "15**2 * AnisotropicVonKarman(invLam=np.array([[1./3000.**2,0],[0,1./3000.**2]]))"
        gp_x = treegp.GPInterpolation(kernel=kernel,
                                      optimizer='anisotropic', 
                                      normalize=False,
                                      white_noise=0.,
                                      p0=[3000., 0.,0.],
                                      n_neighbors=4,
                                      average_fits=None,
                                      nbins=20, 
                                      min_sep=None,
                                      max_sep=None)
        gp_y = treegp.GPInterpolation(kernel=kernel,
                                      optimizer='anisotropic', 
                                      normalize=False,
                                      white_noise=0.,
                                      p0=[3000., 0.,0.],
                                      n_neighbors=4,
                                      average_fits=None,
                                      nbins=20, 
                                      min_sep=None,
                                      max_sep=None)

        gp_x.initialize(xs_train, ys_train[:, 0], y_err=None)
        gp_y.initialize(xs_train, ys_train[:, 1], y_err=None)
        
        self.gp_x = gp_x
        self.gp_y = gp_y
Esempio n. 2
0
def test_anisotropic_limit():
    """Test that AnisotropicRBF with isotropic covariance equals RBF"""

    np.random.seed(42)

    #test isotropic vs anisotropic RBF
    kernel1 = "RBF(0.45)"
    kernel2 = "AnisotropicRBF(scale_length=[0.45, 0.45])"

    gp1 = treegp.GPInterpolation(kernel=kernel1)
    gp2 = treegp.GPInterpolation(kernel=kernel2)

    X = np.random.rand(1000, 2)
    np.testing.assert_allclose(gp1.kernel_template.__call__(X),
                               gp2.kernel_template.__call__(X))

    #test isotropic vs anisotropic VonKarman
    kernel3 = "VonKarman(0.45)"
    kernel4 = "AnisotropicVonKarman(scale_length=[0.45, 0.45])"

    gp3 = treegp.GPInterpolation(kernel=kernel1)
    gp4 = treegp.GPInterpolation(kernel=kernel2)

    X = np.random.rand(1000, 2)
    np.testing.assert_allclose(gp3.kernel_template.__call__(X),
                               gp4.kernel_template.__call__(X))
Esempio n. 3
0
def vk(l):
    kernel_vk = "1 * VonKarman(length_scale=%f)" % (l)
    interp_vk = treegp.GPInterpolation(kernel=kernel_vk,
                                       normalize=False,
                                       white_noise=0.)
    ker_vk = interp_vk.kernel_template
    corr_vk = ker_vk.__call__(coord_corr, Y=np.zeros_like(coord_corr))[:, 0]
    return corr_vk
Esempio n. 4
0
def test_gpinterp_meanify():
    optimizer = ['log-likelihood', 'anisotropic']
    npoints = [600, 2000]
    noise = 0.01
    sigma = 2.
    size = 0.5
    g1 = 0.2
    g2 = 0.2
    ker = 'AnisotropicRBF'

    # Generate 2D gaussian random fields.
    L = get_correlation_length_matrix(size, g1, g2)
    invL = np.linalg.inv(L)
    kernel = "%f**2*%s"%((sigma, ker))
    kernel += "(invLam={0!r})".format(invL)
    kernel_skl = treegp.eval_kernel(kernel)

    for n, opt in enumerate(optimizer):

        x, y, y_err = make_2d_grf(kernel_skl,
                                  noise=noise,
                                  seed=42, npoints=npoints[n])
        # add mean function
        coords0, y0 = make_average(coord=x, gp=False)
        y += y0

        # Do gp interpolation without hyperparameters
        # fitting (truth is put initially).
        gp = treegp.GPInterpolation(kernel=kernel, optimizer=opt,
                                    normalize=True, nbins=21, min_sep=0.,
                                    max_sep=3., p0 = [0.5, 0, 0],
                                    average_fits=os.path.join('inputs',
                                                              'mean_gp_stat_mean.fits'))
        gp.initialize(x, y, y_err=y_err)
        gp.solve()
        # test if found hyperparameters are close the true hyperparameters.
        np.testing.assert_allclose(kernel_skl.theta, gp.kernel.theta, atol=5e-1)

        # Predict at same position as the simulated data.
        # Predictions are strictily equal to the input data
        # in the case of no noise. With noise you should expect
        # to have a pull distribution with mean function arround 0
        # with a std<1 (you use the same data to train and validate, and
        # the data are well sample compared to the input correlation
        # length).
        y_predict, y_cov = gp.predict(x, return_cov=True)
        y_std = np.sqrt(np.diag(y_cov))
        pull = y - y_predict
        pull /= np.sqrt(y_err**2 + y_std**2)
        mean_pull = np.mean(pull)
        std_pull = np.std(pull)

        # Test that mean of the pull is close to zeros and std of the pull bellow 1.
        np.testing.assert_allclose(0., mean_pull, atol=3.*(std_pull)/np.sqrt(npoints[n]))
        if std_pull > 1.:
            raise ValueError("std_pull is > 1. Current value std_pull = %f"%(std_pull))
Esempio n. 5
0
def test_vonkarman_kernel():
    from scipy import special

    corr_lenght = [1., 10., 100., 1000.]
    kernel_amp = [1e-4, 1e-3, 1e-2, 1.]
    dist = np.linspace(0, 10, 100)
    coord = np.array([dist, dist]).T

    dist = np.linspace(0.01, 10, 100)
    coord_corr = np.array([dist, np.zeros_like(dist)]).T

    def _vonkarman_kernel(param, x):
        A = (x[:, 0] - x[:, 0][:, None])
        B = (x[:, 1] - x[:, 1][:, None])
        distance = np.sqrt(A * A + B * B)
        Filter = distance != 0.
        K = np.zeros_like(distance)
        K[Filter] = param[0]**2 * (
            (distance[Filter] / param[1])**(5. / 6.) *
            special.kv(-5. / 6., 2 * np.pi * distance[Filter] / param[1]))
        dist = np.linspace(1e-4, 1., 100)
        div = 5. / 6.
        lim0 = special.gamma(div) / (2 * (np.pi**div))
        K[~Filter] = param[0]**2 * lim0
        K /= lim0
        return K

    def _vonkarman_corr_function(param, distance):
        div = 5. / 6.
        lim0 = (2 * (np.pi**div)) / special.gamma(div)
        return param[0]**2 * lim0 * ((distance / param[1])**(
            5. / 6.)) * special.kv(-5. / 6., 2 * np.pi * distance / param[1])

    for corr in corr_lenght:
        for amp in kernel_amp:
            kernel = "%.10f * VonKarman(length_scale=%f)" % ((amp**2, corr))
            interp = treegp.GPInterpolation(kernel=kernel,
                                            normalize=False,
                                            white_noise=0.)
            ker = interp.kernel_template

            ker_piff = ker.__call__(coord)
            corr_piff = ker.__call__(coord_corr,
                                     Y=np.zeros_like(coord_corr))[:, 0]

            ker_test = _vonkarman_kernel([amp, corr], coord)
            corr_test = _vonkarman_corr_function([amp, corr], dist)

            np.testing.assert_allclose(ker_piff, ker_test, atol=1e-12)
            np.testing.assert_allclose(corr_piff, corr_test, atol=1e-12)
Esempio n. 6
0
    def initialize(self, stars, logger=None):
        """Initialize both the interpolator to some state prefatory to any solve iterations and
        initialize the stars for use with this interpolator.

        :param stars:   A list of Star instances to interpolate between
        :param logger:  A logger object for logging debug info. [default: None]
        """
        if self.rows is None:
            self.nparams = len(stars[0].fit.params)
            self.rows = np.arange(0, self.nparams, 1).astype(int)
        else:
            self.nparams = len(self.rows)
            self.rows = np.array(self.rows)

        if len(self.kernel_template) == 1:
            self.kernels = [
                self.kernel_template[0] for i in range(self.nparams)
            ]
        elif len(self.kernel_template) == self.nparams:
            self.kernels = [ker for ker in self.kernel_template]
        else:
            raise ValueError(
                "numbers of kernel provided should be 1 (same for all parameters) "
                "or equal to the number of params (%i), number kernel provided: %i"
                % ((self.nparams, len(self.kernel_template))))
        self.gps = []

        for i in range(self.nparams):

            gp = treegp.GPInterpolation(
                kernel=self.kernels[i],
                optimizer=self.treegp_alias[self.optimizer],
                normalize=self.normalize,
                p0=[self.l0, 0, 0],
                white_noise=self.white_noise,
                n_neighbors=self.n_neighbors,
                average_fits=self.average_fits,
                indice_meanify=i,
                nbins=self.nbins,
                min_sep=self.min_sep,
                max_sep=self.max_sep)
            self.gps.append(gp)

        self._init_theta = np.array(
            [gp.kernel_template.theta for gp in self.gps])

        return stars
Esempio n. 7
0
    def _finish_read(self, fits, extname):
        data = fits[extname + '_kernel'].read()
        # Run fit to set up GP, but don't actually do any hyperparameter optimization. Just
        # set the GP up using the current hyperparameters.
        # Need to give back average fits files if needed.

        init_theta = np.atleast_1d(data['INIT_THETA'][0])
        fit_theta = np.atleast_1d(data['FIT_THETA'][0])

        self._X = np.atleast_1d(data['X'][0])
        self._y = np.atleast_1d(data['Y'][0])
        self._y_err = np.atleast_1d(data['Y_ERR'][0])
        self.rows = np.atleast_1d(data['ROWS'][0])

        self._init_theta = init_theta
        self.nparams = len(init_theta)
        self.optimizer = data['OPTIMIZER'][0]

        if len(self.kernel_template) == 1:
            self.kernels = [
                self.kernel_template[0] for i in range(self.nparams)
            ]
        else:
            self.kernels = [ker for ker in self.kernel_template]

        self.gps = []
        for i in range(self.nparams):

            gp = treegp.GPInterpolation(
                kernel=self.kernels[i],
                optimizer=self.treegp_alias[self.optimizer],
                normalize=self.normalize,
                p0=[3000., 0., 0.],
                white_noise=self.white_noise,
                n_neighbors=4,
                average_fits=None,
                nbins=20,
                min_sep=None,
                max_sep=None)
            gp.kernel_template.clone_with_theta(fit_theta[i])
            gp.initialize(self._X, self._y[:, i], y_err=self._y_err[:, i])
            self.gps.append(gp)
Esempio n. 8
0
def test_gp_interp_1d():
    npoints = 40
    noise = [None, 0.1]
    # When there is no noise, a "magic"
    # factor is needed in order to be abble
    # to get a numericaly definite positive
    # matrix and to get a gp interpolation (determinant
    # of the kernel matrix is close to 0.). This
    # problem is solved by adding a little bit of
    # white noise when there is no noise.
    white_noise = [1e-5, 0.]
    sigma = [1., 2.]
    l = [2., 2.]
    atols_on_data = [0., 1e-3]
    kernels = ['RBF', 'VonKarman']

    for ker in kernels:
        for i in range(2):
            # Generate 1D gaussian random fields.
            kernel = "%f**2 * %s(%f)" % ((sigma[i], ker, l[i]))
            kernel_skl = treegp.eval_kernel(kernel)
            x, y, y_err = make_1d_grf(kernel_skl,
                                      noise=noise[i],
                                      seed=42,
                                      npoints=npoints)

            # Do gp interpolation without hyperparameters
            # fitting (truth is put initially).
            gp = treegp.GPInterpolation(kernel=kernel,
                                        optimizer="none",
                                        white_noise=white_noise[i])
            gp.initialize(x, y, y_err=y_err)

            # Predict at same position as the simulated data.
            # Predictions are strictily equal to the input data
            # in the case of no noise. With noise you should expect
            # to have a pull distribution with mean function arround 0
            # with a std<1 (you use the same data to train and validate, and
            # the data are well sample compared to the input correlation
            # length).
            y_predict, y_cov = gp.predict(x, return_cov=True)
            y_std = np.sqrt(np.diag(y_cov))
            pull = y - y_predict
            if noise[i] is not None:
                pull /= np.sqrt(y_err**2 + y_std**2)
            else:
                # Test that prediction is equal to the data at data
                # postion. Also test that diagonal of predict
                # covariance is zeros at data positions when no noise.
                np.testing.assert_allclose(y,
                                           y_predict,
                                           atol=3. * white_noise[i])
                np.testing.assert_allclose(np.zeros_like(y_std),
                                           y_std,
                                           atol=3. * white_noise[i])

            mean_pull = np.mean(pull)
            std_pull = np.std(pull)

            # Test that mean of the pull is close to zeros and std of the pull bellow 1.
            np.testing.assert_allclose(0.,
                                       mean_pull,
                                       atol=3. * (std_pull) / np.sqrt(npoints))
            if std_pull > 1.:
                raise ValueError(
                    "std_pull is > 1. Current value std_pull = %f" %
                    (std_pull))

            # Test that for extrapolation, interpolation is the mean function (0 here)
            # and the diagonal of the covariance matrix is close to the hyperameters is
            # link to the amplitudes of the fluctuation of the gaussian random fields.

            new_x = np.linspace(
                np.max(x) + 6. * l[i],
                np.max(x) + 7. * l[i], npoints).reshape((npoints, 1))

            gp = treegp.GPInterpolation(kernel=kernel,
                                        optimizer="none",
                                        normalize=False,
                                        white_noise=white_noise[i])
            gp.initialize(x, y, y_err=y_err)
            y_predict, y_cov = gp.predict(new_x, return_cov=True)
            y_std = np.sqrt(np.diag(y_cov))

            np.testing.assert_allclose(np.zeros_like(y_predict),
                                       y_predict,
                                       atol=1e-5)
            np.testing.assert_allclose(sigma[i] * np.ones_like(y_std),
                                       y_std,
                                       atol=1e-5)
Esempio n. 9
0
def test_hyperparameter_search_2d():
    optimizer = ['log-likelihood', 'anisotropic']
    npoints = [400, 2000]

    noise = 0.01
    sigma = 2.
    size = 0.5
    g1 = 0.2
    g2 = 0.2
    ker = 'AnisotropicRBF'

    # Generate 2D gaussian random fields.
    L = get_correlation_length_matrix(size, g1, g2)
    invL = np.linalg.inv(L)
    kernel = "%f**2*%s"%((sigma, ker))
    kernel += "(invLam={0!r})".format(invL)
    kernel_skl = treegp.eval_kernel(kernel)

    for n, opt in enumerate(optimizer):
        x, y, y_err = make_2d_grf(kernel_skl,
                                  noise=noise,
                                  seed=42, npoints=npoints[n])

        # Do gp interpolation without hyperparameters
        # fitting (truth is put initially).
        gp = treegp.GPInterpolation(kernel=kernel, optimizer=opt,
                                    normalize=True, nbins=21, min_sep=0.,
                                    max_sep=1., p0=[0.3, 0.,0.])
        gp.initialize(x, y, y_err=y_err)
        gp.solve()
        # test if found hyperparameters are close the true hyperparameters.
        np.testing.assert_allclose(kernel_skl.theta, gp.kernel.theta, atol=5e-1)

        # Predict at same position as the simulated data.
        # Predictions are strictily equal to the input data
        # in the case of no noise. With noise you should expect
        # to have a pull distribution with mean function arround 0
        # with a std<1 (you use the same data to train and validate, and
        # the data are well sample compared to the input correlation
        # length).
        y_predict, y_cov = gp.predict(x, return_cov=True)
        y_std = np.sqrt(np.diag(y_cov))
        pull = y - y_predict
        pull /= np.sqrt(y_err**2 + y_std**2)
        mean_pull = np.mean(pull)
        std_pull = np.std(pull)

        # Test that mean of the pull is close to zeros and std of the pull bellow 1.
        np.testing.assert_allclose(0., mean_pull, atol=3.*(std_pull)/np.sqrt(npoints[n]))
        if std_pull > 1.:
            raise ValueError("std_pull is > 1. Current value std_pull = %f"%(std_pull))

        # Test that for extrapolation, interpolation is the mean function (0 here)
        # and the diagonal of the covariance matrix is close to the hyperameters is
        # link to the amplitudes of the fluctuation of the gaussian random fields.

        np.random.seed(42)
        x1 = np.random.uniform(np.max(x)+6.*size,
                               np.max(x)+6.*size, npoints[n])
        x2 = np.random.uniform(np.max(x)+6.*size,
                               np.max(x)+6.*size, npoints[n])
        new_x = np.array([x1, x2]).T

        y_predict, y_cov = gp.predict(new_x, return_cov=True)
        y_std = np.sqrt(np.diag(y_cov))

        np.testing.assert_allclose(np.mean(y), y_predict, atol=1e-5)
        sig = np.sqrt(np.exp(gp.kernel.theta[0]))
        np.testing.assert_allclose(sig*np.ones_like(y_std), y_std, atol=1e-5)
Esempio n. 10
0
def test_hyperparameter_search_1d():
    optimizer = ['log-likelihood', 'two-pcf']
    npoints = [100, 2000]
    noise = 0.01
    sigma = [1., 2., 1., 2.]
    l = [0.5, 0.8, 8., 10.]
    kernels = ['RBF', 'RBF', 'VonKarman', 'VonKarman']
    max_sep = [1.75, 1.75, 1.25, 1.25]
    
    for n, opt in enumerate(optimizer):
        for i, ker in enumerate(kernels):
            # Generate 1D gaussian random fields.
            kernel = "%f**2 * %s(%f)"%((sigma[i], ker, l[i]))
            kernel_skl = treegp.eval_kernel(kernel)
            x, y, y_err = make_1d_grf(kernel_skl,
                                      noise=noise,
                                      seed=42, npoints=npoints[n])

            # Do gp interpolation without hyperparameters
            # fitting (truth is put initially).
            gp = treegp.GPInterpolation(kernel=kernel, optimizer=opt, 
                                        normalize=True, nbins=15, min_sep=0.1, 
                                        max_sep=max_sep[i])
            gp.initialize(x, y, y_err=y_err)
            gp.solve()
            # test if found hyperparameters are close the true hyperparameters.
            np.testing.assert_allclose(kernel_skl.theta, gp.kernel.theta, atol=7e-1)
            
            if opt is "two-pcf":
                xi, xi_weight, distance, coord, mask = gp.return_2pcf()
                np.testing.assert_allclose(xi, gp._optimizer._2pcf, atol=1e-10)
            if opt is "log-likelihood":
                logL = gp.return_log_likelihood()
                np.testing.assert_allclose(logL, gp._optimizer._logL, atol=1e-10)

            # Predict at same position as the simulated data.
            # Predictions are strictily equal to the input data
            # in the case of no noise. With noise you should expect
            # to have a pull distribution with mean function arround 0
            # with a std<1 (you use the same data to train and validate, and
            # the data are well sample compared to the input correlation
            # length).
            y_predict, y_cov = gp.predict(x, return_cov=True)
            y_std = np.sqrt(np.diag(y_cov))
            pull = y - y_predict
            mean_pull = np.mean(pull)
            std_pull = np.std(pull)

            # Test that mean of the pull is close to zeros and std of the pull bellow 1.
            np.testing.assert_allclose(0., mean_pull, atol=3.*(std_pull)/np.sqrt(npoints[n]))
            if std_pull > 1.:
                raise ValueError("std_pull is > 1. Current value std_pull = %f"%(std_pull))

            # Test that for extrapolation, interpolation is the mean function (0 here)
            # and the diagonal of the covariance matrix is close to the hyperameters is
            # link to the amplitudes of the fluctuation of the gaussian random fields.

            new_x = np.linspace(np.max(x)+6.*l[i], np.max(x)+7.*l[i], npoints[n]).reshape((npoints[n],1))
        
            y_predict, y_cov = gp.predict(new_x, return_cov=True)
            y_std = np.sqrt(np.diag(y_cov))
            
            np.testing.assert_allclose(np.mean(y)*np.ones_like(y_std), y_predict, atol=1e-5)
            sig = np.sqrt(np.exp(gp.kernel.theta[0]))
            np.testing.assert_allclose(sig*np.ones_like(y_std), y_std, atol=1e-5)
Esempio n. 11
0
import pylab as plt
import treegp
import numpy as np
import warnings
from iminuit import Minuit

dist = np.linspace(0.0, 6, 100)
coord_corr = np.array([dist, np.zeros_like(dist)]).T

kernel_rbf = "1 * RBF(length_scale=1)"
interp_rbf = treegp.GPInterpolation(kernel=kernel_rbf,
                                    normalize=False,
                                    white_noise=0.)
ker_rbf = interp_rbf.kernel_template
corr_rbf = ker_rbf.__call__(coord_corr, Y=np.zeros_like(coord_corr))[:, 0]


def vk(l):
    kernel_vk = "1 * VonKarman(length_scale=%f)" % (l)
    interp_vk = treegp.GPInterpolation(kernel=kernel_vk,
                                       normalize=False,
                                       white_noise=0.)
    ker_vk = interp_vk.kernel_template
    corr_vk = ker_vk.__call__(coord_corr, Y=np.zeros_like(coord_corr))[:, 0]
    return corr_vk


def chi2_fct(param):
    residuals = corr_rbf - vk(param[0])
    return np.sum(residuals**2)