Esempio n. 1
0
    def __init__(self, regression_model: Functions, **kwargs):
        self.gpr = regression_model
        self.options = self._unpack_options(**kwargs)
        self.dimensions = self.gpr.dimensions

        prior_mean = self.options['prior_mean'].reshape(-1)
        prior_cov = self.options['prior_variance']
        self.prior = Gaussian(mean=prior_mean, covariance=prior_cov)
Esempio n. 2
0
    def bmc(self):
        """
        Bayesian Monte Carlo - No Active Sampling
        :return:
        """
        prior_mean = self.options['prior_mean'].reshape(-1)
        prior_cov = self.options['prior_variance']
        prior = Gaussian(mean=prior_mean, covariance=prior_cov)

        budget = self.options['naive_bq_budget']

        samples = np.zeros((budget, self.gpr.dimensions))
        yv = np.zeros((budget, ))
        # yv_scaled = np.zeros((budget, ))
        intv = np.zeros((budget, ))
        log_intv = np.zeros((budget, ))

        kern = GPy.kern.RBF(
            input_dim=self.dimensions,
            variance=self.options['naive_bq_kern_variance'],
            lengthscale=self.options['naive_bq_kern_lengthscale'])
        initial_x = np.array([[0, 0]])
        initial_y = np.array(self.gpr.sample(initial_x)).reshape(1, -1)

        gpy_gp = GPy.models.GPRegression(initial_x, initial_y, kernel=kern)
        gp = GP(gpy_gp)
        model = OriginalIntegrandModel(gp=gp, prior=prior)

        for i in range(budget):

            samples[i, :] = np.random.multivariate_normal(mean=np.array([0,
                                                                         0]),
                                                          cov=2 * np.eye(2))
            # yv[i] = np.array(self.gpr.log_sample(samples[i, :])).reshape(1, -1)
            yv[i] = np.array(self.gpr.sample(samples[i, :])).reshape(1, -1)
            # scaling = np.max(yv[:i+1])
            # yv_scaled[:i+1] = np.exp(yv[:i+1] - scaling)
            # y = yv_scaled[:i+1].reshape(-1, 1)
            model.update(samples[i], yv[i].reshape(-1, 1))
            if i % 10 == 0 and i > 0:

                gpy_gp.optimize()
                _ = gpy_gp.plot()
                plt.show()
                # log_intv[i] = np.log((model.integral_mean())[0]) + scaling
                # intv[i] = np.exp(log_intv[i])
                intv[i] = model.integral_mean()[0]
                log_intv[i] = np.log(intv[i])

                print(i, log_intv[i])
            if i % 100 == 0:
                self.plot_iterations(i,
                                     log_intv,
                                     true_val=self.gpr.grd_log_evidence)

        return log_intv
    def __init__(self, regression_model: Union[RBFGPRegression,
                                               PeriodicGPRegression],
                 **kwargs):
        self.gpr = regression_model
        self.options = self._unpack_options(**kwargs)
        self.dimensions = self.gpr.param_dim

        # Parameter prior - note that the prior is in *log-space*
        self.prior = Gaussian(mean=self.options['prior_mean'].reshape(-1),
                              covariance=self.options['prior_variance'])
        self.gp = self.gpr.model
Esempio n. 4
0
 def __init__(self, classification_model: SVMClassification, **kwargs):
     self.model = classification_model
     self.options = self._unpack_options(**kwargs)
     self.dimensions = self.model.param_dim
     self.prior = Gaussian(mean=self.options['prior_mean'].reshape(-1),
                           covariance=self.options['prior_variance'])
Esempio n. 5
0
# Set up test function and WSABI-L model.


def true_function(x):
    x = np.atleast_2d(x)
    return np.atleast_2d((((np.sin(x) + 0.5 * np.cos(3 * x))**2) /
                          ((x / 2)**2 + 0.3)).prod(axis=1))


initial_x = np.array([[0, 0]])
initial_y = np.sqrt(2 * true_function(initial_x))

k = GPy.kern.RBF(2, variance=2, lengthscale=2)
lik = GPy.likelihoods.Gaussian(variance=1e-10)

prior = Gaussian(mean=np.array([0, 0]), covariance=2 * np.eye(2))

gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=k, likelihood=lik)
warped_gp = WsabiLGP(gpy_gp)
model = WarpedIntegrandModel(warped_gp, prior)


def true_integrand(x):
    return true_function(x) * prior(x)


# Set up plotting.

LOWER_LIMIT = -4
UPPER_LIMIT = 4
PLOTTING_RESOLUTION = 200
 def reset_prior(self):
     self.prior = Gaussian(mean=self.options['prior_mean'].reshape(-1),
                           covariance=self.options['prior_variance'])
     logging.info("Prior reset at mean, " +
                  str(self.options['prior_mean']) + ' and variance ' +
                  str(self.options['prior_variance']))
Esempio n. 7
0
def wsabi(X_pred,
          y_grd,
          log_lik_handle,
          param_dim=5,
          prior_mean=np.zeros((5, 1)),
          prior_var=100 * np.eye(5)):
    # Allocating number of maximum evaluations
    start = time.time()
    prior = Gaussian(mean=prior_mean.reshape(-1), covariance=prior_var)

    # Initial grid sampling
    log_phis = np.mgrid[-1:1.1:1, -1:1.1:1, -1:1.1:1, -1:1.1:1,
                        0:25:5].reshape(5, -1).T
    n = log_phis.shape[0]
    phis = log_phis.copy()
    phis[:, :-1] = np.exp(phis[:, :-1])

    # Allocate memory of the samples and results
    log_r = np.zeros((n, 1))  # The log-likelihood function
    q = np.zeros((n, 1))  # Prediction
    # var = np.zeros((n, ))  # Posterior variance

    for i in range(n):
        log_r[i, :], q[i, :], _ = log_lik_handle(phi=phis[i, :], x_pred=X_pred)
        print(phis[i, :], log_r[i, :], q[i, :])
    r = np.exp(log_r)
    # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the
    # MAP values.
    kern = GPy.kern.RBF(param_dim, variance=1., lengthscale=1.)
    # kern.plot(ax=plt.gca())
    r_gp = GPy.models.GPRegression(phis[:1, :], r[:1, :], kern)
    r_model = WarpedIntegrandModel(WsabiLGP(r_gp), prior)
    r_model.update(phis[1:, :], r[1:, :])
    r_gp.optimize()

    r_int = r_model.integral_mean()[0]  # Model evidence
    log_r_int = np.log(r_int)  # Model log-evidence

    print(
        "Estimate of model evidence: ",
        r_int,
    )
    print("Model log-evidence ", log_r_int)

    # Enforce positivity in q
    q_min = np.min(q)
    if q_min < 0:
        q -= q_min
    else:
        q_min = 0

    # Do the same exponentiation and rescaling trick for q
    log_rq_x = log_r + np.log(q)
    max_log_rq = np.max(log_rq_x)
    rq = np.exp(log_rq_x - max_log_rq)
    rq_gp = GPy.models.GPRegression(phis, np.sqrt(2 * rq.reshape(-1, 1)), kern)
    rq_model = WarpedIntegrandModel(WsabiLGP(rq_gp), prior)
    rq_model.update(phis, rq)
    rq_gp.optimize()

    # Now estimate the posterior
    # rq_int = rq_model.integral_mean()[0] + q_min * r_int
    rq_int = np.exp(np.log(rq_model.integral_mean()[0]) +
                    max_log_rq) + q_min * r_int
    print("rq_int", rq_int)
    # Similar for variance
    #log_rvar_x = log_r + np.log(var)
    #max_log_rvar = np.max(log_rvar_x)
    #rvar = np.exp(log_rvar_x - max_log_rvar)
    #rvar_gp = GPy.models.GPRegression(phis[:1, :], np.sqrt(2 * rvar[0].reshape(1, 1)), kern)
    #rvar_model = WarpedIntegrandModel(WsabiLGP(rvar_gp), prior)
    #rvar_model.update(phis[1:, :], rvar[1:].reshape(-1, 1))
    #rvar_gp.optimize()

    #rvar_int = np.exp(np.log(rvar_model.integral_mean()[0]) + max_log_rvar)

    pred = rq_int / r_int
    #pred_var = rvar_int / r_int
    print('pred', pred)
    print('actual', y_grd)

    end = time.time()
    print("Total Time: ", end - start)
    return pred, None
Esempio n. 8
0
def toy_example(num_samples=None, noise=(0,0)):

    if num_samples==None:
        num_samples = FLAGS.num_samples

    with tf.GradientTape() as tape:

        train_xs, valid_xs, test_xs = utils.load_toy_data()
        batch_xs = train_xs[0:FLAGS.batch_size]  # [batch_size, input_dim]

        # set up your prior model, proposal and likelihood networks
        p_z = ToyPrior(mu_inital_value = 2., size=FLAGS.latent_dim, name="toy_prior")

        # returns a callable Normal distribution
        p_x_given_z = ToyConditionalNormalLikelihood()

        # with tf.name_scope('proposal') as scope:
        q_z = ToyConditionalNormal(
            size=FLAGS.latent_dim,
            hidden_layer_sizes=1,
            initializers=None,
            use_bias=True,
            name="proposal")

        # initialise the network parameters to optimal (plus some specified N dist'ed noise)
        q_z.initialise_and_fix_network(batch_xs, noise)

        # returns the Normal dist proposal, and the parameters (fixed to optimal A and b)
        proposal, inference_network_params = q_z(batch_xs, stop_gradient=False)

        z = proposal.sample(sample_shape=[num_samples])
        # [num_samples, batch_size, latent_dim]
        print("z samples ", z.shape)

        # returns a Normal dist conditioned on z
        likelihood = p_x_given_z(z)

        # returns the Prior normal (p_z), and the prior parameter mu
        prior, mu = p_z()

        log_p_z = tf.reduce_sum(prior.log_prob(z), axis=-1)   # [num_samples, batch_size]
        log_q_z = tf.reduce_sum(proposal.log_prob(z), axis=-1)   # [num_samples, batch_size]
        log_p_x_given_z = tf.reduce_sum(likelihood.log_prob(batch_xs), axis=-1)  # [num_samples, batch_size]
        log_weights = log_p_z + log_p_x_given_z - log_q_z  # [num_samples, batch_size]

        # This step is crucial for replicating the IWAE bound. log of the sum, NOT sum of the log (the VAE bound - where M increases)
        log_sum_weight = tf.reduce_logsumexp(log_weights, axis=0)  # this sums over K samples, and returns us to IWAE estimator land
        log_avg_weight = log_sum_weight - tf.log(tf.to_float(num_samples))
        inference_loss = -tf.reduce_mean(log_avg_weight)
        # print("shapes", log_p_z.shape, log_p_x_given_z.shape, log_q_z.shape, log_weights.shape, log_sum_weight.shape, inference_loss.shape)

        parameters = (inference_network_params[0], inference_network_params[1], mu)
        # print("near optimal parameters: ", parameters)
        grads = tape.gradient(inference_loss, parameters)

        # Build the evidence lower bound (ELBO) or the negative loss
        # kl = tf.reduce_mean(tfd.kl_divergence(proposal, prior), axis=-1)  # analytic KL
        # log_sum_ll = tf.reduce_logsumexp(log_p_x_given_z, axis=0)  # this converts back to IWAE estimator (log of the sum)
        # expected_log_likelihood = log_sum_ll - tf.log(tf.to_float(num_samples))
        # KL_elbo = tf.reduce_mean(expected_log_likelihood - kl)

        if FLAGS.using_BQ:

            def get_log_joint(z):
                return np.reshape(p_x_given_z(z).log_prob(batch_xs).numpy() + prior.log_prob(z).numpy(), (-1, 1))

            kernel = GPy.kern.RBF(1, variance=2, lengthscale=2)
            kernel.variance.constrain_bounded(1e-5, 1e5)
            bq_likelihood = GPy.likelihoods.Gaussian(variance=1e-1)

            bq_prior = Gaussian(mean=proposal._loc.numpy().squeeze(), covariance=proposal._scale.numpy().item())

            initial_x = bq_prior.sample(5)
            initial_y = []
            for point in initial_x:
                initial_y.append(get_log_joint(np.atleast_2d(point)))
            initial_y = np.concatenate(initial_y)
            mean_function = NegativeQuadratic(1)
            gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=kernel, likelihood=bq_likelihood, mean_function=mean_function)
            warped_gp = VanillaGP(gpy_gp)
            bq_model = IntegrandModel(warped_gp, bq_prior)

            for i in range(10):
                if i % 5 == 0:
                    gpy_gp.optimize_restarts(num_restarts=5)
                failed = True
                while failed:
                    try:
                        batch = select_batch(bq_model, 1, KRIGING_BELIEVER)
                        failed = False
                    except FloatingPointError:
                        gpy_gp.optimize_restarts(num_restarts=5)

                X = np.array(batch)
                Y = get_log_joint(X)

                bq_model.update(batch, Y)

            gpy_gp.optimize_restarts(num_restarts=5)

            bq_elbo = bq_model.integral_mean()

            import scipy.integrate
            def integrand(z):
                return get_log_joint(z) * np.exp(bq_prior.logpdf(np.atleast_2d(z)))
            brute_force_elbo = scipy.integrate.quad(integrand, -10, 10)

            print("BQ ", bq_elbo)
            print("ACTUAL ELBO ", brute_force_elbo)

    return grads
Esempio n. 9
0
    def naive_bq(self) -> tuple:
        """
        Marginalise the marginal log-likelihood using naive Bayesian Quadrature
        :return:
        """
        budget = self.options['naive_bq_budget']

        naive_bq_samples = np.zeros(
            (budget, self.gpr.dimensions +
             2))  # Array to store all the x locations of samples
        naive_bq_log_y = np.zeros((
            budget, ))  # Array to store all the log-likelihoods evaluated at x
        naive_bq_y = np.zeros(
            (budget, ))  # Array to store the likelihoods evaluated at x
        log_naive_bq_int = np.zeros(
            (budget, )
        )  # Array to store the current estimate of the marginalised integral

        # Initial points
        initial_x = np.zeros((self.dimensions + 2, 1)).reshape(
            1, -1) + 1e-6  # Set the initial sample to the prior mean
        initial_y = np.array(self.gpr.log_sample(initial_x)).reshape(1, -1)

        # Prior in log space
        prior_mean = self.options['prior_mean'].reshape(1, -1)
        prior_cov = self.options['prior_variance']

        # Setting up kernel - noting the log-transformation
        kern = GPy.kern.RBF(
            self.dimensions + 2,
            variance=np.log(self.options['naive_bq_kern_variance']),
            lengthscale=np.log(self.options['naive_bq_kern_lengthscale']))

        # Initial guess for the GP for BQ
        lik = GPy.likelihoods.Gaussian(variance=1e-10)
        prior = Gaussian(mean=prior_mean.reshape(-1), covariance=prior_cov)
        gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=kern, likelihood=lik)
        gp = GP(gpy_gp)
        model = OriginalIntegrandModel(gp=gp, prior=prior)
        for i in range(1, self.options['naive_bq_budget']):
            # Do active sampling
            this_x = np.array(select_batch(model, 1,
                                           LOCAL_PENALISATION)).reshape(1, -1)
            naive_bq_samples[i, :] = this_x
            naive_bq_log_y[i] = np.array(self.gpr.log_sample(this_x)).reshape(
                1, -1)

            # Compute the scaling
            log_scaling = np.max(naive_bq_log_y[:i])

            # Scaling batch by max and exponentiate
            naive_bq_y[:i] = np.exp(naive_bq_log_y[:i] - log_scaling)
            this_y = naive_bq_y[i]

            model.update(this_x, this_y)
            gpy_gp.optimize()
            naive_bq_int, _, _ = model.integral_mean(log_transform=True)
            log_naive_bq_int[i] = naive_bq_int + log_scaling
            print("samples", np.exp(this_x))
            print("eval", log_naive_bq_int[i])
            if i % 1 == 0:
                self.plot_iterations(i, naive_bq_samples, naive_bq_log_y)
                print("Step", str(i))
                print("Current estimate of Log-evidence: ",
                      log_naive_bq_int[i])
                # print("Current values of hyperparameters: ", display(gpy_gp))
                plt.plot(log_naive_bq_int[:i])
                plt.show()
        self.naive_bq_samples = naive_bq_samples
        return naive_bq_log_y[-1], log_naive_bq_int[-1]
Esempio n. 10
0
    def wsabi_bq(self, rebase=False):
        """
        Marginalise the marginal log-likelihood using WSABI Bayesian Quadrature
        :return:
        """
        budget = self.options['naive_bq_budget']

        samples = np.zeros((budget, self.gpr.dimensions
                            ))  # Array to store all the x locations of samples
        yv = np.zeros((
            budget, ))  # Array to store all the log-likelihoods evaluated at x
        yv_scaled = np.zeros((budget, ))
        intv = np.zeros(
            (budget, )
        )  # Array to store the current estimate of the marginalised integral
        log_intv = np.zeros((budget, ))

        # Initial points
        initial_x = np.zeros((self.dimensions, 1)).reshape(
            1, -1) + 1e-6  # Set the initial sample to the prior mean
        if rebase:
            initial_y = np.array([1]).reshape(1, -1)
        else:
            initial_y = np.array(self.gpr.sample(initial_x)).reshape(1, -1)

        # Prior in log space
        prior_mean = self.options['prior_mean'].reshape(-1)
        prior_cov = self.options['prior_variance']
        prior = Gaussian(mean=prior_mean, covariance=prior_cov)

        # Setting up kernel - noting the log-transformation
        kern = GPy.kern.RBF(
            self.dimensions,
            variance=self.options['naive_bq_kern_variance'],
            lengthscale=self.options['naive_bq_kern_lengthscale'])

        # Initial guess for the GP for BQ
        gpy_gp = GPy.models.GPRegression(
            initial_x,
            initial_y,
            kernel=kern,
        )
        warped_gp = WsabiLGP(gpy_gp)
        model = WarpedIntegrandModel(warped_gp, prior=prior)

        if rebase:
            for i in range(budget):
                samples[i, :] = np.array(
                    select_batch(model, 1, LOCAL_PENALISATION)).reshape(1, -1)
                yv[i] = np.array(self.gpr.log_sample(samples[i, :])).reshape(
                    1, -1)
                scaling = np.max(yv[:i + 1])
                yv_scaled[:i + 1] = np.exp(yv[:i + 1] - scaling)
                x = samples[:i + 1]
                y = yv_scaled[:i + 1].reshape(-1, 1)
                if i % 20 == 0 and i > 0:
                    # Create a new model since all x and y data have been replaced due to rebasing
                    gpy_gp = GPy.models.GPRegression(
                        x,
                        y,
                        kernel=kern,
                    )
                    warped_gp = WsabiLGP(gpy_gp)
                    model = WarpedIntegrandModel(warped_gp, prior)
                    gpy_gp.optimize()
                    log_intv[i] = np.log((model.integral_mean())[0]) + scaling
                    intv[i] = np.exp(log_intv[i])
                if i % 100 == 0:
                    self.plot_iterations(i,
                                         log_intv,
                                         true_val=self.gpr.grd_log_evidence)

        else:
            for i in range(budget):
                samples[i, :] = np.array(
                    select_batch(model, 1, LOCAL_PENALISATION)).reshape(1, -1)
                yv[i] = np.array(self.gpr.sample(samples[i, :])).reshape(1, -1)
                model.update(samples[i, :], yv[i])
                gpy_gp.optimize()
                intv[i] = (model.integral_mean())[0]
                log_intv[i] = np.log(intv[i])

                if i % 100 == 0:
                    self.plot_iterations(i,
                                         log_intv,
                                         true_val=self.gpr.grd_log_evidence)  #
                    gpy_gp.plot()
                    plt.show()

        return yv[-1], intv[-1]
Esempio n. 11
0
def wsabi(gpy_gp: GPy.core.GP, x, kern=None, noise=None):
    """Initial grid sampling, followed by WSABI quadrature"""
    from ratio.posterior_mc_inference import PosteriorMCSampler
    budget = 50
    x = np.array(x).reshape(1, 1)
    #sampler = PosteriorMCSampler(gpy_gp)

    #log_params = np.log(sampler.hmc(num_iters=budget, mode='gpy'))
    #print(log_params)
    log_params = np.empty((budget, 3))
    #for i in range(budget):
    #   log_params[i, :] = scipy.stats.multivariate_normal.rvs(mean=np.array([0, 0, 0]), cov=4*np.eye(3))
    log_params = np.mgrid[0:4.1:1, 0:4.1:1, -4:-0.1:1.5].reshape(3, -1).T
    budget = log_params.shape[0]

    if kern is None:
        kern = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)
    if noise is None:
        noise = 1e-3

    prior = Gaussian(mean=np.array([0, 0, 0]), covariance=4 * np.eye(3))
    log_phis = np.empty((budget, 3))
    log_liks = np.empty((budget, ))
    pred_means = np.empty((budget, ))
    pred_vars = np.empty((budget, ))

    for i in range(log_params.shape[0]):
        log_phi = log_params[i, :]
        _set_model(gpy_gp, np.exp(log_phi))
        log_lik = gpy_gp.log_likelihood()
        #print('params', log_phi, log_lik)
        log_phis[i] = log_phi
        log_liks[i] = log_lik
        pred_means[i], pred_vars[i] = gpy_gp.predict_noiseless(x)

    if np.max(log_liks) > 15.:
        # For highly peaked likelihoods, we do not use quadrature and simply use MAP estimate
        idx = log_liks.argmax()
        return pred_means[idx], pred_vars[idx], kern, noise

    r_gp = GPy.models.GPRegression(
        log_params[:1, :],
        np.sqrt(2 * np.exp(log_liks[0])).reshape(1, -1), kern)
    r_gp.Gaussian_noise.variance = noise
    r_model = WarpedIntegrandModel(WsabiLGP(r_gp), prior)
    r_model.update(log_phis[1:, :], np.exp(log_liks[1:]).reshape(1, -1))
    #print(r_gp.X, r_gp.Y)
    #from IPython.display import display
    #display(r_gp)
    r_gp.optimize()
    r_int = r_model.integral_mean()[0]

    q_min = np.min(pred_means)
    pred_means -= q_min

    rq = np.exp(log_liks) * pred_means
    rq_gp = GPy.models.GPRegression(log_phis[:1, :],
                                    np.sqrt(2 * rq[0]).reshape(1, -1), kern)
    rq_model = WarpedIntegrandModel((WsabiLGP(rq_gp)), prior)
    rq_model.update(log_phis[1:, :], rq[1:].reshape(1, -1))
    rq_gp.optimize()
    rq_int = rq_model.integral_mean()[0] + q_min * r_int

    rvar = np.exp(log_liks) * pred_vars
    rvar_gp = GPy.models.GPRegression(log_phis[:1, :],
                                      np.sqrt(2 * rvar[0]).reshape(1, -1),
                                      kern)
    rvar_model = WarpedIntegrandModel((WsabiLGP(rvar_gp)), prior)
    rvar_model.update(log_phis[1:, :], rvar[1:].reshape(1, -1))
    rvar_gp.optimize()
    rvar_int = rvar_model.integral_mean()[0]

    return rq_int / r_int, rvar_int / r_int, r_gp.kern, r_gp.Gaussian_noise.variance
Esempio n. 12
0
def plot_gauss_mix(r: GaussMixture, q: GaussMixture):
    r.plot(label='$r(\phi) = p(z_d|\phi)$')
    q.plot(label='$q(\phi) = p(y_*|z_d, \phi)$')

    plt.xlabel("$\phi$")
    plt.legend()


if __name__ == "__main__":

    r = GaussMixture(means=[-1, 2], covariances=[0.7, 2], weights=[0.1, 0.2])

    q = GaussMixture([0.5, 1.5, -1.5, -0.3, 0.2], [100, 1, 0.4, 0.2, 1],
                     weights=[3, 0.5, 0.5, 0.2, -0.1])
    prior = Gaussian(mean=np.array([[0]]), covariance=np.array([[1]]))

    prediction = predictive_integral(r, q, prior_mean=0, prior_var=1)
    evidence = evidence_integral(r, prior_mean=0, prior_var=1)
    print(prediction, evidence, prediction / evidence)
    num, den, ratio = approx_integrals(prior, q, r)

    #plot_gauss_mix(r, q)
    #plt.show()

    #naive_bq = NaiveBQ(r, q, prior, num_batches=203, batch_size=1, plot_iterations=False,
    #                   display_step=50,
    #                   true_prediction_integral=num, true_evidence_integral=den)
    #naive_bq.quadrature()
    #naive_bq.plot_result()
    #plt.show()