Beispiel #1
0
    def optimize_policy(self, maxiter=50, restarts=1):
        """
        Optimize policy parameters. adapted from from https://github.com/nrontsis/PILCO
        :param maxiter: max optimizer iterations
        :param restarts: number of random restarts
        """
        mgpr_trainable_params = self.mgpr.trainable_parameters
        for param in mgpr_trainable_params:
            set_trainable(param, False)

        if not self.optimizer:
            self.optimizer = gpflow.optimizers.Scipy()
            self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter))
        else:
            self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter))
        restarts -= 1

        best_parameter_values = [param.numpy() for param in self.trainable_parameters]
        best_reward = self.compute_reward()
        for restart in range(restarts):
            self.controller.randomize()
            self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter))
            reward = self.compute_reward()
            if reward > best_reward:
                best_parameter_values = [param.numpy() for param in self.trainable_parameters]
                best_reward = reward

        for i, param in enumerate(self.trainable_parameters):
            param.assign(best_parameter_values[i])
        for param in mgpr_trainable_params:
            set_trainable(param, True)
Beispiel #2
0
def gpr_and_vgp(data, kernel, likelihood):
    vgp = gpflow.models.VGP(data, kernel, likelihood)
    gpr = gpflow.models.GPR(data, kernel)
    gpr.likelihood.variance.assign(likelihood.variance)
    set_trainable(vgp, False)
    vgp.q_mu.trainable = True
    vgp.q_sqrt.trainable = True
    return gpr, vgp
Beispiel #3
0
def test_make_trainable(model):
    """
    Checks whether we `set_trainable()` can make parameters which are *not*
    trainable trainable again.
    """
    set_trainable(model, False)
    assert len(model.trainable_variables) == 0
    set_trainable(model, True)
    assert len(model.trainable_variables) == len(model.parameters)
Beispiel #4
0
def sgpr_and_svgp(data, inducing_variable, kernel, likelihood):
    svgp = gpflow.models.SVGP(kernel, likelihood, inducing_variable)
    sgpr = gpflow.models.SGPR(data,
                              kernel,
                              inducing_variable=inducing_variable)
    sgpr.likelihood.variance.assign(Setup.likelihood_variance)
    set_trainable(svgp, False)
    svgp.q_mu.trainable = True
    svgp.q_sqrt.trainable = True
    return sgpr, svgp
Beispiel #5
0
def create_model():
    kernel = create_kernel()
    model = gpflow.models.SVGP(
        kernel=kernel,
        likelihood=gpflow.likelihoods.Gaussian(variance_lower_bound=0.0),
        inducing_variable=Data.Z,
        q_diag=True,
    )
    set_trainable(model.q_mu, False)
    return model
Beispiel #6
0
    def train(self, x, y, sigma_n=None, sigma_f=1e-6):
        if (y.size) < 2:
            raise RuntimeError('y.size must be at least 2')

        notnan = np.logical_not(np.isnan(y))
        x = x.reshape(y.size, -1)[notnan]
        y = y[notnan]

        self.ymean = np.mean(y)
        self.yvar = np.var(y)
        self.yscale = np.sqrt(self.yvar)

        self.xtrain = x.reshape(y.size, -1)
        self.ytrain = (y.reshape(y.size, 1) - self.ymean) / self.yscale
        print(self.xtrain.shape)

        self.xtrain = self.xtrain.astype(np.float64)
        self.ytrain = self.ytrain.astype(np.float64)

        self.ndim = self.xtrain.shape[-1]

        l = np.empty(self.ndim)

        kern = list()

        for k in range(self.ndim):
            # TODO: guess this in a finer fashion via FFT in all directions
            l[k] = 0.3 * (np.max(self.xtrain[:, k]) -
                          np.min(self.xtrain[:, k]))
            kern.append(
                gpflow.kernels.SquaredExponential(lengthscales=l[k],
                                                  variance=1.0,
                                                  active_dims=[k]))
            if k == 0:
                # Guess a bit more broadly
                kern[k].variance.assign(3.0)
            else:
                # Need only one y scale
                set_trainable(kern[k].variance, False)

        kerns = gpflow.kernels.Product(kern)

        self.m = gpflow.models.GPR((self.xtrain, self.ytrain), kernel=kerns)

        self.m.likelihood.variance.assign(1e-2)  # Guess little noise

        # Optimize
        def objective_closure():
            return -self.m.log_marginal_likelihood()

        opt = gpflow.optimizers.Scipy()
        opt.minimize(objective_closure, self.m.trainable_variables)

        self.sigma = np.sqrt(self.m.likelihood.variance.value())
        self.trained = True
Beispiel #7
0
def test_non_trainable_model_objective(model):
    """
    Checks that we can still compute the objective of a model that has no
    trainable parameters whatsoever (regression test for bug in log_prior()).
    In this case we have no priors, so log_prior should be zero to add no
    contribution to the objective.
    """
    set_trainable(model, False)

    _ = model.log_marginal_likelihood()
    assert model.log_prior() == 0.0
def gp_model(x_train, y_train, x_test, num_classes):
    """This function instantiates the gp model and gets the predictions from the model.

    :param x_train: The training dataset.
    :param y_train: The training dataset labels.
    :param x_test: The test dataset.
    :param num_classes: The number of classes in the dataset.
    :return: predictions, the predictions from the gp model.
    :return time_taken: The time taken to train the model."""

    data = (x_train, y_train)
    kernel = gpflow.kernels.SquaredExponential() + gpflow.kernels.Matern12(
    ) + gpflow.kernels.Exponential()

    invlink = gpflow.likelihoods.RobustMax(num_classes)
    likelihood = gpflow.likelihoods.MultiClass(num_classes, invlink=invlink)
    z = x_train[::5].copy()

    model = gpflow.models.SVGP(kernel=kernel,
                               likelihood=likelihood,
                               inducing_variable=z,
                               num_latent_gps=num_classes,
                               whiten=True,
                               q_diag=True)

    set_trainable(model.inducing_variable, False)

    print('\nInitial parameters:')
    print_summary(model, fmt="notebook")

    start = time.time()

    opt = gpflow.optimizers.Scipy()
    opt.minimize(model.training_loss_closure(data),
                 model.trainable_variables,
                 options=dict(maxiter=ci_niter(1000)))

    print('\nParameters after optimization:')
    print_summary(model, fmt="notebook")

    end = time.time()
    time_taken = round(end - start, 2)

    print('Optimization took {:.2f} seconds'.format(time_taken))

    predictions = model.predict_y(x_test)[0]

    return predictions, time_taken
Beispiel #9
0
def test_mixed_mok_with_Id_vs_independent_mok():
    data = DataMixedKernelWithEye
    # Independent model
    k1 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), data.L)
    f1 = InducingPoints(data.X[:data.M, ...])
    model_1 = SVGP(k1,
                   Gaussian(),
                   f1,
                   q_mu=data.mu_data_full,
                   q_sqrt=data.sqrt_data_full)
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       method='BFGS')

    # Mixed Model
    kern_list = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(data.L)
    ]
    k2 = mk.LinearCoregionalization(kern_list, data.W)
    f2 = InducingPoints(data.X[:data.M, ...])
    model_2 = SVGP(k2,
                   Gaussian(),
                   f2,
                   q_mu=data.mu_data_full,
                   q_sqrt=data.sqrt_data_full)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
Beispiel #10
0
def build_model(data):
    variance = tf.math.reduce_variance(data.observations)
    kernel = gpflow.kernels.Matern52(variance=variance,
                                     lengthscales=0.2 * np.ones(2, ))
    gpr = gpflow.models.GPR(astuple(data), kernel, noise_variance=1e-5)
    set_trainable(gpr.likelihood, False)

    return {
        OBJECTIVE:
        trieste.models.create_model({
            "model": gpr,
            "optimizer": gpflow.optimizers.Scipy(),
            "optimizer_args": {
                "options": dict(maxiter=100)
            },
        })
    }
Beispiel #11
0
    def optimizeModel(self):
        k = gpflow.kernels.Matern52(self.kernel_variance, self.lengthscales)
        X = np.concatenate(self.X, 0)
        Y = np.concatenate(self.Y, 0)
        X = X.reshape((-1, self.input_dim))
        Y = Y.reshape((-1, 1))


        meanf = gpflow.mean_functions.Constant(self.mean_value)
        self.gp = gpflow.models.GPR(data=(X, Y), kernel=k, mean_function=meanf)
        self.gp.likelihood.variance.assign(self.noise_variance)
        #keep prior mean functions fixed
        #set_trainable(self.gp.mean_function.c, False)
        if(self.fixed_noise_variance):
            set_trainable(self.gp.likelihood.variance, False)
        opt = gpflow.optimizers.Scipy()
        opt_logs = opt.minimize(self.gp.training_loss, self.gp.trainable_variables, options=dict(maxiter=100))
        print_summary(self.gp)
Beispiel #12
0
    def _init_layers(self,
                     X,
                     Y,
                     Z,
                     dims,
                     kernels,
                     mean_function=Zero(),
                     Layer=SVGPIndependentLayer,
                     white=False):
        """Initialise DGP layers to have the same number of outputs as inputs,
        apart from the final layer."""
        layers = []

        X_running, Z_running = X.copy(), Z.copy()
        for i in range(len(kernels) - 1):
            dim_in, dim_out, kern = dims[i], dims[i + 1], kernels[i]
            if dim_in == dim_out:
                mf = Identity()

            else:
                if dim_in > dim_out:
                    _, _, V = np.linalg.svd(X_running, full_matrices=False)
                    W = V[:dim_out, :].T

                else:
                    W = np.concatenate(
                        [np.eye(dim_in),
                         np.zeros((dim_in, dim_out - dim_in))], 1)

                mf = Linear(W)
                set_trainable(mf.A, False)
                set_trainable(mf.b, False)

            layers.append(Layer(kern, Z_running, dim_out, mf, white=white))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)

        layers.append(
            Layer(kernels[-1], Z_running, dims[-1], mean_function,
                  white=white))
        return layers
def run_fold(X, Y, XT, YT):
    # Kernel
    kern_list = [
        gpflow.kernels.Matern32(variance=3., lengthscale=1.),
        gpflow.kernels.Matern32(variance=3., lengthscale=1.)
    ]
    kernel = gpflow.kernels.SeparateIndependent(kern_list)

    # Inducing points (we hack this through SVGP, because the SpearateIndependent support in plain
    # VGP was broken). We simply put an inducing point at every data point.
    Xu = X.copy()
    inducing_variables = gpflow.inducing_variables.mo_inducing_variables. \
        SharedIndependentInducingVariables(gpflow.inducing_variables.InducingPoints(Xu))

    # The model
    model = gpflow.models.SVGP(kernel=kernel,
                               likelihood=HeteroscedasticGaussian(),
                               inducing_variable=inducing_variables,
                               num_latent=2)

    # Set trainable (everything except the 'inducing' points, because we want the full model).
    set_trainable(model, True)
    set_trainable(model.inducing_variable.inducing_variable_shared.Z, False)

    # Optimize parameters
    o = gpflow.optimizers.Scipy()

    @tf.function(autograph=False)
    def objective():
        return -model.elbo((X, Y))

    o.minimize(objective, variables=model.trainable_variables)

    # Plot model
    plot(model)

    # Print model
    gpflow.utilities.print_summary(model, fmt='notebook')

    # Return NLPD
    return -tf.reduce_mean(model.predict_log_density((XT, YT))).numpy()
Beispiel #14
0
def build_dgp_model(data):
    variance = tf.math.reduce_variance(data.observations)

    dgp = build_vanilla_deep_gp(data.query_points, num_layers=2, num_inducing=100)
    dgp.f_layers[-1].kernel.kernel.variance.assign(variance)
    dgp.f_layers[-1].mean_function = gpflow.mean_functions.Constant()
    dgp.likelihood_layer.likelihood.variance.assign(1e-5)
    set_trainable(dgp.likelihood_layer.likelihood.variance, False)

    epochs = 200
    batch_size = 100

    optimizer = tf.optimizers.Adam(0.01)
    # These are just arguments for the Keras `fit` method.
    fit_args = {
        "batch_size": batch_size,
        "epochs": epochs,
        "verbose": 0,
    }

    return DeepGaussianProcess(model=dgp, optimizer=optimizer, fit_args=fit_args)
Beispiel #15
0
    def optimizeModel(self):
        output_dim = self.output_dim
        rank = self.rank
        self.k = gpflow.kernels.Matern52(self.kernel_variance, self.lengthscales, active_dims= np.arange(self.input_dim).tolist())
        self.coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[self.input_dim])
        k = self.k * self.coreg
        X = np.concatenate(self.X, 0)
        Y = np.concatenate(self.Y, 0)
        X = X.reshape((X.shape[0], -1))
        Y = Y.reshape((Y.shape[0], -1))
        meanf = gpflow.mean_functions.Constant(self.mean_value)
        lik = gpflow.likelihoods.SwitchedLikelihood([gpflow.likelihoods.Gaussian() for i in range(self.output_dim)])

        # now build the GP model as normal
        self.gp = gpflow.models.VGP((X,Y), kernel=k, likelihood=lik, mean_function = meanf)
        #set_trainable(self.gp.mean_function.c, False)
        for i in range(self.output_dim):
            self.gp.likelihood.likelihoods[i].variance.assign(self.noise_variance[i])
        if(self.fixed_noise_variance):
            for i in range(self.output_dim):
                set_trainable(self.gp.likelihood.likelihoods[i].variance, False)
        gpflow.optimizers.Scipy().minimize(self.gp.training_loss, self.gp.trainable_variables, options=dict(maxiter=10000), method="L-BFGS-B")
        print_summary(self.gp)
Beispiel #16
0
def conv_model(conv_k, conv_f, data, num_class):
    # convolutional model
    conv_m1 = gpflow.models.SVGP(
        conv_k,
        gpflow.likelihoods.MultiClass(num_class),
        conv_f,
        num_latent_gps=num_class,
    )

    # set variance, lengthscale, weight as trainable parameters
    set_trainable(conv_m1.inducing_variable, True)
    set_trainable(conv_m1.kernel.base_kernel.variance, True)
    set_trainable(conv_m1.kernel.base_kernel.lengthscales, True)
    set_trainable(conv_m1.kernel.weights, False)

    return conv_m1
Beispiel #17
0
 def _create_kernel(self):
     """Creates a kernel from list of strings stored in _kernel_split."""
     k = None
     for i, prod_kern in enumerate(self.kernel_split):
         sub_k = None
         for j, kern in enumerate(prod_kern):
             new_k = getattr(gpflow.kernels,
                             kern)(**self.kernel_params[i + j])
             if hasattr(new_k, 'lengthscales') and self.length_scale_prior:
                 new_k.lengthscales.prior = tfp.distributions.InverseGamma(
                     to_default_float(1), to_default_float(1))
             if j == 0:
                 sub_k = new_k
                 if self.variance_prior:
                     new_k.variance.prior = tfp.distributions.Gamma(
                         to_default_float(1), to_default_float(1))
             else:
                 set_trainable(new_k.variance, False)
                 sub_k *= new_k
         if i == 0:
             k = sub_k
         else:
             k += sub_k
     return k
Beispiel #18
0
def set_KL_X_trainable(model, param: bool = True):
    '''If false the model will make the parameter that contribute
    to the KL divergence of X non trainable'''
    try:
        set_trainable(model.X_data_mean, param)
        set_trainable(model.X_data_var, param)
    except:
        pass
    try:
        for l in model.encoder._layers:
            l.trainable = param
            set_trainable(l, param)
    except:
        pass
Beispiel #19
0
def rbf_model(base_k, data, MAXITER, n, num_class):
    # inducing points
    idx = np.random.choice(data[0].shape[0], n, replace=False)
    induce_x = data[0][idx]

    # plain squared exponential kernel, no convolution
    rbf_m1 = gpflow.models.SVGP(
        base_k,
        gpflow.likelihoods.MultiClass(num_class),
        gpflow.inducing_variables.InducingPoints(induce_x.copy()),
        num_latent_gps=num_class,
    )

    # set traininable
    set_trainable(rbf_m1.kernel.variance, True)
    set_trainable(rbf_m1.kernel.lengthscales, True)
    set_trainable(rbf_m1.inducing_variable, True)

    return rbf_m1
def FitModel(
    bConsider,
    GPt,
    GPy,
    globalBranching,
    priorConfidence=0.80,
    M=10,
    likvar=1.0,
    kerlen=2.0,
    kervar=5.0,
    fDebug=False,
    maxiter=100,
    fPredict=True,
    fixHyperparameters=False,
):
    """
    Fit BGP model
    :param bConsider: list of candidate branching points
    :param GPt: pseudotime
    :param GPy: gene expression. Should be 0 mean for best performance.
    :param globalBranching: cell labels
    :param priorConfidence: prior confidence on cell labels
    :param M: number of inducing points
    :param likvar: initial value for Gaussian noise variance
    :param kerlen: initial value for kernel length scale
    :param kervar: initial value for kernel variance
    :param fDebug: Print debugging information
    :param maxiter: maximum number of iterations for optimisation
    :param fPredict: compute predictive mean and variance
    :param fixHyperparameters: should kernel hyperparameters be kept fixed or optimised?
    :return: dictionary of log likelihood, GPflow model, Phi matrix, predictive set of points,
    mean and variance, hyperparameter values, posterior on branching time
    """
    assert isinstance(bConsider, list), "Candidate B must be list"
    assert GPt.ndim == 1
    assert GPy.ndim == 2
    assert (
        GPt.size == GPy.size
    ), "pseudotime and gene expression data must be the same size"
    assert (
        globalBranching.size == GPy.size
    ), "state space must be same size as number of cells"
    assert M >= 0, "at least 0 or more inducing points should be given"
    phiInitial, phiPrior = GetInitialConditionsAndPrior(
        globalBranching, priorConfidence, infPriorPhi=True
    )

    XExpanded, indices, _ = VBHelperFunctions.GetFunctionIndexListGeneral(GPt)
    ptb = np.min([np.min(GPt[globalBranching == 2]), np.min(GPt[globalBranching == 3])])
    tree = bt.BinaryBranchingTree(0, 1, fDebug=False)
    tree.add(None, 1, np.ones((1, 1)) * ptb)  # B can be anything here
    (fm, _) = tree.GetFunctionBranchTensor()

    kb = bk.BranchKernelParam(
        gpflow.kernels.Matern32(1), fm, b=np.zeros((1, 1))
    ) + gpflow.kernels.White(1)
    kb.kernels[1].variance.assign(
        1e-6
    )  # controls the discontinuity magnitude, the gap at the branching point
    set_trainable(kb.kernels[1].variance, False)  # jitter for numerics
    if M == 0:
        m = assigngp_dense.AssignGP(
            GPt,
            XExpanded,
            GPy,
            kb,
            indices,
            np.ones((1, 1)) * ptb,
            phiInitial=phiInitial,
            phiPrior=phiPrior,
        )
    else:
        ZExpanded = np.ones((M, 2))
        ZExpanded[:, 0] = np.linspace(0, 1, M, endpoint=False)
        ZExpanded[:, 1] = np.array([i for j in range(M) for i in range(1, 4)])[:M]
        m = assigngp_denseSparse.AssignGPSparse(
            GPt,
            XExpanded,
            GPy,
            kb,
            indices,
            np.ones((1, 1)) * ptb,
            ZExpanded,
            phiInitial=phiInitial,
            phiPrior=phiPrior,
        )
    # Initialise hyperparameters
    m.likelihood.variance.assign(likvar)
    m.kernel.kernels[0].kern.lengthscales.assign(kerlen)
    m.kernel.kernels[0].kern.variance.assign(kervar)
    if fixHyperparameters:
        print("Fixing hyperparameters")
        set_trainable(m.kernel.kernels[0].kern.lengthscales, False)
        set_trainable(m.likelihood.variance, False)
        set_trainable(m.kernel.kernels[0].kern.variance, False)
    else:
        if fDebug:
            print("Adding prior logistic on length scale to avoid numerical problems")
        m.kernel.kernels[0].kern.lengthscales.prior = tfp.distributions.Normal(
            to_default_float(2.0), to_default_float(1.0)
        )
        m.kernel.kernels[0].kern.variance.prior = tfp.distributions.Normal(
            to_default_float(3.0), to_default_float(1.0)
        )
        m.likelihood.variance.prior = tfp.distributions.Normal(
            to_default_float(0.1), to_default_float(0.1)
        )

    # optimization
    ll = np.zeros(len(bConsider))
    Phi_l = list()
    ttestl_l, mul_l, varl_l = list(), list(), list()
    hyps = list()
    for ib, b in enumerate(bConsider):
        m.UpdateBranchingPoint(np.ones((1, 1)) * b, phiInitial)
        try:
            opt = gpflow.optimizers.Scipy()
            opt.minimize(
                m.training_loss,
                variables=m.trainable_variables,
                options=dict(disp=True, maxiter=maxiter),
            )
            # remember winning hyperparameter
            hyps.append(
                {
                    "likvar": m.likelihood.variance.numpy(),
                    "kerlen": m.kernel.kernels[0].kern.lengthscales.numpy(),
                    "kervar": m.kernel.kernels[0].kern.variance.numpy(),
                }
            )
            ll[ib] = m.log_posterior_density()
        except Exception as ex:
            print(f"Unexpected error: {ex} {'-' * 60}\nCaused by model: {m} {'-' * 60}")
            ll[0] = np.nan
            # return model so can inspect model
            return {
                "loglik": ll,
                "model": m,
                "Phi": np.nan,
                "prediction": {"xtest": np.nan, "mu": np.nan, "var": np.nan},
                "hyperparameters": np.nan,
                "posteriorB": np.nan,
            }
        # prediction
        Phi = m.GetPhi()
        Phi_l.append(Phi)
        if fPredict:
            ttestl, mul, varl = VBHelperFunctions.predictBranchingModel(m)
            ttestl_l.append(ttestl), mul_l.append(mul), varl_l.append(varl)
        else:
            ttestl_l.append([]), mul_l.append([]), varl_l.append([])
    iw = np.argmax(ll)
    postB = GetPosteriorB(ll, bConsider)
    if fDebug:
        print(
            "BGP Maximum at b=%.2f" % bConsider[iw],
            "CI= [%.2f, %.2f]" % (postB["B_CI"][0], postB["B_CI"][1]),
        )
    assert np.allclose(bConsider[iw], postB["Bmode"]), "%s-%s" % str(
        postB["B_CI"], bConsider[iw]
    )
    return {
        "loglik": ll,
        "Phi": Phi_l[iw],  # 'model': m,
        "prediction": {"xtest": ttestl_l[iw], "mu": mul_l[iw], "var": varl_l[iw]},
        "hyperparameters": hyps[iw],
        "posteriorB": postB,
    }
Beispiel #21
0
# %%
np.random.seed(0)
X = np.random.rand(20,1)*10
Y = np.sin(X) + 0.9 * np.cos(X*1.6) + np.random.randn(*X.shape)* 0.4
Xtest = np.random.rand(10,1)*10
plt.plot(X, Y, 'kx', mew=2);

# %%
data = (tf.convert_to_tensor(X, dtype=default_float()), tf.convert_to_tensor(Y, dtype=default_float()))
inducing_variable = tf.convert_to_tensor(X, dtype=default_float())

m1 = gpflow.models.GPR(data, kernel=gpflow.kernels.SquaredExponential())
m2 = gpflow.models.VGP(data, kernel=gpflow.kernels.SquaredExponential(), likelihood=gpflow.likelihoods.Gaussian())
m3 = gpflow.models.SVGP(gpflow.kernels.SquaredExponential(), gpflow.likelihoods.Gaussian(), inducing_variable, q_diag=False)
set_trainable(m3.inducing_variable, False)

m4 = gpflow.models.SVGP(gpflow.kernels.SquaredExponential(), gpflow.likelihoods.Gaussian(), inducing_variable, q_diag=False, whiten=True)
set_trainable(m4.inducing_variable, False)

m5 = gpflow.models.SGPR(data, kernel=gpflow.kernels.SquaredExponential(), inducing_variable=inducing_variable)
set_trainable(m5.inducing_variable, False)

m6 = gpflow.models.GPRFITC(data, kernel=gpflow.kernels.SquaredExponential(), inducing_variable=inducing_variable)
set_trainable(m6.inducing_variable, False)

models = [m1, m2, m3, m4, m5, m6]

# %% [markdown]
# Now, we optimize the models. For `GPR`, `SVGP`, and `GPRFITC`, this simply optimizes the hyperparameters (since the inducing points are fixed). For the variational models, this jointly maximises the lower bound to the marginal likelihood (Evidence Lower Bound, ELBO) with respect to the variational parameters and the kernel and likelihood hyperparameters.
Beispiel #22
0


# %%

# datavar = np.mean(var_dTglob)

xtrain = t.reshape([-1,1]).astype(np.float64)
ytrain = dT1glob.reshape([-1,1]).astype(np.float64)

k = gpflow.kernels.SquaredExponential(variance=1e2*np.var(ytrain), lengthscales=50)
#meanf = gpflow.mean_functions.Constant()
m = gpflow.models.GPR(data=(xtrain, ytrain), kernel=k, noise_variance=np.var(ytrain))#, mean_function=meanf)
opt = gpflow.optimizers.Scipy()

set_trainable(m.kernel.variance, False)
#set_trainable(m.likelihood.variance, False)

print_summary(m)

def objective_closure():
    return - m.log_marginal_likelihood()

opt_logs = opt.minimize(objective_closure,
                        m.trainable_variables,
                        options=dict(maxiter=100))

print_summary(m)

xpl = np.linspace(-20, 1020, 1041).reshape([-1, 1]).astype('float64')
mean, var = m.predict_f(xpl, full_cov=False)
Beispiel #23
0
def test_separate_independent_mof():
    """
    Same test as above but we use different (i.e. separate) inducing inducing
    for each of the output dimensions.
    """
    np.random.seed(0)

    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP

    kernel_1 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable_1 = InducingPoints(Data.X[:Data.M, ...])
    model_1 = SVGP(kernel_1,
                   Gaussian(),
                   inducing_variable_1,
                   q_mu=q_mu_1,
                   q_sqrt=q_sqrt_1)
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True
    model_1.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       method='BFGS')

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable_list_2 = [
        InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P)
    ]
    inducing_variable_2 = mf.SeparateIndependentInducingVariables(
        inducing_variable_list_2)
    model_2 = SVGP(kernel_2,
                   Gaussian(),
                   inducing_variable_2,
                   q_mu=q_mu_2,
                   q_sqrt=q_sqrt_2)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True
    model_2.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       method='BFGS')

    # Model 3 (Inefficient): an idenitical inducing variable is used P times,
    # and treated as a separate one.
    q_mu_3 = np.random.randn(Data.M, Data.P)
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(Data.P)
    ]
    kernel_3 = mk.SeparateIndependent(kern_list)
    inducing_variable_list_3 = [
        InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P)
    ]
    inducing_variable_3 = mf.SeparateIndependentInducingVariables(
        inducing_variable_list_3)
    model_3 = SVGP(kernel_3,
                   Gaussian(),
                   inducing_variable_3,
                   q_mu=q_mu_3,
                   q_sqrt=q_sqrt_3)
    set_trainable(model_3, False)
    model_3.q_sqrt.trainable = True
    model_3.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure3():
        return -model_3.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure3,
                                       variables=model_3.trainable_variables,
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2, model_3])
Beispiel #24
0
def test_separate_independent_mok():
    """
    We use different independent kernels for each of the output dimensions.
    We can achieve this in two ways:
        1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof
        2) inefficient: SeparateIndependentMok with InducingPoints
    However, both methods should return the same conditional,
    and after optimization return the same log likelihood.
    """
    # Model 1 (Inefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP

    kern_list_1 = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(Data.P)
    ]
    kernel_1 = mk.SeparateIndependent(kern_list_1)
    inducing_variable_1 = InducingPoints(Data.X[:Data.M, ...])
    model_1 = SVGP(kernel_1,
                   Gaussian(),
                   inducing_variable_1,
                   num_latent=1,
                   q_mu=q_mu_1,
                   q_sqrt=q_sqrt_1)
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True
    model_1.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       method='BFGS')

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list_2 = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(Data.P)
    ]
    kernel_2 = mk.SeparateIndependent(kern_list_2)
    inducing_variable_2 = mf.SharedIndependentInducingVariables(
        InducingPoints(Data.X[:Data.M, ...]))
    model_2 = SVGP(kernel_2,
                   Gaussian(),
                   inducing_variable_2,
                   num_latent=Data.P,
                   q_mu=q_mu_2,
                   q_sqrt=q_sqrt_2)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True
    model_2.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
Beispiel #25
0
def test_shared_independent_mok():
    """
    In this test we use the same kernel and the same inducing inducing
    for each of the outputs. The outputs are considered to be uncorrelated.
    This is how GPflow handled multiple outputs before the multioutput framework was added.
    We compare three models here:
        1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints.
           This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P
           which is extremely inefficient as most of the elements are zero.
        2) efficient: SharedIndependentMok and SharedIndependentMof
           This combinations uses the most efficient form of matrices
        3) the old way, efficient way: using Kernel and InducingPoints
        Model 2) and 3) follow more or less the same code path.
    """
    np.random.seed(0)
    # Model 1
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)  # MP x 1
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable = InducingPoints(Data.X[:Data.M, ...])
    model_1 = SVGP(kernel_1,
                   Gaussian(),
                   inducing_variable,
                   q_mu=q_mu_1,
                   q_sqrt=q_sqrt_1,
                   num_latent=Data.Y.shape[-1])
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    # Model 2
    q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = SquaredExponential(variance=0.5, lengthscale=1.2)
    inducing_variable_2 = InducingPoints(Data.X[:Data.M, ...])
    model_2 = SVGP(kernel_2,
                   Gaussian(),
                   inducing_variable_2,
                   num_latent=Data.P,
                   q_mu=q_mu_2,
                   q_sqrt=q_sqrt_2)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    # Model 3
    q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_3 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable_3 = mf.SharedIndependentInducingVariables(
        InducingPoints(Data.X[:Data.M, ...]))
    model_3 = SVGP(kernel_3,
                   Gaussian(),
                   inducing_variable_3,
                   num_latent=Data.P,
                   q_mu=q_mu_3,
                   q_sqrt=q_sqrt_3)
    set_trainable(model_3, False)
    model_3.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure3():
        return -model_3.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure3,
                                       variables=model_3.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2, model_3])
Beispiel #26
0
m.kernel.kernels[1].variance.trainable = True
print_summary(m)

# %% [markdown]
# **NOTE:** If you want to recursively change the `trainable` status of an object that *contains* parameters, you **must** use the `set_trainable()` utility function.
#
# A module (e.g. a model, kernel, likelihood, ... instance) does not have a `trainable` attribute:

# %%
try:
    m.kernel.trainable
except AttributeError:
    print(f'{m.kernel.__class__.__name__} does not have a trainable attribute')

# %%
set_trainable(m.kernel, False)
print_summary(m)

# %% [markdown]
# ## Priors
#
# You can set priors in the same way as transforms and trainability, by using `tensorflow_probability` distribution objects. Let's set a Gamma prior on the variance of the Matern32 kernel.

# %%
k = gpflow.kernels.Matern32()
k.variance.prior = tfp.distributions.Gamma(to_default_float(2),
                                           to_default_float(3))

print_summary(k)

# %%
Beispiel #27
0
        m = gpflow.models.GPR(data=(time, uv_band_flux),
                              mean_function=Constant(np.mean(uv_band_flux)),
                              kernel=k,
                              noise_variance=1)

        if fix_noise:

            # Fix a noise level to be the average experimental error observed in the dataset (0.037) for magnitudes
            # Noise level is 2.0364e-15 for the flux values.
            # Standardisation destroys this information so setting noise to be mean of standardised values divided by
            # the SNR in the orignal space.

            fixed_noise = np.mean(np.abs(uv_band_flux / snr))
            set_trainable(
                m.likelihood.variance, False
            )  # We don't want to optimise the noise level in this case.
            m.likelihood.variance = fixed_noise

        opt = gpflow.optimizers.Scipy()
        opt.minimize(objective_closure,
                     m.trainable_variables,
                     options=dict(maxiter=100))
        print_summary(m)

        # We specify the grid of time points on which we wish to predict the count rate
        time_test = np.arange(54236, 58630, 1, dtype=np.float64).reshape(-1, 1)
        mean, var = m.predict_y(time_test)

        log_lik = m.log_marginal_likelihood()
Beispiel #28
0
invlink = gpflow.likelihoods.RobustMax(C)  # Robustmax inverse link function
likelihood = gpflow.likelihoods.MultiClass(
    3, invlink=invlink)  # Multiclass likelihood
Z = X[::5].copy()  # inducing inputs

m = gpflow.models.SVGP(
    kernel=kernel,
    likelihood=likelihood,
    inducing_variable=Z,
    num_latent_gps=C,
    whiten=True,
    q_diag=True,
)

# Only train the variational parameters
set_trainable(m.kernel.kernels[1].variance, False)
set_trainable(m.inducing_variable, False)
print_summary(m, fmt="notebook")

# %% [markdown]
# #### Running inference

# %%
opt = gpflow.optimizers.Scipy()

opt_logs = opt.minimize(m.training_loss_closure(data),
                        m.trainable_variables,
                        options=dict(maxiter=ci_niter(1000)))
print_summary(m, fmt="notebook")

# %%
Beispiel #29
0
# %%
p = m.kernel.kernels[0].variance
m.kernel.kernels[0].variance = gpflow.Parameter(p.numpy(),
                                                transform=tfp.bijectors.Exp())

# %%
print_summary(m, fmt="notebook")

# %% [markdown]
# ## Changing whether a parameter will be trained in optimization
#
# Another helpful feature is the ability to fix parameters. To do this, simply set the `trainable` attribute to `False`; this is shown in the **trainable** column of the representation, and the corresponding variable is removed from the free state.

# %%
set_trainable(m.kernel.kernels[1].variance, False)
print_summary(m)

# %%
m.trainable_parameters

# %% [markdown]
# To unfix a parameter, just set the `trainable` attribute to `True` again.

# %%
set_trainable(m.kernel.kernels[1].variance, True)
print_summary(m)

# %% [markdown]
# **NOTE:** If you want to recursively change the `trainable` status of an object that *contains* parameters, you **must** use the `set_trainable()` utility function.
#
Beispiel #30
0
# cannot copy this due to shape mismatch with different numbers of inducing points between models:
del init_params['.inducing_variable.Z']

for M in fMs:
    Zinit = vfe.inducing_variable.Z.numpy()[:M, :]
    Zinit = np.vstack(
        (Zinit, X[np.random.permutation(len(X))[:(M - len(Zinit))], :].copy()))

    vfe = gpflow.models.SGPR((X, Y),
                             gpflow.kernels.SquaredExponential(),
                             inducing_variable=Zinit)

    # copy hyperparameters (omitting inducing_variable.Z) from optimized model:
    gpflow.utilities.multiple_assign(vfe, init_params)

    set_trainable(vfe.kernel, False)
    set_trainable(vfe.likelihood, False)

    objective = tf.function(
        autograph=False)(lambda: -vfe.log_marginal_likelihood())
    gpflow.optimizers.Scipy().minimize(objective,
                                       vfe.trainable_variables,
                                       options=dict(disp=False,
                                                    maxiter=ci_niter(1000)))

    fvfe_lml.append(vfe.log_likelihood().numpy())
    fvupper_lml.append(vfe.upper_bound().numpy())
    print("%i" % M, end=" ")

# %%
plt.plot(fMs, fvfe_lml, label="lower")