Example #1
0
def construct_mean_function(X: np.ndarray, D_in: int,
                            D_out: int) -> gpflow.mean_functions.MeanFunction:
    """
    Return :class:`gpflow.mean_functions.Identity` when ``D_in`` and ``D_out`` are
    equal. Otherwise, use the principal components of the inputs matrix ``X`` to build a
    :class:`~gpflow.mean_functions.Linear` mean function.

    .. note::
        The returned mean function is set to be untrainable.
        To change this, use :meth:`gpflow.set_trainable`.

    :param X: A data array with the shape ``[N, D_in]`` used to determine the principal
        components to use to create a :class:`~gpflow.mean_functions.Linear` mean function
        when ``D_in != D_out``.
    :param D_in: The dimensionality of the input data (or features) ``X``.
        Typically, this corresponds to ``X.shape[-1]``.
    :param D_out: The dimensionality of the outputs (or targets) ``Y``.
        Typically, this corresponds to ``Y.shape[-1]`` or the number of latent GPs in the layer.
    """
    assert X.shape[-1] == D_in
    if D_in == D_out:
        mean_function = gpflow.mean_functions.Identity()
    else:
        if D_in > D_out:
            _, _, V = np.linalg.svd(X, full_matrices=False)
            W = V[:D_out, :].T
        else:
            W = np.concatenate(
                [np.eye(D_in), np.zeros((D_in, D_out - D_in))], axis=1)

        assert W.shape == (D_in, D_out)
        mean_function = gpflow.mean_functions.Linear(W)
        gpflow.set_trainable(mean_function, False)

    return mean_function
def build_model(data):
    variance = tf.math.reduce_variance(data.observations)
    kernel = gpflow.kernels.Matern52(variance=variance,
                                     lengthscales=[0.2, 0.2])
    prior_scale = tf.cast(1.0, dtype=tf.float64)
    kernel.variance.prior = tfp.distributions.LogNormal(
        tf.cast(-2.0, dtype=tf.float64), prior_scale)
    kernel.lengthscales.prior = tfp.distributions.LogNormal(
        tf.math.log(kernel.lengthscales), prior_scale)
    gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5)
    gpflow.set_trainable(gpr.likelihood, False)

    return GPflowModelConfig(
        **{
            "model": gpr,
            "model_args": {
                "num_kernel_samples": 100,
            },
            "optimizer": gpflow.optimizers.Scipy(),
            "optimizer_args": {
                "minimize_args": {
                    "options": dict(maxiter=100)
                },
            },
        })
Example #3
0
def init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=Zero(),
                       num_outputs=None, Layer=SVGPLayer, whiten=False):
    num_outputs = num_outputs or Y.shape[1]
    layers = []

    X_running, Z_running = X.copy(), Z.copy()
    for in_idx, kern_in in enumerate(kernels[:-1]):
        dim_in = layer_sizes[in_idx]
        dim_out = layer_sizes[in_idx+1]

        # Initialize mean function to be either Identity or PCA projection
        if dim_in == dim_out:
            mf = Identity()
        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                # use eigenvectors corresponding to dim_out largest eigenvalues
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T
            else:                 # stepping up, use identity + padding
                W = np.concatenate([np.eye(dim_in),
                                    np.zeros((dim_in, dim_out - dim_in))], 1)
            mf = Linear(W)
            gpflow.set_trainable(mf.A, False)
            gpflow.set_trainable(mf.b, False)

        layers.append(Layer(kern_in, Z_running, dim_out, mf, white=whiten))

        if dim_in != dim_out:
            Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    # final layer
    layers.append(Layer(kernels[-1], Z_running, num_outputs, mean_function,
                        white=whiten))
    return layers
Example #4
0
def test_multiclass():
    num_classes = 3
    model = gpflow.models.SVGP(
        gpflow.kernels.SquaredExponential(),
        gpflow.likelihoods.MultiClass(num_classes=num_classes),
        inducing_variable=Datum.X.copy(),
        num_latent_gps=num_classes,
    )
    gpflow.set_trainable(model.inducing_variable, False)

    # test with explicitly unknown shapes:
    tensor_spec = tf.TensorSpec(shape=None, dtype=default_float())
    elbo = tf.function(
        model.elbo,
        input_signature=[(tensor_spec, tensor_spec)],
    )

    @tf.function
    def model_closure():
        return -elbo(Datum.cdata)

    opt = gpflow.optimizers.Scipy()

    # simply test whether it runs without erroring...:
    opt.minimize(
        model_closure,
        variables=model.trainable_variables,
        options=dict(maxiter=3),
        compile=True,
    )
Example #5
0
    def _init_layers(self,
                     X,
                     Y,
                     Z,
                     q_sqrt_initial,
                     kernels,
                     mean_function=Zero(),
                     Layer=SVGPLayer,
                     white=False):
        """
        The first layer only models between input and output_1,
        The second layer models between input and output_2, output_1 and output_2,
        The inducing point for each layer for input dimension should be shared?
        The induing point for output dimension should be calculated instead of changing?"""

        layers = []
        num_inputs = X.shape[1]
        num_outputs = Y.shape[1]
        self.inducing_inputs = inducingpoint_wrapper(Z[:, :num_inputs])
        gpflow.set_trainable(self.inducing_inputs, False)
        inducing_inputs = self.inducing_inputs.Z

        for i in range(num_outputs):
            layer = Layer(kernels[i],
                          inducing_inputs,
                          Z[:, num_inputs + i],
                          q_sqrt_initial[:, i],
                          mean_function,
                          white=white)
            layers.append(layer)
            inducing_inputs = tf.concat([inducing_inputs, layer.q_mu], axis=1)

        return layers
def build_model(data):
    variance = tf.math.reduce_variance(data.observations)
    kernel = gpflow.kernels.RBF(variance=variance, lengthscales=[2, 2])
    gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5)
    gpflow.set_trainable(gpr.likelihood, False)

    return GaussianProcessRegression(gpr)
Example #7
0
def model_and_loss(data) -> Tuple[tf.keras.models.Model, tf.keras.losses.Loss]:
    """
    Builds a two-layer deep GP model.
    """
    X, Y = data
    num_data, input_dim = X.shape

    layer1 = construct_gp_layer(
        num_data, CONFIG.num_inducing, input_dim, CONFIG.hidden_dim, name="gp0"
    )

    output_dim = Y.shape[-1]
    layer2 = construct_gp_layer(
        num_data, CONFIG.num_inducing, CONFIG.hidden_dim, output_dim, name="gp1"
    )

    likelihood = gpflow.likelihoods.Gaussian(CONFIG.likelihood_variance)
    gpflow.set_trainable(likelihood.variance, False)

    X = tf.keras.Input((input_dim,))
    f1 = layer1(X)
    f2 = layer2(f1)

    # We add a dummy layer so that the likelihood variance is discovered as trainable:
    likelihood_container = gpflux.layers.TrackableLayer()
    likelihood_container.likelihood = likelihood
    y = likelihood_container(f2)

    loss = gpflux.losses.LikelihoodLoss(likelihood)
    return tf.keras.Model(inputs=X, outputs=y), loss
Example #8
0
def create_regression_model(data):
    variance = tf.math.reduce_variance(data.observations)
    kernel = gpflow.kernels.Matern52(variance=variance,
                                     lengthscales=[0.2, 0.2])
    gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5)
    gpflow.set_trainable(gpr.likelihood, False)
    return gpr
Example #9
0
def get_covariance_function():
    gp_dtype = gpf.config.default_float()
    # Matern 32
    m32_cov = Matern32(variance=1, lengthscales=100.)
    m32_cov.variance.prior = Normal(gp_dtype(1.), gp_dtype(0.1))
    m32_cov.lengthscales.prior = Normal(gp_dtype(100.), gp_dtype(50.))

    # Periodic base kernel
    periodic_base_cov = SquaredExponential(variance=5., lengthscales=1.)
    set_trainable(periodic_base_cov.variance, False)
    periodic_base_cov.lengthscales.prior = Normal(gp_dtype(5.), gp_dtype(1.))

    # Periodic
    periodic_cov = Periodic(periodic_base_cov, period=1., order=FLAGS.qp_order)
    set_trainable(periodic_cov.period, False)

    # Periodic damping
    periodic_damping_cov = Matern32(variance=1e-1, lengthscales=50)
    periodic_damping_cov.variance.prior = Normal(gp_dtype(1e-1),
                                                 gp_dtype(1e-3))
    periodic_damping_cov.lengthscales.prior = Normal(gp_dtype(50),
                                                     gp_dtype(10.))

    # Final covariance
    co2_cov = periodic_cov * periodic_damping_cov + m32_cov
    return co2_cov
Example #10
0
def train_SGPR(
    model: gpflow.models.SGPR,
    epochs: int,
    optimizer: tf.optimizers = tf.optimizers.Adam(learning_rate=0.1),
    logging_epoch_freq: int = 10,
    epoch_var: Optional[tf.Variable] = None,
):
    """
    Training loop for Sparse GP
    """
    set_trainable(model.mean_function, False)
    tf_optimization_step = tf.function(optimization_exact)

    loss = list()
    for epoch in range(epochs):
        tf_optimization_step(model)
        if epoch_var is not None:
            epoch_var.assign(epoch + 1)

        epoch_id = epoch + 1
        loss.append(model.training_loss())
        if epoch_id % logging_epoch_freq == 0:
            tf.print(f"Epoch {epoch_id}: LOSS (train) {model.training_loss()}")
    plt.plot(range(epochs), loss)
    plt.xlabel('Epoch', fontsize=25)
    plt.ylabel('Loss', fontsize=25)
    plt.tight_layout()
Example #11
0
def test_svgp(whiten, q_diag):
    model = gpflow.models.SVGP(
        gpflow.kernels.SquaredExponential(),
        gpflow.likelihoods.Gaussian(),
        inducing_variable=Datum.X.copy(),
        q_diag=q_diag,
        whiten=whiten,
        mean_function=gpflow.mean_functions.Constant(),
        num_latent_gps=Datum.Y.shape[1],
    )
    gpflow.set_trainable(model.inducing_variable, False)

    # test with explicitly unknown shapes:
    tensor_spec = tf.TensorSpec(shape=None, dtype=default_float())
    elbo = tf.function(model.elbo, input_signature=[(tensor_spec, tensor_spec)],)

    @tf.function
    def model_closure():
        return -elbo(Datum.data)

    opt = gpflow.optimizers.Scipy()

    # simply test whether it runs without erroring...:
    opt.minimize(
        model_closure, variables=model.trainable_variables, options=dict(maxiter=3), compile=True,
    )
Example #12
0
def checkpointing_train_SGPR(
    model: gpflow.models.SGPR,
    X: tf.Tensor,
    Y: tf.Tensor,
    epochs: int,
    manager: tf.train.CheckpointManager,
    optimizer: tf.optimizers = tf.optimizers.Adam(learning_rate=0.1),
    logging_epoch_freq: int = 10,
    epoch_var: Optional[tf.Variable] = None,
    exp_tag: str = 'test',
):
    """
    Training loop for Sparse GP with checkpointing
    """
    set_trainable(model.mean_function, False)
    tf_optimization_step = tf.function(optimization_exact)

    loss = list()
    for epoch in range(epochs):
        tf_optimization_step(model)
        if epoch_var is not None:
            epoch_var.assign(epoch + 1)

        epoch_id = epoch + 1
        loss.append(model.training_loss())
        if epoch_id % logging_epoch_freq == 0:
            ckpt_path = manager.save()
            tf.print(
                f"Epoch {epoch_id}: LOSS (train) {model.training_loss()}, saved at {ckpt_path}"
            )
            tf.print(f"MSE: {mean_squared_error(Y, model.predict_y(X)[0])}")
    plt.plot(range(epochs), loss)
    plt.xlabel('Epoch', fontsize=25)
    plt.ylabel('Loss', fontsize=25)
    plt.tight_layout()
Example #13
0
    def _gp_train(self, x, y):
        assert x.shape[0] == y.shape[0]
        assert x.ndim == 2 and y.ndim == 2

        if self.gpflow_model is None:
            # if None, init model
            self.gpflow_model = gpflow.models.VGP(
                data=(x, y),
                kernel=self.gp_kernel,
                mean_function=self.gp_meanf,
                likelihood=self.likelihood,
                num_latent_gps=1,
            )
        else:
            # just assign new data
            self.gpflow_model.data = (x, y)

        # training loop
        gpflow.set_trainable(self.gpflow_model.q_mu, False)
        gpflow.set_trainable(self.gpflow_model.q_sqrt, False)

        for i in range(self.train_iters):
            self.natgrad_optimiser.minimize(
                self.gpflow_model.training_loss,
                [(self.gpflow_model.q_mu, self.gpflow_model.q_sqrt)],
            )
            self.optimiser.minimize(
                self.gpflow_model.training_loss,
                self.gpflow_model.trainable_variables,
            )
            logging.debug(
                f"VGP iteration {i+1}. ELBO: {self.gpflow_model.elbo():.04f}")
Example #14
0
def build_gp_model(data, x_std=1.0, y_std=0.1):

    dim = data.query_points.shape[-1]
    empirical_variance = tf.math.reduce_variance(data.observations)

    prior_lengthscales = [0.2 * x_std * np.sqrt(dim)] * dim
    prior_scale = tf.cast(1.0, dtype=tf.float64)

    x_std = tf.cast(x_std, dtype=tf.float64)
    y_std = tf.cast(y_std, dtype=tf.float64)

    kernel = gpflow.kernels.Matern52(
        variance=empirical_variance,
        lengthscales=prior_lengthscales,
    )
    kernel.variance.prior = tfp.distributions.LogNormal(
        tf.math.log(y_std), prior_scale)
    kernel.lengthscales.prior = tfp.distributions.LogNormal(
        tf.math.log(kernel.lengthscales), prior_scale)
    gpr = gpflow.models.GPR(
        data.astuple(),
        kernel,
        mean_function=gpflow.mean_functions.Constant(),
        noise_variance=1e-5,
    )
    gpflow.set_trainable(gpr.likelihood, False)

    return GaussianProcessRegression(
        model=gpr,
        optimizer=Optimizer(gpflow.optimizers.Scipy(),
                            minimize_args={"options": dict(maxiter=100)}),
        num_kernel_samples=100,
    )
Example #15
0
def make_kernel_likelihood_iv():
    kernel = gpflow.kernels.SquaredExponential(variance=0.7, lengthscales=0.6)
    likelihood = gpflow.likelihoods.Gaussian(variance=0.08)
    Z = np.linspace(0, 6, 20)[:, np.newaxis]
    inducing_variable = gpflow.inducing_variables.InducingPoints(Z)
    gpflow.set_trainable(inducing_variable, False)
    return kernel, likelihood, inducing_variable
Example #16
0
def create_classification_model(data):
    kernel = gpflow.kernels.SquaredExponential(variance=100.0,
                                               lengthscales=[0.2, 0.2])
    likelihood = gpflow.likelihoods.Bernoulli()
    vgp = gpflow.models.VGP(data.astuple(), kernel, likelihood)
    gpflow.set_trainable(vgp.kernel.variance, False)
    return vgp
Example #17
0
    def optimize(self):
        set_trainable(self.model.q_mu, False)
        set_trainable(self.model.q_sqrt, False)
        variational_params = [(self.model.q_mu, self.model.q_sqrt)]
        adam_opt = tf.optimizers.Adam(1e-3)
        natgrad_opt = NaturalGradient(gamma=0.1)

        for step in range(100):
            natgrad_opt.minimize(self.model.training_loss, var_list=variational_params)
            adam_opt.minimize(self.model.training_loss, var_list=self.model.trainable_variables)
Example #18
0
    def optimize(self, dataset):
        gpflow.set_trainable(self.model.q_mu, False)
        gpflow.set_trainable(self.model.q_sqrt, False)
        variational_params = [(self.model.q_mu, self.model.q_sqrt)]
        adam_opt = tf.optimizers.Adam(1e-3)
        natgrad_opt = gpflow.optimizers.NaturalGradient(gamma=0.1)

        for step in range(50):
            loss = self.model.training_loss
            natgrad_opt.minimize(loss, variational_params)
            adam_opt.minimize(loss, self.model.trainable_variables)
Example #19
0
    def optimize_policy(self, maxiter=50, restarts=1):
        '''
        Optimize controller's parameter's
        '''
        start = time.time()
        mgpr_trainable_params = self.mgpr.trainable_parameters
        for param in mgpr_trainable_params:
            set_trainable(param, False)

        if not self.optimizer:
            self.optimizer = gpflow.optimizers.Scipy()
            self.optimizer.minimize(self.training_loss,
                                    self.trainable_variables,
                                    options=dict(maxiter=maxiter))
            # self.optimizer = tf.optimizers.Adam()
            # self.optimizer.minimize(self.training_loss, self.trainable_variables)
        else:
            self.optimizer.minimize(self.training_loss,
                                    self.trainable_variables,
                                    options=dict(maxiter=maxiter))
            # self.optimizer.minimize(self.training_loss, self.trainable_variables)
        end = time.time()
        print(
            "Controller's optimization: done in %.1f seconds with reward=%.3f."
            % (end - start, self.compute_reward()))
        restarts -= 1

        best_parameter_values = [
            param.numpy() for param in self.trainable_parameters
        ]
        best_reward = self.compute_reward()
        for restart in range(restarts):
            self.controller.randomize()
            start = time.time()
            self.optimizer.minimize(self.training_loss,
                                    self.trainable_variables,
                                    options=dict(maxiter=maxiter))
            # self.optimizer.minimize(self.training_loss, self.trainable_variables)
            end = time.time()
            reward = self.compute_reward()
            print(
                "Controller's optimization: done in %.1f seconds with reward=%.3f."
                % (end - start, self.compute_reward()))
            if reward > best_reward:
                best_parameter_values = [
                    param.numpy() for param in self.trainable_parameters
                ]
                best_reward = reward

        for i, param in enumerate(self.trainable_parameters):
            param.assign(best_parameter_values[i])
        end = time.time()
        for param in mgpr_trainable_params:
            set_trainable(param, True)
Example #20
0
def build_model(data, kernel_func=None):
    """kernel_func should be a function that takes variance as a single input parameter"""
    variance = tf.math.reduce_variance(data.observations)
    if kernel_func is None:
        kernel = gpflow.kernels.Matern52(variance=variance)
    else:
        kernel = kernel_func(variance)
    gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5)
    gpflow.set_trainable(gpr.likelihood, False)

    return GaussianProcessRegression(gpr)
 def __init__(self,
              state_dim,
              control_dim,
              num_basis_functions,
              max_action=1.0):
     MGPR.__init__(self, [
         np.random.randn(num_basis_functions, state_dim),
         0.1 * np.random.randn(num_basis_functions, control_dim)
     ])
     for model in self.models:
         model.kernel.variance.assign(1.0)
         set_trainable(model.kernel.variance, False)
     self.max_action = max_action
Example #22
0
def build_model(data):
    variance = tf.math.reduce_variance(data.observations)
    kernel = gpflow.kernels.Matern52(variance=variance, lengthscales=[0.2, 0.2])
    gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5)
    gpflow.set_trainable(gpr.likelihood, False)

    return {OBJECTIVE: {
        "model": gpr,
        "optimizer": gpflow.optimizers.Scipy(),
        "optimizer_args": {
            "minimize_args": {"options": dict(maxiter=100)},
        },
    }}
        def fit(self, X: np.ndarray, Y: np.ndarray):

            """ Initiate the individual experts and fit their shared hyperparameters by 
                                minimizing the sum of negative ELBOs 
            
            Inputs : 
            -- X, dimension: n_train_points x dim_x : Training inputs
            -- Y, dimension: n_train_points x 1 : Training Labels
            
            """

            self.ind = 100
            self.X = X
            self.Y = Y

            self.M = int(np.max([int(X.shape[0]) / self.points_per_experts, 1]))
            self.partition_type = partition_type
            
            
            self.N = int(X.shape[0] / self.M)

            
            self.partition  = np.random.choice(X.shape[0],size=(self.M, self.N),replace=False)
               
            
            lengthscales = tf.convert_to_tensor([1.0] * self.X.shape[1], dtype=default_float())
            self.kern = gpflow.kernels.RBF(lengthscales=lengthscales)


            self.invlink = gpflow.likelihoods.RobustMax(self.C)  
            self.likelihood = gpflow.likelihoods.MultiClass(self.C,invlink=self.invlink)


            ivs = []
            for i in range(self.M):
                init_method = ConditionalVariance()
                Z = init_method.compute_initialisation(np.array(X[self.partition[i]].copy()), self.ind, self.kern)[0]
                ivs.append(tf.convert_to_tensor(Z))
            
            self.experts = []
            
            for i in range(self.M):
                expert = gpflow.models.SVGP(kernel = self.kern, likelihood = self.likelihood, num_latent_gps = self.C, inducing_variable = ivs[i])
                self.experts.append( expert )
            
            for expert in self.experts:
                gpflow.set_trainable(expert.inducing_variable, True)

            self.opt = tf.keras.optimizers.Adam(learning_rate=0.05)

            self.optimize()
Example #24
0
def create_bo_model(data):
    variance = tf.math.reduce_variance(initial_data[OBJECTIVE].observations)
    lengthscale = 1.0 * np.ones(2, dtype=gpflow.default_float())
    kernel = gpflow.kernels.Matern52(variance=variance, lengthscales=lengthscale)
    jitter = gpflow.kernels.White(1e-12)
    gpr = gpflow.models.GPR(data.astuple(), kernel + jitter, noise_variance=1e-5)
    gpflow.set_trainable(gpr.likelihood, False)
    return trieste.models.create_model({
        "model": gpr,
        "optimizer": gpflow.optimizers.Scipy(),
        "optimizer_args": {
            "minimize_args": {"options": dict(maxiter=100)},
        },
    })
Example #25
0
def test_mixed_mok_with_Id_vs_independent_mok():
    data = DataMixedKernelWithEye
    # Independent model
    k1 = mk.SharedIndependent(SquaredExponential(variance=0.5, lengthscales=1.2), data.L)
    f1 = InducingPoints(data.X[: data.M, ...])
    model_1 = SVGP(k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full)
    set_trainable(model_1, False)
    set_trainable(model_1.q_sqrt, True)

    gpflow.optimizers.Scipy().minimize(
        model_1.training_loss_closure(Data.data),
        variables=model_1.trainable_variables,
        method="BFGS",
        compile=True,
    )

    # Mixed Model
    kern_list = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(data.L)]
    k2 = mk.LinearCoregionalization(kern_list, data.W)
    f2 = InducingPoints(data.X[: data.M, ...])
    model_2 = SVGP(k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full)
    set_trainable(model_2, False)
    set_trainable(model_2.q_sqrt, True)

    gpflow.optimizers.Scipy().minimize(
        model_2.training_loss_closure(Data.data),
        variables=model_2.trainable_variables,
        method="BFGS",
        compile=True,
    )

    check_equality_predictions(Data.data, [model_1, model_2])
 def __init__(self, data, kernel, X=None, likelihood_variance=1e-4):
     gpflow.Module.__init__(self)
     if X is None:
         self.X = Parameter(data[0],
                            name="DataX",
                            dtype=gpflow.default_float())
     else:
         self.X = X
     self.Y = Parameter(data[1], name="DataY", dtype=gpflow.default_float())
     self.data = [self.X, self.Y]
     self.kernel = kernel
     self.likelihood = gpflow.likelihoods.Gaussian()
     self.likelihood.variance.assign(likelihood_variance)
     set_trainable(self.likelihood.variance, False)
Example #27
0
def fit_natgrad(model, data, maxiter, adam_learning_rate=0.01, gamma=1.0):
    if isinstance(model, gpflow.models.SVGP):
        variational_params = [(model.q_mu, model.q_sqrt)]
    else:
        [layer] = model.f_layers
        variational_params = [(layer.q_mu, layer.q_sqrt)]

    variational_params_vars = []
    for param_list in variational_params:
        these_vars = []
        for param in param_list:
            gpflow.set_trainable(param, False)
            these_vars.append(param.unconstrained_variable)
        variational_params_vars.append(these_vars)
    hyperparam_variables = model.trainable_variables

    num_data = get_num_data(data)

    @tf.function
    def training_loss():
        return -model.elbo(data) / num_data

    natgrad = gpflow.optimizers.NaturalGradient(gamma=gamma)
    adam = tf.optimizers.Adam(adam_learning_rate)

    @tf.function
    def optimization_step():
        """
        NOTE: In GPflow, we would normally do alternating ascent:

        >>> natgrad.minimize(training_loss, var_list=variational_params)
        >>> adam.minimize(training_loss, var_list=hyperparam_variables)

        This, however, does not match up with the single pass we require for Keras's
        model.compile()/fit(). Hence we manually re-create the same optimization step.
        """
        with tf.GradientTape() as tape:
            tape.watch(variational_params_vars)
            loss = training_loss()
        variational_grads, other_grads = tape.gradient(
            loss, (variational_params_vars, hyperparam_variables))
        for (q_mu_grad, q_sqrt_grad), (q_mu,
                                       q_sqrt) in zip(variational_grads,
                                                      variational_params):
            natgrad._natgrad_apply_gradients(q_mu_grad, q_sqrt_grad, q_mu,
                                             q_sqrt)
        adam.apply_gradients(zip(other_grads, hyperparam_variables))

    for i in range(maxiter):
        optimization_step()
 def __init__(
     self,
     data: Tuple[tf.Tensor, tf.Tensor],
     reduced_kernel: gpflow.kernels.Kernel,
     observed_kernel: gpflow.kernels.Kernel,
     mean_function: Optional[gpflow.mean_functions.MeanFunction] = None,
     noise_variance: float = 1.0,
     jitter: float = 1e-8,
 ):
     super().__init__()
     self.likelihood = gpflow.likelihoods.Gaussian(noise_variance)
     self.reduced_kernel = reduced_kernel  # this will be the kernel for absorption axis
     self.observed_kernel = observed_kernel  # this is the kernel for fluorescence axis
     self._pred_jitter_kernel = gpflow.kernels.White(
         variance=jitter)  # needed in prediction only
     gpflow.set_trainable(self._pred_jitter_kernel.variance, False)
     W, Y = data
     R = W.shape[-1]  # the reduced axis length
     Q = Y.shape[-1]  # the observed axis length
     raxis = np.linspace(-1.0, 1.0, R)[:, None]  # reduced axis
     qaxis = np.linspace(-1.0, 1.0, Q)[:, None]  # observed axis
     self.raxis = tf.convert_to_tensor(raxis, dtype=gpflow.default_float())
     self.qaxis = tf.convert_to_tensor(qaxis, dtype=gpflow.default_float())
     # full_axis = 2D axis ordered to match kron(reduced_axis, observed_axis)
     self.full_axis = cartesian_prod(self.raxis, self.qaxis)
     self.data = tuple([
         tf.convert_to_tensor(d, dtype=gpflow.config.default_float())
         for d in data
     ])
     # reassign for convenience now that we're in tensorflow mode
     W, Y = self.data
     # we need to store W for prediction, but not training
     self._W = W
     # make some cached quantities
     self._WTW = tf.matmul(W, W, transpose_a=True)
     self._YTW = tf.matmul(Y, W, transpose_a=True)
     self._yTy = tf.reduce_sum(Y * Y)  # equivalent to fvec(Y).T @ fvec(Y)
     self._N = tf.cast(tf.shape(W)[0], dtype=gpflow.default_float())
     self._R = tf.cast(tf.shape(W)[-1], dtype=gpflow.default_float())
     self._Q = tf.cast(tf.shape(Y)[-1], dtype=gpflow.default_float())
     if mean_function is None:
         self.prior_mean_func = gpflow.mean_functions.Zero()
         self.zpm = True
     else:
         self.prior_mean_func = mean_function
         self.zpm = False
     self.jitter = tf.cast(jitter, dtype=gpflow.default_float())
     self.log2pi = tf.cast(np.log(np.pi * 2), dtype=gpflow.default_float())
Example #29
0
def test_svgp_fixing_q_sqrt():
    """
    In response to bug #46, we need to make sure that the q_sqrt matrix can be fixed
    """
    num_latent_gps = default_datum_svgp.Y.shape[1]
    model = gpflow.models.SVGP(
        kernel=gpflow.kernels.SquaredExponential(),
        likelihood=default_datum_svgp.lik,
        q_diag=True,
        num_latent_gps=num_latent_gps,
        inducing_variable=default_datum_svgp.Z,
        whiten=False,
    )
    default_num_trainable_variables = len(model.trainable_variables)
    set_trainable(model.q_sqrt, False)
    assert len(model.trainable_variables) == default_num_trainable_variables - 1
Example #30
0
def test_non_trainable_model_objective():
    """
    Checks that we can still compute the objective of a model that has no
    trainable parameters whatsoever (regression test for bug in log_prior()).
    In this case we have no priors, so log_prior should be zero to add no
    contribution to the objective.
    """
    model = gpflow.models.GPR(
        (Data.X, Data.Y),
        kernel=gpflow.kernels.SquaredExponential(lengthscales=Data.ls, variance=Data.var),
    )

    set_trainable(model, False)

    _ = model.log_marginal_likelihood()
    assert model.log_prior_density() == 0.0