Example #1
0
def analyze(f, title="Plot"):
    X, Y, groups = f()
    Y_data = np.hstack([Y, groups])
    likelihood = gpflow.likelihoods.SwitchedLikelihood([
        gpflow.likelihoods.Gaussian(variance=1.0),
        gpflow.likelihoods.Gaussian(variance=1.0)
    ])
    # model construction (notice that num_latent_gps is 1)
    natgrad = NaturalGradient(gamma=1.0)
    adam = tf.optimizers.Adam()
    kernel = gpflow.kernels.Matern52(lengthscales=0.5)
    model = gpflow.models.VGP((X, Y_data),
                              kernel=kernel,
                              likelihood=likelihood,
                              num_latent_gps=1)
    # here's a plot of the raw data.
    fig, ax = plt.subplots(1, 1, figsize=(12, 6))
    _ = ax.plot(X, Y_data, "kx")
    plt.xlabel("Minutes")
    plt.ylabel("Value")
    plt.title(title)
    plt.savefig(title + '.png')
    for _ in range(ci_niter(1000)):
        natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)])


# let's do some plotting!
    xx = np.linspace(0, 30, 200)[:, None]

    mu, var = model.predict_f(xx)

    plt.figure(figsize=(12, 6))
    plt.plot(xx, mu, "C0")
    plt.plot(xx, mu + 2 * np.sqrt(var), "C0", lw=0.5)
    plt.plot(xx, mu - 2 * np.sqrt(var), "C0", lw=0.5)
    plt.plot(X, Y, "C1x", mew=2)
    plt.xlabel("Minutes")
    plt.ylabel("Value")
    plt.title(title)
    plt.savefig(title + ' GP model.png')

    print_summary(model)
    # print(type(summary))
    # summary.to_markdown(title+'.md')
    # plt.set_xlim(0, 30)
    # _ = ax.plot(xx, 2.5 * np.sin(6 * xx) + np.cos(3 * xx), "C2--")

    # plt.errorbar(
    #     X.squeeze(),
    #     Y.squeeze(),
    #     # yerr=2 * (np.sqrt(NoiseVar)).squeeze(),
    #     marker="x",
    #     lw=0,
    #     elinewidth=1.0,
    #     color="C1",
    # )
    # _ = plt.xlim(-5, 5)
    return
Example #2
0
    def optimize(self):
        set_trainable(self.model.q_mu, False)
        set_trainable(self.model.q_sqrt, False)
        variational_params = [(self.model.q_mu, self.model.q_sqrt)]
        adam_opt = tf.optimizers.Adam(1e-3)
        natgrad_opt = NaturalGradient(gamma=0.1)

        for step in range(100):
            natgrad_opt.minimize(self.model.training_loss, var_list=variational_params)
            adam_opt.minimize(self.model.training_loss, var_list=self.model.trainable_variables)
Example #3
0
    def __init__(self,
                 variational_model: bool = True,
                 do_monitor: bool = False):

        self.var = variational_model
        self.do_monitor = do_monitor
        if do_monitor:
            self.monitor_path = "train_log/fit"
            os.system("rm -rf train_log")

        if variational_model:
            self.opt = tf.optimizers.Adam()
            self.opt_var = NaturalGradient(gamma=0.1)
        else:
            self.opt = Scipy()
def assert_gpr_vs_vgp(
    m1: gpflow.models.BayesianModel,
    m2: gpflow.models.BayesianModel,
    gamma: float = 1.0,
    maxiter: int = 1,
    xi_transform: Optional[gpflow.optimizers.natgrad.XiTransform] = None,
):
    assert maxiter >= 1

    m1_ll_before = m1.training_loss()
    m2_ll_before = m2.training_loss()

    assert_different(m2_ll_before, m1_ll_before)

    params = (m2.q_mu, m2.q_sqrt)
    if xi_transform is not None:
        params += (xi_transform, )

    opt = NaturalGradient(gamma)

    @tf.function
    def minimize_step():
        opt.minimize(m2.training_loss, var_list=[params])

    for _ in range(maxiter):
        minimize_step()

    m1_ll_after = m1.training_loss()
    m2_ll_after = m2.training_loss()

    np.testing.assert_allclose(m1_ll_after, m2_ll_after, atol=1e-4)
Example #5
0
def assert_gpr_vs_vgp(
        m1: tf.Module,
        m2: tf.Module,
        gamma: float = 1.0,
        maxiter: int = 1,
        xi_transform: Optional[gpflow.optimizers.natgrad.XiTransform] = None):
    assert maxiter >= 1

    m2_ll_before = m2.log_likelihood()
    m1_ll_before = m1.log_likelihood()

    assert m2_ll_before != m1_ll_before

    @tf.function(autograph=False)
    def loss_cb() -> tf.Tensor:
        return -m2.log_marginal_likelihood()

    params = (m2.q_mu, m2.q_sqrt)
    if xi_transform is not None:
        params += (xi_transform, )

    opt = NaturalGradient(gamma)

    @tf.function(autograph=False)
    def minimize_step():
        opt.minimize(loss_cb, var_list=[params])

    for _ in range(maxiter):
        minimize_step()

    m2_ll_after = m2.log_likelihood()
    m1_ll_after = m1.log_likelihood()

    np.testing.assert_allclose(m1_ll_after, m2_ll_after, atol=1e-4)
Example #6
0
def analyze(f, title="Plot", rawplot=True, modelplot=True,summary=True):
    # Obtain randomly generated data
    X, Y, groups = f()
    Y_data = np.hstack([Y, groups])
    # Model construction (notice that num_latent_gps is 1)
    likelihood = gpflow.likelihoods.SwitchedLikelihood(
        [gpflow.likelihoods.Gaussian(variance=1.0),
         gpflow.likelihoods.Gaussian(variance=1.0)]
    )
    natgrad = NaturalGradient(gamma=1.0)
    adam = tf.optimizers.Adam()
    kernel = gpflow.kernels.Matern52(lengthscales=0.5)
    model = gpflow.models.VGP((X, Y_data), kernel=kernel, likelihood=likelihood, num_latent_gps=1)
    for _ in range(ci_niter(1000)):
        natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)])

    # Plot of the raw data.
    if rawplot:
        fig, ax = plt.subplots(1, 1, figsize=(12, 6))
        _ = ax.plot(X, Y_data, "kx")
        plt.xlabel("Minutes")
        plt.ylabel("Value")
        plt.title(title)
        plt.savefig(title+'.png')

    # Plot of GP model
    if modelplot:
        xx = np.linspace(0, 30, 200)[:, None]
        mu, var = model.predict_f(xx)

        plt.figure(figsize=(12, 6))
        plt.plot(xx, mu, "C0")
        plt.plot(xx, mu + 2 * np.sqrt(var), "C0", lw=0.5)
        plt.plot(xx, mu - 2 * np.sqrt(var), "C0", lw=0.5)
        plt.plot(X, Y, "C1x", mew=2)
        plt.xlabel("Minutes")
        plt.ylabel("Value")
        plt.title(title)
        plt.savefig(title+' GP model.png')

    if summary:
        print_summary(model)

    return model
def assert_sgpr_vs_svgp(
    m1: gpflow.models.BayesianModel,
    m2: gpflow.models.BayesianModel,
):
    data = m1.data

    m1_ll_before = m1.training_loss()
    m2_ll_before = m2.training_loss(data)

    assert_different(m2_ll_before, m1_ll_before)

    params = [(m2.q_mu, m2.q_sqrt)]
    opt = NaturalGradient(1.0)
    opt.minimize(m2.training_loss_closure(data), var_list=params)

    m1_ll_after = m1.training_loss()
    m2_ll_after = m2.training_loss(data)

    np.testing.assert_allclose(m1_ll_after, m2_ll_after, atol=1e-4)
Example #8
0
def assert_sgpr_vs_svgp(m1: tf.Module, m2: tf.Module):
    data = m1.data

    m1_ll_before = m1.log_likelihood()
    m2_ll_before = m2.log_likelihood(data[0], data[1])

    assert m2_ll_before != m1_ll_before

    @tf.function(autograph=False)
    def loss_cb() -> tf.Tensor:
        return -m2.log_marginal_likelihood(data[0], data[1])

    params = [(m2.q_mu, m2.q_sqrt)]
    opt = NaturalGradient(1.)
    opt.minimize(loss_cb, var_list=params)

    m1_ll_after = m1.log_likelihood()
    m2_ll_after = m2.log_likelihood(data[0], data[1])

    np.testing.assert_allclose(m1_ll_after, m2_ll_after, atol=1e-4)
Example #9
0
def train_natgrad_adam(model, approx=False, num_iterations=2000, log_freq=10):
    
    natgrad_opt = NaturalGradient(gamma=1.0)
    adam_opt = tf.optimizers.Adam(learning_rate=0.01)
    variational_params = list(zip(model.q_mu, model.q_sqrt))
    gpflow.set_trainable(model.q_mu, False)
    gpflow.set_trainable(model.q_sqrt, False)
    if approx:
        variational_params.append((model.q_mu_s, model.q_sqrt_s))
        gpflow.set_trainable(model.q_mu_s, False)
        gpflow.set_trainable(model.q_sqrt_s, False)

    @tf.function
    def optimization_step():
        natgrad_opt.minimize(model.training_loss, var_list=variational_params)
        adam_opt.minimize(model.training_loss, var_list=model.trainable_variables)
        #return (model.elbo(), model.Fq)
        return model.elbo()

    log_elbo = []
    #log_Fq = []
    # log_predY = []
    tol = 1e-4

    print('initial elbo {:.4f}'.format(model.elbo()))

    for step in range(num_iterations):
        start_time = time.time()
        #elbo, Fq = optimization_step()
        elbo = optimization_step()
        log_elbo.append(elbo)
        #log_Fq.append(Fq.numpy())
        # log_predY.append(pred_Y.numpy())

        if step > 0 and np.abs(elbo - log_elbo[-2]) < tol:
            print('converge at iteration {} elbo {:.4f}'.format(step+1, elbo))
            break
        if (step + 1)  % log_freq == 0:
            print('iteration {} elbo {:.4f}, took {:.4f}s'.format(step+1, elbo, time.time()-start_time))
            
    #return (log_elbo, log_Fq)
    return log_elbo
Example #10
0
          likelihood=gpflow.likelihoods.Gaussian())

# %% [markdown]
# The log marginal likelihood lower bound (evidence lower bound or ELBO) of the approximate GP model is:

# %%
vgp.elbo().numpy()

# %% [markdown]
# Obviously, our initial guess for the variational distribution is not correct, which results in a lower bound to the likelihood of the exact GPR model. We can optimize the variational parameters in order to get a tighter bound.

# %% [markdown]
# In fact, we only need to take **one step** in the natural gradient direction to recover the exact posterior:

# %%
natgrad_opt = NaturalGradient(gamma=1.0)
variational_params = [(vgp.q_mu, vgp.q_sqrt)]
natgrad_opt.minimize(vgp.training_loss, var_list=variational_params)

# %% [markdown]
# The ELBO of the approximate GP model after a single NatGrad step:

# %%
vgp.elbo().numpy()

# %% [markdown]
# ### Optimize both variational parameters and kernel hyperparameters together
#
# In the Gaussian likelihood case we can iterate between an Adam update for the hyperparameters and a NatGrad update for the variational parameters. That way, we achieve optimization of hyperparameters as if the model were a GPR.

# %% [markdown]
Example #11
0
    def __init__(self,
                 xin,
                 yin,
                 nInput,
                 nOutput,
                 xlb,
                 xub,
                 seed=None,
                 batch_size=50,
                 inducing_fraction=0.2,
                 min_inducing=100,
                 gp_lengthscale_bounds=(1e-6, 100.0),
                 gp_likelihood_sigma=1.0e-4,
                 natgrad_gamma=0.1,
                 adam_lr=0.01,
                 n_iter=30000,
                 min_elbo_pct_change=1.0,
                 num_latent_gps=None,
                 logger=None):
        if not _has_gpflow:
            raise RuntimeError(
                'SIV_Matern requires the GPflow library to be installed.')

        self.nInput = nInput
        self.nOutput = nOutput
        self.xlb = xlb
        self.xub = xub
        self.xrng = np.where(np.isclose(xub - xlb, 0., rtol=1e-6, atol=1e-6),
                             1., xub - xlb)

        self.logger = logger

        N = xin.shape[0]
        D = xin.shape[1]
        xn = np.zeros_like(xin)
        for i in range(N):
            xn[i, :] = (xin[i, :] - self.xlb) / self.xrng
        if nOutput == 1:
            yin = yin.reshape((yin.shape[0], 1))
        if num_latent_gps is None:
            num_latent_gps = nOutput

        self.y_train_mean = np.asarray(
            [np.mean(yin[:, i]) for i in range(yin.shape[1])],
            dtype=np.float32)
        self.y_train_std = np.asarray([
            handle_zeros_in_scale(np.std(yin[:, i], axis=0), copy=False)
            for i in range(yin.shape[1])
        ],
                                      dtype=np.float32)

        # Remove mean and make unit variance
        yn = np.column_stack(
            tuple((yin[:, i] - self.y_train_mean[i]) / self.y_train_std[i]
                  for i in range(yin.shape[1])))

        adam_opt = tf.optimizers.Adam(adam_lr)
        natgrad_opt = NaturalGradient(gamma=natgrad_gamma)
        autotune = tf.data.experimental.AUTOTUNE

        if logger is not None:
            logger.info(f"SIV_Matern: creating regressor for output...")
            for i in range(nOutput):
                logger.info(
                    f"SIV_Matern: y_{i+1} range is {(np.min(yin[:,i]), np.max(yin[:,i]))}"
                )

        data = (np.asarray(xn, dtype=np.float64), yn.astype(np.float64))

        M = int(round(inducing_fraction * N))

        if M < min_inducing:
            Z = xn.copy()
        else:
            Z = xn[np.random.choice(N, size=M, replace=False), :].copy(
            )  # Initialize inducing locations to M random inputs
        iv = gpflow.inducing_variables.SharedIndependentInducingVariables(
            gpflow.inducing_variables.InducingPoints(Z))
        kernel = gpflow.kernels.Matern52()
        gp_kernel = gpflow.kernels.SharedIndependent(kernel,
                                                     output_dim=nOutput)
        gp_likelihood = gpflow.likelihoods.Gaussian(
            variance=gp_likelihood_sigma)
        gp_model = gpflow.models.SVGP(inducing_variable=iv,
                                      kernel=gp_kernel,
                                      likelihood=gp_likelihood,
                                      num_data=N,
                                      num_latent_gps=num_latent_gps)

        gp_model.kernel.kernel.lengthscales = bounded_parameter(
            np.asarray([gp_lengthscale_bounds[0]] * nInput, dtype=np.float64),
            np.asarray([gp_lengthscale_bounds[1]] * nInput, dtype=np.float64),
            np.ones(nInput, dtype=np.float64),
            trainable=True,
            name='lengthscales')

        gpflow.set_trainable(gp_model.q_mu, False)
        gpflow.set_trainable(gp_model.q_sqrt, False)
        gpflow.set_trainable(gp_model.inducing_variable, False)

        if logger is not None:
            logger.info(f"SIV_Matern: optimizing regressor...")

        variational_params = [(gp_model.q_mu, gp_model.q_sqrt)]

        data_minibatch = (tf.data.Dataset.from_tensor_slices(data).prefetch(
            autotune).repeat().shuffle(N).batch(batch_size))
        data_minibatch_it = iter(data_minibatch)
        svgp_natgrad_loss = gp_model.training_loss_closure(data_minibatch_it,
                                                           compile=True)

        @tf.function
        def optim_step():
            natgrad_opt.minimize(svgp_natgrad_loss,
                                 var_list=variational_params)
            adam_opt.minimize(svgp_natgrad_loss,
                              var_list=gp_model.trainable_variables)

        iterations = ci_niter(n_iter)
        elbo_log = []
        diff_kernel = np.array([1, -1])
        for it in range(iterations):
            optim_step()
            if (it % 10 == 0):
                likelihood = -svgp_natgrad_loss().numpy()
                elbo_log.append(likelihood)
            if (it % 1000 == 0):
                logger.info(
                    f"SIV_Matern: iteration {it} likelihood: {likelihood:.04f}"
                )
            if it >= 2000:
                elbo_change = np.convolve(elbo_log, diff_kernel, 'same')[1:]
                elbo_pct_change = (elbo_change / np.abs(elbo_log[1:])) * 100
                mean_elbo_pct_change = np.mean(elbo_pct_change[-100:])
                if (it % 1000 == 0):
                    logger.info(
                        f"SIV_Matern: iteration {it} mean elbo pct change: {mean_elbo_pct_change:.04f}"
                    )
                if mean_elbo_pct_change < min_elbo_pct_change:
                    logger.info(
                        f"SIV_Matern: likelihood change at iteration {it+1} is less than {min_elbo_pct_change} percent"
                    )
                    break
        print_summary(gp_model)
        self.sm = gp_model
Example #12
0
    def __init__(self,
                 xin,
                 yin,
                 nInput,
                 nOutput,
                 xlb,
                 xub,
                 seed=None,
                 gp_lengthscale_bounds=(1e-6, 100.0),
                 gp_likelihood_sigma=1.0e-4,
                 natgrad_gamma=1.0,
                 adam_lr=0.01,
                 n_iter=3000,
                 min_elbo_pct_change=0.1,
                 logger=None):
        if not _has_gpflow:
            raise RuntimeError(
                'VGP_Matern requires the GPflow library to be installed.')

        self.nInput = nInput
        self.nOutput = nOutput
        self.xlb = xlb
        self.xub = xub
        self.xrng = np.where(np.isclose(xub - xlb, 0., rtol=1e-6, atol=1e-6),
                             1., xub - xlb)

        self.logger = logger

        N = xin.shape[0]
        xn = np.zeros_like(xin)
        for i in range(N):
            xn[i, :] = (xin[i, :] - self.xlb) / self.xrng
        if nOutput == 1:
            yin = yin.reshape((yin.shape[0], 1))

        self.y_train_mean = np.asarray(
            [np.mean(yin[:, i]) for i in range(yin.shape[1])],
            dtype=np.float32)
        self.y_train_std = np.asarray([
            handle_zeros_in_scale(np.std(yin[:, i], axis=0), copy=False)
            for i in range(yin.shape[1])
        ],
                                      dtype=np.float32)

        # Remove mean and make unit variance
        yn = np.column_stack(
            tuple((yin[:, i] - self.y_train_mean[i]) / self.y_train_std[i]
                  for i in range(yin.shape[1])))

        adam_opt = tf.optimizers.Adam(adam_lr)
        natgrad_opt = NaturalGradient(gamma=natgrad_gamma)

        smlist = []
        for i in range(nOutput):
            if logger is not None:
                logger.info(
                    f"VGP_Matern: creating regressor for output {i+1} of {nOutput}..."
                )
                logger.info(
                    f"VGP_Matern: y_{i} range is {(np.min(yin[:,i]), np.max(yin[:,i]))}..."
                )

            gp_kernel = gpflow.kernels.Matern52()
            gp_likelihood = gpflow.likelihoods.Gaussian(
                variance=gp_likelihood_sigma)
            gp_model = gpflow.models.VGP(
                data=(np.asarray(xn, dtype=np.float64), yn[:, i].reshape(
                    (-1, 1)).astype(np.float64)),
                kernel=gp_kernel,
                likelihood=gp_likelihood,
            )
            gp_model.kernel.lengthscales = bounded_parameter(
                np.asarray([gp_lengthscale_bounds[0]] * nInput,
                           dtype=np.float64),
                np.asarray([gp_lengthscale_bounds[1]] * nInput,
                           dtype=np.float64),
                np.ones(nInput, dtype=np.float64),
                trainable=True,
                name='lengthscales')

            gpflow.set_trainable(gp_model.q_mu, False)
            gpflow.set_trainable(gp_model.q_sqrt, False)

            if logger is not None:
                logger.info(
                    f"VGP_Matern: optimizing regressor for output {i+1} of {nOutput}..."
                )

            variational_params = [(gp_model.q_mu, gp_model.q_sqrt)]
            iterations = ci_niter(n_iter)
            elbo_log = []
            diff_kernel = np.array([1, -1])

            @tf.function
            def optim_step():
                natgrad_opt.minimize(gp_model.training_loss,
                                     var_list=variational_params)
                adam_opt.minimize(gp_model.training_loss,
                                  var_list=gp_model.trainable_variables)

            for it in range(iterations):
                optim_step()
                likelihood = gp_model.elbo()
                if (it % 100 == 0):
                    logger.info(
                        f"VGP_Matern: iteration {it} likelihood: {likelihood:.04f}"
                    )
                elbo_log.append(likelihood)
                if it >= 200:
                    elbo_change = np.convolve(elbo_log, diff_kernel,
                                              'same')[1:]
                    elbo_pct_change = (elbo_change /
                                       np.abs(elbo_log[1:])) * 100
                    mean_elbo_pct_change = np.mean(elbo_pct_change[-100:])
                    if mean_elbo_pct_change < min_elbo_pct_change:
                        logger.info(
                            f"VGP_Matern: likelihood change at iteration {it+1} is less than {min_elbo_pct_change} percent"
                        )
                        break
            print_summary(gp_model)
            #assert(opt_log.success)
            smlist.append(gp_model)
        self.smlist = smlist
Example #13
0
class Trainer():
    def __init__(self,
                 variational_model: bool = True,
                 do_monitor: bool = False):

        self.var = variational_model
        self.do_monitor = do_monitor
        if do_monitor:
            self.monitor_path = "train_log/fit"
            os.system("rm -rf train_log")

        if variational_model:
            self.opt = tf.optimizers.Adam()
            self.opt_var = NaturalGradient(gamma=0.1)
        else:
            self.opt = Scipy()

    def run(self, model, dataset, epoch: int = 10):
        num_iter = len(dataset) * epoch

        #something not trainable
        set_trainable(model.inducing_variable, False)
        set_trainable(model.q_mu, False)
        set_trainable(model.q_sqrt, False)

        if self.do_monitor:
            self.create_monitor(model)

        if self.var:
            train_iter = iter(dataset)
            training_loss = model.training_loss_closure(train_iter,
                                                        compile=True)
            for step in tf.range(num_iter):
                self.optimization_step(model, training_loss)
                self.monitor(step)

        else:
            data = dataset.unbatch()
            self.opt.minimize(model.training_loss_closure(data),
                              variables=model.trainable_variables,
                              options={
                                  "disp": True,
                                  "maxiter": 1e3
                              })

    @tf.function
    def optimization_step(self, model, loss):
        self.opt.minimize(loss, par_list=model.trainable_variables)
        self.opt_var.minimize(loss, var_list=[model.q_mu, model.q_sqrt])

    def create_monitor(self, model):

        model_task = ModelToTensorBoard(self.monitor_path, model)
        self.monitor = Monitor(MonitorTaskGroup([model_task]), period=5)


# data_minibatch = (
#     tf.data.Dataset.from_tensor_slices(data)
#     .prefetch(autotune)
#     .repeat()
#     .shuffle(N)
#     .batch(batch_size)
# )

#nat grad loop
# gamma_start = 1e-2   # deliberately chosen to be too large for this example
# gamma_max = 1e-1   # same max value as before
# gamma_step = 1e-2  # this is much more aggressive increase

# gamma = tf.Variable(gamma_start, dtype=tf.float64)
# gamma_incremented = tf.where(tf.less(gamma, gamma_max), gamma + gamma_step, gamma_max)

# op_ng = NatGradOptimizer(gamma).make_optimize_tensor(model, var_list=[[model.q_mu, model.q_sqrt]])
# op_adam = AdamOptimizer(0.001).make_optimize_tensor(model)
# op_increment_gamma = tf.assign(gamma, gamma_incremented)

# gamma_fallback = 1e-1   # we'll reduce by this factor if there's a cholesky failure
# op_fallback_gamma = tf.assign(gamma, gamma * gamma_fallback)

# sess.run(tf.variables_initializer([gamma]))

# for it in range(1000):
#     try:
#         sess.run(op_ng)
#         sess.run(op_increment_gamma)
#     except tf.errors.InvalidArgumentError:
#         g = sess.run(gamma)
#         print('gamma = {} on iteration {} is too big! Falling back to {}'.format(it, g, g * gamma_fallback))
#         sess.run(op_fallback_gamma)

#     sess.run(op_adam)

#     if it % 100 == 0:
#         print('{} gamma={:.4f} ELBO={:.4f}'.format(it, *sess.run([gamma, model.likelihood_tensor])))
    NoiseVar = 2 * np.exp(-((X - 2) ** 2) / 4) + 0.3  # Noise variances
    Y = F + np.random.randn(N, 1) * np.sqrt(NoiseVar)  # Noisy data
    return X, Y, NoiseVar

#known noise

X, Y, NoiseVar = generate_data()
Y_data = np.hstack([Y, NoiseVar])


likelihood = HeteroskedasticGaussian()
kernel = gpflow.kernels.Matern52(lengthscales=0.5)
model = gpflow.models.VGP((X, Y_data), kernel=kernel, likelihood=likelihood, num_latent_gps=1)


natgrad = NaturalGradient(gamma=1.0)
adam = tf.optimizers.Adam()

set_trainable(model.q_mu, False)
set_trainable(model.q_sqrt, False)

for _ in range(ci_niter(1000)):
    natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)])
    adam.minimize(model.training_loss, model.trainable_variables)
for _ in range(ci_niter(1000)):
    natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)])
    adam.minimize(model.training_loss, model.trainable_variables)
    
    
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
_ = ax.errorbar(
Example #15
0
    def fit(self, X, Y, Xval, Yval):
        N = X.shape[0]

        if self.var_dist == "diag":
            q_diag = True
        elif self.var_dist == "full":
            q_diag = False
        else:
            raise NotImplementedError(
                "GPFlow cannot implement %s variational distribution" %
                (self.var_dist))

        if self.do_classif:
            if self.num_classes == 2:
                likelihood = gpflow.likelihoods.Bernoulli()
                num_latent = 1
            else:
                # Softmax better than Robustmax (apparently per the gpflow slack)
                #likelihood = gpflow.likelihoods.MultiClass(self.num_classes, invlink=invlink)  # Multiclass likelihood
                likelihood = gpflow.likelihoods.Softmax(self.num_classes)
                num_latent = self.num_classes
                # Y must be 1D for the multiclass model to actually work.
                Y = np.argmax(Y, 1).reshape((-1, 1)).astype(int)
        else:
            num_latent = 1
            likelihood = gpflow.likelihoods.Gaussian()

        self.model = SVGP(kernel=self.kernel,
                          likelihood=likelihood,
                          inducing_variable=self.Z,
                          num_data=N,
                          num_latent_gps=num_latent,
                          whiten=False,
                          q_diag=q_diag)
        # Setup training
        if not self.train_hyperparams:
            set_trainable(self.model.inducing_variable.Z, False)
            set_trainable(self.kernel.lengthscales, False)
            set_trainable(self.kernel.variance, False)
        if self.natgrad_lr > 0:
            set_trainable(self.model.q_mu, False)
            set_trainable(self.model.q_sqrt, False)
            variational_params = [(self.model.q_mu, self.model.q_sqrt)]
        # Create the optimizers
        adam_opt = tf.optimizers.Adam(self.lr)
        if self.natgrad_lr > 0:
            natgrad_opt = NaturalGradient(gamma=self.natgrad_lr)

        # Print
        gpflow.utilities.print_summary(self.model)
        print("", flush=True)

        # Giacomo: If shuffle buffer is too large it will run OOM
        if self.num_classes == 2:
            Y = (Y + 1) / 2
            Yval = (Yval + 1) / 2
        generator = partial(data_generator, X, Y)
        #train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)) \
        train_dataset = tf.data.Dataset.from_generator(generator, args=(self.batch_size, ), output_types=(tf.float32, tf.float32)) \
            .prefetch(self.batch_size * 10) \
            .repeat() \
            .shuffle(min(N // self.batch_size, 1_000_000 // self.batch_size)) \
            .batch(1)
        train_iter = iter(train_dataset)

        loss = self.model.training_loss_closure(train_iter)
        t_elapsed = 0
        for step in range(self.num_iter):
            t_s = time.time()
            if self.natgrad_lr > 0:
                natgrad_opt.minimize(loss, var_list=variational_params)
            adam_opt.minimize(loss, var_list=self.model.trainable_variables)
            t_elapsed += time.time() - t_s
            if step % 700 == 0:
                print("Step %d -- Elapsed %.2fs" % (step, t_elapsed),
                      flush=True)
            if (step + 1) % self.error_every == 0:
                preds = self.predict(Xval)
                val_err, err_name = self.err_fn(Yval, preds)
                print(
                    f"Step {step + 1} - {t_elapsed:7.2f}s Elapsed - "
                    f"Validation {err_name} {val_err:7.5f}",
                    flush=True)

        preds = self.predict(Xval)
        val_err, err_name = self.err_fn(Yval, preds)
        print(
            f"Finished optimization - {t_elapsed:7.2f}s Elapsed - "
            f"Validation {err_name} {val_err:7.5f}",
            flush=True)
        print("Final model is ")
        gpflow.utilities.print_summary(self.model)
        print("", flush=True)
        return self