def make_dgp(X, Y, Z, L):
    D = X.shape[1]
    Y_mean, Y_std = np.average(Y), np.std(Y)

    # the layer shapes are defined by the kernel dims, so here all hidden layers are D dimensional
    kernels = []
    for l in range(L):
        kernels.append(RBF(D, lengthscales=1., variance=1.))

    # between layer noise (doesn't actually make much difference but we include it anyway)
    for kernel in kernels[:-1]:
        kernel += White(D, variance=1e-5)

    mb = 10000 if X.shape[0] > 10000 else None
    model = DGP(X, Y, Z, kernels, Gaussian(), num_samples=1, minibatch_size=mb)

    # same final layer inits we used for the single layer model
    model.layers[-1].kern.variance = Y_std**2
    model.likelihood.variance = Y_std * 0.1
    model.layers[-1].mean_function = Constant(Y_mean)
    model.layers[-1].mean_function.fixed = True

    # start the inner layers almost deterministically
    for layer in model.layers[:-1]:
        layer.q_sqrt = layer.q_sqrt.value * 1e-5

    return model
Exemple #2
0
    def _fit(self, X, Y, lik, **kwargs):
        if len(Y.shape) == 1:
            Y = Y[:, None]

        kerns = []
        if not self.model:
            with tf.variable_scope('theta'):
                for _ in range(self.ARGS.n_layers):
                    if _ == 0:
                        kerns.append(
                            SquaredExponential(X.shape[1],
                                               ARD=True,
                                               lengthscales=float(
                                                   X.shape[1])**0.5))
                    else:
                        kerns.append(
                            SquaredExponential(self.ARGS.inter_dim,
                                               ARD=True,
                                               lengthscales=float(
                                                   self.ARGS.inter_dim)**0.5))
                lik = MultiClass(10)
            minibatch_size = self.ARGS.minibatch_size if X.shape[
                0] > self.ARGS.minibatch_size else X.shape[0]

            self.model = DGP(X=X,
                             Y=Y,
                             n_inducing=self.ARGS.n_inducing,
                             kernels=kerns,
                             likelihood=lik,
                             minibatch_size=minibatch_size,
                             inter_dim=self.ARGS.inter_dim,
                             **kwargs)

        self.model.reset(X, Y)

        try:
            for _ in range(self.ARGS.iterations):
                self.model.train_hypers()
                if _ % 50 == 1:
                    print('Iteration {}'.format(_))
                    self.model.print_sample_performance()
        except KeyboardInterrupt:  # pragma: no cover
            pass
Exemple #3
0
    def _fit(self, X, Y, Xs, Ys, Y_std, lik, **kwargs):
        if len(Y.shape) == 1:
            Y = Y[:, None]

        kerns = []
        if not self.model:
            with tf.variable_scope('theta'):
                for _ in range(self.ARGS["n_layers"]):
                    kerns.append(
                        SquaredExponential(X.shape[1],
                                           ARD=self.ARGS["ard"],
                                           lengthscales=float(
                                               X.shape[1])**0.5))
            minibatch_size = self.ARGS["minibatch_size"] if X.shape[
                0] > self.ARGS["minibatch_size"] else X.shape[0]

            self.model = DGP(X=X,
                             Y=Y,
                             n_inducing=self.ARGS["num_inducing"],
                             kernels=kerns,
                             likelihood=lik,
                             minibatch_size=minibatch_size,
                             adam_lr=self.ARGS["lr"],
                             **kwargs)
        self.model.reset(X, Y)

        try:
            for _ in range(self.ARGS["iterations"]):
                self.model.train_hypers()
                if _ % 50 == 1:
                    print('Iteration {}:'.format(_))
                    self.model.print_sample_performance()
                    m, v = self.predict(Xs)
                    print(
                        '######## Test set MLL:',
                        np.mean(
                            norm.logpdf(Y_std * Ys, Y_std * m,
                                        Y_std * np.sqrt(v))))
        except KeyboardInterrupt:  # pragma: no cover
            pass
Exemple #4
0
def make_dgp(L):

    # kernels = [ckern.WeightedColourPatchConv(RBF(25*1, lengthscales=10., variance=10.), [28, 28], [5, 5], colour_channels=1)]
    kernels = [RBF(784,lengthscales=10., variance=10.)]
    for l in range(L-1):
        kernels.append(RBF(50, lengthscales=10., variance=10.))
    model = DGP(X, Y, Z, kernels, gpflow.likelihoods.MultiClass(num_classes), 
                minibatch_size=minibatch_size,
                num_outputs=num_classes, dropout = 0.0)
    
    # start things deterministic 
    for layer in model.layers[:-1]:
        layer.q_sqrt = layer.q_sqrt.value * 1e-5 
    
    return model
Exemple #5
0
        def make_dgp_as_sgp(kernels):
            m_dgp = DGP(X, Y, Z, kernels, Gaussian())
            
            #set final layer to sgp
            m_dgp.layers[-1].kern.lengthscales = ls
            m_dgp.layers[-1].kern.variance = s
            m_dgp.likelihood.variance = noise
            m_dgp.layers[-1].q_mu = q_mu
            m_dgp.layers[-1].q_sqrt = q_sqrt
            
            # set other layers to identity 
            for layer in m_dgp.layers[:-1]:
#                1e-6 gives errors of 1e-3, so need to set right down
                layer.kern.variance.transform._lower = 1e-18
                layer.kern.variance = 1e-18
                
            return m_dgp
def make_dgp(X, Y, Z, L):

    Y_mean, Y_std = np.average(Y), np.std(Y)

    # the layer shapes are defined by the kernel dims, so here all hidden layers are D dimensional
    kernels = []
    for l in range(L):
        kernels.append(RBF(input_dim=17, ARD=True))

    mb = 128 if X.shape[0] > 128 else None
    model = DGP(X, Y, Z, kernels, Gaussian(), num_samples=1, minibatch_size=mb)

    # same final layer inits we used for the single layer model
    #model.layers[-1].kern.variance = Y_std**2
    model.likelihood.variance = 0.01
    #model.layers[-1].mean_function = Constant(Y_mean)
    #model.layers[-1].mean_function.fixed = True

    # start the inner layers almost deterministically
    for layer in model.layers[:-1]:
        layer.q_sqrt = layer.q_sqrt.value * 1e-5

    return model
Exemple #7
0
def main(args):
    datasets = Datasets(data_path=args.data_path)

    # Prepare output files
    outname1 = '../tmp/' + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.nll'
    if not os.path.exists(os.path.dirname(outname1)):
        os.makedirs(os.path.dirname(outname1))
    outfile1 = open(outname1, 'w')
    outname2 = '../tmp/' + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.time'
    outfile2 = open(outname2, 'w')

    running_loss = 0
    running_time = 0
    for i in range(args.splits):
        print('Split: {}'.format(i))
        print('Getting dataset...')
        data = datasets.all_datasets[args.dataset].get_data(i)
        X, Y, Xs, Ys, Y_std = [
            data[_] for _ in ['X', 'Y', 'Xs', 'Ys', 'Y_std']
        ]
        Z = kmeans2(X, args.num_inducing, minit='points')[0]

        # set up batches
        batch_size = args.M if args.M < X.shape[0] else X.shape[0]
        train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)).repeat()\
                .prefetch(X.shape[0]//2)\
                .shuffle(buffer_size=(X.shape[0]//2))\
                .batch(batch_size)

        print('Setting up DGP model...')
        kernels = []
        for l in range(args.num_layers):
            kernels.append(SquaredExponential() + White(variance=1e-5))

        dgp_model = DGP(X.shape[1],
                        kernels,
                        Gaussian(variance=0.05),
                        Z,
                        num_outputs=Y.shape[1],
                        num_samples=args.num_samples,
                        num_data=X.shape[0])

        # initialise inner layers almost deterministically
        for layer in dgp_model.layers[:-1]:
            layer.q_sqrt = Parameter(layer.q_sqrt.value() * 1e-5,
                                     transform=triangular())

        optimiser = tf.optimizers.Adam(args.learning_rate)

        def optimisation_step(model, X, Y):
            with tf.GradientTape() as tape:
                tape.watch(model.trainable_variables)
                obj = -model.elbo(X, Y, full_cov=False)
                grad = tape.gradient(obj, model.trainable_variables)
            optimiser.apply_gradients(zip(grad, model.trainable_variables))

        def monitored_training_loop(model, train_dataset, logdir, iterations,
                                    logging_iter_freq):
            # TODO: use tensorboard to log trainables and performance
            tf_optimisation_step = tf.function(optimisation_step)
            batches = iter(train_dataset)

            for i in range(iterations):
                X, Y = next(batches)
                tf_optimisation_step(model, X, Y)

                iter_id = i + 1
                if iter_id % logging_iter_freq == 0:
                    tf.print(
                        f'Epoch {iter_id}: ELBO (batch) {model.elbo(X, Y)}')

        print('Training DGP model...')
        t0 = time.time()
        monitored_training_loop(dgp_model,
                                train_dataset,
                                logdir=args.log_dir,
                                iterations=args.iterations,
                                logging_iter_freq=args.logging_iter_freq)
        t1 = time.time()
        print('Time taken to train: {}'.format(t1 - t0))
        outfile2.write('Split {}: {}\n'.format(i + 1, t1 - t0))
        outfile2.flush()
        os.fsync(outfile2.fileno())
        running_time += t1 - t0

        m, v = dgp_model.predict_y(Xs, num_samples=args.test_samples)
        test_nll = np.mean(
            logsumexp(norm.logpdf(Ys * Y_std, m * Y_std, v**0.5 * Y_std),
                      0,
                      b=1 / float(args.test_samples)))
        print('Average test log likelihood: {}'.format(test_nll))
        outfile1.write('Split {}: {}\n'.format(i + 1, test_nll))
        outfile1.flush()
        os.fsync(outfile1.fileno())
        running_loss += t1 - t0

    outfile1.write('Average: {}\n'.format(running_loss / args.splits))
    outfile2.write('Average: {}\n'.format(running_time / args.splits))
    outfile1.close()
    outfile2.close()
kernels = []
for l in range(L):
    kernels.append(RBF(1, lengthscales=0.2, variance=1))

kernels = [
    RBF(1, lengthscales=0.2, variance=1),
    RBF(2, lengthscales=0.2, variance=1)
]

N, M = 50, 25
X = np.random.uniform(0, 1, N)[:, None]
Z = np.random.uniform(0, 1, M)[:, None]
f = lambda x: 0. if x < 0.5 else 1.
Y = np.reshape([f(x) for x in X], X.shape) + np.random.randn(*X.shape) * 1e-2

m_dgp = DGP(X, Y, Z, kernels, Gaussian(), num_samples=1)
for layer in m_dgp.layers[:-1]:
    layer.q_sqrt = layer.q_sqrt.value * 1e-5


class CB(object):
    def __init__(self, model, record_every=10):
        self.model = model
        self.i = 0
        self.res = []
        self.record_every = record_every

    def cb(self, x):
        self.i += 1
        if self.i % self.record_every == 0:
            self.model.set_state(x)
Exemple #9
0
def main(args):
    datasets = Datasets(data_path=args.data_path)

    # prepare output files
    outname1 = args.results_dir + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.rmse'
    if not os.path.exists(os.path.dirname(outname1)):
        os.makedirs(os.path.dirname(outname1))
    outfile1 = open(outname1, 'w')

    outname2 = args.results_dir + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.nll'
    outfile2 = open(outname2, 'w')

    outname3 = args.results_dir + args.dataset + '_' + str(args.num_layers) + '_'\
            + str(args.num_inducing) + '.time'
    outfile3 = open(outname3, 'w')

    # =========================================================================
    # CROSS-VALIDATION LOOP
    # =========================================================================
    running_err = 0
    running_loss = 0
    running_time = 0
    test_errs = np.zeros(args.splits)
    test_nlls = np.zeros(args.splits)
    test_times = np.zeros(args.splits)
    for i in range(args.splits):
        # =====================================================================
        # MODEL CONSTRUCTION
        # =====================================================================
        print('Split: {}'.format(i))
        print('Getting dataset...')
        # get dataset
        data = datasets.all_datasets[args.dataset].get_data(
            i, normalize=args.normalize_data)
        X, Y, Xs, Ys, Y_std = [
            data[_] for _ in ['X', 'Y', 'Xs', 'Ys', 'Y_std']
        ]

        # inducing points via k-means
        Z = kmeans2(X, args.num_inducing, minit='points')[0]

        # set up batches
        batch_size = args.M if args.M < X.shape[0] else X.shape[0]
        train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)).repeat()\
            .prefetch(X.shape[0]//2)\
            .shuffle(buffer_size=(X.shape[0]//2))\
            .batch(batch_size)

        print('Setting up DGP model...')
        kernels = []
        dims = []

        # hidden_dim = min(args.max_dim, X.shape[1])
        hidden_dim = X.shape[1] if X.shape[1] < args.max_dim else args.max_dim
        for l in range(args.num_layers):
            if l == 0:
                dim = X.shape[1]
                dims.append(dim)
            else:
                dim = hidden_dim
                dims.append(dim)

            if args.ard:
                # SE kernel with lengthscale per dimension
                kernels.append(
                    SquaredExponential(lengthscale=[1.] * dim) +
                    White(variance=1e-5))
            else:
                # SE kernel with single lengthscale
                kernels.append(
                    SquaredExponential(lengthscale=1.) + White(variance=1e-5))

        # output dim
        dims.append(Y.shape[1])

        dgp_model = DGP(X,
                        Y,
                        Z,
                        dims,
                        kernels,
                        Gaussian(variance=0.05),
                        num_samples=args.num_samples,
                        num_data=X.shape[0])

        # initialise inner layers almost deterministically
        for layer in dgp_model.layers[:-1]:
            layer.q_sqrt = Parameter(layer.q_sqrt.value() * 1e-5,
                                     transform=triangular())

        # =====================================================================
        # TRAINING
        # =====================================================================
        optimiser = tf.optimizers.Adam(args.learning_rate)

        print('Training DGP model...')
        t0 = time.time()
        # training loop
        monitored_training_loop(dgp_model,
                                train_dataset,
                                optimiser=optimiser,
                                logdir=args.log_dir,
                                iterations=args.iterations,
                                logging_iter_freq=args.logging_iter_freq)
        t1 = time.time()

        # =====================================================================
        # TESTING
        # =====================================================================
        test_times[i] = t1 - t0
        print('Time taken to train: {}'.format(t1 - t0))
        outfile3.write('Split {}: {}\n'.format(i + 1, t1 - t0))
        outfile3.flush()
        os.fsync(outfile3.fileno())
        running_time += t1 - t0

        # minibatch test predictions
        means, vars = [], []
        test_batch_size = args.test_batch_size
        if len(Xs) > test_batch_size:
            for mb in range(-(-len(Xs) // test_batch_size)):
                m, v = dgp_model.predict_y(Xs[mb * test_batch_size:(mb + 1) *
                                              test_batch_size, :],
                                           num_samples=args.test_samples)
                means.append(m)
                vars.append(v)
        else:
            m, v = dgp_model.predict_y(Xs, num_samples=args.test_samples)
            means.append(m)
            vars.append(v)

        mean_SND = np.concatenate(means, 1)  # [S, N, D]
        var_SND = np.concatenate(vars, 1)  # [S, N, D]
        mean_ND = np.mean(mean_SND, 0)  # [N, D]

        # rmse
        test_err = np.mean(Y_std * np.mean((Ys - mean_ND)**2.0)**0.5)
        test_errs[i] = test_err
        print('Average RMSE: {}'.format(test_err))
        outfile1.write('Split {}: {}\n'.format(i + 1, test_err))
        outfile1.flush()
        os.fsync(outfile1.fileno())
        running_err += test_err

        # nll
        test_nll = np.mean(
            logsumexp(norm.logpdf(Ys * Y_std, mean_SND * Y_std,
                                  var_SND**0.5 * Y_std),
                      0,
                      b=1 / float(args.test_samples)))
        test_nlls[i] = test_nll
        print('Average test log likelihood: {}'.format(test_nll))
        outfile2.write('Split {}: {}\n'.format(i + 1, test_nll))
        outfile2.flush()
        os.fsync(outfile2.fileno())
        running_loss += test_nll

    outfile1.write('Average: {}\n'.format(running_err / args.splits))
    outfile1.write('Standard deviation: {}\n'.format(np.std(test_errs)))
    outfile2.write('Average: {}\n'.format(running_loss / args.splits))
    outfile2.write('Standard deviation: {}\n'.format(np.std(test_nlls)))
    outfile3.write('Average: {}\n'.format(running_time / args.splits))
    outfile3.write('Standard deviation: {}\n'.format(np.std(test_times)))
    outfile1.close()
    outfile2.close()
    outfile3.close()
Exemple #10
0
class RegressionModel(object):
    def __init__(self,
                 lr,
                 max_iterations,
                 n_layers=5,
                 num_inducing=128,
                 minibatch_size=10000,
                 n_posterior_samples=100,
                 ard=True):
        tf.reset_default_graph()
        ARGS = {
            "n_layers": n_layers,
            "num_inducing": num_inducing,
            "iterations": max_iterations,
            "minibatch_size": minibatch_size,
            "n_posterior_samples": n_posterior_samples,
            "ard": ard,
            "lr": lr
        }
        self.ARGS = ARGS
        self.model = None
        print("================ Regression Model =================")
        print("ARD is {}".format(self.ARGS["ard"]))

    def fit(self, X, Y, Xs, Ys, Y_std):
        lik = Gaussian(np.var(Y, 0))  # Initialize with variance in Y
        return self._fit(X, Y, Xs, Ys, Y_std, lik)

    def _fit(self, X, Y, Xs, Ys, Y_std, lik, **kwargs):
        if len(Y.shape) == 1:
            Y = Y[:, None]

        kerns = []
        if not self.model:
            with tf.variable_scope('theta'):
                for _ in range(self.ARGS["n_layers"]):
                    kerns.append(
                        SquaredExponential(X.shape[1],
                                           ARD=self.ARGS["ard"],
                                           lengthscales=float(
                                               X.shape[1])**0.5))
            minibatch_size = self.ARGS["minibatch_size"] if X.shape[
                0] > self.ARGS["minibatch_size"] else X.shape[0]

            self.model = DGP(X=X,
                             Y=Y,
                             n_inducing=self.ARGS["num_inducing"],
                             kernels=kerns,
                             likelihood=lik,
                             minibatch_size=minibatch_size,
                             adam_lr=self.ARGS["lr"],
                             **kwargs)
        self.model.reset(X, Y)

        try:
            for _ in range(self.ARGS["iterations"]):
                self.model.train_hypers()
                if _ % 50 == 1:
                    print('Iteration {}:'.format(_))
                    self.model.print_sample_performance()
                    m, v = self.predict(Xs)
                    print(
                        '######## Test set MLL:',
                        np.mean(
                            norm.logpdf(Y_std * Ys, Y_std * m,
                                        Y_std * np.sqrt(v))))
        except KeyboardInterrupt:  # pragma: no cover
            pass

    def _predict(self, Xs, S):
        ms, vs = [], []
        n = max(len(Xs) / 100, 1)  # predict in small batches
        for xs in np.array_split(Xs, n):
            m, v = self.model.predict_y(xs, S)
            ms.append(m)
            vs.append(v)

        return np.concatenate(ms, 1), np.concatenate(vs, 1)

    def predict(self, Xs):
        ms, vs = self._predict(Xs, self.ARGS["n_posterior_samples"])
        m = np.average(ms, 0)
        v = np.average(vs + ms**2, 0) - m**2
        return m, v
Exemple #11
0
class ClassificationModel(object):
    def __init__(self, layers, inducing):
        class ARGS:
            n_layers = layers
            iterations = 1001
            minibatch_size = 256
            n_posterior_samples = 100
            n_inducing = inducing
            inter_dim = 98

        self.ARGS = ARGS
        self.model = None

    def fit(self, X, Y):
        # lik = Gaussian(np.var(Y, 0)) # initialize with variance in Y
        lik = None
        return self._fit(X, Y, lik)

    def _fit(self, X, Y, lik, **kwargs):
        if len(Y.shape) == 1:
            Y = Y[:, None]

        kerns = []
        if not self.model:
            with tf.variable_scope('theta'):
                for _ in range(self.ARGS.n_layers):
                    if _ == 0:
                        kerns.append(
                            SquaredExponential(X.shape[1],
                                               ARD=True,
                                               lengthscales=float(
                                                   X.shape[1])**0.5))
                    else:
                        kerns.append(
                            SquaredExponential(self.ARGS.inter_dim,
                                               ARD=True,
                                               lengthscales=float(
                                                   self.ARGS.inter_dim)**0.5))
                lik = MultiClass(10)
            minibatch_size = self.ARGS.minibatch_size if X.shape[
                0] > self.ARGS.minibatch_size else X.shape[0]

            self.model = DGP(X=X,
                             Y=Y,
                             n_inducing=self.ARGS.n_inducing,
                             kernels=kerns,
                             likelihood=lik,
                             minibatch_size=minibatch_size,
                             inter_dim=self.ARGS.inter_dim,
                             **kwargs)

        self.model.reset(X, Y)

        try:
            for _ in range(self.ARGS.iterations):
                self.model.train_hypers()
                if _ % 50 == 1:
                    print('Iteration {}'.format(_))
                    self.model.print_sample_performance()
        except KeyboardInterrupt:  # pragma: no cover
            pass

    def _predict(self, Xs, S):
        ms, vs = [], []
        n = max(len(Xs) / 100, 1)  # predict in small batches
        for xs in np.array_split(Xs, n):
            m, v = self.model.predict_y(xs, S)
            ms.append(m)
            vs.append(v)

        return np.concatenate(ms, 1), np.concatenate(
            vs, 1)  # n_posterior_samples, N_test, D_y

    def predict(self, Xs):
        ms, vs = self._predict(Xs, self.ARGS.n_posterior_samples)
        # the first two moments
        m = np.average(ms, 0)
        v = np.average(vs + ms**2, 0) - m**2
        return m, v