def test_bug_277_regression():
    """
    See github issue #277. This is a regression test.
    """
    model1, model2 = Linear(), Linear()
    assert model1.b.numpy() == model2.b.numpy()
    model2.b.assign([1.])
    assert not model1.b.numpy() == model2.b.numpy()
Beispiel #2
0
def init_layers(X, Z, dims, final_mean_function):
    M = Z.shape[0]
    q_mus, q_sqrts, mean_functions, Zs = [], [], [], []
    X_running, Z_running = X.copy(), Z.copy()

    for dim_in, dim_out in zip(dims[:-2], dims[1:-1]):
        if dim_in == dim_out:  # identity for same dims
            W = np.eye(dim_in)
        elif dim_in > dim_out:  # use PCA mf for stepping down
            _, _, V = np.linalg.svd(X_running, full_matrices=False)
            W = V[:dim_out, :].T
        elif dim_in < dim_out:  # identity + pad with zeros for stepping up
            I = np.eye(dim_in)
            zeros = np.zeros((dim_in, dim_out - dim_in))
            W = np.concatenate([I, zeros], 1)

        mean_functions.append(Linear(A=W))
        Zs.append(Z_running.copy())
        q_mus.append(np.zeros((M, dim_out)))
        q_sqrts.append(np.eye(M)[:, :, None] * np.ones((1, 1, dim_out)))

        Z_running = Z_running.dot(W)
        X_running = X_running.dot(W)

    # final layer (as before but no mean function)
    mean_functions.append(final_mean_function)
    Zs.append(Z_running.copy())
    q_mus.append(np.zeros((M, dims[-1])))
    q_sqrts.append(np.eye(M)[:, :, None] * np.ones((1, 1, dims[-1])))

    return q_mus, q_sqrts, Zs, mean_functions
Beispiel #3
0
    def setUp(self):
        self.rng = np.random.RandomState(42)
        input_dim = 2
        output_dim = 2
        kern_list = [RBF(2) for _ in range(output_dim)]
        self.W0 = np.zeros((input_dim, output_dim))
        mean_function = Linear(A=self.W0)
        self.Z = self.rng.randn(5, 2)
        num_inducing = 5

        self.layer = MultikernelHiddenLayer(input_dim=input_dim,
                                            output_dim=output_dim,
                                            num_inducing=num_inducing,
                                            kernel_list=kern_list,
                                            share_Z=False,
                                            mean_function=mean_function)

        self.layer_shared_Z = MultikernelHiddenLayer(
            input_dim=input_dim,
            output_dim=output_dim,
            num_inducing=num_inducing,
            kernel_list=kern_list,
            share_Z=True,
            mean_function=mean_function)

        self.X = self.rng.randn(10, 2)
Beispiel #4
0
def init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=Zero(),
                       num_outputs=None, Layer=SVGPLayer, whiten=False):
    num_outputs = num_outputs or Y.shape[1]
    layers = []

    X_running, Z_running = X.copy(), Z.copy()
    for in_idx, kern_in in enumerate(kernels[:-1]):
        dim_in = layer_sizes[in_idx]
        dim_out = layer_sizes[in_idx+1]

        # Initialize mean function to be either Identity or PCA projection
        if dim_in == dim_out:
            mf = Identity()
        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                # use eigenvectors corresponding to dim_out largest eigenvalues
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T
            else:                 # stepping up, use identity + padding
                W = np.concatenate([np.eye(dim_in),
                                    np.zeros((dim_in, dim_out - dim_in))], 1)
            mf = Linear(W)
            gpflow.set_trainable(mf.A, False)
            gpflow.set_trainable(mf.b, False)

        layers.append(Layer(kern_in, Z_running, dim_out, mf, white=whiten))

        if dim_in != dim_out:
            Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    # final layer
    layers.append(Layer(kernels[-1], Z_running, num_outputs, mean_function,
                        white=whiten))
    return layers
def mean_function_factory(mean_function_name, D_in, D_out):
    if mean_function_name == "Zero":
        return Zero(output_dim=D_out)
    elif mean_function_name == "Constant":
        return Constant(c=rng.rand(D_out))
    elif mean_function_name == "Linear":
        return Linear(A=rng.rand(D_in, D_out), b=rng.rand(D_out))
    else:
        return None
def init_linear(X, Z, all_kernels, initialized_Zs=False):
    """
    if there are no Zs from an initialization (e.g. for warm-starting),
    all_Zs is initialized according to the Salimbeni scheme (Z should be MxD).
    otherwise the Zs obtained from the initialization are simply taken and put
    into the all_Zs array (Z should be a list of L arrays)
    """
    if initialized_Zs:
        all_Zs = Z
    else:
        all_Zs = []
    all_mean_funcs = []
    X_running = X.copy()
    if not initialized_Zs:
        Z_running = Z.copy()
    for kern_in, kern_out in zip(all_kernels[:-1], all_kernels[1:]):
        dim_in = kern_in.input_dim
        dim_out = kern_out.input_dim
        if dim_in == dim_out:
            mf = Identity()
        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T

            else:  # stepping up, use identity + padding
                W = np.concatenate(
                    [np.eye(dim_in),
                     np.zeros((dim_in, dim_out - dim_in))], 1)

            mf = Linear(W)
            mf.set_trainable(False)

        all_mean_funcs.append(mf)
        if not initialized_Zs:
            all_Zs.append(Z_running)

        if dim_in != dim_out:
            X_running = X_running.dot(W)
            if not initialized_Zs:
                Z_running = Z_running.dot(W)

    # final layer
    all_mean_funcs.append(Zero())
    if not initialized_Zs:
        all_Zs.append(Z_running)
    return all_Zs, all_mean_funcs
Beispiel #7
0
    def setUp(self):
        self.rng = np.random.RandomState(42)
        kernel = RBF(2)
        input_dim = 2
        output_dim = 2
        self.W0 = np.zeros((input_dim, output_dim))
        mean_function = Linear(A=self.W0)
        self.Z = self.rng.randn(5, 2)
        num_inducing = 5

        self.layer = HiddenLayer(input_dim=input_dim,
                                 output_dim=output_dim,
                                 num_inducing=num_inducing,
                                 kernel=kernel,
                                 mean_function=mean_function)

        self.X = self.rng.randn(10, 2)
    def prepare(self):
        N = 100
        M = 10
        rng = np.random.RandomState(42)
        X = rng.randn(N, 2)
        Y = rng.randn(N, 1)
        Z = rng.randn(M, 2)

        X_ind = rng.randint(0, 2, (N, 1))
        Z_ind = rng.randint(0, 2, (M, 1))

        X = np.hstack([X, X_ind])
        Y = np.hstack([Y, X_ind])
        Z = np.hstack([Z, Z_ind])

        Xs = rng.randn(M, 2)
        Xs_ind = rng.randint(0, 2, (M, 1))
        Xs = np.hstack([Xs, Xs_ind])

        with defer_build():
            lik = SwitchedLikelihood([Gaussian(), Gaussian()])

            input_layer = InputLayer(input_dim=2,
                                     output_dim=1,
                                     num_inducing=M,
                                     kernel=RBF(2) + White(2),
                                     mean_function=Linear(A=np.ones((3, 1))),
                                     multitask=True)
            output_layer = OutputLayer(input_dim=1,
                                       output_dim=1,
                                       num_inducing=M,
                                       kernel=RBF(1) + White(1),
                                       multitask=True)

            seq = MultitaskSequential([input_layer, output_layer])

            model = MultitaskDSDGP(X=X,
                                   Y=Y,
                                   Z=Z,
                                   layers=seq,
                                   likelihood=lik,
                                   num_latent=1)
        model.compile()
        return model, Xs
Beispiel #9
0
def init_layers_linear(X,
                       Y,
                       Z,
                       kernels,
                       num_outputs=None,
                       mean_function=Zero(),
                       Layer=SVGP_Layer,
                       white=False):
    num_outputs = num_outputs or Y.shape[1]

    layers = []

    X_running, Z_running = X.copy(), Z.copy()
    for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):
        dim_in = kern_in.input_dim
        dim_out = kern_out.input_dim
        print(dim_in, dim_out)
        if dim_in == dim_out:
            mf = Identity()

        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T

            else:  # stepping up, use identity + padding
                W = np.concatenate(
                    [np.eye(dim_in),
                     np.zeros((dim_in, dim_out - dim_in))], 1)

            mf = Linear(W)
            mf.set_trainable(False)

        layers.append(Layer(kern_in, Z_running, dim_out, mf, white=white))

        if dim_in != dim_out:
            Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    # final layer
    layers.append(
        Layer(kernels[-1], Z_running, num_outputs, mean_function, white=white))
    return layers
Beispiel #10
0
    def _init_layers(self,
                     X,
                     Y,
                     Z,
                     dims,
                     kernels,
                     mean_function=Zero(),
                     Layer=SVGPIndependentLayer,
                     white=False):
        """Initialise DGP layers to have the same number of outputs as inputs,
        apart from the final layer."""
        layers = []

        X_running, Z_running = X.copy(), Z.copy()
        for i in range(len(kernels) - 1):
            dim_in, dim_out, kern = dims[i], dims[i + 1], kernels[i]
            if dim_in == dim_out:
                mf = Identity()

            else:
                if dim_in > dim_out:
                    _, _, V = np.linalg.svd(X_running, full_matrices=False)
                    W = V[:dim_out, :].T

                else:
                    W = np.concatenate(
                        [np.eye(dim_in),
                         np.zeros((dim_in, dim_out - dim_in))], 1)

                mf = Linear(W)
                set_trainable(mf.A, False)
                set_trainable(mf.b, False)

            layers.append(Layer(kern, Z_running, dim_out, mf, white=white))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)

        layers.append(
            Layer(kernels[-1], Z_running, dims[-1], mean_function,
                  white=white))
        return layers
    def __init__(self, X, Y, Z, kernels, likelihood, 
                 num_outputs=None,
                 mean_function=Zero(),  # the final layer mean function
                 **kwargs):
        Model.__init__(self)
        num_outputs = num_outputs or Y.shape[1]

        # init the layers
        layers = []

        # inner layers
        X_running, Z_running = X.copy(), Z.copy()
        for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):
            dim_in = kern_in.input_dim
            dim_out = kern_out.input_dim

            if dim_in == dim_out:
                mf = Identity()

            else:
                if dim_in > dim_out:  # stepping down, use the pca projection
                    _, _, V = np.linalg.svd(X_running, full_matrices=False)
                    W = V[:dim_out, :].T

                else:  # pad with zeros
                    zeros = np.zeros((dim_in, dim_out - dim_in))
                    W = np.concatenate([np.eye(dim_in), zeros], 1)

                mf = Linear(W)
                mf.set_trainable(False)

            layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)


        # final layer
        layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function))

        DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
Beispiel #12
0
    def _fit(self, X, F, data):

        if self.regr == 'constant':
            mf = Constant()
        elif self.regr == 'linear':
            mf = Linear(numpy.ones((X.shape[1], 1)), numpy.ones((1, 1)))

        if self.kernel == 'linear':
            kernel = gpflow.kernels.Linear(X.shape[1], ARD=self.ARD)
        if self.kernel == 'rbf':
            kernel = gpflow.kernels.RBF(X.shape[1], ARD=self.ARD)
        elif self.kernel == 'polynomial':
            kernel = gpflow.kernels.Polynomial(X.shape[1], ARD=self.ARD)

        m = gpflow.gpr.GPR(X,
                           numpy.array([F]).T,
                           kern=kernel,
                           mean_function=mf)
        m.optimize()
        self.model = m
Beispiel #13
0
    def prepare(self):
        N = 100
        M = 10
        rng = np.random.RandomState(42)
        X = rng.randn(N, 2)
        Y = rng.randn(N, 1)
        Z = rng.randn(M, 2)
        Xs = rng.randn(M, 2)
        lik = Gaussian()
        input_layer = InputLayer(input_dim=2,
                                 output_dim=1,
                                 num_inducing=M,
                                 kernel=RBF(2) + White(2),
                                 mean_function=Linear(A=np.ones((2, 1))))
        output_layer = OutputLayer(input_dim=1,
                                   output_dim=1,
                                   num_inducing=M,
                                   kernel=RBF(1) + White(1))

        seq = Sequential([input_layer, output_layer])

        model = DSDGP(X=X, Y=Y, Z=Z, layers=seq, likelihood=lik)
        model.compile()
        return model, Xs
Beispiel #14
0
def init_layers(X, dims_in, dims_out, M, final_inducing_points,
                share_inducing_inputs):
    q_mus, q_sqrts, mean_functions, Zs = [], [], [], []
    X_running = X.copy()

    for dim_in, dim_out in zip(dims_in[:-1], dims_out[:-1]):
        if dim_in == dim_out:  # identity for same dims
            W = np.eye(dim_in)
        elif dim_in > dim_out:  # use PCA mf for stepping down
            _, _, V = np.linalg.svd(X_running, full_matrices=False)
            W = V[:dim_out, :].T
        elif dim_in < dim_out:  # identity + pad with zeros for stepping up
            I = np.eye(dim_in)
            zeros = np.zeros((dim_out - dim_in, dim_in))
            W = np.concatenate([I, zeros], 0).T

        mean_functions.append(Linear(A=W))
        Zs.append(kmeans2(X_running, M, minit='points')[0])
        if share_inducing_inputs:
            q_mus.append([np.zeros((M, dim_out))])
            q_sqrts.append([np.eye(M)[:, :, None] * np.ones((1, 1, dim_out))])
        else:
            q_mus.append([np.zeros((M, 1))] * dim_out)
            q_sqrts.append([np.eye(M)[:, :, None] * np.ones(
                (1, 1, 1))] * dim_out)

        X_running = X_running.dot(W)

    # final layer (as before but no mean function)
    mean_functions.append(Zero())
    Zs.append(kmeans2(X_running, final_inducing_points, minit='points')[0])
    q_mus.append([np.zeros((final_inducing_points, 1))])
    q_sqrts.append(
        [np.eye(final_inducing_points)[:, :, None] * np.ones((1, 1, 1))])

    return q_mus, q_sqrts, Zs, mean_functions
Beispiel #15
0
    def build_model(self,
                    ARGS,
                    X,
                    Y,
                    conditioning=False,
                    apply_name=True,
                    noise_var=None,
                    mean_function=None):

        if conditioning == False:
            N, D = X.shape

            # first layer inducing points
            if N > ARGS.M:
                Z = kmeans2(X, ARGS.M, minit='points')[0]
            else:
                # This is the old way of initializing Zs
                # M_pad = ARGS.M - N
                # Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0)

                # This is the new way of initializing Zs
                min_x, max_x = self.bounds[0]
                min_x = (min_x - self.x_mean) / self.x_std
                max_x = (max_x - self.x_mean) / self.x_std

                Z = np.linspace(min_x, max_x, num=ARGS.M)  # * X.shape[1])
                Z = Z.reshape((-1, X.shape[1]))
                #print(min_x)
                #print(max_x)
                #print(Z)

            #################################### layers
            P = np.linalg.svd(X, full_matrices=False)[2]
            # PX = P.copy()

            layers = []
            # quad_layers = []

            DX = D
            DY = 1

            D_in = D
            D_out = D

            with defer_build():

                # variance initialiaztion
                lik = Gaussian()
                lik.variance = ARGS.likelihood_variance

                if len(ARGS.configuration) > 0:
                    for c, d in ARGS.configuration.split('_'):
                        if c == 'G':
                            num_gps = int(d)
                            A = np.zeros((D_in, D_out))
                            D_min = min(D_in, D_out)
                            A[:D_min, :D_min] = np.eye(D_min)
                            mf = Linear(A=A)
                            mf.b.set_trainable(False)

                            def make_kern():
                                k = RBF(D_in,
                                        lengthscales=float(D_in)**0.5,
                                        variance=1.,
                                        ARD=True)
                                k.variance.set_trainable(False)
                                return k

                            PP = np.zeros((D_out, num_gps))
                            PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)]
                            ZZ = np.random.randn(ARGS.M, D_in)
                            # print(Z.shape)
                            # print(ZZ.shape)
                            ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]

                            kern = SharedMixedMok(make_kern(), W=PP)
                            inducing = MixedKernelSharedMof(InducingPoints(ZZ))

                            l = GPLayer(kern,
                                        inducing,
                                        num_gps,
                                        mean_function=mf)
                            if ARGS.fix_linear is True:
                                kern.W.set_trainable(False)
                                mf.set_trainable(False)

                            layers.append(l)

                            D_in = D_out

                        elif c == 'L':
                            d = int(d)
                            D_in += d
                            layers.append(LatentVariableLayer(d,
                                                              XY_dim=DX + 1))

                # kernel initialization
                kern = RBF(D_in,
                           lengthscales=float(D_in)**0.5,
                           variance=1.,
                           ARD=True)
                ZZ = np.random.randn(ARGS.M, D_in)
                ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]
                layers.append(GPLayer(kern, InducingPoints(ZZ), DY))
                self.layers = layers
                self.lik = lik

            # global_step = tf.Variable(0, dtype=tf.int32)
            # self.global_step = global_step
        else:
            lik = self._gp.likelihood
            layers = self._gp.layers._list
            # val = self.session.run(self.global_step)
            # global_step = tf.Variable(val, dtype=tf.int32)
            # self.global_step = global_step
            self._gp.clear()

        with defer_build():

            #################################### model
            name = 'Model' if apply_name else None

            if ARGS.mode == 'VI':
                model = DGP_VI(X,
                               Y,
                               layers,
                               lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)

            elif ARGS.mode == 'SGHMC':
                for layer in layers:
                    if hasattr(layer, 'q_sqrt'):
                        del layer.q_sqrt
                        layer.q_sqrt = None
                        layer.q_mu.set_trainable(False)

                model = DGP_VI(X,
                               Y,
                               layers,
                               lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)

            elif ARGS.mode == 'IWAE':
                model = DGP_IWVI(X,
                                 Y,
                                 layers,
                                 lik,
                                 minibatch_size=ARGS.minibatch_size,
                                 num_samples=ARGS.num_IW_samples,
                                 name=name)

        global_step = tf.Variable(0, dtype=tf.int32)
        op_increment = tf.assign_add(global_step, 1)

        if not ('SGHMC' == ARGS.mode):
            for layer in model.layers[:-1]:
                if isinstance(layer, GPLayer):
                    layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5

            model.compile()

            #################################### optimization

            var_list = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]]

            model.layers[-1].q_mu.set_trainable(False)
            model.layers[-1].q_sqrt.set_trainable(False)

            gamma = tf.cast(tf.train.exponential_decay(ARGS.gamma,
                                                       global_step,
                                                       1000,
                                                       ARGS.gamma_decay,
                                                       staircase=True),
                            dtype=tf.float64)
            lr = tf.cast(tf.train.exponential_decay(ARGS.lr,
                                                    global_step,
                                                    1000,
                                                    ARGS.lr_decay,
                                                    staircase=True),
                         dtype=tf.float64)

            op_ng = NatGradOptimizer(gamma=gamma).make_optimize_tensor(
                model, var_list=var_list)

            op_adam = AdamOptimizer(lr).make_optimize_tensor(model)

            def train(s):
                s.run(op_increment)
                s.run(op_ng)
                s.run(op_adam)

            model.train_op = train
            model.init_op = lambda s: s.run(
                tf.variables_initializer([global_step]))
            model.global_step = global_step

        else:
            model.compile()

            sghmc_vars = []
            for layer in layers:
                if hasattr(layer, 'q_mu'):
                    sghmc_vars.append(layer.q_mu.unconstrained_tensor)

            hyper_train_op = AdamOptimizer(ARGS.lr).make_optimize_tensor(model)

            self.sghmc_optimizer = SGHMC(model, sghmc_vars, hyper_train_op,
                                         100)

            def train_op(s):
                s.run(op_increment),
                self.sghmc_optimizer.sghmc_step(s),
                self.sghmc_optimizer.train_hypers(s)

            model.train_op = train_op
            model.sghmc_optimizer = self.sghmc_optimizer

            def init_op(s):
                epsilon = 0.01
                mdecay = 0.05
                with tf.variable_scope('sghmc'):
                    self.sghmc_optimizer.generate_update_step(epsilon, mdecay)
                v = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope='sghmc')
                s.run(tf.variables_initializer(v))
                s.run(tf.variables_initializer([global_step]))

            # Added jitter due to input matrix invertability problems
            custom_config = gpflow.settings.get_settings()
            custom_config.numerics.jitter_level = 1e-8

            model.init_op = init_op
            model.global_step = global_step

        # build the computation graph for the gradient
        self.X_placeholder = tf.placeholder(tf.float64,
                                            shape=[None, X.shape[1]])
        self.Fs, Fmu, Fvar = model._build_predict(self.X_placeholder)
        self.mean_grad = tf.gradients(Fmu, self.X_placeholder)
        self.var_grad = tf.gradients(Fvar, self.X_placeholder)

        # calculated the gradient of the mean for the quantile-filtered distribution
        # print(Fs)
        # q = np.quantile(Fs, self.quantile, axis=0)
        # qFs = [f for f in Fs if f < q]
        # q_mean = np.mean(qFs, axis=0)
        # q_var = np.var(qFs, axis=0)
        # self.qmean_grad = tf.gradients(q_mean, self.X_placeholder)
        # self.qvar_grad = tf.gradients(q_var, self.X_placeholder)

        return model
    def __init__(
            self,
            X,
            Y,
            Z,
            kernels,
            likelihood,
            num_outputs=None,
            mean_function=Zero(),  # the final layer mean function
            **kwargs):
        Model.__init__(self)
        num_outputs = num_outputs or Y.shape[1]

        # init the layers
        layers = []

        # inner layers
        X_running, Z_running = X.copy(), Z.copy()
        for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):

            if isinstance(kern_in, Conv):
                dim_in = kern_in.basekern.input_dim
            else:
                dim_in = kern_in.input_dim
            '''  
            if isinstance(kern_out,Conv):
                dim_out = kern_out.basekern.input_dim
            else:
                dim_out = kern_out.input_dim
            '''
            dim_out = kern_out.input_dim

            if dim_in == dim_out:
                mf = Identity()

            else:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T
                b = np.zeros(1, dtype=np.float32)
                mf = Linear(W, b)
                mf.set_trainable(False)

            if isinstance(kern_in, Conv):
                Z_patch = np.unique(kern_in.compute_patches(Z_running).reshape(
                    -1, kern_in.patch_len),
                                    axis=0)
                Z_patch = Z_patch[np.random.permutation(
                    (len(Z_patch)))[:Z_running.shape[0]], :]
                layers.append(svconvgp(kern_in, Z_patch, dim_out, mf))

            else:
                layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)

        # final layer
        if isinstance(kernels[-1], Conv):
            Z_patch = np.unique(kernels[-1].compute_patches(Z_running).reshape(
                -1, kernels[-1].patch_len),
                                axis=0)
            Z_patch = Z_patch[np.random.permutation(
                (len(Z_patch)))[:Z_running.shape[0]], :]
            layers.append(
                svconvgp(kernels[-1], Z_patch, num_outputs, mean_function))
        else:
            layers.append(
                SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function))
        DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
Beispiel #17
0
def init_layers_graph(X, Y, Z, kernels, gmat,
                      num_layers=2,
                      num_nodes=None,
                      dim_per_node=5,
                      dim_per_X=5, dim_per_Y=5,
                      share_Z=False,
                      nb_init=True):
    layers = []

    def pa_idx(nd, dim_per_in):
        res = []
        for n in range(num_nodes):
            w = gmat[nd, n]
            if w > 0:
                # print(res, range(n*self.dim_per_in, (n+1)*self.dim_per_in))
                res = res + list(range(n * dim_per_in, (n + 1) * dim_per_in))
        res = np.asarray(res)
        return res

    X_running, Z_running = X.copy(), Z.copy()
    for l in range(num_layers - 1):
        if l == 0:
            dim_in = dim_per_X
            dim_out = dim_per_node
        else:
            dim_in = dim_per_node
            dim_out = dim_per_node
        # print(dim_in, dim_out)
        X_running_tmp = np.zeros((X.shape[0], dim_out * num_nodes))
        Z_running_tmp = np.zeros((Z.shape[0], dim_out * num_nodes))
        mf_lst = ParamList([], trainable=False)
        for nd in range(num_nodes):
            if nb_init:
                pa = pa_idx(nd, dim_in)
            else:
                pa = np.asarray(range(nd * dim_in, (nd + 1) * dim_in))
            agg_dim_in = len(pa)

            if agg_dim_in == dim_out:
                mf = Identity()

            else:
                if agg_dim_in > dim_out:  # stepping down, use the pca projection
                    # _, _, V = np.linalg.svd(X_running[:, nd*dim_in : (nd+1)*dim_in], full_matrices=False)
                    _, _, V = np.linalg.svd(X_running[:, pa], full_matrices=False)
                    W = V[:dim_out, :].T

                else:  # stepping up, use identity + padding
                    W = np.concatenate([np.eye(agg_dim_in), np.zeros((agg_dim_in, dim_out - agg_dim_in))], 1)

                mf = Linear(W)
                mf.set_trainable(False)
            mf_lst.append(mf)
            if agg_dim_in != dim_out:
                # print(Z_running_tmp[:, nd*dim_out:(nd+1)*dim_out].shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].shape,
                #       W.shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].dot(W).shape)
                Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa].dot(W)
                X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa].dot(W)
            else:
                Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa]
                X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa]

        layers.append(
            SVGPG_Layer(kernels[l], Z_running, mf_lst, num_nodes, dim_in, dim_out, gmat, share_Z=share_Z, nb_init=nb_init))
        Z_running = Z_running_tmp
        X_running = X_running_tmp

    # final layer
    if num_layers == 1:
        fin_dim_in = dim_per_X
    else:
        fin_dim_in = dim_per_node
    layers.append(
        SVGPG_Layer(kernels[-1], Z_running, None, num_nodes, fin_dim_in, dim_per_Y, gmat, share_Z=share_Z, nb_init=nb_init))
    return layers
Beispiel #18
0
def init_layers(graph_adj, node_feature, kernels, n_layers, all_layers_dim, num_inducing,
                gc_kernel=True, mean_function="linear", white=False, q_diag=False):

    assert mean_function in ["linear", "zero"]  # mean function must be linear or zero

    layers = []

    # get initial Z
    sparse_adj = tuple_to_sparse_matrix(graph_adj[0], graph_adj[1], graph_adj[2])
    X_running = node_feature.copy()

    for i in range(n_layers):

        tf.logging.info("initialize {}th layer".format(i + 1))

        dim_in = all_layers_dim[i]
        dim_out = all_layers_dim[i + 1]

        conv_X = sparse_adj.dot(X_running)
        Z_running = kmeans2(conv_X, num_inducing[i], minit="points")[0]

        kernel = kernels[i]

        if gc_kernel and kernel.gc_weight:
            # Z_running = pca(Z_running, kernel.base_kernel.input_dim)  # 将维度降到和输出维度一致
            X_dim = X_running.shape[1]
            kernel_input_dim = kernel.base_kernel.input_dim
            if X_dim > kernel_input_dim:
                Z_running = pca(Z_running, kernel.base_kernel.input_dim)  # 将维度降到和输出维度一致
            elif X_dim < kernel_input_dim:
                Z_running = np.concatenate([Z_running, np.zeros((Z_running.shape[0], kernel_input_dim - X_dim))], axis=1)

        # print(type(Z_running))
        # print(Z_running)

        if dim_in > dim_out:
            _, _, V = np.linalg.svd(X_running, full_matrices=False)
            W = V[:dim_out, :].T
        elif dim_in < dim_out:
            W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1)

        if mean_function == "zero":
            mf = Zero()
        else:

            if dim_in == dim_out:
                mf = Identity()
            else:
                mf = Linear(W)
                mf.set_trainable(False)

        # self.Ku = Kuu(GraphConvolutionInducingpoints(Z_running), kernel, jitter=settings.jitter)
        # print("successfully calculate Ku")
        if gc_kernel:
            feature = GraphConvolutionInducingpoints(Z_running)
        else:
            feature = InducingPoints(Z_running)

        layers.append(svgp_layer(kernel, Z_running, feature, dim_out, mf, gc_kernel, white=white, q_diag=q_diag))

        if dim_in != dim_out:
            # Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    return layers
Beispiel #19
0
def build_model(ARGS, X, Y, apply_name=True):

    if ARGS.mode == 'CVAE':

        layers = []
        for l in ARGS.configuration.split('_'):
            try:
                layers.append(int(l))
            except:
                pass

        with defer_build():
            name = 'CVAE' if apply_name else None
            model = CVAE(X, Y, 1, layers, batch_size=ARGS.minibatch_size, name=name)

        model.compile()

        global_step = tf.Variable(0, dtype=tf.int32)
        op_increment = tf.assign_add(global_step, 1)

        lr = tf.cast(tf.train.exponential_decay(ARGS.lr, global_step, 1000, 0.98, staircase=True), dtype=tf.float64)
        op_adam = AdamOptimizer(lr).make_optimize_tensor(model)

        model.train_op = lambda s: s.run([op_adam, op_increment])
        model.init_op = lambda s: s.run(tf.variables_initializer([global_step]))
        model.global_step = global_step

        model.compile()

    else:
        N, D = X.shape

        # first layer inducing points
        if N > ARGS.M:
            Z = kmeans2(X, ARGS.M, minit='points')[0]
        else:
            M_pad = ARGS.M - N
            Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0)

        #################################### layers
        P = np.linalg.svd(X, full_matrices=False)[2]
        # PX = P.copy()

        layers = []
        # quad_layers = []

        DX = D
        DY = 1

        D_in = D
        D_out = D
        with defer_build():
            lik = Gaussian()
            lik.variance = ARGS.likelihood_variance

            if len(ARGS.configuration) > 0:
                for c, d in ARGS.configuration.split('_'):
                    if c == 'G':
                        num_gps = int(d)
                        A = np.zeros((D_in, D_out))
                        D_min = min(D_in, D_out)
                        A[:D_min, :D_min] = np.eye(D_min)
                        mf = Linear(A=A)
                        mf.b.set_trainable(False)

                        def make_kern():
                            k = RBF(D_in, lengthscales=float(D_in) ** 0.5, variance=1., ARD=True)
                            k.variance.set_trainable(False)
                            return k

                        PP = np.zeros((D_out, num_gps))
                        PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)]
                        ZZ = np.random.randn(ARGS.M, D_in)
                        ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]

                        kern = SharedMixedMok(make_kern(), W=PP)
                        inducing = MixedKernelSharedMof(InducingPoints(ZZ))

                        l = GPLayer(kern, inducing, num_gps, mean_function=mf)
                        if ARGS.fix_linear is True:
                            kern.W.set_trainable(False)
                            mf.set_trainable(False)

                        layers.append(l)

                        D_in = D_out

                    elif c == 'L':
                        d = int(d)
                        D_in += d
                        layers.append(LatentVariableLayer(d, XY_dim=DX+1))

            kern = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1., ARD=True)
            ZZ = np.random.randn(ARGS.M, D_in)
            ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]
            layers.append(GPLayer(kern, InducingPoints(ZZ), DY))


            #################################### model
            name = 'Model' if apply_name else None

            if ARGS.mode == 'VI':
                model = DGP_VI(X, Y, layers, lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)

            elif ARGS.mode == 'SGHMC':
                for layer in layers:
                    if hasattr(layer, 'q_sqrt'):
                        del layer.q_sqrt
                        layer.q_sqrt = None
                        layer.q_mu.set_trainable(False)

                model = DGP_VI(X, Y, layers, lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)


            elif ARGS.mode == 'IWAE':
                model = DGP_IWVI(X, Y, layers, lik,
                                 minibatch_size=ARGS.minibatch_size,
                                 num_samples=ARGS.num_IW_samples,
                                 name=name)



        global_step = tf.Variable(0, dtype=tf.int32)
        op_increment = tf.assign_add(global_step, 1)

        if not ('SGHMC' == ARGS.mode):
            for layer in model.layers[:-1]:
                if isinstance(layer, GPLayer):
                    layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5

            model.compile()

            #################################### optimization

            var_list = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]]

            model.layers[-1].q_mu.set_trainable(False)
            model.layers[-1].q_sqrt.set_trainable(False)

            gamma = tf.cast(tf.train.exponential_decay(ARGS.gamma, global_step, 1000, ARGS.gamma_decay, staircase=True),
                            dtype=tf.float64)
            lr = tf.cast(tf.train.exponential_decay(ARGS.lr, global_step, 1000, ARGS.lr_decay, staircase=True), dtype=tf.float64)

            op_ng = NatGradOptimizer(gamma=gamma).make_optimize_tensor(model, var_list=var_list)

            op_adam = AdamOptimizer(lr).make_optimize_tensor(model)

            def train(s):
                s.run(op_increment)
                s.run(op_ng)
                s.run(op_adam)

            model.train_op = train
            model.init_op = lambda s: s.run(tf.variables_initializer([global_step]))
            model.global_step = global_step

        else:
            model.compile()

            hmc_vars = []
            for layer in layers:
                if hasattr(layer, 'q_mu'):
                    hmc_vars.append(layer.q_mu.unconstrained_tensor)

            hyper_train_op = AdamOptimizer(ARGS.lr).make_optimize_tensor(model)

            sghmc_optimizer = SGHMC(model, hmc_vars, hyper_train_op, 100)

            def train_op(s):
                s.run(op_increment),
                sghmc_optimizer.sghmc_step(s),
                sghmc_optimizer.train_hypers(s)

            model.train_op = train_op
            model.sghmc_optimizer = sghmc_optimizer
            def init_op(s):
                epsilon = 0.01
                mdecay = 0.05
                with tf.variable_scope('hmc'):
                    sghmc_optimizer.generate_update_step(epsilon, mdecay)
                v = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='hmc')
                s.run(tf.variables_initializer(v))
                s.run(tf.variables_initializer([global_step]))

            model.init_op = init_op
            model.global_step = global_step

    return model
Beispiel #20
0
def build_model(ARGS, X, Y, apply_name=True):
    N, D = X.shape

    # first layer inducing points
    if N > ARGS.M:
        Z = kmeans2(X, ARGS.M, minit="points")[0]
    else:
        M_pad = ARGS.M - N
        Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0)

    #################################### layers
    P = np.linalg.svd(X, full_matrices=False)[2]

    layers = []

    DX = D
    DY = 1

    D_in = D
    D_out = D
    with defer_build():
        lik = Gaussian()
        lik.variance = ARGS.likelihood_variance

        if len(ARGS.configuration) > 0:
            for c, d in ARGS.configuration.split("_"):
                if c == "G":
                    num_gps = int(d)
                    A = np.zeros((D_in, D_out))
                    D_min = min(D_in, D_out)
                    A[:D_min, :D_min] = np.eye(D_min)
                    mf = Linear(A=A)
                    mf.b.set_trainable(False)

                    def make_kern():
                        k = RBF(D_in,
                                lengthscales=float(D_in)**0.5,
                                variance=1.0,
                                ARD=True)
                        k.variance.set_trainable(False)
                        return k

                    PP = np.zeros((D_out, num_gps))
                    PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)]
                    ZZ = np.random.randn(ARGS.M, D_in)
                    ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]

                    kern = SharedMixedMok(make_kern(), W=PP)
                    inducing = MixedKernelSharedMof(InducingPoints(ZZ))

                    l = GPLayer(kern,
                                inducing,
                                num_gps,
                                layer_num=len(layers),
                                mean_function=mf)
                    if ARGS.fix_linear is True:
                        kern.W.set_trainable(False)
                        mf.set_trainable(False)

                    layers.append(l)

                    D_in = D_out

                elif c == "L":
                    d = int(d)
                    D_in += d
                    encoder_dims = [
                        int(dim.strip())
                        for dim in ARGS.encoder_dims.split(",")
                    ]
                    layers.append(
                        LatentVariableLayer(d,
                                            XY_dim=DX + 1,
                                            encoder_dims=encoder_dims,
                                            qz_mode=ARGS.qz_mode))

        kern = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1.0, ARD=True)
        ZZ = np.random.randn(ARGS.M, D_in)
        ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]
        layers.append(GPLayer(kern, InducingPoints(ZZ), DY))

        #################################### model
        name = "Model" if apply_name else None

        if ARGS.mode == "VI":
            model = DGP_VI(X,
                           Y,
                           layers,
                           lik,
                           minibatch_size=ARGS.minibatch_size,
                           name=name)

        elif ARGS.mode == "IWAE":
            model = DGP_IWVI(
                X=X,
                Y=Y,
                layers=layers,
                likelihood=lik,
                minibatch_size=ARGS.minibatch_size,
                num_samples=ARGS.num_IW_samples,
                name=name,
                encoder_minibatch_size=ARGS.encoder_minibatch_size,
            )

        elif ARGS.mode == "CIWAE":
            model = DGP_CIWAE(
                X,
                Y,
                layers,
                lik,
                minibatch_size=ARGS.minibatch_size,
                num_samples=ARGS.num_IW_samples,
                name=name,
                beta=ARGS.beta,
            )

        else:
            raise ValueError(f"Unknown mode {ARGS.mode}.")

    global_step = tf.Variable(0, dtype=tf.int32)
    op_increment = tf.assign_add(global_step, 1)

    for layer in model.layers[:-1]:
        if isinstance(layer, GPLayer):
            layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5

    model.compile()

    #################################### optimization

    # Whether to train the final layer with the other parameters, using Adam, or by itself, using natural
    # gradients.
    if ARGS.use_nat_grad_for_final_layer:
        # Turn off training so the parameters are not optimised by Adam. We pass them directly to the natgrad
        # optimiser, which bypasses this flag.
        model.layers[-1].q_mu.set_trainable(False)
        model.layers[-1].q_sqrt.set_trainable(False)

        gamma = tf.cast(
            tf.train.exponential_decay(ARGS.gamma,
                                       global_step,
                                       1000,
                                       ARGS.gamma_decay,
                                       staircase=True),
            dtype=tf.float64,
        )
        final_layer_vars = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]]
        final_layer_opt_op = NatGradOptimizer(
            gamma=gamma).make_optimize_tensor(model, var_list=final_layer_vars)
    else:
        final_layer_opt_op = NoOp()

    lr = tf.cast(
        tf.train.exponential_decay(ARGS.lr,
                                   global_step,
                                   decay_steps=1000,
                                   decay_rate=ARGS.lr_decay,
                                   staircase=True),
        dtype=tf.float64,
    )

    encoder_lr = tf.cast(
        tf.train.exponential_decay(
            ARGS.encoder_lr,
            global_step,
            decay_steps=1000,
            decay_rate=ARGS.encoder_lr_decay,
            staircase=True,
        ),
        dtype=tf.float64,
    )

    dreg_optimizer = DregOptimizer(
        enable_dreg=ARGS.use_dreg,
        optimizer=ARGS.optimizer,
        encoder_optimizer=ARGS.encoder_optimizer,
        learning_rate=lr,
        encoder_learning_rate=encoder_lr,
        assert_no_nans=ARGS.assert_no_nans,
        encoder_grad_clip_value=ARGS.clip_encoder_grads,
    )
    other_layers_opt_op = dreg_optimizer.make_optimize_tensor(model)

    model.lr = lr
    model.train_op = tf.group(op_increment, final_layer_opt_op,
                              other_layers_opt_op)
    model.init_op = lambda s: s.run(tf.variables_initializer([global_step]))
    model.global_step = global_step

    return model
Beispiel #21
0
    SwitchedMeanFunction,
    Zero,
)

rng = np.random.RandomState(99021)


class Datum:
    input_dim, output_dim = 3, 2
    N, Ntest, M = 20, 30, 10


_mean_functions = [
    Zero(),
    Linear(
        A=rng.randn(Datum.input_dim, Datum.output_dim),
        b=rng.randn(Datum.output_dim, 1).reshape(-1),
    ),
    Constant(c=rng.randn(Datum.output_dim, 1).reshape(-1)),
]


@pytest.mark.parametrize("mean_function_1", _mean_functions)
@pytest.mark.parametrize("mean_function_2", _mean_functions)
@pytest.mark.parametrize("operation", ["+", "*"])
def test_mean_functions_output_shape(mean_function_1, mean_function_2,
                                     operation):
    """
    Test the output shape for basic and compositional mean functions, also
    check that the combination of mean functions returns the correct class
    """
    X = np.random.randn(Datum.N, Datum.input_dim)
import gpflow
from gpflow.config import default_int
from gpflow.inducing_variables import InducingPoints
from gpflow.mean_functions import Additive, Constant, Linear, Product, SwitchedMeanFunction, Zero

rng = np.random.RandomState(99021)


class Datum:
    input_dim, output_dim = 3, 2
    N, Ntest, M = 20, 30, 10


_mean_functions = [
    Zero(),
    Linear(A=rng.randn(Datum.input_dim, Datum.output_dim), b=rng.randn(Datum.output_dim, 1).reshape(-1)),
    Constant(c=rng.randn(Datum.output_dim, 1).reshape(-1))
]


@pytest.mark.parametrize('mean_function_1', _mean_functions)
@pytest.mark.parametrize('mean_function_2', _mean_functions)
@pytest.mark.parametrize('operation', ['+', 'x'])
def test_mean_functions_output_shape(mean_function_1, mean_function_2, operation):
    """
    Test the output shape for basic and compositional mean functions, also
    check that the combination of mean functions returns the correct class
    """
    X = np.random.randn(Datum.N, Datum.input_dim)
    Y = mean_function_1(X)
    # basic output shape check