Beispiel #1
0
def init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=Zero(),
                       num_outputs=None, Layer=SVGPLayer, whiten=False):
    num_outputs = num_outputs or Y.shape[1]
    layers = []

    X_running, Z_running = X.copy(), Z.copy()
    for in_idx, kern_in in enumerate(kernels[:-1]):
        dim_in = layer_sizes[in_idx]
        dim_out = layer_sizes[in_idx+1]

        # Initialize mean function to be either Identity or PCA projection
        if dim_in == dim_out:
            mf = Identity()
        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                # use eigenvectors corresponding to dim_out largest eigenvalues
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T
            else:                 # stepping up, use identity + padding
                W = np.concatenate([np.eye(dim_in),
                                    np.zeros((dim_in, dim_out - dim_in))], 1)
            mf = Linear(W)
            gpflow.set_trainable(mf.A, False)
            gpflow.set_trainable(mf.b, False)

        layers.append(Layer(kern_in, Z_running, dim_out, mf, white=whiten))

        if dim_in != dim_out:
            Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    # final layer
    layers.append(Layer(kernels[-1], Z_running, num_outputs, mean_function,
                        white=whiten))
    return layers
Beispiel #2
0
    def _init_layers(self,
                     dim_in,
                     kernels,
                     inducing_variables,
                     num_outputs=None,
                     mean_function=Zero(),
                     Layer=SVGPLayer,
                     white=False):
        """Initialise DGP layers to have the same number of outputs as inputs,
        apart from the final layer."""
        layers = []

        # Add layers
        for kern in kernels[:-1]:
            mf = Identity()
            # Initialise layers dim_out=dim_in
            # Use Identity mean function when input and output dimensions
            # are the same.
            layers.append(
                Layer(kern, inducing_variables, dim_in, mf, white=white))

        layers.append(
            Layer(kernels[-1],
                  inducing_variables,
                  num_outputs,
                  mean_function,
                  white=white))
        return layers
def init_linear(X, Z, all_kernels, initialized_Zs=False):
    """
    if there are no Zs from an initialization (e.g. for warm-starting),
    all_Zs is initialized according to the Salimbeni scheme (Z should be MxD).
    otherwise the Zs obtained from the initialization are simply taken and put
    into the all_Zs array (Z should be a list of L arrays)
    """
    if initialized_Zs:
        all_Zs = Z
    else:
        all_Zs = []
    all_mean_funcs = []
    X_running = X.copy()
    if not initialized_Zs:
        Z_running = Z.copy()
    for kern_in, kern_out in zip(all_kernels[:-1], all_kernels[1:]):
        dim_in = kern_in.input_dim
        dim_out = kern_out.input_dim
        if dim_in == dim_out:
            mf = Identity()
        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T

            else:  # stepping up, use identity + padding
                W = np.concatenate(
                    [np.eye(dim_in),
                     np.zeros((dim_in, dim_out - dim_in))], 1)

            mf = Linear(W)
            mf.set_trainable(False)

        all_mean_funcs.append(mf)
        if not initialized_Zs:
            all_Zs.append(Z_running)

        if dim_in != dim_out:
            X_running = X_running.dot(W)
            if not initialized_Zs:
                Z_running = Z_running.dot(W)

    # final layer
    all_mean_funcs.append(Zero())
    if not initialized_Zs:
        all_Zs.append(Z_running)
    return all_Zs, all_mean_funcs
Beispiel #4
0
def init_layers_linear(X,
                       Y,
                       Z,
                       kernels,
                       num_outputs=None,
                       mean_function=Zero(),
                       Layer=SVGP_Layer,
                       white=False):
    num_outputs = num_outputs or Y.shape[1]

    layers = []

    X_running, Z_running = X.copy(), Z.copy()
    for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):
        dim_in = kern_in.input_dim
        dim_out = kern_out.input_dim
        print(dim_in, dim_out)
        if dim_in == dim_out:
            mf = Identity()

        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T

            else:  # stepping up, use identity + padding
                W = np.concatenate(
                    [np.eye(dim_in),
                     np.zeros((dim_in, dim_out - dim_in))], 1)

            mf = Linear(W)
            mf.set_trainable(False)

        layers.append(Layer(kern_in, Z_running, dim_out, mf, white=white))

        if dim_in != dim_out:
            Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    # final layer
    layers.append(
        Layer(kernels[-1], Z_running, num_outputs, mean_function, white=white))
    return layers
    def __init__(self, X, Y, Z, kernels, likelihood, 
                 num_outputs=None,
                 mean_function=Zero(),  # the final layer mean function
                 **kwargs):
        Model.__init__(self)
        num_outputs = num_outputs or Y.shape[1]

        # init the layers
        layers = []

        # inner layers
        X_running, Z_running = X.copy(), Z.copy()
        for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):
            dim_in = kern_in.input_dim
            dim_out = kern_out.input_dim

            if dim_in == dim_out:
                mf = Identity()

            else:
                if dim_in > dim_out:  # stepping down, use the pca projection
                    _, _, V = np.linalg.svd(X_running, full_matrices=False)
                    W = V[:dim_out, :].T

                else:  # pad with zeros
                    zeros = np.zeros((dim_in, dim_out - dim_in))
                    W = np.concatenate([np.eye(dim_in), zeros], 1)

                mf = Linear(W)
                mf.set_trainable(False)

            layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)


        # final layer
        layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function))

        DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
        def test_vs_single_layer(self):
            lik = Gaussian()
            lik_var = 0.01
            lik.variance = lik_var
            N, Ns, D_Y, D_X = self.X.shape[0], self.Xs.shape[
                0], self.D_Y, self.X.shape[1]
            Y = np.random.randn(N, D_Y)
            Ys = np.random.randn(Ns, D_Y)

            kern = Matern52(self.X.shape[1], lengthscales=0.5)
            # mf = Linear(A=np.random.randn(D_X, D_Y), b=np.random.randn(D_Y))
            mf = Zero()
            m_gpr = GPR(self.X, Y, kern, mean_function=mf)
            m_gpr.likelihood.variance = lik_var
            mean_gpr, var_gpr = m_gpr.predict_y(self.Xs)
            test_lik_gpr = m_gpr.predict_density(self.Xs, Ys)
            pred_m_gpr, pred_v_gpr = m_gpr.predict_f(self.Xs)
            pred_mfull_gpr, pred_vfull_gpr = m_gpr.predict_f_full_cov(self.Xs)

            kerns = []
            kerns.append(
                Matern52(self.X.shape[1], lengthscales=0.5, variance=1e-1))
            kerns.append(kern)

            layer0 = GPMC_Layer(kerns[0], self.X.copy(), D_X, Identity())
            layer1 = GPR_Layer(kerns[1], mf, D_Y)
            m_dgp = DGP_Heinonen(self.X, Y, lik, [layer0, layer1])

            mean_dgp, var_dgp = m_dgp.predict_y(self.Xs, 1)
            test_lik_dgp = m_dgp.predict_density(self.Xs, Ys, 1)
            pred_m_dgp, pred_v_dgp = m_dgp.predict_f(self.Xs, 1)
            pred_mfull_dgp, pred_vfull_dgp = m_dgp.predict_f_full_cov(
                self.Xs, 1)

            tol = 1e-4
            assert_allclose(mean_dgp[0], mean_gpr, atol=tol, rtol=tol)
            assert_allclose(test_lik_dgp, test_lik_gpr, atol=tol, rtol=tol)
            assert_allclose(pred_m_dgp[0], pred_m_gpr, atol=tol, rtol=tol)
            assert_allclose(pred_mfull_dgp[0],
                            pred_mfull_gpr,
                            atol=tol,
                            rtol=tol)
            assert_allclose(pred_vfull_dgp[0],
                            pred_vfull_gpr,
                            atol=tol,
                            rtol=tol)
Beispiel #7
0
    def _init_layers(self,
                     X,
                     Y,
                     Z,
                     dims,
                     kernels,
                     mean_function=Zero(),
                     Layer=SVGPIndependentLayer,
                     white=False):
        """Initialise DGP layers to have the same number of outputs as inputs,
        apart from the final layer."""
        layers = []

        X_running, Z_running = X.copy(), Z.copy()
        for i in range(len(kernels) - 1):
            dim_in, dim_out, kern = dims[i], dims[i + 1], kernels[i]
            if dim_in == dim_out:
                mf = Identity()

            else:
                if dim_in > dim_out:
                    _, _, V = np.linalg.svd(X_running, full_matrices=False)
                    W = V[:dim_out, :].T

                else:
                    W = np.concatenate(
                        [np.eye(dim_in),
                         np.zeros((dim_in, dim_out - dim_in))], 1)

                mf = Linear(W)
                set_trainable(mf.A, False)
                set_trainable(mf.b, False)

            layers.append(Layer(kern, Z_running, dim_out, mf, white=white))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)

        layers.append(
            Layer(kernels[-1], Z_running, dims[-1], mean_function,
                  white=white))
        return layers
    def __init__(
            self,
            X,
            Y,
            Z,
            kernels,
            likelihood,
            num_outputs=None,
            mean_function=Zero(),  # the final layer mean function
            **kwargs):
        Model.__init__(self)
        num_outputs = num_outputs or Y.shape[1]

        # init the layers
        layers = []

        # inner layers
        X_running, Z_running = X.copy(), Z.copy()
        for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):

            if isinstance(kern_in, Conv):
                dim_in = kern_in.basekern.input_dim
            else:
                dim_in = kern_in.input_dim
            '''  
            if isinstance(kern_out,Conv):
                dim_out = kern_out.basekern.input_dim
            else:
                dim_out = kern_out.input_dim
            '''
            dim_out = kern_out.input_dim

            if dim_in == dim_out:
                mf = Identity()

            else:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T
                b = np.zeros(1, dtype=np.float32)
                mf = Linear(W, b)
                mf.set_trainable(False)

            if isinstance(kern_in, Conv):
                Z_patch = np.unique(kern_in.compute_patches(Z_running).reshape(
                    -1, kern_in.patch_len),
                                    axis=0)
                Z_patch = Z_patch[np.random.permutation(
                    (len(Z_patch)))[:Z_running.shape[0]], :]
                layers.append(svconvgp(kern_in, Z_patch, dim_out, mf))

            else:
                layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)

        # final layer
        if isinstance(kernels[-1], Conv):
            Z_patch = np.unique(kernels[-1].compute_patches(Z_running).reshape(
                -1, kernels[-1].patch_len),
                                axis=0)
            Z_patch = Z_patch[np.random.permutation(
                (len(Z_patch)))[:Z_running.shape[0]], :]
            layers.append(
                svconvgp(kernels[-1], Z_patch, num_outputs, mean_function))
        else:
            layers.append(
                SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function))
        DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
Beispiel #9
0
def init_layers(graph_adj, node_feature, kernels, n_layers, all_layers_dim, num_inducing,
                gc_kernel=True, mean_function="linear", white=False, q_diag=False):

    assert mean_function in ["linear", "zero"]  # mean function must be linear or zero

    layers = []

    # get initial Z
    sparse_adj = tuple_to_sparse_matrix(graph_adj[0], graph_adj[1], graph_adj[2])
    X_running = node_feature.copy()

    for i in range(n_layers):

        tf.logging.info("initialize {}th layer".format(i + 1))

        dim_in = all_layers_dim[i]
        dim_out = all_layers_dim[i + 1]

        conv_X = sparse_adj.dot(X_running)
        Z_running = kmeans2(conv_X, num_inducing[i], minit="points")[0]

        kernel = kernels[i]

        if gc_kernel and kernel.gc_weight:
            # Z_running = pca(Z_running, kernel.base_kernel.input_dim)  # 将维度降到和输出维度一致
            X_dim = X_running.shape[1]
            kernel_input_dim = kernel.base_kernel.input_dim
            if X_dim > kernel_input_dim:
                Z_running = pca(Z_running, kernel.base_kernel.input_dim)  # 将维度降到和输出维度一致
            elif X_dim < kernel_input_dim:
                Z_running = np.concatenate([Z_running, np.zeros((Z_running.shape[0], kernel_input_dim - X_dim))], axis=1)

        # print(type(Z_running))
        # print(Z_running)

        if dim_in > dim_out:
            _, _, V = np.linalg.svd(X_running, full_matrices=False)
            W = V[:dim_out, :].T
        elif dim_in < dim_out:
            W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1)

        if mean_function == "zero":
            mf = Zero()
        else:

            if dim_in == dim_out:
                mf = Identity()
            else:
                mf = Linear(W)
                mf.set_trainable(False)

        # self.Ku = Kuu(GraphConvolutionInducingpoints(Z_running), kernel, jitter=settings.jitter)
        # print("successfully calculate Ku")
        if gc_kernel:
            feature = GraphConvolutionInducingpoints(Z_running)
        else:
            feature = InducingPoints(Z_running)

        layers.append(svgp_layer(kernel, Z_running, feature, dim_out, mf, gc_kernel, white=white, q_diag=q_diag))

        if dim_in != dim_out:
            # Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    return layers
Beispiel #10
0
    def __init__(
        self,
        kernel: MultioutputKernel,
        inducing_variable: MultioutputInducingVariables,
        num_data: int,
        mean_function: Optional[MeanFunction] = None,
        *,
        num_samples: Optional[int] = None,
        full_cov: bool = False,
        full_output_cov: bool = False,
        num_latent_gps: int = None,
        whiten: bool = True,
        name: Optional[str] = None,
        verbose: bool = False,
    ):
        """
        :param kernel: The multioutput kernel for this layer.
        :param inducing_variable: The inducing features for this layer.
        :param num_data: The number of points in the training dataset (see :attr:`num_data`).
        :param mean_function: The mean function that will be applied to the
            inputs. Default: :class:`~gpflow.mean_functions.Identity`.

            .. note:: The Identity mean function requires the input and output
                dimensionality of this layer to be the same. If you want to
                change the dimensionality in a layer, you may want to provide a
                :class:`~gpflow.mean_functions.Linear` mean function instead.

        :param num_samples: The number of samples to draw when converting the
            :class:`~tfp.layers.DistributionLambda` into a `tf.Tensor`, see
            :meth:`_convert_to_tensor_fn`. Will be stored in the
            :attr:`num_samples` attribute.  If `None` (the default), draw a
            single sample without prefixing the sample shape (see
            :class:`tfp.distributions.Distribution`'s `sample()
            <https://www.tensorflow.org/probability/api_docs/python/tfp/distributions/Distribution#sample>`_
            method).
        :param full_cov: Sets default behaviour of calling this layer
            (:attr:`full_cov` attribute):
            If `False` (the default), only predict marginals (diagonal
            of covariance) with respect to inputs.
            If `True`, predict full covariance over inputs.
        :param full_output_cov: Sets default behaviour of calling this layer
            (:attr:`full_output_cov` attribute):
            If `False` (the default), only predict marginals (diagonal
            of covariance) with respect to outputs.
            If `True`, predict full covariance over outputs.
        :param num_latent_gps: The number of (latent) GPs in the layer
            (which can be different from the number of outputs, e.g. with a
            :class:`~gpflow.kernels.LinearCoregionalization` kernel).
            This is used to determine the size of the
            variational parameters :attr:`q_mu` and :attr:`q_sqrt`.
            If possible, it is inferred from the *kernel* and *inducing_variable*.
        :param whiten: If `True` (the default), uses the whitened parameterisation
            of the inducing variables; see :attr:`whiten`.
        :param name: The name of this layer.
        :param verbose: The verbosity mode. Set this parameter to `True`
            to show debug information.
        """

        super().__init__(
            make_distribution_fn=self._make_distribution_fn,
            convert_to_tensor_fn=self._convert_to_tensor_fn,
            dtype=default_float(),
            name=name,
        )

        self.kernel = kernel
        self.inducing_variable = inducing_variable

        self.num_data = num_data

        if mean_function is None:
            mean_function = Identity()
        self.mean_function = mean_function

        self.full_output_cov = full_output_cov
        self.full_cov = full_cov
        self.whiten = whiten
        self.verbose = verbose

        try:
            num_inducing, self.num_latent_gps = verify_compatibility(
                kernel, mean_function, inducing_variable)
            # TODO: if num_latent_gps is not None, verify it is equal to self.num_latent_gps
        except GPLayerIncompatibilityException as e:
            if num_latent_gps is None:
                raise e

            if self.verbose:
                warnings.warn(
                    "Could not verify the compatibility of the `kernel`, `inducing_variable` "
                    "and `mean_function`. We advise using `gpflux.helpers.construct_*` to create "
                    "compatible kernels and inducing variables. As "
                    f"`num_latent_gps={num_latent_gps}` has been specified explicitly, this will "
                    "be used to create the `q_mu` and `q_sqrt` parameters.")

            num_inducing, self.num_latent_gps = (
                len(inducing_variable),
                num_latent_gps,
            )

        self.q_mu = Parameter(
            np.zeros((num_inducing, self.num_latent_gps)),
            dtype=default_float(),
            name=f"{self.name}_q_mu" if self.name else "q_mu",
        )  # [num_inducing, num_latent_gps]

        self.q_sqrt = Parameter(
            np.stack(
                [np.eye(num_inducing) for _ in range(self.num_latent_gps)]),
            transform=triangular(),
            dtype=default_float(),
            name=f"{self.name}_q_sqrt" if self.name else "q_sqrt",
        )  # [num_latent_gps, num_inducing, num_inducing]

        self.num_samples = num_samples
        def test_vs_DGP2(self):
            lik = Gaussian()
            lik_var = 0.1
            lik.variance = lik_var
            N, Ns, D_Y, D_X = self.X.shape[0], self.Xs.shape[
                0], self.D_Y, self.X.shape[1]

            q_mu = np.random.randn(N, D_X)

            Y = np.random.randn(N, D_Y)
            Ys = np.random.randn(Ns, D_Y)

            kern1 = Matern52(self.X.shape[1], lengthscales=0.5)
            kern2 = Matern52(self.X.shape[1], lengthscales=0.5)
            kerns = [kern1, kern2]
            # mf = Linear(A=np.random.randn(D_X, D_Y), b=np.random.randn(D_Y))

            mf = Zero()
            m_dgp = DGP(self.X,
                        Y,
                        self.X,
                        kerns,
                        lik,
                        mean_function=mf,
                        white=True)
            m_dgp.layers[0].q_mu = q_mu
            m_dgp.layers[0].q_sqrt = m_dgp.layers[0].q_sqrt.read_value(
            ) * 1e-24

            Fs, ms, vs = m_dgp.predict_all_layers(self.Xs, 1)
            Z = self.X.copy()
            Z[:len(self.Xs)] = ms[0][0]
            m_dgp.layers[
                1].feature.Z = Z  # need to put the inducing points in the right place

            var_list = [[m_dgp.layers[1].q_mu, m_dgp.layers[1].q_sqrt]]
            NatGradOptimizer(gamma=1).minimize(m_dgp,
                                               var_list=var_list,
                                               maxiter=1)

            mean_dgp, var_dgp = m_dgp.predict_y(self.Xs, 1)
            test_lik_dgp = m_dgp.predict_density(self.Xs, Ys, 1)
            pred_m_dgp, pred_v_gpr = m_dgp.predict_f(self.Xs, 1)
            pred_mfull_dgp, pred_vfull_dgp = m_dgp.predict_f_full_cov(
                self.Xs, 1)

            # mean_functions = [Identity(), mf]
            layer0 = GPMC_Layer(kerns[0], self.X.copy(), D_X, Identity())
            layer1 = GPR_Layer(kerns[1], mf, D_Y)

            m_heinonen = DGP_Heinonen(self.X, Y, lik, [layer0, layer1])

            m_heinonen.layers[0].q_mu = q_mu

            mean_heinonen, var_heinonen = m_heinonen.predict_y(self.Xs, 1)
            test_lik_heinonen = m_heinonen.predict_density(self.Xs, Ys, 1)
            pred_m_heinonen, pred_v_heinonen = m_heinonen.predict_f(self.Xs, 1)
            pred_mfull_heinonen, pred_vfull_heinonen = m_heinonen.predict_f_full_cov(
                self.Xs, 1)

            tol = 1e-4
            assert_allclose(mean_dgp, mean_heinonen, atol=tol, rtol=tol)
            assert_allclose(test_lik_dgp,
                            test_lik_heinonen,
                            atol=tol,
                            rtol=tol)
            assert_allclose(pred_m_dgp, pred_m_heinonen, atol=tol, rtol=tol)
            assert_allclose(pred_mfull_dgp,
                            pred_mfull_heinonen,
                            atol=tol,
                            rtol=tol)
            assert_allclose(pred_vfull_dgp,
                            pred_vfull_heinonen,
                            atol=tol,
                            rtol=tol)
Beispiel #12
0
def init_layers_graph(X, Y, Z, kernels, gmat,
                      num_layers=2,
                      num_nodes=None,
                      dim_per_node=5,
                      dim_per_X=5, dim_per_Y=5,
                      share_Z=False,
                      nb_init=True):
    layers = []

    def pa_idx(nd, dim_per_in):
        res = []
        for n in range(num_nodes):
            w = gmat[nd, n]
            if w > 0:
                # print(res, range(n*self.dim_per_in, (n+1)*self.dim_per_in))
                res = res + list(range(n * dim_per_in, (n + 1) * dim_per_in))
        res = np.asarray(res)
        return res

    X_running, Z_running = X.copy(), Z.copy()
    for l in range(num_layers - 1):
        if l == 0:
            dim_in = dim_per_X
            dim_out = dim_per_node
        else:
            dim_in = dim_per_node
            dim_out = dim_per_node
        # print(dim_in, dim_out)
        X_running_tmp = np.zeros((X.shape[0], dim_out * num_nodes))
        Z_running_tmp = np.zeros((Z.shape[0], dim_out * num_nodes))
        mf_lst = ParamList([], trainable=False)
        for nd in range(num_nodes):
            if nb_init:
                pa = pa_idx(nd, dim_in)
            else:
                pa = np.asarray(range(nd * dim_in, (nd + 1) * dim_in))
            agg_dim_in = len(pa)

            if agg_dim_in == dim_out:
                mf = Identity()

            else:
                if agg_dim_in > dim_out:  # stepping down, use the pca projection
                    # _, _, V = np.linalg.svd(X_running[:, nd*dim_in : (nd+1)*dim_in], full_matrices=False)
                    _, _, V = np.linalg.svd(X_running[:, pa], full_matrices=False)
                    W = V[:dim_out, :].T

                else:  # stepping up, use identity + padding
                    W = np.concatenate([np.eye(agg_dim_in), np.zeros((agg_dim_in, dim_out - agg_dim_in))], 1)

                mf = Linear(W)
                mf.set_trainable(False)
            mf_lst.append(mf)
            if agg_dim_in != dim_out:
                # print(Z_running_tmp[:, nd*dim_out:(nd+1)*dim_out].shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].shape,
                #       W.shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].dot(W).shape)
                Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa].dot(W)
                X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa].dot(W)
            else:
                Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa]
                X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa]

        layers.append(
            SVGPG_Layer(kernels[l], Z_running, mf_lst, num_nodes, dim_in, dim_out, gmat, share_Z=share_Z, nb_init=nb_init))
        Z_running = Z_running_tmp
        X_running = X_running_tmp

    # final layer
    if num_layers == 1:
        fin_dim_in = dim_per_X
    else:
        fin_dim_in = dim_per_node
    layers.append(
        SVGPG_Layer(kernels[-1], Z_running, None, num_nodes, fin_dim_in, dim_per_Y, gmat, share_Z=share_Z, nb_init=nb_init))
    return layers