def init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=Zero(), num_outputs=None, Layer=SVGPLayer, whiten=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for in_idx, kern_in in enumerate(kernels[:-1]): dim_in = layer_sizes[in_idx] dim_out = layer_sizes[in_idx+1] # Initialize mean function to be either Identity or PCA projection if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection # use eigenvectors corresponding to dim_out largest eigenvalues _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) gpflow.set_trainable(mf.A, False) gpflow.set_trainable(mf.b, False) layers.append(Layer(kern_in, Z_running, dim_out, mf, white=whiten)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(Layer(kernels[-1], Z_running, num_outputs, mean_function, white=whiten)) return layers
def _init_layers(self, dim_in, kernels, inducing_variables, num_outputs=None, mean_function=Zero(), Layer=SVGPLayer, white=False): """Initialise DGP layers to have the same number of outputs as inputs, apart from the final layer.""" layers = [] # Add layers for kern in kernels[:-1]: mf = Identity() # Initialise layers dim_out=dim_in # Use Identity mean function when input and output dimensions # are the same. layers.append( Layer(kern, inducing_variables, dim_in, mf, white=white)) layers.append( Layer(kernels[-1], inducing_variables, num_outputs, mean_function, white=white)) return layers
def init_linear(X, Z, all_kernels, initialized_Zs=False): """ if there are no Zs from an initialization (e.g. for warm-starting), all_Zs is initialized according to the Salimbeni scheme (Z should be MxD). otherwise the Zs obtained from the initialization are simply taken and put into the all_Zs array (Z should be a list of L arrays) """ if initialized_Zs: all_Zs = Z else: all_Zs = [] all_mean_funcs = [] X_running = X.copy() if not initialized_Zs: Z_running = Z.copy() for kern_in, kern_out in zip(all_kernels[:-1], all_kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.set_trainable(False) all_mean_funcs.append(mf) if not initialized_Zs: all_Zs.append(Z_running) if dim_in != dim_out: X_running = X_running.dot(W) if not initialized_Zs: Z_running = Z_running.dot(W) # final layer all_mean_funcs.append(Zero()) if not initialized_Zs: all_Zs.append(Z_running) return all_Zs, all_mean_funcs
def init_layers_linear(X, Y, Z, kernels, num_outputs=None, mean_function=Zero(), Layer=SVGP_Layer, white=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim print(dim_in, dim_out) if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.set_trainable(False) layers.append(Layer(kern_in, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append( Layer(kernels[-1], Z_running, num_outputs, mean_function, white=white)) return layers
def __init__(self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # pad with zeros zeros = np.zeros((dim_in, dim_out - dim_in)) W = np.concatenate([np.eye(dim_in), zeros], 1) mf = Linear(W) mf.set_trainable(False) layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def test_vs_single_layer(self): lik = Gaussian() lik_var = 0.01 lik.variance = lik_var N, Ns, D_Y, D_X = self.X.shape[0], self.Xs.shape[ 0], self.D_Y, self.X.shape[1] Y = np.random.randn(N, D_Y) Ys = np.random.randn(Ns, D_Y) kern = Matern52(self.X.shape[1], lengthscales=0.5) # mf = Linear(A=np.random.randn(D_X, D_Y), b=np.random.randn(D_Y)) mf = Zero() m_gpr = GPR(self.X, Y, kern, mean_function=mf) m_gpr.likelihood.variance = lik_var mean_gpr, var_gpr = m_gpr.predict_y(self.Xs) test_lik_gpr = m_gpr.predict_density(self.Xs, Ys) pred_m_gpr, pred_v_gpr = m_gpr.predict_f(self.Xs) pred_mfull_gpr, pred_vfull_gpr = m_gpr.predict_f_full_cov(self.Xs) kerns = [] kerns.append( Matern52(self.X.shape[1], lengthscales=0.5, variance=1e-1)) kerns.append(kern) layer0 = GPMC_Layer(kerns[0], self.X.copy(), D_X, Identity()) layer1 = GPR_Layer(kerns[1], mf, D_Y) m_dgp = DGP_Heinonen(self.X, Y, lik, [layer0, layer1]) mean_dgp, var_dgp = m_dgp.predict_y(self.Xs, 1) test_lik_dgp = m_dgp.predict_density(self.Xs, Ys, 1) pred_m_dgp, pred_v_dgp = m_dgp.predict_f(self.Xs, 1) pred_mfull_dgp, pred_vfull_dgp = m_dgp.predict_f_full_cov( self.Xs, 1) tol = 1e-4 assert_allclose(mean_dgp[0], mean_gpr, atol=tol, rtol=tol) assert_allclose(test_lik_dgp, test_lik_gpr, atol=tol, rtol=tol) assert_allclose(pred_m_dgp[0], pred_m_gpr, atol=tol, rtol=tol) assert_allclose(pred_mfull_dgp[0], pred_mfull_gpr, atol=tol, rtol=tol) assert_allclose(pred_vfull_dgp[0], pred_vfull_gpr, atol=tol, rtol=tol)
def _init_layers(self, X, Y, Z, dims, kernels, mean_function=Zero(), Layer=SVGPIndependentLayer, white=False): """Initialise DGP layers to have the same number of outputs as inputs, apart from the final layer.""" layers = [] X_running, Z_running = X.copy(), Z.copy() for i in range(len(kernels) - 1): dim_in, dim_out, kern = dims[i], dims[i + 1], kernels[i] if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) set_trainable(mf.A, False) set_trainable(mf.b, False) layers.append(Layer(kern, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) layers.append( Layer(kernels[-1], Z_running, dims[-1], mean_function, white=white)) return layers
def __init__( self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): if isinstance(kern_in, Conv): dim_in = kern_in.basekern.input_dim else: dim_in = kern_in.input_dim ''' if isinstance(kern_out,Conv): dim_out = kern_out.basekern.input_dim else: dim_out = kern_out.input_dim ''' dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T b = np.zeros(1, dtype=np.float32) mf = Linear(W, b) mf.set_trainable(False) if isinstance(kern_in, Conv): Z_patch = np.unique(kern_in.compute_patches(Z_running).reshape( -1, kern_in.patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append(svconvgp(kern_in, Z_patch, dim_out, mf)) else: layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer if isinstance(kernels[-1], Conv): Z_patch = np.unique(kernels[-1].compute_patches(Z_running).reshape( -1, kernels[-1].patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append( svconvgp(kernels[-1], Z_patch, num_outputs, mean_function)) else: layers.append( SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def init_layers(graph_adj, node_feature, kernels, n_layers, all_layers_dim, num_inducing, gc_kernel=True, mean_function="linear", white=False, q_diag=False): assert mean_function in ["linear", "zero"] # mean function must be linear or zero layers = [] # get initial Z sparse_adj = tuple_to_sparse_matrix(graph_adj[0], graph_adj[1], graph_adj[2]) X_running = node_feature.copy() for i in range(n_layers): tf.logging.info("initialize {}th layer".format(i + 1)) dim_in = all_layers_dim[i] dim_out = all_layers_dim[i + 1] conv_X = sparse_adj.dot(X_running) Z_running = kmeans2(conv_X, num_inducing[i], minit="points")[0] kernel = kernels[i] if gc_kernel and kernel.gc_weight: # Z_running = pca(Z_running, kernel.base_kernel.input_dim) # 将维度降到和输出维度一致 X_dim = X_running.shape[1] kernel_input_dim = kernel.base_kernel.input_dim if X_dim > kernel_input_dim: Z_running = pca(Z_running, kernel.base_kernel.input_dim) # 将维度降到和输出维度一致 elif X_dim < kernel_input_dim: Z_running = np.concatenate([Z_running, np.zeros((Z_running.shape[0], kernel_input_dim - X_dim))], axis=1) # print(type(Z_running)) # print(Z_running) if dim_in > dim_out: _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T elif dim_in < dim_out: W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) if mean_function == "zero": mf = Zero() else: if dim_in == dim_out: mf = Identity() else: mf = Linear(W) mf.set_trainable(False) # self.Ku = Kuu(GraphConvolutionInducingpoints(Z_running), kernel, jitter=settings.jitter) # print("successfully calculate Ku") if gc_kernel: feature = GraphConvolutionInducingpoints(Z_running) else: feature = InducingPoints(Z_running) layers.append(svgp_layer(kernel, Z_running, feature, dim_out, mf, gc_kernel, white=white, q_diag=q_diag)) if dim_in != dim_out: # Z_running = Z_running.dot(W) X_running = X_running.dot(W) return layers
def __init__( self, kernel: MultioutputKernel, inducing_variable: MultioutputInducingVariables, num_data: int, mean_function: Optional[MeanFunction] = None, *, num_samples: Optional[int] = None, full_cov: bool = False, full_output_cov: bool = False, num_latent_gps: int = None, whiten: bool = True, name: Optional[str] = None, verbose: bool = False, ): """ :param kernel: The multioutput kernel for this layer. :param inducing_variable: The inducing features for this layer. :param num_data: The number of points in the training dataset (see :attr:`num_data`). :param mean_function: The mean function that will be applied to the inputs. Default: :class:`~gpflow.mean_functions.Identity`. .. note:: The Identity mean function requires the input and output dimensionality of this layer to be the same. If you want to change the dimensionality in a layer, you may want to provide a :class:`~gpflow.mean_functions.Linear` mean function instead. :param num_samples: The number of samples to draw when converting the :class:`~tfp.layers.DistributionLambda` into a `tf.Tensor`, see :meth:`_convert_to_tensor_fn`. Will be stored in the :attr:`num_samples` attribute. If `None` (the default), draw a single sample without prefixing the sample shape (see :class:`tfp.distributions.Distribution`'s `sample() <https://www.tensorflow.org/probability/api_docs/python/tfp/distributions/Distribution#sample>`_ method). :param full_cov: Sets default behaviour of calling this layer (:attr:`full_cov` attribute): If `False` (the default), only predict marginals (diagonal of covariance) with respect to inputs. If `True`, predict full covariance over inputs. :param full_output_cov: Sets default behaviour of calling this layer (:attr:`full_output_cov` attribute): If `False` (the default), only predict marginals (diagonal of covariance) with respect to outputs. If `True`, predict full covariance over outputs. :param num_latent_gps: The number of (latent) GPs in the layer (which can be different from the number of outputs, e.g. with a :class:`~gpflow.kernels.LinearCoregionalization` kernel). This is used to determine the size of the variational parameters :attr:`q_mu` and :attr:`q_sqrt`. If possible, it is inferred from the *kernel* and *inducing_variable*. :param whiten: If `True` (the default), uses the whitened parameterisation of the inducing variables; see :attr:`whiten`. :param name: The name of this layer. :param verbose: The verbosity mode. Set this parameter to `True` to show debug information. """ super().__init__( make_distribution_fn=self._make_distribution_fn, convert_to_tensor_fn=self._convert_to_tensor_fn, dtype=default_float(), name=name, ) self.kernel = kernel self.inducing_variable = inducing_variable self.num_data = num_data if mean_function is None: mean_function = Identity() self.mean_function = mean_function self.full_output_cov = full_output_cov self.full_cov = full_cov self.whiten = whiten self.verbose = verbose try: num_inducing, self.num_latent_gps = verify_compatibility( kernel, mean_function, inducing_variable) # TODO: if num_latent_gps is not None, verify it is equal to self.num_latent_gps except GPLayerIncompatibilityException as e: if num_latent_gps is None: raise e if self.verbose: warnings.warn( "Could not verify the compatibility of the `kernel`, `inducing_variable` " "and `mean_function`. We advise using `gpflux.helpers.construct_*` to create " "compatible kernels and inducing variables. As " f"`num_latent_gps={num_latent_gps}` has been specified explicitly, this will " "be used to create the `q_mu` and `q_sqrt` parameters.") num_inducing, self.num_latent_gps = ( len(inducing_variable), num_latent_gps, ) self.q_mu = Parameter( np.zeros((num_inducing, self.num_latent_gps)), dtype=default_float(), name=f"{self.name}_q_mu" if self.name else "q_mu", ) # [num_inducing, num_latent_gps] self.q_sqrt = Parameter( np.stack( [np.eye(num_inducing) for _ in range(self.num_latent_gps)]), transform=triangular(), dtype=default_float(), name=f"{self.name}_q_sqrt" if self.name else "q_sqrt", ) # [num_latent_gps, num_inducing, num_inducing] self.num_samples = num_samples
def test_vs_DGP2(self): lik = Gaussian() lik_var = 0.1 lik.variance = lik_var N, Ns, D_Y, D_X = self.X.shape[0], self.Xs.shape[ 0], self.D_Y, self.X.shape[1] q_mu = np.random.randn(N, D_X) Y = np.random.randn(N, D_Y) Ys = np.random.randn(Ns, D_Y) kern1 = Matern52(self.X.shape[1], lengthscales=0.5) kern2 = Matern52(self.X.shape[1], lengthscales=0.5) kerns = [kern1, kern2] # mf = Linear(A=np.random.randn(D_X, D_Y), b=np.random.randn(D_Y)) mf = Zero() m_dgp = DGP(self.X, Y, self.X, kerns, lik, mean_function=mf, white=True) m_dgp.layers[0].q_mu = q_mu m_dgp.layers[0].q_sqrt = m_dgp.layers[0].q_sqrt.read_value( ) * 1e-24 Fs, ms, vs = m_dgp.predict_all_layers(self.Xs, 1) Z = self.X.copy() Z[:len(self.Xs)] = ms[0][0] m_dgp.layers[ 1].feature.Z = Z # need to put the inducing points in the right place var_list = [[m_dgp.layers[1].q_mu, m_dgp.layers[1].q_sqrt]] NatGradOptimizer(gamma=1).minimize(m_dgp, var_list=var_list, maxiter=1) mean_dgp, var_dgp = m_dgp.predict_y(self.Xs, 1) test_lik_dgp = m_dgp.predict_density(self.Xs, Ys, 1) pred_m_dgp, pred_v_gpr = m_dgp.predict_f(self.Xs, 1) pred_mfull_dgp, pred_vfull_dgp = m_dgp.predict_f_full_cov( self.Xs, 1) # mean_functions = [Identity(), mf] layer0 = GPMC_Layer(kerns[0], self.X.copy(), D_X, Identity()) layer1 = GPR_Layer(kerns[1], mf, D_Y) m_heinonen = DGP_Heinonen(self.X, Y, lik, [layer0, layer1]) m_heinonen.layers[0].q_mu = q_mu mean_heinonen, var_heinonen = m_heinonen.predict_y(self.Xs, 1) test_lik_heinonen = m_heinonen.predict_density(self.Xs, Ys, 1) pred_m_heinonen, pred_v_heinonen = m_heinonen.predict_f(self.Xs, 1) pred_mfull_heinonen, pred_vfull_heinonen = m_heinonen.predict_f_full_cov( self.Xs, 1) tol = 1e-4 assert_allclose(mean_dgp, mean_heinonen, atol=tol, rtol=tol) assert_allclose(test_lik_dgp, test_lik_heinonen, atol=tol, rtol=tol) assert_allclose(pred_m_dgp, pred_m_heinonen, atol=tol, rtol=tol) assert_allclose(pred_mfull_dgp, pred_mfull_heinonen, atol=tol, rtol=tol) assert_allclose(pred_vfull_dgp, pred_vfull_heinonen, atol=tol, rtol=tol)
def init_layers_graph(X, Y, Z, kernels, gmat, num_layers=2, num_nodes=None, dim_per_node=5, dim_per_X=5, dim_per_Y=5, share_Z=False, nb_init=True): layers = [] def pa_idx(nd, dim_per_in): res = [] for n in range(num_nodes): w = gmat[nd, n] if w > 0: # print(res, range(n*self.dim_per_in, (n+1)*self.dim_per_in)) res = res + list(range(n * dim_per_in, (n + 1) * dim_per_in)) res = np.asarray(res) return res X_running, Z_running = X.copy(), Z.copy() for l in range(num_layers - 1): if l == 0: dim_in = dim_per_X dim_out = dim_per_node else: dim_in = dim_per_node dim_out = dim_per_node # print(dim_in, dim_out) X_running_tmp = np.zeros((X.shape[0], dim_out * num_nodes)) Z_running_tmp = np.zeros((Z.shape[0], dim_out * num_nodes)) mf_lst = ParamList([], trainable=False) for nd in range(num_nodes): if nb_init: pa = pa_idx(nd, dim_in) else: pa = np.asarray(range(nd * dim_in, (nd + 1) * dim_in)) agg_dim_in = len(pa) if agg_dim_in == dim_out: mf = Identity() else: if agg_dim_in > dim_out: # stepping down, use the pca projection # _, _, V = np.linalg.svd(X_running[:, nd*dim_in : (nd+1)*dim_in], full_matrices=False) _, _, V = np.linalg.svd(X_running[:, pa], full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate([np.eye(agg_dim_in), np.zeros((agg_dim_in, dim_out - agg_dim_in))], 1) mf = Linear(W) mf.set_trainable(False) mf_lst.append(mf) if agg_dim_in != dim_out: # print(Z_running_tmp[:, nd*dim_out:(nd+1)*dim_out].shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].shape, # W.shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].dot(W).shape) Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa].dot(W) X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa].dot(W) else: Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa] X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa] layers.append( SVGPG_Layer(kernels[l], Z_running, mf_lst, num_nodes, dim_in, dim_out, gmat, share_Z=share_Z, nb_init=nb_init)) Z_running = Z_running_tmp X_running = X_running_tmp # final layer if num_layers == 1: fin_dim_in = dim_per_X else: fin_dim_in = dim_per_node layers.append( SVGPG_Layer(kernels[-1], Z_running, None, num_nodes, fin_dim_in, dim_per_Y, gmat, share_Z=share_Z, nb_init=nb_init)) return layers