def _create_gaussian(self, gaussian_type): mu = tf.random_normal([3]) if gaussian_type == tfp.distributions.MultivariateNormalDiag: scale_diag = tf.random_normal([3]) dist = tfp.distributions.MultivariateNormalDiag(mu, scale_diag) if gaussian_type == tfp.distributions.MultivariateNormalDiagPlusLowRank: scale_diag = tf.random_normal([3]) perturb_factor = tf.random_normal([3, 2]) scale_perturb_diag = tf.random_normal([2]) dist = tfp.distributions.MultivariateNormalDiagPlusLowRank( mu, scale_diag, scale_perturb_factor=perturb_factor, scale_perturb_diag=scale_perturb_diag) if gaussian_type == tfp.distributions.MultivariateNormalTriL: cov = tf.random_uniform([3, 3], minval=0, maxval=1.0) # Create a PSD matrix. cov = 0.5 * (cov + tf.transpose(cov)) + 3 * tf.eye(3) scale = tf.cholesky(cov) dist = tfp.distributions.MultivariateNormalTriL(mu, scale) if gaussian_type == tfp.distributions.MultivariateNormalFullCovariance: cov = tf.random_uniform([3, 3], minval=0, maxval=1.0) # Create a PSD matrix. cov = 0.5 * (cov + tf.transpose(cov)) + 3 * tf.eye(3) dist = tfp.distributions.MultivariateNormalFullCovariance(mu, cov) return (dist, mu, dist.covariance())
def sample_activations(acts, n_sample): """ take n_sample samples from acts input: acts: GaussianVar [batch_size (b), hidden size (h)] """ sigma_sqr = acts.var # [b, h, h] sigma = tf.transpose(tf.cholesky(sigma_sqr), [0, 2, 1]) # [b, h, h] standard_samples = tf.random_normal( [tf.shape(sigma)[0], n_sample, tf.shape(sigma)[-1]]) # [b, n_sample, h] samples = tf.matmul(standard_samples, sigma) + tf.expand_dims( acts.mean, 1) # [b, n_sample, h] return samples
def orthonorm_op(x, epsilon=1e-7): ''' Computes a matrix that orthogonalizes the input matrix x x: an n x d input matrix eps: epsilon to prevent nonzero values in the diagonal entries of x returns: a d x d matrix, ortho_weights, which orthogonalizes x by right multiplication ''' x_2 = K.dot(K.transpose(x), x) x_2 += K.eye(K.int_shape(x)[1]) * epsilon L = tf.cholesky(x_2) ortho_weights = tf.transpose(tf.matrix_inverse(L)) * tf.sqrt( tf.cast(tf.shape(x)[0], dtype=K.floatx())) return ortho_weights
def __init__(self, posts, **kwargs): FactorisedPosterior.__init__(self, posts, **kwargs) # The full covariance matrix is formed from the Cholesky decomposition # to ensure that it remains positive definite. # # To achieve this, we have to create PxP tensor variables for # each parameter vertex, but we then extract only the lower triangular # elements and train only on these. The diagonal elements # are constructed by the FactorisedPosterior if kwargs.get("init", None): # We are initializing from an existing posterior. # The FactorizedPosterior will already have extracted the mean and # diagonal of the covariance matrix - we need the Cholesky decomposition # of the covariance to initialize the off-diagonal terms self.log.info(" - Initializing posterior covariance from input posterior") _mean, cov = kwargs["init"] covar_init = tf.cholesky(cov) else: covar_init = tf.zeros([self.nvertices, self.nparams, self.nparams], dtype=tf.float32) self.off_diag_vars_base = self.log_tf(tf.Variable(covar_init, validate_shape=False, name='%s_off_diag_vars' % self.name)) if kwargs.get("suppress_nan", True): self.off_diag_vars = tf.where(tf.is_nan(self.off_diag_vars_base), tf.zeros_like(self.off_diag_vars_base), self.off_diag_vars_base) else: self.off_diag_vars = self.off_diag_vars_base self.off_diag_cov_chol = tf.matrix_set_diag(tf.matrix_band_part(self.off_diag_vars, -1, 0), tf.zeros([self.nvertices, self.nparams]), name='%s_off_diag_cov_chol' % self.name) # Combine diagonal and off-diagonal elements into full matrix self.cov_chol = tf.add(tf.matrix_diag(self.std), self.off_diag_cov_chol, name='%s_cov_chol' % self.name) # Form the covariance matrix from the chol decomposition self.cov = tf.matmul(tf.transpose(self.cov_chol, perm=(0, 2, 1)), self.cov_chol, name='%s_cov' % self.name) self.cov_chol = self.log_tf(self.cov_chol) self.cov = self.log_tf(self.cov)
def get_y_values(self, x_values, num_total_points): # Set kernel parameters l1 = ( tf.ones(shape=[self._batch_size, self._y_size, self._x_size]) * self._l1_scale) sigma_f = tf.ones( shape=[self._batch_size, self._y_size]) * self._sigma_scale # Pass the x_values through the Gaussian kernel # [batch_size, y_size, num_total_points, num_total_points] kernel = self._gaussian_kernel(x_values, l1, sigma_f) # Calculate Cholesky, using double precision for better stability: cholesky = tf.cast(tf.cholesky(tf.cast(kernel, tf.float64)), tf.float32) # Sample a curve # [batch_size, y_size, num_total_points, 1] y_values = tf.matmul( cholesky, tf.random_normal([self._batch_size, self._y_size, num_total_points, 1])) # [batch_size, num_total_points, y_size] y_values = tf.transpose(tf.squeeze(y_values, 3), [0, 2, 1]) return y_values
def generate_curves(self, num_context=None): """Builds the op delivering the data. Generated functions are `float32` with x values between -2 and 2. Args: num_context: Number of context points. If None, chosen randomly. Returns: A `CNPRegressionDescription` namedtuple. """ if num_context is None: num_context = tf.random_uniform( shape=[], minval=3, maxval=self._max_num_context, dtype=tf.int32) # If we are testing we want to have more targets and have them evenly # distributed in order to plot the function. if self._testing: num_target = 400 num_total_points = num_target x_values = tf.tile( tf.expand_dims(tf.range(-2., 2., 1. / 100, dtype=tf.float32), axis=0), [self._batch_size, 1]) x_values = tf.expand_dims(x_values, axis=-1) # During training the number of target points and their x-positions are # selected at random else: num_target = tf.random_uniform(shape=(), minval=0, maxval=self._max_num_context - num_context, dtype=tf.int32) num_total_points = num_context + num_target x_values = tf.random_uniform( [self._batch_size, num_total_points, self._x_size], -2, 2) # Set kernel parameters # Either choose a set of random parameters for the mini-batch if self._random_kernel_parameters: l1 = tf.random_uniform([self._batch_size, self._y_size, self._x_size], 0.1, self._l1_scale) sigma_f = tf.random_uniform([self._batch_size, self._y_size], 0.1, self._sigma_scale) # Or use the same fixed parameters for all mini-batches else: l1 = tf.ones(shape=[self._batch_size, self._y_size, self._x_size]) * self._l1_scale sigma_f = tf.ones(shape=[self._batch_size, self._y_size]) * self._sigma_scale # Pass the x_values through the Gaussian kernel # [batch_size, y_size, num_total_points, num_total_points] kernel = self._gaussian_kernel(x_values, l1, sigma_f) # Calculate Cholesky, using double precision for better stability: cholesky = tf.cast(tf.cholesky(tf.cast(kernel, tf.float64)), tf.float32) # Sample a curve # [batch_size, y_size, num_total_points, 1] y_values = tf.matmul( cholesky, tf.random_normal([self._batch_size, self._y_size, num_total_points, 1])) # [batch_size, num_total_points, y_size] y_values = tf.transpose(tf.squeeze(y_values, 3), [0, 2, 1]) if self._testing: # Select the targets target_x = x_values target_y = y_values # Select the observations idx = tf.random_shuffle(tf.range(num_target)) context_x = tf.gather(x_values, idx[:num_context], axis=1) context_y = tf.gather(y_values, idx[:num_context], axis=1) else: # Select the targets which will consist of the context points as well as # some new target points target_x = x_values[:, :num_target + num_context, :] target_y = y_values[:, :num_target + num_context, :] # Select the observations context_x = x_values[:, :num_context, :] context_y = y_values[:, :num_context, :] return NPRegressionDescription( context_x=context_x, context_y=context_y, target_x=target_x, target_y=target_y)
def create_model(self, x, y, *args): if self.process_y: self.f_mu = Regression().fit(x, y) self.Ymu = self.f_mu(x) self.Ys2 = np.std((y - self.Ymu)) y = (y - self.Ymu) / self.Ys2 self.t_X = tf.constant(x, dtype=self.dtype) self.t_Y = tf.constant(y, dtype=self.dtype) self.t_N = tf.shape(self.t_Y)[0] self.t_D = tf.shape(self.t_Y)[1] self.t_Q = tf.shape(self.t_X)[0] self.t_M = tf.shape(self.t_X)[1] self.M = x.shape[1] if self.kernel == 'Squared Exponential': self.kernel_function = self.sq_exp_kernel self.signal_var = self.init_variable(args[0][0], positive=True) self.lengthscale = self.init_variable([args[0][1]] * self.M, positive=True, multi=self.variable_l) self.noise_var = self.init_variable(args[0][2], positive=True) self.hparamd = ['Signal Variance', 'Lengthscale'] self.hparams = [self.signal_var, self.lengthscale] if self.kernel == 'Periodic': self.kernel_function = self.sq_exp_kernel self.signal_var = self.init_variable(args[0][0], True) self.gamma = self.init_variable(args[0][0], True) self.period = self.init_variable(args[0][0], True) self.noise_var = self.init_variable(args[0][0], True) self.p_mu = self.init_variable(tf.log(self.t_Y), False) self.p_s2 = self.init_variable(1.0, True) self.hparamd = ['Signal Variance', 'Gamma', 'Period'] self.hparams = [self.signal_var, self.gamma, self.period] self.create_kernel = lambda t_x1, t_x2: self.kernel_function( t_x1, t_x2, self.hparams) ### CREATING THE TRAINING MATRICES ### self.K_xx = self.create_kernel(self.t_X, self.t_X) + ( self.noise_var + self.jitter) * tf.eye(self.t_N, dtype=self.dtype) self.L_xx = tf.cholesky(self.K_xx) self.logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.L_xx))) self.Kinv_YYt = 0.5 * tf.reduce_sum( tf.square( tf.matrix_triangular_solve(self.L_xx, self.t_Y, lower=True))) ### Initialising loose priors ### self.hprior = 0 if self.variable_l: self.hprior += 0.5 * tf.square(tf.log(self.hparams[0])) self.hprior += tf.reduce_sum(0.5 * tf.square(tf.log(self.hparams[1]))) else: for i in self.hparams: self.hprior += 0.5 * tf.square(tf.log(i)) self.noise_prior = 0.5 * tf.square(tf.log(self.noise_var)) ### Negative marginal log likelihood under Gaussian assumption ### if self.distribution == 'Gaussian': pi_term = tf.constant(0.5 * np.log(2.0 * np.pi), dtype=self.dtype) self.term1 = pi_term * tf.cast(self.t_D, dtype = self.dtype) * tf.cast(self.t_N, dtype = self.dtype) \ + 0.5 * tf.cast(self.t_D, dtype = self.dtype) * self.logdet \ + self.Kinv_YYt if self.distribution == 'Poisson' and self.kernel == 'Periodic': self.Kinv = tf.cholesky_solve(self.L_xx, tf.eye(self.t_N, dtype=self.dtype)) self.term1 = -tf.reduce_sum(self.t_Y*self.p_mu - tf.exp(self.p_mu + self.p_s2/2)) \ + (1/2)*(tf.trace(self.Kinv @ (self.p_s2*tf.eye(self.t_N, dtype=self.dtype) + [email protected](self.p_mu))) \ - tf.cast(self.t_N, dtype = self.dtype) + self.logdet - tf.cast(self.t_N, dtype = self.dtype)*tf.log(self.p_s2)) self.objective = self.term1 + self.hprior + self.noise_prior
def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False): """ Given a g1 and g2, and distribution p and q such that p(g2) = N(g2;0,Kmm) p(g1) = N(g1;0,Knn) p(g1|g2) = N(g1;0,Knm) And q(g2) = N(g2;f,q_sqrt*q_sqrt^T) This method computes the mean and (co)variance of q(g1) = \int q(g2) p(g1|g2) :param Kmn: M x N :param Kmm: M x M :param Knn: N x N or N :param f: M x R :param full_cov: bool :param q_sqrt: None or R x M x M (lower triangular) :param white: bool :return: N x R or R x N x N """ # compute kernel stuff num_func = tf.shape(f)[1] # R Lm = tf.cholesky(Kmm) # Compute the projection matrix A A = tf.matrix_triangular_solve(Lm, Kmn, lower=True) # compute the covariance due to the conditioning if full_cov: fvar = Knn - tf.matmul(A, A, transpose_a=True) fvar = tf.tile(fvar[None, :, :], [num_func, 1, 1]) # R x N x N else: fvar = Knn - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(fvar[None, :], [num_func, 1]) # R x N # another backsubstitution in the unwhitened case if not white: A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False) # construct the conditional mean fmean = tf.matmul(A, f, transpose_a=True) if q_sqrt is not None: if q_sqrt.get_shape().ndims == 2: LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # R x M x N elif q_sqrt.get_shape().ndims == 3: L = q_sqrt A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1])) LTA = tf.matmul(L, A_tiled, transpose_a=True) # R x M x N else: # pragma: no cover raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) if full_cov: fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # R x N x N else: fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # R x N if not full_cov: fvar = tf.transpose(fvar) # N x R return fmean, fvar # N x R, R x N x N or N x R