def _apply(self, x1, x2, example_ndims=0): if self.shift is None: dot_prod = util.sum_rightmost_ndims_preserving_shape( x1 * x2, ndims=self.feature_ndims) else: shift = tf.convert_to_tensor(self.shift) dot_prod = util.sum_rightmost_ndims_preserving_shape( (x1 - shift) * (x2 - shift), ndims=self.feature_ndims) if self.exponent is not None: exponent = tf.convert_to_tensor(self.exponent) exponent = util.pad_shape_with_ones(exponent, example_ndims) dot_prod **= exponent if self.slope_variance is not None: slope_variance = tf.convert_to_tensor(self.slope_variance) slope_variance = util.pad_shape_with_ones(slope_variance, example_ndims) dot_prod *= slope_variance**2. if self.bias_variance is not None: bias_variance = tf.convert_to_tensor(self.bias_variance) bias_variance = util.pad_shape_with_ones(bias_variance, example_ndims) dot_prod += bias_variance**2. return dot_prod
def _apply(self, x1, x2, example_ndims=0): difference = util.sum_rightmost_ndims_preserving_shape( tf.math.squared_difference(x1, x2), ndims=self.feature_ndims) difference /= 2 if self.length_scale is not None: length_scale = tf.convert_to_tensor(self.length_scale) length_scale = util.pad_shape_with_ones(length_scale, ndims=example_ndims) difference /= length_scale**2 if self.scale_mixture_rate is None: power = 1. else: scale_mixture_rate = tf.convert_to_tensor(self.scale_mixture_rate) power = util.pad_shape_with_ones(scale_mixture_rate, ndims=example_ndims) difference /= power result = (1. + difference)**-power if self.amplitude is not None: amplitude = tf.convert_to_tensor(self.amplitude) amplitude = util.pad_shape_with_ones(amplitude, ndims=example_ndims) result *= amplitude**2 return result
def _apply(self, x1, x2, param_expansion_ndims=0): if self.shift is None: dot_prod = util.sum_rightmost_ndims_preserving_shape( x1 * x2, ndims=self.feature_ndims) else: dot_prod = util.sum_rightmost_ndims_preserving_shape( (x1 - self.shift) * (x2 - self.shift), ndims=self.feature_ndims) if self.exponent is not None: exponent = util.pad_shape_with_ones( self.exponent, param_expansion_ndims) dot_prod **= exponent if self.slope_variance is not None: slope_variance = util.pad_shape_with_ones( self.slope_variance, param_expansion_ndims) dot_prod *= slope_variance ** 2. if self.bias_variance is not None: bias_variance = util.pad_shape_with_ones( self.bias_variance, param_expansion_ndims) dot_prod += bias_variance ** 2. return dot_prod
def transform_by_kumaraswamy(x, feature_ndims, example_ndims): """Apply a Kumaraswamy bijector to features.""" concentration1 = util.pad_shape_with_ones( self.concentration1, example_ndims, start=-(feature_ndims + 1)) concentration0 = util.pad_shape_with_ones( self.concentration0, example_ndims, start=-(feature_ndims + 1)) bij = bijectors.Kumaraswamy(concentration1, concentration0, validate_args=validate_args) # Apply the inverse as this is the Kumaraswamy CDF. return bij.inverse(x)
def _apply(self, x1, x2, example_ndims=0): exponent = -0.5 * util.sum_rightmost_ndims_preserving_shape( tf.math.squared_difference(x1, x2), self.feature_ndims) if self.length_scale is not None: length_scale = util.pad_shape_with_ones(self.length_scale, example_ndims) exponent /= length_scale**2 if self.amplitude is not None: amplitude = util.pad_shape_with_ones(self.amplitude, example_ndims) exponent += 2. * tf.math.log(amplitude) return tf.exp(exponent)
def testPadShapeMiddleWithOnes(self): # Test nominal behavior. x = np.ones([2, 3, 5], np.float32) self.assertAllEqual( self.evaluate(util.pad_shape_with_ones(x, 3)).shape, [2, 3, 5, 1, 1, 1]) self.assertAllEqual( self.evaluate(util.pad_shape_with_ones(x, 3, start=-2)).shape, [2, 3, 1, 1, 1, 5]) self.assertAllEqual( self.evaluate(util.pad_shape_with_ones(x, 3, start=-3)).shape, [2, 1, 1, 1, 3, 5])
def _apply(self, x1, x2, example_ndims): # In the shape annotations below, # # - x1 has shape B1 + E1 + F (batch, example, feature), # - x2 has shape B2 + E2 + F, # - z refers to self.fixed_inputs, and has shape Bz + [ez] + F, ie its # example ndims is exactly 1, # - self.base_kernel has batch shape Bk, # - bc(A, B, C) means "the result of broadcasting shapes A, B, and C". # Shape: bc(Bk, B1, B2) + bc(E1, E2) k12 = self.base_kernel.apply(x1, x2, example_ndims) if self._is_fixed_inputs_empty(): return k12 # Shape: bc(Bk, B1, Bz) + E1 + [ez] k1z = self.base_kernel.tensor(x1, self.fixed_inputs, x1_example_ndims=example_ndims, x2_example_ndims=1) # Shape: bc(Bk, B2, Bz) + E2 + [ez] k2z = self.base_kernel.tensor(x2, self.fixed_inputs, x1_example_ndims=example_ndims, x2_example_ndims=1) # Shape: bc(Bz, Bk) + [ez, ez] div_mat_chol = self._cholesky_bijector.forward(self._divisor_matrix) # Shape: bc(Bz, Bk) + [1, ..., 1] + [ez, ez] # `--------' # `-- (example_ndims - 1) ones # This reshape ensures that the batch shapes here align correctly with the # batch shape of k2z, below: `example_ndims` because E2 has rank # `example_ndims`, and "- 1" because one of the ez's here already "pushed" # the batch dims over by one. div_mat_chol = util.pad_shape_with_ones(div_mat_chol, example_ndims - 1, -3) div_mat_chol_linop = tf.linalg.LinearOperatorLowerTriangular( div_mat_chol) # Shape: bc(Bz, Bk, B2) + E2 + [ez] kzzinv_kz2 = tf.linalg.matrix_transpose( # Shape: bc(Bz, Bk, B2) + E2[:-1] + [ez] + E2[-1] div_mat_chol_linop.solve( # Shape: bc(Bz, Bk, B2) + E2[:-1] + [ez] + E2[-1] div_mat_chol_linop.solve(k2z, adjoint_arg=True), adjoint=True)) # Shape: bc(Bz, Bk, B1, B2) + bc(E1, E2) k1z_kzzinv_kz2 = tf.reduce_sum( # Shape: bc(Bz, Bk, B1, B2) + bc(E1, E2) + [ez] input_tensor=k1z * kzzinv_kz2, axis=-1) # we can safely always reduce just this one trailing dim, # since self.fixed_inputs is presumed to have example_ndims # exactly 1. # Shape: bc(Bz, Bk, B1, B2) + bc(E1, E2) return k12 - k1z_kzzinv_kz2
def _apply(self, x1, x2, param_expansion_ndims=0): # Use util.sqrt_with_finite_grads to avoid NaN gradients when `x1 == x2`. norm = util.sqrt_with_finite_grads( util.sum_rightmost_ndims_preserving_shape( tf.math.squared_difference(x1, x2), self.feature_ndims)) if self.length_scale is not None: length_scale = util.pad_shape_with_ones( self.length_scale, ndims=param_expansion_ndims) norm /= length_scale log_result = -norm if self.amplitude is not None: amplitude = util.pad_shape_with_ones(self.amplitude, ndims=param_expansion_ndims) log_result += 2. * tf.math.log(amplitude) return tf.exp(log_result)
def _apply(self, x1, x2, example_ndims=0): # Use util.sqrt_with_finite_grads to avoid NaN gradients when `x1 == x2`. norm = util.sqrt_with_finite_grads( util.sum_rightmost_ndims_preserving_shape( tf.math.squared_difference(x1, x2), self.feature_ndims)) if self.length_scale is not None: length_scale = util.pad_shape_with_ones( self.length_scale, ndims=example_ndims) norm /= length_scale series_term = np.sqrt(5) * norm log_result = tf.math.log1p(series_term + series_term**2 / 3.) - series_term if self.amplitude is not None: amplitude = util.pad_shape_with_ones(self.amplitude, example_ndims) log_result += 2. * tf.math.log(amplitude) return tf.exp(log_result)
def testPadShapeRightWithOnesCanBeGraphNoop(self): # First ensure graph actually *is* changed when we use non-trivial ndims. # Use an explicitly created graph, to make sure no whacky test fixture graph # reuse is going on in the background. g = tf.Graph() with g.as_default(): x = tf.constant(np.ones([3], np.float32)) graph_def = g.as_graph_def() x = util.pad_shape_with_ones(x, 3) self.assertNotEqual(graph_def, g.as_graph_def()) # Now verify that graphdef is unchanged (no extra ops) when we pass ndims=0. g = tf.Graph() with g.as_default(): x = tf.constant(np.ones([3], np.float32)) graph_def = g.as_graph_def() x = util.pad_shape_with_ones(x, 0) self.assertEqual(graph_def, g.as_graph_def())
def rescale_input(x, feature_ndims, example_ndims): """Computes `x / scale_diag`.""" scale_diag = util.pad_shape_with_ones( self.scale_diag, example_ndims, # Start before the first feature dimension. We assume scale_diag has # at least as many dimensions as feature_ndims. start=-(feature_ndims + 1)) return x / scale_diag
def testPadShapeRightWithOnesDynamicShape(self): if tf.executing_eagerly(): return # Test partially unknown shape x = tf.compat.v1.placeholder_with_default(np.ones([3], np.float32), [None]) expanded = util.pad_shape_with_ones(x, 3) self.assertAllEqual(expanded.shape.as_list(), [None, 1, 1, 1]) self.assertAllEqual(self.evaluate(expanded).shape, [3, 1, 1, 1]) expanded = util.pad_shape_with_ones(x, 3, start=-2) self.assertAllEqual(expanded.shape.as_list(), [1, 1, 1, None]) self.assertAllEqual(self.evaluate(expanded).shape, [1, 1, 1, 3]) # Test totally unknown shape x = tf.compat.v1.placeholder_with_default(np.ones([3], np.float32), None) expanded = util.pad_shape_with_ones(x, 3) self.assertIsNone(expanded.shape.ndims) self.assertAllEqual(self.evaluate(expanded).shape, [3, 1, 1, 1])
def _apply(self, x1, x2, param_expansion_ndims=1): length_scale = tf.expand_dims(tf.expand_dims(self.length_scale, -2), -2) exponent = tf.exp(-0.5 * tf.reduce_sum( length_scale**2 * tf.math.squared_difference(x1, x2), axis=-1)) amplitude = util.pad_shape_with_ones(self.amplitude, param_expansion_ndims) exponent *= amplitude**2 return exponent
def _apply(self, x1, x2, param_expansion_ndims=0): x1 = tf.convert_to_tensor(value=x1) x2 = tf.convert_to_tensor(value=x2) value = tf.reduce_sum(input_tensor=x1 + x2, axis=-1) if self.multiplier is not None: multiplier = kernels_util.pad_shape_with_ones( self._multiplier, param_expansion_ndims) value *= multiplier return value
def _apply(self, x1, x2, example_ndims=0): x1 = tf.convert_to_tensor(x1) x2 = tf.convert_to_tensor(x2) value = tf.reduce_sum(x1 + x2, axis=-1) if self.multiplier is not None: multiplier = kernels_util.pad_shape_with_ones( self._multiplier, example_ndims) value *= multiplier return value
def _apply(self, x1, x2, example_ndims=0): difference = np.pi * tf.abs(x1 - x2) if self.period is not None: # period acts as a batch of periods, and hence we must additionally # pad the shape with self.feature_ndims number of ones. period = util.pad_shape_with_ones( self.period, ndims=(example_ndims + self.feature_ndims)) difference /= period log_kernel = util.sum_rightmost_ndims_preserving_shape( -2 * tf.sin(difference) ** 2, ndims=self.feature_ndims) if self.length_scale is not None: length_scale = util.pad_shape_with_ones( self.length_scale, ndims=example_ndims) log_kernel /= length_scale ** 2 if self.amplitude is not None: amplitude = util.pad_shape_with_ones( self.amplitude, ndims=example_ndims) log_kernel += 2. * tf.math.log(amplitude) return tf.exp(log_kernel)
def _apply(self, x1, x2, example_ndims=0): difference = util.sum_rightmost_ndims_preserving_shape( tf.math.squared_difference(x1, x2), ndims=self.feature_ndims) difference /= 2 if self.length_scale is not None: length_scale = util.pad_shape_with_ones(self.length_scale, ndims=example_ndims) difference /= length_scale**2 scale_mixture_rate = 1. if self.scale_mixture_rate is not None: scale_mixture_rate = util.pad_shape_with_ones( self.scale_mixture_rate, ndims=example_ndims) difference /= scale_mixture_rate result = (1. + difference)**-scale_mixture_rate if self.amplitude is not None: amplitude = util.pad_shape_with_ones(self.amplitude, ndims=example_ndims) result *= amplitude**2 return result
def vector_transform(x, feature_ndims, param_expansion_ndims): diag = util.pad_shape_with_ones(scale_diag, param_expansion_ndims + feature_ndims - 1, start=-2) return diag * x
def testPadShapeRightWithOnes(self): # Test nominal behavior. x = np.ones([3], np.float32) self.assertAllEqual( self.evaluate(util.pad_shape_with_ones(x, 3)).shape, [3, 1, 1, 1])
def tensor(self, x1, x2, x1_example_ndims, x2_example_ndims): """Construct (batched) tensors from (batches of) collections of inputs. Args: x1: `Tensor` input to the first positional parameter of the kernel, of shape `B1 + E1 + F`, where `B1` and `E1` arbitrary shapes which may be empty (ie, no batch/example dims, resp.), and `F` (the feature shape) must have rank equal to the kernel's `feature_ndims` property. Batch shape must broadcast with the batch shape of `x2` and with the kernel's batch shape. x2: `Tensor` input to the second positional parameter of the kernel, shape `B2 + E2 + F`, where `B2` and `E2` arbitrary shapes which may be empty (ie, no batch/example dims, resp.), and `F` (the feature shape) must have rank equal to the kernel's `feature_ndims` property. Batch shape must broadcast with the batch shape of `x1` and with the kernel's batch shape. x1_example_ndims: A python integer greater than or equal to 0, the number of example dims in the first input. This affects both the alignment of batch shapes and the shape of the final output of the function. Everything left of the feature shape and the example dims in `x1` is considered "batch shape", and must broadcast as specified above. x2_example_ndims: A python integer greater than or equal to 0, the number of example dims in the second input. This affects both the alignment of batch shapes and the shape of the final output of the function. Everything left of the feature shape and the example dims in `x1` is considered "batch shape", and must broadcast as specified above. Returns: `Tensor` containing (possibly batched) kernel applications to pairs from inputs `x1` and `x2`. If the kernel parameters' batch shape is `Bk` then the shape of the `Tensor` resulting from this method call is `broadcast(Bk, B1, B2) + E1 + E2`. Note this differs from `apply`: the example dimensions are concatenated, whereas in `apply` the example dims are broadcast together. It also differs from `matrix`: the example shapes are arbitrary here, and the result accrues a rank equal to the sum of the ranks of the input example shapes. #### Examples First, consider a kernel with a single scalar parameter. ```python import tensorflow_probability as tfp scalar_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=.5) scalar_kernel.batch_shape # ==> [] # Our inputs are two rank-2 collections of 3-D vectors x = np.ones([5, 6, 3], np.float32) y = np.ones([7, 8, 3], np.float32) scalar_kernel.tensor(x, y, x1_example_ndims=2, x2_example_ndims=2).shape # ==> [5, 6, 7, 8] # Empty example shapes work too! x = np.ones([3], np.float32) y = np.ones([5, 3], np.float32) scalar_kernel.tensor(x, y, x1_example_ndims=0, x2_example_ndims=1).shape # ==> [5] ``` The result comes from applying the kernel to the entries in `x` and `y` pairwise, across all pairs: ```none | k(x[0], y[0]) k(x[0], y[1]) ... k(x[0], y[3]) | | k(x[1], y[0]) k(x[1], y[1]) ... k(x[1], y[3]) | | ... ... ... | | k(x[4], y[0]) k(x[4], y[1]) ... k(x[4], y[3]) | ``` Now consider a kernel with batched parameters. ```python batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel(param=[1., .5]) batch_kernel.batch_shape # ==> [2] # Inputs are two rank-2 collections of 3-D vectors x = np.ones([5, 6, 3], np.float32) y = np.ones([7, 8, 3], np.float32) scalar_kernel.tensor(x, y, x1_example_ndims=2, x2_example_ndims=2).shape # ==> [2, 5, 6, 7, 8] ``` We also support batching of the inputs. First, let's look at that with the scalar kernel again. ```python # Batch of 10 lists of 5x6 collections of dimension 3 x = np.ones([10, 5, 6, 3], np.float32) # Batch of 10 lists of 7x8 collections of dimension 3 y = np.ones([10, 7, 8, 3], np.float32) scalar_kernel.tensor(x, y, x1_example_ndims=2, x2_example_ndims=2).shape # ==> [10, 5, 6, 7, 8] ``` The result is a batch of 10 tensors built from the batch of 10 rank-2 collections of input vectors. The batch shapes have to be broadcastable. The following will *not* work: ```python x = np.ones([10, 5, 3], np.float32) y = np.ones([20, 4, 3], np.float32) scalar_kernel.tensor(x, y, x1_example_ndims=1, x2_example_ndims=1).shape # ==> Error! [10] and [20] can't broadcast. ``` Now let's consider batches of inputs in conjunction with batches of kernel parameters. We require that the input batch shapes be broadcastable with the kernel parameter batch shapes, otherwise we get an error: ```python x = np.ones([10, 5, 6, 3], np.float32) y = np.ones([10, 7, 8, 3], np.float32) batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel(params=[1., .5]) batch_kernel.batch_shape # ==> [2] batch_kernel.tensor(x, y, x1_example_ndims=2, x2_example_ndims=2).shape # ==> Error! [2] and [10] can't broadcast. ``` The fix is to make the kernel parameter shape broadcastable with `[10]` (or reshape the inputs to be broadcastable!): ```python x = np.ones([10, 5, 6, 3], np.float32) y = np.ones([10, 7, 8, 3], np.float32) batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel( params=[[1.], [.5]]) batch_kernel.batch_shape # ==> [2, 1] batch_kernel.tensor(x, y, x1_example_ndims=2, x2_example_ndims=2).shape # ==> [2, 10, 5, 6, 7, 8] # Or, make the inputs broadcastable: x = np.ones([10, 1, 5, 6, 3], np.float32) y = np.ones([10, 1, 7, 8, 3], np.float32) batch_kernel = tfp.positive_semidefinite_kernels.SomeKernel( params=[1., .5]) batch_kernel.batch_shape # ==> [2] batch_kernel.tensor(x, y, x1_example_ndims=2, x2_example_ndims=2).shape # ==> [10, 2, 5, 6, 7, 8] ``` """ with self._name_scope(self._name, values=[x1, x2]): x1 = tf.convert_to_tensor(value=x1, name='x1') x2 = tf.convert_to_tensor(value=x2, name='x2') x1 = util.pad_shape_with_ones(x1, ndims=x2_example_ndims, start=-(self.feature_ndims + 1)) x2 = util.pad_shape_with_ones( x2, ndims=x1_example_ndims, start=-(self.feature_ndims + 1 + x2_example_ndims)) return self.apply(x1, x2, example_ndims=(x1_example_ndims + x2_example_ndims))
def testPadShapeStartWithOnes(self): # Test nominal behavior. x = np.ones([3], np.float32) self.assertAllEqual( self.evaluate(util.pad_shape_with_ones(x, 3, start=-2)).shape, [1, 1, 1, 3])