def __init__( self, input_size, output_size, # keras::Conv::filters # Conv specific. filter_shape, # keras::Conv::kernel_size rank=2, # keras::Conv::rank strides=1, # keras::Conv::strides padding='VALID', # keras::Conv::padding; 'CAUSAL' not implemented. # keras::Conv::data_format is not implemented dilations=1, # keras::Conv::dilation_rate output_padding=None, # keras::ConvTranspose::output_padding method='auto', # Weights kernel_initializer=None, # tfp.nn.initializers.glorot_uniform() bias_initializer=None, # tf.initializers.zeros() make_kernel_bias_fn=kernel_bias_lib.make_kernel_bias, dtype=tf.float32, index_dtype=tf.int32, # Misc activation_fn=None, validate_args=False, name=None): """Constructs layer. Note: `data_format` is not supported since all nn layers operate on the rightmost column. If your channel dimension is not rightmost, use `tf.transpose` before calling this layer. For example, if your channel dimension is second from the left, the following code will move it rightmost: ```python inputs = tf.transpose(inputs, tf.concat([ [0], tf.range(2, tf.rank(inputs)), [1]], axis=0)) ``` Args: input_size: ... In Keras, this argument is inferred from the rightmost input shape, i.e., `tf.shape(inputs)[-1]`. This argument specifies the size of the second from the rightmost dimension of both `inputs` and `kernel`. Default value: `None`. output_size: ... In Keras, this argument is called `filters`. This argument specifies the rightmost dimension size of both `kernel` and `bias`. filter_shape: ... In Keras, this argument is called `kernel_size`. This argument specifies the leftmost `rank` dimensions' sizes of `kernel`. rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. This argument implies the number of `kernel` dimensions, i.e., `kernel.shape.rank == rank + 2`. In Keras, this argument has the same name and semantics. Default value: `2`. strides: An integer or tuple/list of n integers, specifying the stride length of the convolution. In Keras, this argument has the same name and semantics. Default value: `1`. padding: One of `"VALID"` or `"SAME"` (case-insensitive). In Keras, this argument has the same name and semantics (except we don't support `"CAUSAL"`). Default value: `'VALID'`. dilations: An integer or tuple/list of `rank` integers, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilations` value != 1 is incompatible with specifying any `strides` value != 1. In Keras, this argument is called `dilation_rate`. Default value: `1`. output_padding: An `int` or length-`rank` tuple/list representing the amount of padding along the input spatial dimensions (e.g., depth, height, width). A single `int` indicates the same value for all spatial dimensions. The amount of output padding along a given dimension must be lower than the stride along that same dimension. If set to `None` (default), the output shape is inferred. In Keras, this argument has the same name and semantics. Default value: `None` (i.e., inferred). method: ... kernel_initializer: ... Default value: `None` (i.e., `tfp.experimental.nn.initializers.glorot_uniform()`). bias_initializer: ... Default value: `None` (i.e., `tf.initializers.zeros()`). make_kernel_bias_fn: ... Default value: `tfp.experimental.nn.util.make_kernel_bias`. dtype: ... Default value: `tf.float32`. index_dtype: ... activation_fn: ... Default value: `None`. validate_args: ... name: ... Default value: `None` (i.e., `'ConvolutionTranspose'`). """ filter_shape = convolution_util.prepare_tuple_argument( filter_shape, rank, 'filter_shape', validate_args) kernel_shape = ps.concat([filter_shape, [output_size, input_size]], axis=0) # Note transpose. batch_ndims = 0 kernel, bias = make_kernel_bias_fn(kernel_shape, [output_size], kernel_initializer, bias_initializer, batch_ndims, batch_ndims, dtype) apply_kernel_fn = _get_convolution_transpose_fn(strides, method)( filter_shape, strides, padding, rank=2, dilations=dilations, dtype=index_dtype, validate_args=validate_args) # TODO(emilyaf): Remove after kernel shape is updated. temp_apply_kernel_fn = lambda x, k: apply_kernel_fn( # pylint: disable=g-long-lambda x, tf.reshape(tf.transpose(k, perm=[0, 1, 3, 2]), [-1, output_size])) super(ConvolutionTranspose, self).__init__(kernel=kernel, bias=bias, apply_kernel_fn=temp_apply_kernel_fn, dtype=dtype, activation_fn=activation_fn, validate_args=validate_args, name=name)
def generate_mc_normal_draws(num_normal_draws, num_time_steps, num_sample_paths, random_type, batch_shape=None, skip=0, seed=None, dtype=None, name=None): """Generates normal random samples to be consumed by a Monte Carlo algorithm. Many of Monte Carlo (MC) algorithms can be re-written so that all necessary random (or quasi-random) variables are drawn in advance as a `Tensor` of shape `batch_shape + [num_time_steps, num_samples, num_normal_draws]`, where `batch_shape` is the shape of the independent batches of the Monte Carlo algorithm, `num_time_steps` is the number of time steps Monte Carlo algorithm performs within each batch, `num_sample_paths` is a number of sample paths of the Monte Carlo algorithm and `num_normal_draws` is a number of independent normal draws per sample path. For example, in order to use quasi-random numbers in a Monte Carlo algorithm, the samples have to be drawn in advance. The function generates a `Tensor`, say, `x` in a format such that for a quasi-`random_type` `x[i]` is correspond to different dimensions of the quasi-random sequence, so that it can be used in a Monte Carlo algorithm Args: num_normal_draws: A scalar int32 `Tensor`. The number of independent normal draws at each time step for each sample path. Should be a graph compilation constant. num_time_steps: A scalar int32 `Tensor`. The number of time steps at which to draw the independent normal samples. Should be a graph compilation constant. num_sample_paths: A scalar int32 `Tensor`. The number of trajectories (e.g., Monte Carlo paths) for which to draw the independent normal samples. Should be a graph compilation constant. random_type: Enum value of `tff.math.random.RandomType`. The type of (quasi)-random number generator to use to generate the paths. batch_shape: This input can be either of type `tf.TensorShape` or a 1-d `Tensor` of type `tf.int32` specifying the dimensions of independent batches of normal samples to be drawn. Default value: `None` which correspond to a single batch of shape `tf.TensorShape([])`. skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or Halton sequence to skip. Used only when `random_type` is 'SOBOL', 'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored. Default value: `0`. seed: Seed for the random number generator. The seed is only relevant if `random_type` is one of `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC, STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and `HALTON_RANDOMIZED` the seed should be an Python integer. For `STATELESS` and `STATELESS_ANTITHETIC `must be supplied as an integer `Tensor` of shape `[2]`. Default value: `None` which means no seed is set. dtype: The `dtype` of the output `Tensor`. Default value: `None` which maps to `float32`. name: Python string. The name to give this op. Default value: `None` which maps to `generate_mc_normal_draws`. Returns: A `Tensor` of shape `[num_time_steps] + batch_shape + [num_sample_paths, num_normal_draws]`. """ if name is None: name = 'generate_mc_normal_draws' if skip is None: skip = 0 with tf.name_scope(name): if dtype is None: dtype = tf.float32 if batch_shape is None: batch_shape = tf.TensorShape([]) # In case of quasi-random draws, the total dimension of the draws should be # `num_time_steps * dim` total_dimension = tf.zeros( [num_time_steps * num_normal_draws], dtype=dtype, name='total_dimension') if random_type in [random.RandomType.PSEUDO_ANTITHETIC, random.RandomType.STATELESS_ANTITHETIC]: # Put `num_sample_paths` to the front for antithetic samplers sample_shape = tf.concat([[num_sample_paths], batch_shape], axis=0) is_antithetic = True else: # Note that for QMC sequences `num_sample_paths` should follow # `batch_shape` sample_shape = tf.concat([batch_shape, [num_sample_paths]], axis=0) is_antithetic = False normal_draws = random.mv_normal_sample( sample_shape, mean=total_dimension, random_type=random_type, seed=seed, skip=skip) # Reshape and transpose normal_draws = tf.reshape( normal_draws, tf.concat([sample_shape, [num_time_steps, num_normal_draws]], axis=0)) # Shape [steps_num] + batch_shape + [num_samples, dim] normal_draws_rank = normal_draws.shape.rank if is_antithetic and normal_draws_rank > 3: # Permutation for the case when the batch_shape is present perm = [normal_draws_rank-2] + list( range(1, normal_draws_rank-2)) + [0, normal_draws_rank-1] else: perm = [normal_draws_rank-2] + list( range(normal_draws_rank-2)) + [normal_draws_rank-1] normal_draws = tf.transpose(normal_draws, perm=perm) return normal_draws
def preprocess(example): image = _preprocess_image(example['image'], is_training) image = tf.transpose(image, (2, 0, 1)) # transpose HWC image to CHW format label = tf.cast(example['label'], tf.int32) return {'images': image, 'labels': label}
def get_loss_tensors(self, f0_candidates, freqs, amps): """Get traces of loss to estimate fundamental frequency. Args: f0_candidates: Frequencies of candidates in hertz. [batch, time, freq]. freqs: Frequencies of sinusoids in hertz. [batch, time, feq]. amps: Amplitudes of sinusoids, greater than 0. [batch, time, freq]. Returns: sinusoids_loss: -log p(sinusoids|harmonics), [batch, time, f0_candidate]. harmonics_loss: - log p(harmonics|sinusoids), [batch, time, f0_candidate]. """ # ========================================================================== # P(sinusoids | candidate_harmonics). # ========================================================================== p_sinusoids_given_harmonics = self.get_p_sinusoids_given_harmonics() # Treat each partial as a candidate. # Get the ratio of each partial to each candidate. # -> [batch, time, candidate, partial] freq_ratios = safe_divide(freqs[:, :, tf.newaxis, :], f0_candidates[:, :, :, tf.newaxis]) nll_sinusoids = -p_sinusoids_given_harmonics.log_prob(freq_ratios) a = tf.convert_to_tensor(amps[:, :, tf.newaxis, :]) # # Don't count sinusoids that are less than 1 std > mean. # a_mean, a_var = tf.nn.moments(a, axes=-1, keepdims=True) # a = tf.where(a > a_mean + 0.5 * a_var**0.5, a, tf.zeros_like(a)) # Weighted sum by sinusoid amplitude. # -> [batch, time, candidate] sinusoids_loss = safe_divide(tf.reduce_sum(nll_sinusoids * a, axis=-1), tf.reduce_sum(a, axis=-1)) # ========================================================================== # P(candidate_harmonics | sinusoids) # ========================================================================== p_harm_given_sin = self.get_p_harmonics_given_sinusoids(freqs, amps) harmonics = self.get_candidate_harmonics(f0_candidates, as_midi=True) # Need to rearrage shape as tfp expects, [sample_sh, batch_sh, event_sh]. # -> [candidate, harmonic, batch, time] harmonics_transpose = tf.transpose(harmonics, [2, 3, 0, 1]) nll_harmonics_transpose = -p_harm_given_sin.log_prob( harmonics_transpose) # -> [batch, time, candidate, harm] nll_harmonics = tf.transpose(nll_harmonics_transpose, [2, 3, 0, 1]) # Prior decreasing importance of upper harmonics. amps_prior = tf.linspace(1.0, 1.0 / self.n_harmonic_points, self.n_harmonic_points) harmonics_loss = (nll_harmonics * amps_prior[tf.newaxis, tf.newaxis, tf.newaxis, :]) # Don't count loss for harmonics above nyquist. # Reweight by the number of harmonics below nyquist, # (so it doesn't just pick the highest frequency possible). nyquist_midi = hz_to_midi(self.sample_rate / 2.0) nyquist_mask = tf.where(harmonics < nyquist_midi, tf.ones_like(harmonics_loss), tf.zeros_like(harmonics_loss)) harmonics_loss *= safe_divide( nyquist_mask, tf.reduce_mean(nyquist_mask, axis=-1, keepdims=True)) # Sum over harmonics. harmonics_loss = tf.reduce_mean(harmonics_loss, axis=-1) return sinusoids_loss, harmonics_loss
def testLangevin3DNormalDynamicVolatility(self): """Sampling from a 3-D Multivariate Normal distribution.""" dtype = np.float32 true_mean = dtype([1, 2, 7]) true_cov = dtype([[1, 0.25, 0.25], [0.25, 1, 0.25], [0.25, 0.25, 1]]) num_results = 500 num_chains = 500 # Targeg distribution is defined through the Cholesky decomposition chol = tf.linalg.cholesky(true_cov) target = tfd.MultivariateNormalTriL(loc=true_mean, scale_tril=chol) # Assume that the state is passed as a list of 1-d tensors `x` and `y`. # Then the target log-density is defined as follows: def target_log_prob(x, y): # Stack the input tensors together z = tf.concat([x, y], axis=-1) return target.log_prob(z) # Here we define the volatility function to be non-caonstant def volatility_fn(x, y): # Stack the input tensors together return [ 1. / (0.5 + 0.1 * tf.abs(x + y)), 1. / (0.5 + 0.1 * tf.abs(y)) ] # Initial state of the chain init_state = [ np.ones([num_chains, 2], dtype=dtype), np.ones([num_chains, 1], dtype=dtype) ] # Run Random Walk Metropolis with normal proposal for `num_results` # iterations for `num_chains` independent chains: states, _ = tfp.mcmc.sample_chain( num_results=num_results, current_state=init_state, kernel=tfp.mcmc.MetropolisAdjustedLangevinAlgorithm( target_log_prob_fn=target_log_prob, volatility_fn=volatility_fn, step_size=.1, seed=42), num_burnin_steps=200, num_steps_between_results=1, parallel_iterations=1) states = tf.concat(states, axis=-1) sample_mean = tf.reduce_mean(input_tensor=states, axis=[0, 1]) x = tf.expand_dims(states - sample_mean, -1) sample_cov = tf.reduce_mean(input_tensor=tf.matmul( x, tf.transpose(a=x, perm=[0, 1, 3, 2])), axis=[0, 1]) sample_mean_, sample_cov_ = self.evaluate([sample_mean, sample_cov]) self.assertAllClose(np.squeeze(sample_mean_), true_mean, atol=0.1, rtol=0.1) self.assertAllClose(np.squeeze(sample_cov_), true_cov, atol=0.1, rtol=0.1)
def _log_prob(self, x): if self.input_output_cholesky: x_sqrt = x else: # Complexity: O(nbk**3) x_sqrt = tf.linalg.cholesky(x) batch_shape = self.batch_shape_tensor() event_shape = self.event_shape_tensor() x_ndims = tf.rank(input=x_sqrt) num_singleton_axes_to_prepend = ( tf.maximum(tf.size(input=batch_shape) + 2, x_ndims) - x_ndims) x_with_prepended_singletons_shape = tf.concat([ tf.ones([num_singleton_axes_to_prepend], dtype=tf.int32), tf.shape(input=x_sqrt) ], 0) x_sqrt = tf.reshape(x_sqrt, x_with_prepended_singletons_shape) ndims = tf.rank(x_sqrt) # sample_ndims = ndims - batch_ndims - event_ndims sample_ndims = ndims - tf.size(input=batch_shape) - 2 sample_shape = tf.shape(input=x_sqrt)[:sample_ndims] # We need to be able to pre-multiply each matrix by its corresponding # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these # gymnastics allows us to do a batch_solve. # # After we're done with sqrt_solve (the batch operation) we need to undo # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. # Complexity: O(nbk**2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt perm = tf.concat( [tf.range(sample_ndims, ndims), tf.range(0, sample_ndims)], 0) scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt, perm=perm) last_dim_size = ( tf.cast(self.dimension, dtype=tf.int32) * tf.reduce_prod( input_tensor=x_with_prepended_singletons_shape[:sample_ndims])) shape = tf.concat([ x_with_prepended_singletons_shape[sample_ndims:-2], [tf.cast(self.dimension, dtype=tf.int32), last_dim_size] ], axis=0) scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving a # vector system. For LinearOperatorLowerTriangular, each solve is O(k**2) so # this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator.solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = tf.concat([ tf.shape(input=scale_sqrt_inv_x_sqrt)[:-2], event_shape, sample_shape ], axis=0) scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape) perm = tf.concat([ tf.range(ndims - sample_ndims, ndims), tf.range(0, ndims - sample_ndims) ], 0) scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt, perm=perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] # = sum_{ik} (inv(S) L)_{ik}**2 # The second equality follows from the cyclic permutation property. # Complexity: O(nbk**2) trace_scale_inv_x = tf.reduce_sum( input_tensor=tf.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1]) # Complexity: O(nbk) half_log_det_x = tf.reduce_sum(input_tensor=tf.math.log( tf.linalg.diag_part(x_sqrt)), axis=[-1]) # Complexity: O(nbk**2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalization()) # Set shape hints. # Try to merge what we know from the input x with what we know from the # parameters of this distribution. if tensorshape_util.rank( x.shape) is not None and tensorshape_util.rank( self.batch_shape) is not None: tensorshape_util.set_shape( log_prob, tf.broadcast_static_shape(x.shape[:-2], self.batch_shape)) return log_prob
def _testMVN(self, base_distribution_class, base_distribution_kwargs, event_shape=()): # Base distribution shapes must be compatible w/bijector; most bijectors are # batch_shape agnostic and only care about event_ndims. # In the case of `ScaleMatvecTriL`, if we got it wrong then it would fire an # exception due to incompatible dimensions. event_shape_var = tf.Variable( np.int32(event_shape), shape=tf.TensorShape(None), name='dynamic_event_shape') base_distribution_dynamic_kwargs = { k: tf.Variable( v, shape=tf.TensorShape(None), name='dynamic_{}'.format(k)) for k, v in base_distribution_kwargs.items()} fake_mvn_dynamic = self._cls()( distribution=tfd.Sample( base_distribution_class( validate_args=True, **base_distribution_dynamic_kwargs), sample_shape=event_shape_var), bijector=tfb.Chain( [tfb.Shift(shift=self._shift), tfb.ScaleMatvecTriL(scale_tril=self._tril)]), validate_args=True) fake_mvn_static = self._cls()( distribution=tfd.Sample( base_distribution_class( validate_args=True, **base_distribution_kwargs), sample_shape=event_shape), bijector=tfb.Chain( [tfb.Shift(shift=self._shift), tfb.ScaleMatvecTriL(scale_tril=self._tril)]), validate_args=True) actual_mean = np.tile(self._shift, [2, 1]) # ScaleMatvecTriL elided tile. actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1])) def actual_mvn_log_prob(x): return np.concatenate([[ # pylint: disable=g-complex-comprehension stats.multivariate_normal(actual_mean[i], actual_cov[i]).logpdf(x[:, i, :]) ] for i in range(len(actual_cov))]).T actual_mvn_entropy = np.concatenate( [[stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy()] for i in range(len(actual_cov))]) self.assertAllEqual([3], fake_mvn_static.event_shape) self.assertAllEqual([2], fake_mvn_static.batch_shape) if not tf.executing_eagerly(): self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape) self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape) num_samples = 7e3 for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]: # Ensure sample works by checking first, second moments. y = fake_mvn.sample(int(num_samples), seed=test_util.test_seed()) x = y[0:5, ...] sample_mean = tf.reduce_mean(y, axis=0) centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0]) sample_cov = tf.matmul( centered_y, centered_y, transpose_b=True) / num_samples self.evaluate( [v.initializer for v in base_distribution_dynamic_kwargs.values()] + [event_shape_var.initializer]) [ sample_mean_, sample_cov_, x_, fake_event_shape_, fake_batch_shape_, fake_log_prob_, fake_prob_, fake_mean_, fake_entropy_, ] = self.evaluate([ sample_mean, sample_cov, x, fake_mvn.event_shape_tensor(), fake_mvn.batch_shape_tensor(), fake_mvn.log_prob(x), fake_mvn.prob(x), fake_mvn.mean(), fake_mvn.entropy(), ]) self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1) self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1) # Ensure all other functions work as intended. self.assertAllEqual([5, 2, 3], x_.shape) self.assertAllEqual([3], fake_event_shape_) self.assertAllEqual([2], fake_batch_shape_) self.assertAllClose( actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6) self.assertAllClose( np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5) self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6) self.assertAllClose(actual_mvn_entropy, fake_entropy_, atol=0., rtol=1e-6)
def convert(w): return tf.transpose(w) if transpose_weights else w
def is_cudnn_supported_inputs(mask, time_major): if time_major: mask = tf.transpose(mask) return tf.logical_and(is_sequence_right_padded(mask), tf.logical_not(has_fully_masked_sequence(mask)))
def _sample_n(self, n, seed): df = tf.convert_to_tensor(self.df) batch_shape = self._batch_shape_tensor(df=df) event_shape = self._event_shape_tensor() batch_ndims = ps.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = ps.concat([[n], batch_shape, event_shape], 0) normal_seed, gamma_seed = samplers.split_seed(seed, salt='Wishart') # Complexity: O(nbk**2) x = samplers.normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=normal_seed) # Complexity: O(nbk) # This parameterization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) expanded_df = df * tf.ones(self._scale.batch_shape_tensor(), dtype=dtype_util.base_dtype(df.dtype)) g = gamma_lib.random_gamma( shape=[n], concentration=self._multi_gamma_sequence(0.5 * expanded_df, self._dimension()), log_rate=tf.convert_to_tensor(np.log(0.5), self.dtype), seed=gamma_seed, log_space=True) # Complexity: O(nbk**2) x = tf.linalg.band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = tf.linalg.set_diag(x, tf.math.exp(g * 0.5)) # Make batch-op ready. # Complexity: O(nbk**2) perm = ps.concat([ps.range(1, ndims), [0]], 0) x = tf.transpose(a=x, perm=perm) shape = ps.concat( [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0) x = tf.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so # this step has complexity O(nbk^3). x = self._scale.matmul(x) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = ps.concat([batch_shape, event_shape, [n]], 0) x = tf.reshape(x, shape) perm = ps.concat([[ndims - 1], ps.range(0, ndims - 1)], 0) x = tf.transpose(a=x, perm=perm) if not self.input_output_cholesky: # Complexity: O(nbk**3) x = tf.matmul(x, x, adjoint_b=True) return x
def generate_mc_normal_draws(num_normal_draws, num_time_steps, num_sample_paths, random_type, skip=0, seed=None, dtype=None, name=None): """Generates normal random samples to be consumed by a Monte Carlo algorithm. Many of Monte Carlo (MC) algorithms can be re-written so that all necessary random (or quasi-random) variables are drawn in advance as a `Tensor` of shape `[num_time_steps, num_samples, num_normal_draws]`, where `num_time_steps` is the number of time steps Monte Carlo algorithm performs, `num_sample_paths` is a number of sample paths of the Monte Carlo algorithm and `num_normal_draws` is a number of independent normal draws per sample paths. For example, in order to use quasi-random numbers in a Monte Carlo algorithm, the samples have to be drawn in advance. The function generates a `Tensor`, say, `x` in a format such that for a quasi-`random_type` `x[i]` is correspond to different dimensions of the quasi-random sequence, so that it can be used in a Monte Carlo algorithm Args: num_normal_draws: A scalar int32 `Tensor`. The number of independent normal draws at each time step for each sample path. Should be a graph compilation constant. num_time_steps: A scalar int32 `Tensor`. The number of time steps at which to draw the independent normal samples. Should be a graph compilation constant. num_sample_paths: A scalar int32 `Tensor`. The number of trajectories (e.g., Monte Carlo paths) for which to draw the independent normal samples. Should be a graph compilation constant. random_type: Enum value of `tff.math.random.RandomType`. The type of (quasi)-random number generator to use to generate the paths. skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or Halton sequence to skip. Used only when `random_type` is 'SOBOL', 'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored. Default value: `0`. seed: Seed for the random number generator. The seed is only relevant if `random_type` is one of `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC, STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and `HALTON_RANDOMIZED` the seed should be an Python integer. For `STATELESS` and `STATELESS_ANTITHETIC `must be supplied as an integer `Tensor` of shape `[2]`. Default value: `None` which means no seed is set. dtype: The `dtype` of the output `Tensor`. Default value: `None` which maps to `float32`. name: Python string. The name to give this op. Default value: `None` which maps to `generate_mc_normal_draws`. Returns: A `Tensor` of shape `[num_time_steps, num_sample_paths, num_normal_draws]`. """ if name is None: name = 'generate_mc_normal_draws' if skip is None: skip = 0 with tf.name_scope(name): if dtype is None: dtype = tf.float32 # In case of quasi-random draws, the total dimension of the draws should be # `num_time_steps * dim` total_dimension = tf.zeros([num_time_steps * num_normal_draws], dtype=dtype, name='total_dimension') normal_draws = random.mv_normal_sample([num_sample_paths], mean=total_dimension, random_type=random_type, seed=seed, skip=skip) # Reshape and transpose normal_draws = tf.reshape( normal_draws, [num_sample_paths, num_time_steps, num_normal_draws]) # Shape [steps_num, num_samples, dim] normal_draws = tf.transpose(normal_draws, [1, 0, 2]) return normal_draws
def generate_wavelet_toy_image_data(image_width, num_samples, wavelet_num_levels): """Generates wavelet data for testFittingImageDataIsCorrect(). Constructs a "mean" image in the YUV wavelet domain (parametrized by `image_width`, and `wavelet_num_levels`) and draws `num_samples` samples from a normal distribution using that mean, and returns RGB images corresponding to those samples and to the mean (computed in the specified latent space) of those samples. Args: image_width: The width and height in pixels of the images being produced. num_samples: The number of samples to generate. wavelet_num_levels: The number of levels in the wavelet decompositions of the generated images. Returns: A tuple of (samples, reference, color_space, representation), where samples = A set of sampled images of size (`num_samples`, `image_width`, `image_width`, 3) reference = The empirical mean of `samples` (computed in YUV Wavelet space but returned as an RGB image) of size (`image_width`, `image_width`, 3). color_space = 'YUV' representation = 'CDF9/7' """ color_space = 'YUV' representation = 'CDF9/7' samples = [] reference = [] for level in range(wavelet_num_levels): samples.append([]) reference.append([]) w = image_width // 2**(level + 1) scaling = 2**level for _ in range(3): # Construct the ground-truth pixel band mean. mu = scaling * np.random.uniform(size=(3, w, w)) # Draw samples from the ground-truth mean. band_samples = np.random.normal( loc=np.tile(mu[np.newaxis], [num_samples, 1, 1, 1])) # Take the empirical mean of the samples as a reference. band_reference = np.mean(band_samples, 0) samples[-1].append(np.reshape(band_samples, [-1, w, w])) reference[-1].append(band_reference) # Handle the residual band. mu = scaling * np.random.uniform(size=(3, w, w)) band_samples = np.random.normal( loc=np.tile(mu[np.newaxis], [num_samples, 1, 1, 1])) band_reference = np.mean(band_samples, 0) samples.append(np.reshape(band_samples, [-1, w, w])) reference.append(band_reference) # Collapse and reshape wavelets to be ({_,} width, height, 3). samples = wavelet.collapse(samples, representation) reference = wavelet.collapse(reference, representation) samples = tf.transpose(tf.reshape( samples, [num_samples, 3, image_width, image_width]), perm=[0, 2, 3, 1]) reference = tf.transpose(reference, perm=[1, 2, 0]) # Convert into RGB space. samples = util.syuv_to_rgb(samples).numpy() reference = util.syuv_to_rgb(reference).numpy() return samples, reference, color_space, representation
def swaption_price(*, expiries, floating_leg_start_times, floating_leg_end_times, fixed_leg_payment_times, floating_leg_daycount_fractions, fixed_leg_daycount_fractions, fixed_leg_coupon, reference_rate_fn, dim, mean_reversion, volatility, notional=None, is_payer_swaption=None, use_analytic_pricing=True, num_samples=1, random_type=None, seed=None, skip=0, time_step=None, dtype=None, name=None): """Calculates the price of European Swaptions using the Hull-White model. A European Swaption is a contract that gives the holder an option to enter a swap contract at a future date at a prespecified fixed rate. A swaption that grants the holder to pay fixed rate and receive floating rate is called a payer swaption while the swaption that grants the holder to receive fixed and pay floating payments is called the receiver swaption. Typically the start date (or the inception date) of the swap concides with the expiry of the swaption. Mid-curve swaptions are currently not supported (b/160061740). Analytic pricing of swaptions is performed using the Jamshidian decomposition [1]. #### References: [1]: D. Brigo, F. Mercurio. Interest Rate Models-Theory and Practice. Second Edition. 2007. #### Example The example shows how value a batch of 1y x 1y and 1y x 2y swaptions using the Hull-White model. ````python import numpy as np import tensorflow.compat.v2 as tf import tf_quant_finance as tff dtype = tf.float64 expiries = [1.0, 1.0] float_leg_start_times = [[1.0, 1.25, 1.5, 1.75, 2.0, 2.0, 2.0, 2.0], [1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75]] float_leg_end_times = [[1.25, 1.5, 1.75, 2.0, 2.0, 2.0, 2.0, 2.0], [1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]] fixed_leg_payment_times = [[1.25, 1.5, 1.75, 2.0, 2.0, 2.0, 2.0, 2.0], [1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]] float_leg_daycount_fractions = [[0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0], [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25]] fixed_leg_daycount_fractions = [[0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0], [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25]] fixed_leg_coupon = [[0.011, 0.011, 0.011, 0.011, 0.0, 0.0, 0.0, 0.0], [0.011, 0.011, 0.011, 0.011, 0.011, 0.011, 0.011, 0.011]] zero_rate_fn = lambda x: 0.01 * tf.ones_like(x, dtype=dtype) price = tff.models.hull_white.swaption_price( expiries=expiries, floating_leg_start_times=float_leg_start_times, floating_leg_end_times=float_leg_end_times, fixed_leg_payment_times=fixed_leg_payment_times, floating_leg_daycount_fractions=float_leg_daycount_fractions, fixed_leg_daycount_fractions=fixed_leg_daycount_fractions, fixed_leg_coupon=fixed_leg_coupon, reference_rate_fn=zero_rate_fn, notional=100., dim=1, mean_reversion=[0.03], volatility=[0.02], dtype=dtype) # Expected value: [[0.7163243383624043], [1.4031415262337608]] # shape = (2,1) ```` Args: expiries: A real `Tensor` of any shape and dtype. The time to expiration of the swaptions. The shape of this input determines the number (and shape) of swaptions to be priced and the shape of the output. floating_leg_start_times: A real `Tensor` of the same dtype as `expiries`. The times when accrual begins for each payment in the floating leg. The shape of this input should be `expiries.shape + [m]` where `m` denotes the number of floating payments in each leg. floating_leg_end_times: A real `Tensor` of the same dtype as `expiries`. The times when accrual ends for each payment in the floating leg. The shape of this input should be `expiries.shape + [m]` where `m` denotes the number of floating payments in each leg. fixed_leg_payment_times: A real `Tensor` of the same dtype as `expiries`. The payment times for each payment in the fixed leg. The shape of this input should be `expiries.shape + [n]` where `n` denotes the number of fixed payments in each leg. floating_leg_daycount_fractions: A real `Tensor` of the same dtype and compatible shape as `floating_leg_start_times`. The daycount fractions for each payment in the floating leg. fixed_leg_daycount_fractions: A real `Tensor` of the same dtype and compatible shape as `fixed_leg_payment_times`. The daycount fractions for each payment in the fixed leg. fixed_leg_coupon: A real `Tensor` of the same dtype and compatible shape as `fixed_leg_payment_times`. The fixed rate for each payment in the fixed leg. reference_rate_fn: A Python callable that accepts expiry time as a real `Tensor` and returns a `Tensor` of shape `input_shape + [dim]`. Returns the continuously compounded zero rate at the present time for the input expiry time. dim: A Python scalar which corresponds to the number of Hull-White Models to be used for pricing. mean_reversion: A real positive `Tensor` of shape `[dim]` or a Python callable. The callable can be one of the following: (a) A left-continuous piecewise constant object (e.g., `tff.math.piecewise.PiecewiseConstantFunc`) that has a property `is_piecewise_constant` set to `True`. In this case the object should have a method `jump_locations(self)` that returns a `Tensor` of shape `[dim, num_jumps]` or `[num_jumps]`. In the first case, `mean_reversion(t)` should return a `Tensor` of shape `[dim] + t.shape`, and in the second, `t.shape + [dim]`, where `t` is a rank 1 `Tensor` of the same `dtype` as the output. See example in the class docstring. (b) A callable that accepts scalars (stands for time `t`) and returns a `Tensor` of shape `[dim]`. Corresponds to the mean reversion rate. volatility: A real positive `Tensor` of the same `dtype` as `mean_reversion` or a callable with the same specs as above. Corresponds to the lond run price variance. notional: An optional `Tensor` of same dtype and compatible shape as `strikes`specifying the notional amount for the underlying swap. Default value: None in which case the notional is set to 1. is_payer_swaption: A boolean `Tensor` of a shape compatible with `expiries`. Indicates whether the swaption is a payer (if True) or a receiver (if False) swaption. If not supplied, payer swaptions are assumed. use_analytic_pricing: A Python boolean specifying if analytic valuation should be performed. Analytic valuation is only supported for constant `mean_reversion` and piecewise constant `volatility`. If the input is `False`, then valuation using Monte-Carlo simulations is performed. Default value: The default value is `True`. num_samples: Positive scalar `int32` `Tensor`. The number of simulation paths during Monte-Carlo valuation. This input is ignored during analytic valuation. Default value: The default value is 1. random_type: Enum value of `RandomType`. The type of (quasi)-random number generator to use to generate the simulation paths. This input is relevant only for Monte-Carlo valuation and ignored during analytic valuation. Default value: `None` which maps to the standard pseudo-random numbers. seed: Seed for the random number generator. The seed is only relevant if `random_type` is one of `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC, STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and `HALTON_RANDOMIZED` the seed should be an Python integer. For `STATELESS` and `STATELESS_ANTITHETIC `must be supplied as an integer `Tensor` of shape `[2]`. This input is relevant only for Monte-Carlo valuation and ignored during analytic valuation. Default value: `None` which means no seed is set. skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or Halton sequence to skip. Used only when `random_type` is 'SOBOL', 'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored. Default value: `0`. time_step: Scalar real `Tensor`. Maximal distance between time grid points in Euler scheme. Relevant when Euler scheme is used for simulation. This input is ignored during analytic valuation. Default value: `None`. dtype: The default dtype to use when converting values to `Tensor`s. Default value: `None` which means that default dtypes inferred by TensorFlow are used. name: Python string. The name to give to the ops created by this function. Default value: `None` which maps to the default name `hw_swaption_price`. Returns: A `Tensor` of real dtype and shape expiries.shape + [dim] containing the computed swaption prices. For swaptions that have. reset in the past (expiries<0), the function sets the corresponding option prices to 0.0. """ # TODO(b/160061740): Extend the functionality to support mid-curve swaptions. name = name or 'hw_swaption_price' del floating_leg_daycount_fractions with tf.name_scope(name): expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') dtype = dtype or expiries.dtype float_leg_start_times = tf.convert_to_tensor( floating_leg_start_times, dtype=dtype, name='float_leg_start_times') float_leg_end_times = tf.convert_to_tensor(floating_leg_end_times, dtype=dtype, name='float_leg_end_times') fixed_leg_payment_times = tf.convert_to_tensor( fixed_leg_payment_times, dtype=dtype, name='fixed_leg_payment_times') fixed_leg_daycount_fractions = tf.convert_to_tensor( fixed_leg_daycount_fractions, dtype=dtype, name='fixed_leg_daycount_fractions') fixed_leg_coupon = tf.convert_to_tensor(fixed_leg_coupon, dtype=dtype, name='fixed_leg_coupon') notional = tf.convert_to_tensor(notional, dtype=dtype, name='notional') if is_payer_swaption is None: is_payer_swaption = True is_payer_swaption = tf.convert_to_tensor(is_payer_swaption, dtype=tf.bool, name='is_payer_swaption') output_shape = expiries.shape.as_list() + [dim] # Add a dimension corresponding to multiple cashflows in a swap if expiries.shape.rank == fixed_leg_payment_times.shape.rank - 1: expiries = tf.expand_dims(expiries, axis=-1) elif expiries.shape.rank < fixed_leg_payment_times.shape.rank - 1: raise ValueError( 'Swaption expiries not specified for all swaptions ' 'in the batch. Expected rank {} but received {}.'.format( fixed_leg_payment_times.shape.rank - 1, expiries.shape.rank)) # Expected shape: batch_shape + [m], same as fixed_leg_payment_times.shape # We need to explicitly use tf.repeat because we need to price # batch_shape + [m] bond options with different strikes along the last # dimension. expiries = tf.repeat(expiries, fixed_leg_payment_times.shape.as_list()[-1], axis=-1) if use_analytic_pricing: return _analytic_valuation( expiries, float_leg_start_times, float_leg_end_times, fixed_leg_payment_times, fixed_leg_daycount_fractions, fixed_leg_coupon, reference_rate_fn, dim, mean_reversion, volatility, notional, is_payer_swaption, output_shape, dtype, name + '_analytic_valyation') # Monte-Carlo pricing model = vector_hull_white.VectorHullWhiteModel( dim, mean_reversion, volatility, initial_discount_rate_fn=reference_rate_fn, dtype=dtype) if time_step is None: raise ValueError('`time_step` must be provided for simulation ' 'based bond option valuation.') sim_times, _ = tf.unique(tf.reshape(expiries, shape=[-1])) longest_expiry = tf.reduce_max(sim_times) sim_times, _ = tf.unique( tf.concat( [sim_times, tf.range(time_step, longest_expiry, time_step)], axis=0)) sim_times = tf.sort(sim_times, name='sort_sim_times') maturities = fixed_leg_payment_times swaptionlet_shape = maturities.shape tau = maturities - expiries curve_times_builder, _ = tf.unique(tf.reshape(tau, shape=[-1])) curve_times = tf.sort(curve_times_builder, name='sort_curve_times') p_t_tau, r_t = model.sample_discount_curve_paths( times=sim_times, curve_times=curve_times, num_samples=num_samples, random_type=random_type, seed=seed, skip=skip) dt = tf.concat([ tf.convert_to_tensor([0.0], dtype=dtype), sim_times[1:] - sim_times[:-1] ], axis=0) dt = tf.expand_dims(tf.expand_dims(dt, axis=-1), axis=0) discount_factors_builder = tf.math.exp(-r_t * dt) # Transpose before (and after) because we want the cumprod along axis=1 # and `matvec` operates on the last axis. discount_factors_builder = tf.transpose( _cumprod_using_matvec( tf.transpose(discount_factors_builder, [0, 2, 1])), [0, 2, 1]) # make discount factors the same shape as `p_t_tau`. This involves adding # an extra dimenstion (corresponding to `curve_times`). discount_factors_builder = tf.expand_dims(discount_factors_builder, axis=1) # tf.repeat is needed because we will use gather_nd later on this tensor. discount_factors_simulated = tf.repeat(discount_factors_builder, p_t_tau.shape.as_list()[1], axis=1) # `sim_times` and `curve_times` are sorted for simulation. We need to # select the indices corresponding to our input. sim_time_index = tf.searchsorted(sim_times, tf.reshape(expiries, [-1])) curve_time_index = tf.searchsorted(curve_times, tf.reshape(tau, [-1])) gather_index = _prepare_indices(tf.range(0, num_samples), curve_time_index, sim_time_index, tf.range(0, dim)) # The shape after `gather_nd` will be `(num_samples*num_swaptionlets*dim,)` payoff_discount_factors_builder = tf.gather_nd( discount_factors_simulated, gather_index) # Reshape to `[num_samples] + swaptionlet.shape + [dim]` payoff_discount_factors = tf.reshape(payoff_discount_factors_builder, [num_samples] + swaptionlet_shape + [dim]) payoff_bond_price_builder = tf.gather_nd(p_t_tau, gather_index) payoff_bond_price = tf.reshape(payoff_bond_price_builder, [num_samples] + swaptionlet_shape + [dim]) # Add an axis corresponding to `dim` fixed_leg_pv = tf.expand_dims( fixed_leg_coupon * fixed_leg_daycount_fractions, axis=-1) * payoff_bond_price # Sum fixed coupon payments within each swap fixed_leg_pv = tf.math.reduce_sum(fixed_leg_pv, axis=-2) float_leg_pv = 1.0 - payoff_bond_price[..., -1, :] payoff_swap = payoff_discount_factors[..., -1, :] * (float_leg_pv - fixed_leg_pv) payoff_swap = tf.where(is_payer_swaption, payoff_swap, -1.0 * payoff_swap) payoff_swaption = tf.math.maximum(payoff_swap, 0.0) option_value = notional * tf.math.reduce_mean(payoff_swaption, axis=0) return tf.reshape(option_value, output_shape)
def _analytic_valuation(expiries, floating_leg_start_times, floating_leg_end_times, fixed_leg_payment_times, fixed_leg_daycount_fractions, fixed_leg_coupon, reference_rate_fn, dim, mean_reversion, volatility, notional, is_payer_swaption, output_shape, dtype, name): """Helper function for analytic valuation.""" # The below inputs are needed for midcurve swaptions del floating_leg_start_times, floating_leg_end_times with tf.name_scope(name): is_call_options = tf.where(is_payer_swaption, tf.convert_to_tensor(False, dtype=tf.bool), tf.convert_to_tensor(True, dtype=tf.bool)) model = vector_hull_white.VectorHullWhiteModel( dim, mean_reversion, volatility, initial_discount_rate_fn=reference_rate_fn, dtype=dtype) coefficients = fixed_leg_daycount_fractions * fixed_leg_coupon jamshidian_coefficients = tf.concat([ -coefficients[..., :-1], tf.expand_dims(-1.0 - coefficients[..., -1], axis=-1) ], axis=-1) breakeven_bond_option_strikes = _jamshidian_decomposition( model, expiries, fixed_leg_payment_times, jamshidian_coefficients, dtype, name=name + '_jamshidian_decomposition') bond_strike_rank = breakeven_bond_option_strikes.shape.rank perm = [bond_strike_rank - 1 ] + [x for x in range(0, bond_strike_rank - 1)] breakeven_bond_option_strikes = tf.transpose( breakeven_bond_option_strikes, perm=perm) bond_option_prices = zcb.bond_option_price( strikes=breakeven_bond_option_strikes, expiries=expiries, maturities=fixed_leg_payment_times, discount_rate_fn=reference_rate_fn, dim=dim, mean_reversion=mean_reversion, volatility=volatility, is_call_options=is_call_options, use_analytic_pricing=True, dtype=dtype, name=name + '_bond_option') bond_option_prices = notional * bond_option_prices # Now compute P(T0, TN) + sum_i (c_i * tau_i * P(T0, Ti)) # bond_option_prices.shape = [dim] + batch_shape + [m] + [dim], where `m` # denotes the number of fixed payments for the underlying swaps. swaption_values = (tf.reduce_sum( bond_option_prices * tf.expand_dims(coefficients, axis=-1), axis=-2) + bond_option_prices[..., -1, :]) swaption_shape = swaption_values.shape gather_index = _prepare_swaption_indices(swaption_shape.as_list()) swaption_values = tf.gather_nd(swaption_values, gather_index) return tf.reshape(swaption_values, output_shape)
def _update_confusion_matrix_variables_optimized( variables_to_update, y_true, y_pred, thresholds, multi_label=False, sample_weights=None, label_weights=None, thresholds_with_epsilon=False): """Update confusion matrix variables with memory efficient alternative. Note that the thresholds need to be evenly distributed within the list, eg, the diff between consecutive elements are the same. To compute TP/FP/TN/FN, we are measuring a binary classifier C(t) = (predictions >= t) at each threshold 't'. So we have TP(t) = sum( C(t) * true_labels ) FP(t) = sum( C(t) * false_labels ) But, computing C(t) requires computation for each t. To make it fast, observe that C(t) is a cumulative integral, and so if we have thresholds = [t_0, ..., t_{n-1}]; t_0 < ... < t_{n-1} where n = num_thresholds, and if we can compute the bucket function B(i) = Sum( (predictions == t), t_i <= t < t{i+1} ) then we get C(t_i) = sum( B(j), j >= i ) which is the reversed cumulative sum in tf.cumsum(). We can compute B(i) efficiently by taking advantage of the fact that our thresholds are evenly distributed, in that width = 1.0 / (num_thresholds - 1) thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0] Given a prediction value p, we can map it to its bucket by bucket_index(p) = floor( p * (num_thresholds - 1) ) so we can use tf.math.unsorted_segment_sum() to update the buckets in one pass. Consider following example: y_true = [0, 0, 1, 1] y_pred = [0.1, 0.5, 0.3, 0.9] thresholds = [0.0, 0.5, 1.0] num_buckets = 2 # [0.0, 1.0], (1.0, 2.0] bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets) = tf.math.floor([0.2, 1.0, 0.6, 1.8]) = [0, 0, 0, 1] # The meaning of this bucket is that if any of the label is true, # then 1 will be added to the corresponding bucket with the index. # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the # label for 1.8 is true, then 1 will be added to bucket 1. # # Note the second item "1.0" is floored to 0, since the value need to be # strictly larger than the bucket lower bound. # In the implementation, we use tf.math.ceil() - 1 to achieve this. tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices, num_segments=num_thresholds) = [1, 1, 0] # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0, # and 1 value contributed by bucket 1. When we aggregate them to together, # the result become [a + b + c, b + c, c], since large thresholds will always # contribute to the value for smaller thresholds. true_positive = tf.math.cumsum(tp_bucket_value, reverse=True) = [2, 1, 0] This implementation exhibits a run time and space complexity of O(T + N), where T is the number of thresholds and N is the size of predictions. Metrics that rely on standard implementation instead exhibit a complexity of O(T * N). Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A sorted floating point `Tensor` with value in `[0, 1]`. It need to be evenly distributed (the diff between each element need to be the same). multi_label: Optional boolean indicating whether multidimensional prediction/labels should be treated as multilabel responses, or flattened into a single label. When True, the valus of `variables_to_update` must have a second dimension equal to the number of labels in y_true and y_pred, and those tensors must not be RaggedTensors. sample_weights: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). label_weights: Optional tensor of non-negative weights for multilabel data. The weights are applied when calculating TP, FP, FN, and TN without explicit multilabel handling (i.e. when the data is to be flattened). thresholds_with_epsilon: Optional boolean indicating whether the leading and tailing thresholds has any epsilon added for floating point imprecisions. It will change how we handle the leading and tailing bucket. Returns: Update op. """ num_thresholds = thresholds.shape.as_list()[0] if sample_weights is None: sample_weights = 1.0 else: sample_weights = tf.__internal__.ops.broadcast_weights( tf.cast(sample_weights, dtype=y_pred.dtype), y_pred) if not multi_label: sample_weights = tf.reshape(sample_weights, [-1]) if label_weights is None: label_weights = 1.0 else: label_weights = tf.expand_dims(label_weights, 0) label_weights = tf.__internal__.ops.broadcast_weights( label_weights, y_pred) if not multi_label: label_weights = tf.reshape(label_weights, [-1]) weights = tf.multiply(sample_weights, label_weights) # We shouldn't need this, but in case there are predict value that is out of # the range of [0.0, 1.0] y_pred = tf.clip_by_value(y_pred, clip_value_min=0.0, clip_value_max=1.0) y_true = tf.cast(tf.cast(y_true, tf.bool), y_true.dtype) if not multi_label: y_true = tf.reshape(y_true, [-1]) y_pred = tf.reshape(y_pred, [-1]) true_labels = tf.multiply(y_true, weights) false_labels = tf.multiply((1.0 - y_true), weights) # Compute the bucket indices for each prediction value. # Since the predict value has to be strictly greater than the thresholds, # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket. # We have to use math.ceil(val) - 1 for the bucket. bucket_indices = tf.math.ceil(y_pred * (num_thresholds - 1)) - 1 if thresholds_with_epsilon: # In this case, the first bucket should actually take into account since # the any prediction between [0.0, 1.0] should be larger than the first # threshold. We change the bucket value from -1 to 0. bucket_indices = tf.nn.relu(bucket_indices) bucket_indices = tf.cast(bucket_indices, tf.int32) if multi_label: # We need to run bucket segment sum for each of the label class. In the # multi_label case, the rank of the label is 2. We first transpose it so # that the label dim becomes the first and we can parallel run though them. true_labels = tf.transpose(true_labels) false_labels = tf.transpose(false_labels) bucket_indices = tf.transpose(bucket_indices) def gather_bucket(label_and_bucket_index): label, bucket_index = label_and_bucket_index[ 0], label_and_bucket_index[1] return tf.math.unsorted_segment_sum(data=label, segment_ids=bucket_index, num_segments=num_thresholds) tp_bucket_v = tf.vectorized_map(gather_bucket, (true_labels, bucket_indices)) fp_bucket_v = tf.vectorized_map(gather_bucket, (false_labels, bucket_indices)) tp = tf.transpose(tf.cumsum(tp_bucket_v, reverse=True, axis=1)) fp = tf.transpose(tf.cumsum(fp_bucket_v, reverse=True, axis=1)) else: tp_bucket_v = tf.math.unsorted_segment_sum(data=true_labels, segment_ids=bucket_indices, num_segments=num_thresholds) fp_bucket_v = tf.math.unsorted_segment_sum(data=false_labels, segment_ids=bucket_indices, num_segments=num_thresholds) tp = tf.cumsum(tp_bucket_v, reverse=True) fp = tf.cumsum(fp_bucket_v, reverse=True) # fn = sum(true_labels) - tp # tn = sum(false_labels) - fp if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update or ConfusionMatrix.FALSE_NEGATIVES in variables_to_update): if multi_label: total_true_labels = tf.reduce_sum(true_labels, axis=1) total_false_labels = tf.reduce_sum(false_labels, axis=1) else: total_true_labels = tf.reduce_sum(true_labels) total_false_labels = tf.reduce_sum(false_labels) update_ops = [] if ConfusionMatrix.TRUE_POSITIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES] update_ops.append(variable.assign_add(tp)) if ConfusionMatrix.FALSE_POSITIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES] update_ops.append(variable.assign_add(fp)) if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES] tn = total_false_labels - fp update_ops.append(variable.assign_add(tn)) if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES] fn = total_true_labels - tp update_ops.append(variable.assign_add(fn)) return tf.group(update_ops)
def transpose_fn(batch): # Applies the double-transpose trick for TPU. batch = dict(**batch) batch['images'] = tf.transpose(batch['images'], (1, 2, 3, 0)) return batch
def _sample_n(self, n, seed): batch_shape = self.batch_shape_tensor() event_shape = self.event_shape_tensor() batch_ndims = tf.shape(input=batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = tf.concat([[n], batch_shape, event_shape], 0) stream = seed_stream.SeedStream(seed, salt="Wishart") # Complexity: O(nbk**2) x = tf.random.normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=stream()) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) expanded_df = self.df * tf.ones( self.scale_operator.batch_shape_tensor(), dtype=dtype_util.base_dtype(self.df.dtype)) g = tf.random.gamma(shape=[n], alpha=self._multi_gamma_sequence( 0.5 * expanded_df, self.dimension), beta=0.5, dtype=self.dtype, seed=stream()) # Complexity: O(nbk**2) x = tf.linalg.band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = tf.linalg.set_diag(x, tf.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk**2) perm = tf.concat([tf.range(1, ndims), [0]], 0) x = tf.transpose(a=x, perm=perm) shape = tf.concat( [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0) x = tf.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so # this step has complexity O(nbk^3). x = self.scale_operator.matmul(x) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = tf.concat([batch_shape, event_shape, [n]], 0) x = tf.reshape(x, shape) perm = tf.concat([[ndims - 1], tf.range(0, ndims - 1)], 0) x = tf.transpose(a=x, perm=perm) if not self.input_output_cholesky: # Complexity: O(nbk**3) x = tf.matmul(x, x, adjoint_b=True) return x
def roll_channels_from_batch(tensor): # Switch from [B, C, H, W, D] to [B, H, W, C, D] return tf.transpose(tensor, perm=[0, 2, 3, 1, 4])
def _testMVN(self, base_distribution_class, base_distribution_kwargs, batch_shape=(), event_shape=(), not_implemented_message=None): # Overriding shapes must be compatible w/bijector; most bijectors are # batch_shape agnostic and only care about event_ndims. # In the case of `Affine`, if we got it wrong then it would fire an # exception due to incompatible dimensions. batch_shape_pl = tf1.placeholder_with_default( input=np.int32(batch_shape), shape=None, name="dynamic_batch_shape") event_shape_pl = tf1.placeholder_with_default( input=np.int32(event_shape), shape=None, name="dynamic_event_shape") fake_mvn_dynamic = self._cls()( distribution=base_distribution_class(validate_args=True, **base_distribution_kwargs), bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril), batch_shape=batch_shape_pl, event_shape=event_shape_pl, validate_args=True) fake_mvn_static = self._cls()( distribution=base_distribution_class(validate_args=True, **base_distribution_kwargs), bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril), batch_shape=batch_shape, event_shape=event_shape, validate_args=True) actual_mean = np.tile(self._shift, [2, 1]) # Affine elided this tile. actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1])) def actual_mvn_log_prob(x): return np.concatenate([[ stats.multivariate_normal(actual_mean[i], actual_cov[i]).logpdf(x[:, i, :]) ] for i in range(len(actual_cov))]).T actual_mvn_entropy = np.concatenate([[ stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy() ] for i in range(len(actual_cov))]) self.assertAllEqual([3], fake_mvn_static.event_shape) self.assertAllEqual([2], fake_mvn_static.batch_shape) if not tf.executing_eagerly(): self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape) self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape) x = self.evaluate( fake_mvn_static.sample(5, seed=tfp_test_util.test_seed())) for unsupported_fn in (fake_mvn_static.log_cdf, fake_mvn_static.cdf, fake_mvn_static.survival_function, fake_mvn_static.log_survival_function): with self.assertRaisesRegexp(NotImplementedError, not_implemented_message): unsupported_fn(x) num_samples = 7e3 for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]: # Ensure sample works by checking first, second moments. y = fake_mvn.sample(int(num_samples), seed=tfp_test_util.test_seed()) x = y[0:5, ...] sample_mean = tf.reduce_mean(input_tensor=y, axis=0) centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0]) sample_cov = tf.matmul(centered_y, centered_y, transpose_b=True) / num_samples [ sample_mean_, sample_cov_, x_, fake_event_shape_, fake_batch_shape_, fake_log_prob_, fake_prob_, fake_mean_, fake_entropy_, ] = self.evaluate([ sample_mean, sample_cov, x, fake_mvn.event_shape_tensor(), fake_mvn.batch_shape_tensor(), fake_mvn.log_prob(x), fake_mvn.prob(x), fake_mvn.mean(), fake_mvn.entropy(), ]) self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1) self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1) # Ensure all other functions work as intended. self.assertAllEqual([5, 2, 3], x_.shape) self.assertAllEqual([3], fake_event_shape_) self.assertAllEqual([2], fake_batch_shape_) self.assertAllClose(actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6) self.assertAllClose(np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5) self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6) self.assertAllClose(actual_mvn_entropy, fake_entropy_, atol=0., rtol=1e-6)
def _transpose_around_bijector_fn(self, bijector_fn, arg, src_event_ndims, dest_event_ndims=None, fn_reduces_event=False, **kwargs): # This function moves the axes corresponding to `self.sample_shape` to the # left of the batch shape, then applies `bijector_fn`, then moves the axes # corresponding to `self.sample_shape` back to the event part of the shape. # # `src_event_ndims` and `dest_event_ndims` indicate the expected event rank # (omitting `self.sample_shape`) before and after applying `bijector_fn`. # # This function arose because forward and inverse ended up being quite # similar. It was then only a small generalization to also support {F/I}LDJ. batch_ndims = ps.rank_from_shape(self.distribution.batch_shape_tensor, self.distribution.batch_shape) extra_sample_ndims = ps.rank_from_shape(self.sample_shape) arg_ndims = ps.rank(arg) # (1) Expand arg's dims. d = arg_ndims - batch_ndims - extra_sample_ndims - src_event_ndims arg = tf.reshape(arg, shape=ps.pad(ps.shape(arg), paddings=[[ps.maximum(0, -d), 0]], constant_values=1)) arg_ndims = ps.rank(arg) sample_ndims = ps.maximum(0, d) # (2) Transpose arg's dims. sample_dims = ps.range(0, sample_ndims) batch_dims = ps.range(sample_ndims, sample_ndims + batch_ndims) extra_sample_dims = ps.range( sample_ndims + batch_ndims, sample_ndims + batch_ndims + extra_sample_ndims) event_dims = ps.range(sample_ndims + batch_ndims + extra_sample_ndims, arg_ndims) perm = ps.concat( [sample_dims, extra_sample_dims, batch_dims, event_dims], axis=0) arg = tf.transpose(arg, perm=perm) # (3) Apply underlying bijector. result = bijector_fn(arg, **kwargs) # (4) Transpose sample_shape from the sample to the event shape. result_ndims = ps.rank(result) if fn_reduces_event: dest_event_ndims = 0 d = result_ndims - batch_ndims - extra_sample_ndims - dest_event_ndims if fn_reduces_event: # In some cases, fn may reduce event too far, i.e. ildj may return a # scalar `0.`, which won't work with the transpose we do below. result = tf.reshape(result, shape=ps.pad(ps.shape(result), paddings=[[ps.maximum(0, -d), 0]], constant_values=1)) result_ndims = ps.rank(result) sample_ndims = ps.maximum(0, d) sample_dims = ps.range(0, sample_ndims) extra_sample_dims = ps.range(sample_ndims, sample_ndims + extra_sample_ndims) batch_dims = ps.range(sample_ndims + extra_sample_ndims, sample_ndims + extra_sample_ndims + batch_ndims) event_dims = ps.range(sample_ndims + extra_sample_ndims + batch_ndims, result_ndims) perm = ps.concat( [sample_dims, batch_dims, extra_sample_dims, event_dims], axis=0) return tf.transpose(result, perm=perm)
def _sample_n(self, n, seed=None): loc, scale, low, high = self._loc_scale_low_high() batch_shape = self._batch_shape_tensor(loc=loc, scale=scale, low=low, high=high) sample_and_batch_shape = tf.concat([[n], batch_shape], 0) flat_batch_and_sample_shape = tf.stack( [tf.reduce_prod(batch_shape), n]) # In order to be reparameterizable we sample on the truncated_normal of # unit variance and mean and scale (but with the standardized # truncation bounds). @tf.custom_gradient def _std_samples_with_gradients(lower, upper): """Standard truncated Normal with gradient support for low, high.""" # Note: Unlike the convention in TFP, parameterized_truncated_normal # returns a tensor with the final dimension being the sample dimension. std_samples = random_ops.parameterized_truncated_normal( shape=flat_batch_and_sample_shape, means=0.0, stddevs=1.0, minvals=lower, maxvals=upper, dtype=self.dtype, seed=seed) def grad(dy): """Computes a derivative for the min and max parameters. This function implements the derivative wrt the truncation bounds, which get blocked by the sampler. We use a custom expression for numerical stability instead of automatic differentiation on CDF for implicit gradients. Args: dy: output gradients Returns: The standard normal samples and the gradients wrt the upper bound and lower bound. """ # std_samples has an extra dimension (the sample dimension), expand # lower and upper so they broadcast along this dimension. # See note above regarding parameterized_truncated_normal, the sample # dimension is the final dimension. lower_broadcast = lower[..., tf.newaxis] upper_broadcast = upper[..., tf.newaxis] cdf_samples = ((special_math.ndtr(std_samples) - special_math.ndtr(lower_broadcast)) / (special_math.ndtr(upper_broadcast) - special_math.ndtr(lower_broadcast))) # tiny, eps are tolerance parameters to ensure we stay away from giving # a zero arg to the log CDF expression. tiny = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny eps = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).eps cdf_samples = tf.clip_by_value(cdf_samples, tiny, 1 - eps) du = tf.exp(0.5 * (std_samples**2 - upper_broadcast**2) + tf.math.log(cdf_samples)) dl = tf.exp(0.5 * (std_samples**2 - lower_broadcast**2) + tf.math.log1p(-cdf_samples)) # Reduce the gradient across the samples grad_u = tf.reduce_sum(dy * du, axis=-1) grad_l = tf.reduce_sum(dy * dl, axis=-1) return [grad_l, grad_u] return std_samples, grad std_low, std_high = self._standardized_low_and_high(low=low, high=high, loc=loc, scale=scale) low_high_shp = tf.broadcast_dynamic_shape(tf.shape(std_low), tf.shape(std_high)) std_low = tf.broadcast_to(std_low, low_high_shp) std_high = tf.broadcast_to(std_high, low_high_shp) std_samples = _std_samples_with_gradients(tf.reshape(std_low, [-1]), tf.reshape(std_high, [-1])) # The returned shape is [flat_batch x n] std_samples = tf.transpose(std_samples, perm=[1, 0]) std_samples = tf.reshape(std_samples, sample_and_batch_shape) return std_samples * scale[tf.newaxis] + loc[tf.newaxis]
def calibrate(*, forwards, expiries, strikes, volatilities, initial_position=None, optimizer_fn=None, tolerance=1e-6, maximum_iterations=100, dtype=None, name=None): """Calibrates the SVI model parameters for a batch of volatility skews. This function optimizes the SVI model parameters to fit the given volatilities at various strikes. The loss function is the L2 norm of the differences in the volatility space. Each volatility skew in the batch corresponds to a fixed expiry for options on some underlying assets. Optimization is done independently for each skew. TODO(b/189458981): add flexibility to accept higher rank tensors as inputs. #### Example The example shows how to calibrate a single skew, loosely based on market prices for GOOG210820C* (GOOG calls with 2021-08-20 expiry) as of 2021-05-27. https://finance.yahoo.com/quote/GOOG/options?p=GOOG&date=1629417600 ````python import numpy as np import tensorflow.compat.v2 as tf import tf_quant_finance as tff forwards = np.array([2402.]) expiries = np.array([0.23]) strikes = np.array([[ 1700., 1800., 1900., 2000., 2050., 2100., 2200., 2250., 2350., 2400., 2450., 2500., 2550., 2600., 2650., 2700., 2750., 2800., 2850., 2900., 2950., 3000. ]]) volatilities = np.array([[ 0.5335, 0.4882, 0.4389, 0.3937, 0.3749, 0.3569, 0.3259, 0.3135, 0.29, 0.283, 0.2717, 0.2667, 0.2592, 0.2566, 0.2564, 0.2574, 0.2595, 0.2621, 0.2669, 0.2732, 0.2826, 0.2967 ]]) tolerance=1e-4 (svi_params, converged, _) = tff.experimental.svi.calibrate( forwards=forwards, expiries=expiries, strikes=strikes, volatilities=volatilities) # Expected results are tensors containing (up to numerical tolerance): # svi_params: [[-0.2978, 0.4212, 0.0415, 0.1282, 0.7436]] # converged: [True] ```` Args: forwards: A rank 1 real `Tensor` of shape [batch_size]. The forward prices of the underlyig asset for each skew in the batch. expiries: A rank 1 real `Tensor` of shape [batch_size]. The option expiries for each skew in the batch. strikes: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The strike prices of the options. volatilities: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The market implied Black-Scholes volatilities to calibrate. initial_position: A rank 2 real `Tensor` of shape [batch_size, 5]. The SVI parameters to use as the initial values for the optimization. The default value is None, in which case the initial values are guessed heuristically and may lead to slower convergence. optimizer_fn: Optional Python callable which implements the algorithm used to minimize the objective function during calibration. It should have the following interface: result = optimizer_fn(value_and_gradients_function, initial_position, tolerance, max_iterations) `value_and_gradients_function` is a Python callable that accepts a point as a real `Tensor` and returns a tuple of `Tensor`s of real dtype containing the value of the function and its gradient at that point. 'initial_position' is a real `Tensor` containing the starting point of the optimization, 'tolerance' is a real scalar `Tensor` for stopping tolerance for the procedure and `max_iterations` specifies the maximum number of iterations. `optimizer_fn` should return a namedtuple containing the items: `position` (a tensor containing the optimal value), `converged` (a boolean indicating whether the optimize converged according the specified criteria), `failed` (a boolean indicating if the optimization resulted in a failure), `num_iterations` (the number of iterations used), and `objective_value` ( the value of the objective function at the optimal value). The default value for `optimizer_fn` is None and conjugate gradient algorithm is used. tolerance: Scalar `Tensor` of real dtype. The absolute tolerance for terminating the iterations. Default value: 1e-6. maximum_iterations: Scalar positive int32 `Tensor`. The maximum number of iterations during the optimization. Default value: 200. dtype: The default dtype to use when converting values to `Tensor`s. Default value: `None`, uses the default dtypes inferred by TensorFlow. name: Python string. The name to give to the ops created by this function. Default value: `None`, maps to the default name `svi_skew_calibration`. Returns: A Tuple of three elements: (parameters, status, iterations) - parameters: a tensor of shape [batch_size, 5] representing raw parameters for the SVI model calibrated with given input Black-Scholes volatilities. - status: boolean, whether the optimization algorithm succeeded in finding the optimal point based on the specified convergance criteria. - iterations: the number of iterations performed during the optimization. """ name = name or 'svi_skew_calibration' with tf.name_scope(name): volatilities = tf.convert_to_tensor(volatilities, dtype=dtype, name='volatilities') dtype = dtype or volatilities.dtype forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards') expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') # the standard notation for log moneyness in the literature is k:=log(K/F) log_moneyness = tf.math.log(strikes / forwards[:, None]) if initial_position is None: minvol_index = tf.argmin(volatilities, axis=1) a0 = tf.gather(volatilities, minvol_index, axis=1, batch_dims=1)**2 b0 = tf.zeros_like(forwards, dtype=dtype) rho0 = tf.zeros_like(forwards, dtype=dtype) sigma0 = 0.5 * tf.ones_like(forwards, dtype=dtype) m0 = tf.gather(log_moneyness, minvol_index, axis=1, batch_dims=1) initial_position = tf.transpose([a0, b0, rho0, m0, sigma0]) if optimizer_fn is None: optimizer_fn = optimizer.conjugate_gradient_minimize @make_val_and_grad_fn def loss_function(parameters): """Loss function for the optimization.""" total_variance = parameterizations.total_variance_from_raw( parameters, log_moneyness) model_vol = tf.where(total_variance < 0., tf.zeros_like(total_variance), tf.sqrt(total_variance / expiries[:, None])) squared_difference = tf.where( total_variance < 0., volatilities**2 - total_variance, tf.math.squared_difference(model_vol, volatilities)) loss = tf.math.reduce_sum(squared_difference, axis=1) return loss optimization_result = optimizer_fn(loss_function, initial_position=initial_position, tolerance=tolerance, max_iterations=maximum_iterations) # The optimizer may converge negative SVI sigma; to enforce the positivity # convention, we take sigma by absolute value, which yields the same model. calibrated_parameters = tf.concat([ optimization_result.position[:, :-1], tf.math.abs(optimization_result.position[:, -1, None]) ], axis=1) return (calibrated_parameters, optimization_result.converged, optimization_result.num_iterations)
def options_price_from_samples(strikes: types.RealTensor, expiries: types.RealTensor, maturities: types.RealTensor, is_call_options: types.BoolTensor, sample_discount_curve_paths_fn: Callable[ ..., Tuple[types.RealTensor, types.RealTensor]], num_samples: types.IntTensor, time_step: types.RealTensor, dtype: tf.DType = None, name: str = None) -> types.RealTensor: """Computes the zero coupon bond options price from simulated discount curves. Args: strikes: A real `Tensor` of any shape and dtype. The strike price of the options. The shape of this input determines the number (and shape) of the options to be priced and the output. expiries: A real `Tensor` of the same dtype and compatible shape as `strikes`. The time to expiry of each bond option. maturities: A real `Tensor` of the same dtype and compatible shape as `strikes`. The time to maturity of the underlying zero coupon bonds. is_call_options: A boolean `Tensor` of a shape compatible with `strikes`. Indicates whether the option is a call (if True) or a put (if False). sample_discount_curve_paths_fn: Callable which takes the following args: 1) times: Rank 1 `Tensor` of positive real values, specifying the times at which the path points are to be evaluated. 2) curve_times: Rank 1 `Tensor` of positive real values, specifying the maturities at which the discount curve is to be computed at each simulation time. 3) num_samples: Positive scalar integer specifying the number of paths to draw. and returns two `Tensor`s, the first being a Rank-4 tensor of shape `[num_samples, m, k, dim]` containing the simulated zero coupon bond curves, and the second being a `Tensor` of shape `[num_samples, k, dim]` containing the simulated short rate paths. Here, `m` is the size of `curve_times`, `k` is the size of `times`, and `dim` is the dimensionality of the paths. num_samples: Positive scalar `int32` `Tensor`. The number of simulation paths during Monte-Carlo valuation. time_step: Scalar real `Tensor`. Maximal distance between time grid points in Euler scheme. Relevant when Euler scheme is used for simulation. dtype: The default dtype to use when converting values to `Tensor`s. Default value: `None` which means that default dtypes inferred by TensorFlow are used. name: Python string. The name to give to the ops created by this function. Default value: `None` which maps to the default name `options_price_from_samples`. Returns: A `Tensor` of real dtype and shape `strikes.shape + [dim]` containing the computed option prices. """ name = name or 'options_price_from_samples' with tf.name_scope(name): sim_times, _ = tf.unique(tf.reshape(expiries, shape=[-1])) longest_expiry = tf.reduce_max(sim_times) sim_times, _ = tf.unique( tf.concat( [sim_times, tf.range(time_step, longest_expiry, time_step)], axis=0)) sim_times = tf.sort(sim_times, name='sort_sim_times') tau = maturities - expiries curve_times_builder, _ = tf.unique(tf.reshape(tau, shape=[-1])) curve_times = tf.sort(curve_times_builder, name='sort_curve_times') p_t_tau, r_t = sample_discount_curve_paths_fn(times=sim_times, curve_times=curve_times, num_samples=num_samples) dim = p_t_tau.shape[-1] dt_builder = tf.concat(axis=0, values=[ tf.convert_to_tensor([0.0], dtype=dtype), sim_times[1:] - sim_times[:-1] ]) dt = tf.expand_dims(tf.expand_dims(dt_builder, axis=-1), axis=0) discount_factors_builder = tf.math.exp(-r_t * dt) # Transpose before (and after) because we want the cumprod along axis=1 # and `matvec` operates on the last axis. The shape before and after would # be `(num_samples, len(times), dim)` discount_factors_builder = tf.transpose( utils.cumprod_using_matvec( tf.transpose(discount_factors_builder, [0, 2, 1])), [0, 2, 1]) # make discount factors the same shape as `p_t_tau`. This involves adding # an extra dimenstion (corresponding to `curve_times`). discount_factors_builder = tf.expand_dims(discount_factors_builder, axis=1) discount_factors_simulated = tf.repeat(discount_factors_builder, p_t_tau.shape.as_list()[1], axis=1) # `sim_times` and `curve_times` are sorted for simulation. We need to # select the indices corresponding to our input. sim_time_index = tf.searchsorted(sim_times, tf.reshape(expiries, [-1])) curve_time_index = tf.searchsorted(curve_times, tf.reshape(tau, [-1])) # Broadcast shapes of strikes, expiries and maturities curve_time_index, sim_time_index = tff_utils.broadcast_tensors( curve_time_index, sim_time_index) gather_index = _prepare_indices(tf.range(0, num_samples), curve_time_index, sim_time_index, tf.range(0, dim)) # The shape after `gather_nd` would be (num_samples*num_strikes*dim,) payoff_discount_factors_builder = tf.gather_nd( discount_factors_simulated, gather_index) # Reshape to `[num_samples] + strikes.shape + [dim]` payoff_discount_factors = tf.reshape(payoff_discount_factors_builder, [num_samples] + strikes.shape + [dim]) payoff_bond_price_builder = tf.gather_nd(p_t_tau, gather_index) payoff_bond_price = tf.reshape(payoff_bond_price_builder, [num_samples] + strikes.shape + [dim]) is_call_options = tf.reshape( tf.broadcast_to(is_call_options, strikes.shape), [1] + strikes.shape + [1]) strikes = tf.reshape(strikes, [1] + strikes.shape + [1]) payoff = tf.where(is_call_options, tf.math.maximum(payoff_bond_price - strikes, 0.0), tf.math.maximum(strikes - payoff_bond_price, 0.0)) option_value = tf.math.reduce_mean(payoff_discount_factors * payoff, axis=0) return option_value
def discount_factors_and_bond_prices_from_samples( expiries, payment_times, sample_discount_curve_paths_fn, num_samples, time_step, dtype=None): """Utility function to compute the discount factors and the bond prices. Args: expiries: A real `Tensor` of any and dtype. The time to expiration of the swaptions. The shape of this input determines the number (and shape) of swaptions to be priced and the shape of the output - e.g. if there are two swaptions, and there are 11 payment dates for each swaption, then the shape of `expiries` is [2, 11], with entries repeated along the second axis. payment_times: A real `Tensor` of same dtype and compatible shape with `expiries` - e.g. if there are two swaptions, and there are 11 payment dates for each swaption, then the shape of `payment_times` should be [2, 11] sample_discount_curve_paths_fn: Callable which takes the following args: 1) times: Rank 1 `Tensor` of positive real values, specifying the times at which the path points are to be evaluated. 2) curve_times: Rank 1 `Tensor` of positive real values, specifying the maturities at which the discount curve is to be computed at each simulation time. 3) num_samples: Positive scalar integer specifying the number of paths to draw. Returns two `Tensor`s, the first being a Rank-4 tensor of shape [num_samples, m, k, d] containing the simulated zero coupon bond curves, and the second being a `Tensor` of shape [num_samples, k, d] containing the simulated short rate paths. Here, m is the size of `curve_times`, k is the size of `times`, and d is the dimensionality of the paths. num_samples: Positive scalar `int32` `Tensor`. The number of simulation paths during Monte-Carlo valuation. time_step: Scalar real `Tensor`. Maximal distance between time grid points in Euler scheme. Relevant when Euler scheme is used for simulation. dtype: The default dtype to use when converting values to `Tensor`s. Default value: `None` which means that default dtypes inferred by TensorFlow are used. Returns: Two real tensors, `discount_factors` and `bond_prices`, both of shape [num_samples] + shape(payment_times) + [dim], where `dim` is the dimension of each path (e.g for a Hull-White with two models, dim==2; while for HJM dim==1 always.) """ sim_times, _ = tf.unique(tf.reshape(expiries, shape=[-1])) longest_expiry = tf.reduce_max(sim_times) sim_times, _ = tf.unique( tf.concat([sim_times, tf.range(time_step, longest_expiry, time_step)], axis=0)) sim_times = tf.sort(sim_times, name='sort_sim_times') swaptionlet_shape = payment_times.shape tau = payment_times - expiries curve_times_builder, _ = tf.unique(tf.reshape(tau, shape=[-1])) curve_times = tf.sort(curve_times_builder, name='sort_curve_times') p_t_tau, r_t = sample_discount_curve_paths_fn(times=sim_times, curve_times=curve_times, num_samples=num_samples) dim = p_t_tau.shape[-1] dt = tf.concat(axis=0, values=[ tf.convert_to_tensor([0.0], dtype=dtype), sim_times[1:] - sim_times[:-1] ]) dt = tf.expand_dims(tf.expand_dims(dt, axis=-1), axis=0) # Compute the discount factors. We do this by performing the following: # # 1. We compute the implied discount factors. These are the factors: # P(t1) = exp(-r1 * t1), # P(t1, t2) = exp(-r2 (t2 - t1)) # P(t2, t3) = exp(-r3 (t3 - t2)) # ... # 2. We compute the cumulative products to get P(t2), P(t3), etc.: # P(t2) = P(t1) * P(t1, t2) # P(t3) = P(t1) * P(t1, t2) * P(t2, t3) # ... # We perform the cumulative product by taking the cumulative sum over # log P's, and then exponentiating the sum. However, since each P is itself # an exponential, this effectively amounts to taking a cumsum over the # exponents themselves, and exponentiating in the end: # # P(t1) = exp(-r1 * t1) # P(t2) = exp(-r1 * t1 - r2 * (t2 - t1)) # P(t3) = exp(-r1 * t1 - r2 * (t2 - t1) - r3 * (t3 - t2)) # P(tk) = exp(-r1 * t1 - r2 * (t2 - t1) ... - r_k * (t_k - t_k-1)) # Transpose before (and after) because we want the cumprod along axis=1 # but `cumsum_using_matvec` operates on the last axis. cumul_rdt = tf.transpose(utils.cumsum_using_matvec( tf.transpose(r_t * dt, perm=[0, 2, 1])), perm=[0, 2, 1]) discount_factors = tf.math.exp(-cumul_rdt) # Make discount factors the same shape as `p_t_tau`. This involves adding # an extra dimenstion (corresponding to `curve_times`). discount_factors = tf.expand_dims(discount_factors, axis=1) # tf.repeat is needed because we will use gather_nd later on this tensor. discount_factors_simulated = tf.repeat(discount_factors, tf.shape(p_t_tau)[1], axis=1) # `sim_times` and `curve_times` are sorted for simulation. We need to # select the indices corresponding to our input. sim_time_index = tf.searchsorted(sim_times, tf.reshape(expiries, [-1])) curve_time_index = tf.searchsorted(curve_times, tf.reshape(tau, [-1])) gather_index = _prepare_indices_ijjk(tf.range(0, num_samples), curve_time_index, sim_time_index, tf.range(0, dim)) # The shape after `gather_nd` will be `(num_samples*num_swaptionlets*dim,)` payoff_discount_factors_builder = tf.gather_nd(discount_factors_simulated, gather_index) # Reshape to `[num_samples] + swaptionlet.shape + [dim]` payoff_discount_factors = tf.reshape(payoff_discount_factors_builder, [num_samples] + swaptionlet_shape + [dim]) payoff_bond_price_builder = tf.gather_nd(p_t_tau, gather_index) payoff_bond_price = tf.reshape(payoff_bond_price_builder, [num_samples] + swaptionlet_shape + [dim]) return payoff_discount_factors, payoff_bond_price
def count_integers(arr, weights=None, minlength=None, maxlength=None, axis=None, dtype=tf.int32, name=None): """Counts the number of occurrences of each value in an integer array `arr`. Works like `tf.math.bincount`, but provides an `axis` kwarg that specifies dimensions to reduce over. With `~axis = [i for i in range(arr.ndim) if i not in axis]`, this function returns a `Tensor` of shape `[K] + arr.shape[~axis]`. If `minlength` and `maxlength` are not given, `K = tf.reduce_max(arr) + 1` if `arr` is non-empty, and 0 otherwise. If `weights` are non-None, then index `i` of the output stores the sum of the value in `weights` at each index where the corresponding value in `arr` is `i`. Args: arr: An `int32` `Tensor` of non-negative values. weights: If non-None, must be the same shape as arr. For each value in `arr`, the bin will be incremented by the corresponding weight instead of 1. minlength: If given, ensures the output has length at least `minlength`, padding with zeros at the end if necessary. maxlength: If given, skips values in `arr` that are equal or greater than `maxlength`, ensuring that the output has length at most `maxlength`. axis: A `0-D` or `1-D` `int32` `Tensor` (with static values) designating dimensions in `arr` to reduce over. `Default value:` `None`, meaning reduce over all dimensions. dtype: If `weights` is None, determines the type of the output bins. name: A name scope for the associated operations (optional). Returns: A vector with the same dtype as `weights` or the given `dtype`. The bin values. """ with tf.name_scope(name or 'count_integers'): if axis is None: return tf.math.bincount(arr, weights=weights, minlength=minlength, maxlength=maxlength, dtype=dtype) arr = tf.convert_to_tensor(arr, dtype=tf.int32, name='arr') arr_ndims = _get_static_ndims(arr, expect_static=True) axis = _make_static_axis_non_negative_list(axis, arr_ndims) # ~axis from docstring. Dims in arr that are not in axis. not_axis = sorted(set(range(arr_ndims)).difference(axis)) # If we're reducing over everything, just use standard bincount. if not not_axis: return tf.math.bincount(arr, weights=weights, minlength=minlength, maxlength=maxlength, dtype=dtype) # Move dims in ~axis to the left, so we can tf.map_fn bincount over them, # Producing counts for every index I in ~axis. # Thus, flat_arr is not totally flat, it just has the dims in ~axis # flattened. flat_arr = _move_dims_to_flat_end(arr, not_axis, arr_ndims, right_end=False) minlength = minlength if minlength is not None else tf.reduce_max( arr) + 1 maxlength = maxlength if maxlength is not None else tf.reduce_max( arr) + 1 # tf.map_fn over dim 0. if weights is None: def one_bincount(arr_slice): return tf.math.bincount(arr_slice, weights=None, minlength=minlength, maxlength=maxlength, dtype=dtype) flat_counts = tf.map_fn(one_bincount, elems=flat_arr, fn_output_signature=dtype) else: weights = tf.convert_to_tensor(weights, name='weights') _get_static_ndims(weights, expect_static=True, expect_ndims=arr_ndims) flat_weights = _move_dims_to_flat_end(weights, not_axis, arr_ndims, right_end=False) def one_bincount(arr_and_weights_slices): arr_slice, weights_slice = arr_and_weights_slices return tf.math.bincount(arr_slice, weights=weights_slice, minlength=minlength, maxlength=maxlength, dtype=dtype) flat_counts = tf.map_fn(one_bincount, elems=[flat_arr, flat_weights], fn_output_signature=weights.dtype) # flat_counts.shape = [prod(~axis), K], because map_fn stacked on axis 0. # bincount needs to have the K bins in axis 0, so transpose... flat_counts_t = tf.transpose(a=flat_counts, perm=[1, 0]) # Throw in this assert, to ensure shape assumptions are correct. _get_static_ndims(flat_counts_t, expect_ndims=2, expect_static=True) # not_axis_shape = arr.shape[~axis] not_axis_shape = ps.gather(ps.shape(arr), indices=not_axis) # The first index of flat_counts_t indexes bins 0,..,K-1, the rest are ~axis out_shape = ps.concat([[-1], not_axis_shape], axis=0) return tf.reshape(flat_counts_t, out_shape)
def sample(dim, drift_fn, volatility_fn, times, time_step=None, num_time_steps=None, num_samples=1, initial_state=None, random_type=None, seed=None, swap_memory=True, skip=0, precompute_normal_draws=True, times_grid=None, normal_draws=None, watch_params=None, validate_args=False, dtype=None, name=None): """Returns a sample paths from the process using Euler method. For an Ito process, ``` dX = a(t, X_t) dt + b(t, X_t) dW_t ``` with given drift `a` and volatility `b` functions Euler method generates a sequence {X_n} as ``` X_{n+1} = X_n + a(t_n, X_n) dt + b(t_n, X_n) (N(0, t_{n+1}) - N(0, t_n)), ``` where `dt = t_{n+1} - t_n` and `N` is a sample from the Normal distribution. See [1] for details. #### References [1]: Wikipedia. Euler-Maruyama method: https://en.wikipedia.org/wiki/Euler-Maruyama_method Args: dim: Python int greater than or equal to 1. The dimension of the Ito Process. drift_fn: A Python callable to compute the drift of the process. The callable should accept two real `Tensor` arguments of the same dtype. The first argument is the scalar time t, the second argument is the value of Ito process X - tensor of shape `batch_shape + [dim]`. The result is value of drift a(t, X). The return value of the callable is a real `Tensor` of the same dtype as the input arguments and of shape `batch_shape + [dim]`. volatility_fn: A Python callable to compute the volatility of the process. The callable should accept two real `Tensor` arguments of the same dtype and shape `times_shape`. The first argument is the scalar time t, the second argument is the value of Ito process X - tensor of shape `batch_shape + [dim]`. The result is value of drift b(t, X). The return value of the callable is a real `Tensor` of the same dtype as the input arguments and of shape `batch_shape + [dim, dim]`. times: Rank 1 `Tensor` of increasing positive real values. The times at which the path points are to be evaluated. time_step: An optional scalar real `Tensor` - maximal distance between points in grid in Euler schema. Either this or `num_time_steps` should be supplied. Default value: `None`. num_time_steps: An optional Scalar integer `Tensor` - a total number of time steps performed by the algorithm. The maximal distance betwen points in grid is bounded by `times[-1] / (num_time_steps - times.shape[0])`. Either this or `time_step` should be supplied. Default value: `None`. num_samples: Positive scalar `int`. The number of paths to draw. Default value: 1. initial_state: `Tensor` of shape `[dim]`. The initial state of the process. Default value: None which maps to a zero initial state. random_type: Enum value of `RandomType`. The type of (quasi)-random number generator to use to generate the paths. Default value: None which maps to the standard pseudo-random numbers. seed: Seed for the random number generator. The seed is only relevant if `random_type` is one of `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC, STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and `HALTON_RANDOMIZED` the seed should be a Python integer. For `STATELESS` and `STATELESS_ANTITHETIC `must be supplied as an integer `Tensor` of shape `[2]`. Default value: `None` which means no seed is set. swap_memory: A Python bool. Whether GPU-CPU memory swap is enabled for this op. See an equivalent flag in `tf.while_loop` documentation for more details. Useful when computing a gradient of the op since `tf.while_loop` is used to propagate stochastic process in time. Default value: True. skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or Halton sequence to skip. Used only when `random_type` is 'SOBOL', 'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored. Default value: `0`. precompute_normal_draws: Python bool. Indicates whether the noise increments `N(0, t_{n+1}) - N(0, t_n)` are precomputed. For `HALTON` and `SOBOL` random types the increments are always precomputed. While the resulting graph consumes more memory, the performance gains might be significant. Default value: `True`. times_grid: An optional rank 1 `Tensor` representing time discretization grid. If `times` are not on the grid, then the nearest points from the grid are used. When supplied, `num_time_steps` and `time_step` are ignored. Default value: `None`, which means that times grid is computed using `time_step` and `num_time_steps`. normal_draws: A `Tensor` of shape `[num_samples, num_time_points, dim]` and the same `dtype` as `times`. Represents random normal draws to compute increments `N(0, t_{n+1}) - N(0, t_n)`. When supplied, `num_samples` argument is ignored and the first dimensions of `normal_draws` is used instead. Default value: `None` which means that the draws are generated by the algorithm. watch_params: An optional list of zero-dimensional `Tensor`s of the same `dtype` as `initial_state`. If provided, specifies `Tensor`s with respect to which the differentiation of the sampling function will happen. A more efficient algorithm is used when `watch_params` are specified. Note the the function becomes differentiable onlhy wrt to these `Tensor`s and the `initial_state`. The gradient wrt any other `Tensor` is set to be zero. validate_args: Python `bool`. When `True` and `normal_draws` are supplied, checks that `tf.shape(normal_draws)[1]` is equal to `num_time_steps` that is either supplied as an argument or computed from `time_step`. When `False` invalid dimension may silently render incorrect outputs. Default value: `False`. dtype: `tf.Dtype`. If supplied the dtype for the input and output `Tensor`s. Default value: None which means that the dtype implied by `times` is used. name: Python string. The name to give this op. Default value: `None` which maps to `euler_sample`. Returns: A real `Tensor` of shape [num_samples, k, n] where `k` is the size of the `times`, `n` is the dimension of the process. Raises: ValueError: (a) When `times_grid` is not supplied, and neither `num_time_steps` nor `time_step` are supplied or if both are supplied. (b) If `normal_draws` is supplied and `dim` is mismatched. tf.errors.InvalidArgumentError: If `normal_draws` is supplied and `num_time_steps` is mismatched. """ name = name or 'euler_sample' with tf.name_scope(name): times = tf.convert_to_tensor(times, dtype=dtype) if dtype is None: dtype = times.dtype if initial_state is None: initial_state = tf.zeros(dim, dtype=dtype) initial_state = tf.convert_to_tensor(initial_state, dtype=dtype, name='initial_state') num_requested_times = tf.shape(times)[0] # Create a time grid for the Euler scheme. if num_time_steps is not None and time_step is not None: raise ValueError( 'When `times_grid` is not supplied only one of either ' '`num_time_steps` or `time_step` should be defined but not both.') if times_grid is None: if time_step is None: if num_time_steps is None: raise ValueError( 'When `times_grid` is not supplied, either `num_time_steps` ' 'or `time_step` should be defined.') num_time_steps = tf.convert_to_tensor( num_time_steps, dtype=tf.int32, name='num_time_steps') time_step = times[-1] / tf.cast(num_time_steps, dtype=dtype) else: time_step = tf.convert_to_tensor(time_step, dtype=dtype, name='time_step') else: times_grid = tf.convert_to_tensor(times_grid, dtype=dtype, name='times_grid') times, keep_mask, time_indices = utils.prepare_grid( times=times, time_step=time_step, num_time_steps=num_time_steps, times_grid=times_grid, dtype=dtype) if normal_draws is not None: normal_draws = tf.convert_to_tensor(normal_draws, dtype=dtype, name='normal_draws') # Shape [num_time_points, num_samples, dim] normal_draws = tf.transpose(normal_draws, [1, 0, 2]) num_samples = tf.shape(normal_draws)[1] draws_dim = normal_draws.shape[2] if dim != draws_dim: raise ValueError( '`dim` should be equal to `normal_draws.shape[2]` but are ' '{0} and {1} respectively'.format(dim, draws_dim)) if validate_args: draws_times = tf.shape(normal_draws)[0] asserts = tf.assert_equal( draws_times, tf.shape(keep_mask)[0] - 1, message='`num_time_steps` should be equal to ' '`tf.shape(normal_draws)[1]`') with tf.compat.v1.control_dependencies([asserts]): normal_draws = tf.identity(normal_draws) if watch_params is not None: watch_params = [tf.convert_to_tensor(param, dtype=dtype) for param in watch_params] return _sample( dim=dim, drift_fn=drift_fn, volatility_fn=volatility_fn, times=times, keep_mask=keep_mask, num_requested_times=num_requested_times, num_samples=num_samples, initial_state=initial_state, random_type=random_type, seed=seed, swap_memory=swap_memory, skip=skip, precompute_normal_draws=precompute_normal_draws, normal_draws=normal_draws, watch_params=watch_params, time_indices=time_indices, dtype=dtype)
def _transpose(self, x, perm): perm = self._make_perm(tf.rank(x), perm) return tf.transpose(a=x, perm=perm)
def soft_multivariate_quantiles(x, quantiles, quantile_width=None, **kwargs): """Computes soft multivariate quantiles via optimal transport. Transport multivariate input values in x onto 2^d + 1 weighted points, {0,1}^d + [0.5, ..., 0.5]. Target weights are adjusted so that those values in x that are transported to the middle value in the target vector correspond to those concentrating around the quantile of interest. Args: x: Tensor<float> of shape [batch, N, d] quantiles: Tensor<float> of shape [r, d], r targeted quantiles of dimension d quantile_width: (float) mass given to the bucket supposed to attract points whose value concentrate around the desired quantile value. Bigger width means that we allow the soft quantile to be a mixture of more points further away from the quantile. If None, the width is set at 1/n where n is the number of values considered (the size along the 'axis'). **kwargs: see sinkhorn.autodiff_sinkhorn for possible extra parameters. Returns: A Tensor<float> [N,r,d] of multivariate quantiles per batch. """ quantiles = tf.constant(quantiles, tf.float32) batch_size = x.shape[0] n = tf.cast(x.shape[1], tf.float32) d = x.shape[2] if quantile_width is None: quantile_width = 2 / n num_quantiles = tf.shape(quantiles)[0] hypercube_vertices = tf.constant( list(itertools.product([-1, 1], repeat=d)), tf.float32) # weights attached to vertices for each quantile. this is n_quantiles x 2^r weights = quantiles[:, tf.newaxis, :]**(0.5 * (1 - hypercube_vertices))[tf.newaxis, Ellipsis] weights *= (1 - quantiles)[:, tf.newaxis, :]**( 0.5 * (1 + hypercube_vertices))[tf.newaxis, Ellipsis] weights = (1 - quantile_width) * tf.reduce_prod(weights, axis=2) # adding weights for quantile itself (in position 0). weights = tf.concat((quantile_width * tf.ones( (num_quantiles, 1)), weights), axis=1) # augmenting and formating as batch_size * 2^r +1 * num_quantiles weights = tf.reshape(tf.tile(tf.transpose(weights), [batch_size, 1]), [batch_size, 2**d + 1, num_quantiles]) # set target locations, by adding the point at 0 that will absorb the quantile # augment it with batch_size y = tf.concat((tf.zeros((1, d), dtype=tf.float32), hypercube_vertices), axis=0) y = tf.reshape(tf.tile(y, [batch_size, 1]), [batch_size, 2**d + 1, d]) # center x x_mean = tf.reduce_mean(x, axis=1) x = x - x_mean[:, tf.newaxis, :] transports = sinkhorn.autodiff_sinkhorn( x, y, tf.ones([batch_size, n, num_quantiles], dtype=tf.float32) / n, weights, **kwargs) # recover convex combinations resulting from transporting to central point in # in all batches and quantile variations. transports = 1 / quantile_width * tf.reshape(transports[:, :, 0, :], [batch_size, n, -1]) # apply these convex combinations to data points + recenter. all_soft_quantiles = tf.reduce_sum( transports[:, :, :, tf.newaxis] * x[:, :, tf.newaxis, :], axis=1) + x_mean[:, tf.newaxis, :] # reshape those quantiles after having applied convex combinations. return tf.reshape(all_soft_quantiles, [batch_size, num_quantiles, d])
def draw_sample(num_samples, num_classes, logits, num_trials, dtype, seed): """Sample a multinomial. The batch shape is given by broadcasting num_trials with remove_last_dimension(logits). Args: num_samples: Python int or singleton integer Tensor: number of multinomial samples to draw. num_classes: Python int or singleton integer Tensor: number of classes. logits: Floating Tensor with last dimension k, of (unnormalized) logit probabilities per class. num_trials: Tensor of number of categorical trials each multinomial consists of. num_trials[..., tf.newaxis] must broadcast with logits. dtype: dtype at which to emit samples. seed: Random seed. Returns: samples: Tensor of given dtype and shape [n] + batch_shape + [k]. """ with tf.name_scope('draw_sample'): # broadcast the num_trials and logits to same shape num_trials = tf.ones_like(logits[..., 0], dtype=num_trials.dtype) * num_trials logits = tf.ones_like(num_trials[..., tf.newaxis], dtype=logits.dtype) * logits # flatten the total_count and logits # flat_logits has shape [B1B2...Bm, num_classes] flat_logits = tf.reshape(logits, [-1, num_classes]) flat_num_trials = num_samples * tf.reshape(num_trials, [-1]) # [B1B2...Bm] # Computes each logits and num_trials situation by map_fn. # Using just one batch samplers.categorical call doesn't work because that # requires num_trials to be the same across all members of the batch of # logits. This restriction makes sense for samplers.categorical because # for it, num_trials is part of the returned shape. However, the # multinomial sampler does not need that restriction, because it sums out # exactly that dimension. # One possibility would be to draw a batch categorical whose sample count is # max(num_trials) and mask out the excess ones. However, if the elements of # num_trials vary widely, this can be wasteful of memory. # TODO(b/123763054, b/112152209): Revisit the possibility of writing this # with a batch categorical followed by batch unsorted_segment_sum, once both # of those work and are memory-efficient enough. def _sample_one_batch_member(args): logits, num_cat_samples, item_seed = args # [K], [] # x has shape [1, num_cat_samples = num_samples * num_trials] x = samplers.categorical(logits[tf.newaxis, ...], num_cat_samples, seed=item_seed) x = tf.reshape(x, shape=[num_samples, -1]) # [num_samples, num_trials] x = tf.one_hot( x, depth=num_classes) # [num_samples, num_trials, num_classes] x = tf.reduce_sum(x, axis=-2) # [num_samples, num_classes] return tf.cast(x, dtype=dtype) flat_seeds = samplers.split_seed(seed, n=tf.shape(flat_logits)[0], salt='multinomial_draw_sample') x = tf.map_fn( _sample_one_batch_member, [flat_logits, flat_num_trials, flat_seeds], fn_output_signature=dtype) # [B1B2...Bm, num_samples, num_classes] # reshape the results to proper shape x = tf.transpose(a=x, perm=[1, 0, 2]) final_shape = tf.concat( [[num_samples], tf.shape(num_trials), [num_classes]], axis=0) x = tf.reshape(x, final_shape) return x
def f(x, y): # [4, 2, 3], [4, 2, 1, 3] -> [4, 3, 2] return tf.transpose( tf.cast(tf.math.cumsum(w1 * x, axis=-1), dtype=tf.float32) + tf.square(tf.reverse(w2 * y, axis=[-3]))[..., 0, :], perm=[0, 2, 1])