def __init__(
            self,
            input_size,
            output_size,  # keras::Conv::filters
            # Conv specific.
        filter_shape,  # keras::Conv::kernel_size
            rank=2,  # keras::Conv::rank
            strides=1,  # keras::Conv::strides
            padding='VALID',  # keras::Conv::padding; 'CAUSAL' not implemented.
            # keras::Conv::data_format is not implemented
        dilations=1,  # keras::Conv::dilation_rate
            output_padding=None,  # keras::ConvTranspose::output_padding
            method='auto',
            # Weights
            kernel_initializer=None,  # tfp.nn.initializers.glorot_uniform()
            bias_initializer=None,  # tf.initializers.zeros()
            make_kernel_bias_fn=kernel_bias_lib.make_kernel_bias,
            dtype=tf.float32,
            index_dtype=tf.int32,
            # Misc
            activation_fn=None,
            validate_args=False,
            name=None):
        """Constructs layer.

    Note: `data_format` is not supported since all nn layers operate on
    the rightmost column. If your channel dimension is not rightmost, use
    `tf.transpose` before calling this layer. For example, if your channel
    dimension is second from the left, the following code will move it
    rightmost:

    ```python
    inputs = tf.transpose(inputs, tf.concat([
        [0], tf.range(2, tf.rank(inputs)), [1]], axis=0))
    ```

    Args:
      input_size: ...
        In Keras, this argument is inferred from the rightmost input shape,
        i.e., `tf.shape(inputs)[-1]`. This argument specifies the size of the
        second from the rightmost dimension of both `inputs` and `kernel`.
        Default value: `None`.
      output_size: ...
        In Keras, this argument is called `filters`. This argument specifies the
        rightmost dimension size of both `kernel` and `bias`.
      filter_shape: ...
        In Keras, this argument is called `kernel_size`. This argument specifies
        the leftmost `rank` dimensions' sizes of `kernel`.
      rank: An integer, the rank of the convolution, e.g. "2" for 2D
        convolution. This argument implies the number of `kernel` dimensions,
        i.e., `kernel.shape.rank == rank + 2`.
        In Keras, this argument has the same name and semantics.
        Default value: `2`.
      strides: An integer or tuple/list of n integers, specifying the stride
        length of the convolution.
        In Keras, this argument has the same name and semantics.
        Default value: `1`.
      padding: One of `"VALID"` or `"SAME"` (case-insensitive).
        In Keras, this argument has the same name and semantics (except we don't
        support `"CAUSAL"`).
        Default value: `'VALID'`.
      dilations: An integer or tuple/list of `rank` integers, specifying the
        dilation rate to use for dilated convolution. Currently, specifying any
        `dilations` value != 1 is incompatible with specifying any `strides`
        value != 1.
        In Keras, this argument is called `dilation_rate`.
        Default value: `1`.
      output_padding: An `int` or length-`rank` tuple/list representing the
        amount of padding along the input spatial dimensions (e.g., depth,
        height, width). A single `int` indicates the same value for all spatial
        dimensions. The amount of output padding along a given dimension must be
        lower than the stride along that same dimension.  If set to `None`
        (default), the output shape is inferred.
        In Keras, this argument has the same name and semantics.
        Default value: `None` (i.e., inferred).
      method: ...
      kernel_initializer: ...
        Default value: `None` (i.e.,
        `tfp.experimental.nn.initializers.glorot_uniform()`).
      bias_initializer: ...
        Default value: `None` (i.e., `tf.initializers.zeros()`).
      make_kernel_bias_fn: ...
        Default value: `tfp.experimental.nn.util.make_kernel_bias`.
      dtype: ...
        Default value: `tf.float32`.
      index_dtype: ...
      activation_fn: ...
        Default value: `None`.
      validate_args: ...
      name: ...
        Default value: `None` (i.e., `'ConvolutionTranspose'`).
    """
        filter_shape = convolution_util.prepare_tuple_argument(
            filter_shape, rank, 'filter_shape', validate_args)
        kernel_shape = ps.concat([filter_shape, [output_size, input_size]],
                                 axis=0)  # Note transpose.
        batch_ndims = 0
        kernel, bias = make_kernel_bias_fn(kernel_shape, [output_size],
                                           kernel_initializer,
                                           bias_initializer, batch_ndims,
                                           batch_ndims, dtype)

        apply_kernel_fn = _get_convolution_transpose_fn(strides, method)(
            filter_shape,
            strides,
            padding,
            rank=2,
            dilations=dilations,
            dtype=index_dtype,
            validate_args=validate_args)

        # TODO(emilyaf): Remove after kernel shape is updated.
        temp_apply_kernel_fn = lambda x, k: apply_kernel_fn(  # pylint: disable=g-long-lambda
            x, tf.reshape(tf.transpose(k, perm=[0, 1, 3, 2]),
                          [-1, output_size]))
        super(ConvolutionTranspose,
              self).__init__(kernel=kernel,
                             bias=bias,
                             apply_kernel_fn=temp_apply_kernel_fn,
                             dtype=dtype,
                             activation_fn=activation_fn,
                             validate_args=validate_args,
                             name=name)
Exemple #2
0
def generate_mc_normal_draws(num_normal_draws,
                             num_time_steps,
                             num_sample_paths,
                             random_type,
                             batch_shape=None,
                             skip=0,
                             seed=None,
                             dtype=None,
                             name=None):
  """Generates normal random samples to be consumed by a Monte Carlo algorithm.

  Many of Monte Carlo (MC) algorithms can be re-written so that all necessary
  random (or quasi-random) variables are drawn in advance as a `Tensor` of
  shape `batch_shape + [num_time_steps, num_samples, num_normal_draws]`, where
  `batch_shape` is the shape of the independent batches of the Monte Carlo
  algorithm, `num_time_steps` is the number of time steps Monte Carlo algorithm
  performs within each batch, `num_sample_paths` is a number of sample paths of
  the Monte Carlo algorithm and `num_normal_draws` is a number of independent
  normal draws per sample path.
  For example, in order to use quasi-random numbers in a Monte Carlo algorithm,
  the samples have to be drawn in advance.
  The function generates a `Tensor`, say, `x` in a format such that for a
  quasi-`random_type` `x[i]` is correspond to different dimensions of the
  quasi-random sequence, so that it can be used in a Monte Carlo algorithm

  Args:
    num_normal_draws: A scalar int32 `Tensor`. The number of independent normal
      draws at each time step for each sample path. Should be a graph
      compilation constant.
    num_time_steps: A scalar int32 `Tensor`. The number of time steps at which
      to draw the independent normal samples. Should be a graph compilation
      constant.
    num_sample_paths: A scalar int32 `Tensor`. The number of trajectories (e.g.,
      Monte Carlo paths) for which to draw the independent normal samples.
      Should be a graph compilation constant.
    random_type: Enum value of `tff.math.random.RandomType`. The type of
      (quasi)-random number generator to use to generate the paths.
    batch_shape: This input can be either of type `tf.TensorShape` or a 1-d
      `Tensor` of type `tf.int32` specifying the dimensions of independent
      batches of normal samples to be drawn.
      Default value: `None` which correspond to a single batch of shape
      `tf.TensorShape([])`.
    skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or
      Halton sequence to skip. Used only when `random_type` is 'SOBOL',
      'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored.
      Default value: `0`.
      seed: Seed for the random number generator. The seed is
        only relevant if `random_type` is one of
        `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC,
          STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and
        `HALTON_RANDOMIZED` the seed should be an Python integer. For
        `STATELESS` and  `STATELESS_ANTITHETIC `must be supplied as an integer
        `Tensor` of shape `[2]`.
        Default value: `None` which means no seed is set.
    dtype: The `dtype` of the output `Tensor`.
      Default value: `None` which maps to `float32`.
    name: Python string. The name to give this op.
      Default value: `None` which maps to `generate_mc_normal_draws`.

  Returns:
   A `Tensor` of shape
   `[num_time_steps] + batch_shape + [num_sample_paths, num_normal_draws]`.
  """
  if name is None:
    name = 'generate_mc_normal_draws'
  if skip is None:
    skip = 0
  with tf.name_scope(name):
    if dtype is None:
      dtype = tf.float32
    if batch_shape is None:
      batch_shape = tf.TensorShape([])

    # In case of quasi-random draws, the total dimension of the draws should be
    # `num_time_steps * dim`
    total_dimension = tf.zeros(
        [num_time_steps * num_normal_draws], dtype=dtype,
        name='total_dimension')
    if random_type in [random.RandomType.PSEUDO_ANTITHETIC,
                       random.RandomType.STATELESS_ANTITHETIC]:
      # Put `num_sample_paths` to the front for antithetic samplers
      sample_shape = tf.concat([[num_sample_paths], batch_shape], axis=0)
      is_antithetic = True
    else:
      # Note that for QMC sequences `num_sample_paths` should follow
      # `batch_shape`
      sample_shape = tf.concat([batch_shape, [num_sample_paths]], axis=0)
      is_antithetic = False
    normal_draws = random.mv_normal_sample(
        sample_shape,
        mean=total_dimension,
        random_type=random_type,
        seed=seed,
        skip=skip)
    # Reshape and transpose
    normal_draws = tf.reshape(
        normal_draws,
        tf.concat([sample_shape, [num_time_steps, num_normal_draws]], axis=0))
    # Shape [steps_num] + batch_shape + [num_samples, dim]
    normal_draws_rank = normal_draws.shape.rank
    if is_antithetic and normal_draws_rank > 3:
      # Permutation for the case when the batch_shape is present
      perm = [normal_draws_rank-2] + list(
          range(1, normal_draws_rank-2)) + [0, normal_draws_rank-1]
    else:
      perm = [normal_draws_rank-2] + list(
          range(normal_draws_rank-2)) + [normal_draws_rank-1]
    normal_draws = tf.transpose(normal_draws, perm=perm)
    return normal_draws
 def preprocess(example):
     image = _preprocess_image(example['image'], is_training)
     image = tf.transpose(image, (2, 0, 1))  # transpose HWC image to CHW format
     label = tf.cast(example['label'], tf.int32)
     return {'images': image, 'labels': label}
Exemple #4
0
    def get_loss_tensors(self, f0_candidates, freqs, amps):
        """Get traces of loss to estimate fundamental frequency.

    Args:
      f0_candidates: Frequencies of candidates in hertz. [batch, time, freq].
      freqs: Frequencies of sinusoids in hertz. [batch, time, feq].
      amps: Amplitudes of sinusoids, greater than 0. [batch, time, freq].

    Returns:
      sinusoids_loss: -log p(sinusoids|harmonics), [batch, time, f0_candidate].
      harmonics_loss: - log p(harmonics|sinusoids), [batch, time, f0_candidate].
    """
        # ==========================================================================
        # P(sinusoids | candidate_harmonics).
        # ==========================================================================
        p_sinusoids_given_harmonics = self.get_p_sinusoids_given_harmonics()

        # Treat each partial as a candidate.
        # Get the ratio of each partial to each candidate.
        # -> [batch, time, candidate, partial]
        freq_ratios = safe_divide(freqs[:, :, tf.newaxis, :],
                                  f0_candidates[:, :, :, tf.newaxis])
        nll_sinusoids = -p_sinusoids_given_harmonics.log_prob(freq_ratios)

        a = tf.convert_to_tensor(amps[:, :, tf.newaxis, :])

        # # Don't count sinusoids that are less than 1 std > mean.
        # a_mean, a_var = tf.nn.moments(a, axes=-1, keepdims=True)
        # a = tf.where(a > a_mean + 0.5 * a_var**0.5, a, tf.zeros_like(a))

        # Weighted sum by sinusoid amplitude.
        # -> [batch, time, candidate]
        sinusoids_loss = safe_divide(tf.reduce_sum(nll_sinusoids * a, axis=-1),
                                     tf.reduce_sum(a, axis=-1))

        # ==========================================================================
        # P(candidate_harmonics | sinusoids)
        # ==========================================================================
        p_harm_given_sin = self.get_p_harmonics_given_sinusoids(freqs, amps)
        harmonics = self.get_candidate_harmonics(f0_candidates, as_midi=True)

        # Need to rearrage shape as tfp expects, [sample_sh, batch_sh, event_sh].
        # -> [candidate, harmonic, batch, time]
        harmonics_transpose = tf.transpose(harmonics, [2, 3, 0, 1])
        nll_harmonics_transpose = -p_harm_given_sin.log_prob(
            harmonics_transpose)
        # -> [batch, time, candidate, harm]
        nll_harmonics = tf.transpose(nll_harmonics_transpose, [2, 3, 0, 1])

        # Prior decreasing importance of upper harmonics.
        amps_prior = tf.linspace(1.0, 1.0 / self.n_harmonic_points,
                                 self.n_harmonic_points)
        harmonics_loss = (nll_harmonics *
                          amps_prior[tf.newaxis, tf.newaxis, tf.newaxis, :])

        # Don't count loss for harmonics above nyquist.
        # Reweight by the number of harmonics below nyquist,
        # (so it doesn't just pick the highest frequency possible).
        nyquist_midi = hz_to_midi(self.sample_rate / 2.0)
        nyquist_mask = tf.where(harmonics < nyquist_midi,
                                tf.ones_like(harmonics_loss),
                                tf.zeros_like(harmonics_loss))
        harmonics_loss *= safe_divide(
            nyquist_mask, tf.reduce_mean(nyquist_mask, axis=-1, keepdims=True))

        # Sum over harmonics.
        harmonics_loss = tf.reduce_mean(harmonics_loss, axis=-1)

        return sinusoids_loss, harmonics_loss
Exemple #5
0
    def testLangevin3DNormalDynamicVolatility(self):
        """Sampling from a 3-D Multivariate Normal distribution."""
        dtype = np.float32
        true_mean = dtype([1, 2, 7])
        true_cov = dtype([[1, 0.25, 0.25], [0.25, 1, 0.25], [0.25, 0.25, 1]])
        num_results = 500
        num_chains = 500

        # Targeg distribution is defined through the Cholesky decomposition
        chol = tf.linalg.cholesky(true_cov)
        target = tfd.MultivariateNormalTriL(loc=true_mean, scale_tril=chol)

        # Assume that the state is passed as a list of 1-d tensors `x` and `y`.
        # Then the target log-density is defined as follows:
        def target_log_prob(x, y):
            # Stack the input tensors together
            z = tf.concat([x, y], axis=-1)
            return target.log_prob(z)

        # Here we define the volatility function to be non-caonstant
        def volatility_fn(x, y):
            # Stack the input tensors together
            return [
                1. / (0.5 + 0.1 * tf.abs(x + y)), 1. / (0.5 + 0.1 * tf.abs(y))
            ]

        # Initial state of the chain
        init_state = [
            np.ones([num_chains, 2], dtype=dtype),
            np.ones([num_chains, 1], dtype=dtype)
        ]

        # Run Random Walk Metropolis with normal proposal for `num_results`
        # iterations for `num_chains` independent chains:
        states, _ = tfp.mcmc.sample_chain(
            num_results=num_results,
            current_state=init_state,
            kernel=tfp.mcmc.MetropolisAdjustedLangevinAlgorithm(
                target_log_prob_fn=target_log_prob,
                volatility_fn=volatility_fn,
                step_size=.1,
                seed=42),
            num_burnin_steps=200,
            num_steps_between_results=1,
            parallel_iterations=1)

        states = tf.concat(states, axis=-1)
        sample_mean = tf.reduce_mean(input_tensor=states, axis=[0, 1])
        x = tf.expand_dims(states - sample_mean, -1)
        sample_cov = tf.reduce_mean(input_tensor=tf.matmul(
            x, tf.transpose(a=x, perm=[0, 1, 3, 2])),
                                    axis=[0, 1])

        sample_mean_, sample_cov_ = self.evaluate([sample_mean, sample_cov])

        self.assertAllClose(np.squeeze(sample_mean_),
                            true_mean,
                            atol=0.1,
                            rtol=0.1)
        self.assertAllClose(np.squeeze(sample_cov_),
                            true_cov,
                            atol=0.1,
                            rtol=0.1)
Exemple #6
0
    def _log_prob(self, x):
        if self.input_output_cholesky:
            x_sqrt = x
        else:
            # Complexity: O(nbk**3)
            x_sqrt = tf.linalg.cholesky(x)

        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        x_ndims = tf.rank(input=x_sqrt)
        num_singleton_axes_to_prepend = (
            tf.maximum(tf.size(input=batch_shape) + 2, x_ndims) - x_ndims)
        x_with_prepended_singletons_shape = tf.concat([
            tf.ones([num_singleton_axes_to_prepend], dtype=tf.int32),
            tf.shape(input=x_sqrt)
        ], 0)
        x_sqrt = tf.reshape(x_sqrt, x_with_prepended_singletons_shape)
        ndims = tf.rank(x_sqrt)
        # sample_ndims = ndims - batch_ndims - event_ndims
        sample_ndims = ndims - tf.size(input=batch_shape) - 2
        sample_shape = tf.shape(input=x_sqrt)[:sample_ndims]

        # We need to be able to pre-multiply each matrix by its corresponding
        # batch scale matrix. Since a Distribution Tensor supports multiple
        # samples per batch, this means we need to reshape the input matrix `x`
        # so that the first b dimensions are batch dimensions and the last two
        # are of shape [dimension, dimensions*number_of_samples]. Doing these
        # gymnastics allows us to do a batch_solve.
        #
        # After we're done with sqrt_solve (the batch operation) we need to undo
        # this reshaping so what we're left with is a Tensor partitionable by
        # sample, batch, event dimensions.

        # Complexity: O(nbk**2) since transpose must access every element.
        scale_sqrt_inv_x_sqrt = x_sqrt
        perm = tf.concat(
            [tf.range(sample_ndims, ndims),
             tf.range(0, sample_ndims)], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)
        last_dim_size = (
            tf.cast(self.dimension, dtype=tf.int32) * tf.reduce_prod(
                input_tensor=x_with_prepended_singletons_shape[:sample_ndims]))
        shape = tf.concat([
            x_with_prepended_singletons_shape[sample_ndims:-2],
            [tf.cast(self.dimension, dtype=tf.int32), last_dim_size]
        ],
                          axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)

        # Complexity: O(nbM*k) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each solve is O(k**2) so
        # this step has complexity O(nbk^3).
        scale_sqrt_inv_x_sqrt = self.scale_operator.solve(
            scale_sqrt_inv_x_sqrt)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat([
            tf.shape(input=scale_sqrt_inv_x_sqrt)[:-2], event_shape,
            sample_shape
        ],
                          axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)
        perm = tf.concat([
            tf.range(ndims - sample_ndims, ndims),
            tf.range(0, ndims - sample_ndims)
        ], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)

        # Write V = SS', X = LL'. Then:
        # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
        #              = tr[inv(S) L L' inv(S)']
        #              = tr[(inv(S) L) (inv(S) L)']
        #              = sum_{ik} (inv(S) L)_{ik}**2
        # The second equality follows from the cyclic permutation property.
        # Complexity: O(nbk**2)
        trace_scale_inv_x = tf.reduce_sum(
            input_tensor=tf.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1])

        # Complexity: O(nbk)
        half_log_det_x = tf.reduce_sum(input_tensor=tf.math.log(
            tf.linalg.diag_part(x_sqrt)),
                                       axis=[-1])

        # Complexity: O(nbk**2)
        log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                    0.5 * trace_scale_inv_x - self.log_normalization())

        # Set shape hints.
        # Try to merge what we know from the input x with what we know from the
        # parameters of this distribution.
        if tensorshape_util.rank(
                x.shape) is not None and tensorshape_util.rank(
                    self.batch_shape) is not None:
            tensorshape_util.set_shape(
                log_prob,
                tf.broadcast_static_shape(x.shape[:-2], self.batch_shape))

        return log_prob
Exemple #7
0
  def _testMVN(self,
               base_distribution_class,
               base_distribution_kwargs,
               event_shape=()):
    # Base distribution shapes must be compatible w/bijector; most bijectors are
    # batch_shape agnostic and only care about event_ndims.
    # In the case of `ScaleMatvecTriL`, if we got it wrong then it would fire an
    # exception due to incompatible dimensions.
    event_shape_var = tf.Variable(
        np.int32(event_shape),
        shape=tf.TensorShape(None),
        name='dynamic_event_shape')

    base_distribution_dynamic_kwargs = {
        k: tf.Variable(
            v, shape=tf.TensorShape(None), name='dynamic_{}'.format(k))
        for k, v in base_distribution_kwargs.items()}
    fake_mvn_dynamic = self._cls()(
        distribution=tfd.Sample(
            base_distribution_class(
                validate_args=True, **base_distribution_dynamic_kwargs),
            sample_shape=event_shape_var),
        bijector=tfb.Chain(
            [tfb.Shift(shift=self._shift),
             tfb.ScaleMatvecTriL(scale_tril=self._tril)]),
        validate_args=True)

    fake_mvn_static = self._cls()(
        distribution=tfd.Sample(
            base_distribution_class(
                validate_args=True, **base_distribution_kwargs),
            sample_shape=event_shape),
        bijector=tfb.Chain(
            [tfb.Shift(shift=self._shift),
             tfb.ScaleMatvecTriL(scale_tril=self._tril)]),
        validate_args=True)

    actual_mean = np.tile(self._shift, [2, 1])  # ScaleMatvecTriL elided tile.
    actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1]))

    def actual_mvn_log_prob(x):
      return np.concatenate([[  # pylint: disable=g-complex-comprehension
          stats.multivariate_normal(actual_mean[i],
                                    actual_cov[i]).logpdf(x[:, i, :])
      ] for i in range(len(actual_cov))]).T

    actual_mvn_entropy = np.concatenate(
        [[stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy()]
         for i in range(len(actual_cov))])

    self.assertAllEqual([3], fake_mvn_static.event_shape)
    self.assertAllEqual([2], fake_mvn_static.batch_shape)

    if not tf.executing_eagerly():
      self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape)
      self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape)

    num_samples = 7e3
    for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]:
      # Ensure sample works by checking first, second moments.
      y = fake_mvn.sample(int(num_samples), seed=test_util.test_seed())
      x = y[0:5, ...]
      sample_mean = tf.reduce_mean(y, axis=0)
      centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0])
      sample_cov = tf.matmul(
          centered_y, centered_y, transpose_b=True) / num_samples
      self.evaluate(
          [v.initializer for v in base_distribution_dynamic_kwargs.values()]
          + [event_shape_var.initializer])
      [
          sample_mean_,
          sample_cov_,
          x_,
          fake_event_shape_,
          fake_batch_shape_,
          fake_log_prob_,
          fake_prob_,
          fake_mean_,
          fake_entropy_,
      ] = self.evaluate([
          sample_mean,
          sample_cov,
          x,
          fake_mvn.event_shape_tensor(),
          fake_mvn.batch_shape_tensor(),
          fake_mvn.log_prob(x),
          fake_mvn.prob(x),
          fake_mvn.mean(),
          fake_mvn.entropy(),
      ])

      self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1)
      self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1)

      # Ensure all other functions work as intended.
      self.assertAllEqual([5, 2, 3], x_.shape)
      self.assertAllEqual([3], fake_event_shape_)
      self.assertAllEqual([2], fake_batch_shape_)
      self.assertAllClose(
          actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6)
      self.assertAllClose(
          np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5)
      self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6)
      self.assertAllClose(actual_mvn_entropy, fake_entropy_, atol=0., rtol=1e-6)
Exemple #8
0
 def convert(w):
     return tf.transpose(w) if transpose_weights else w
Exemple #9
0
def is_cudnn_supported_inputs(mask, time_major):
    if time_major:
        mask = tf.transpose(mask)

    return tf.logical_and(is_sequence_right_padded(mask),
                          tf.logical_not(has_fully_masked_sequence(mask)))
Exemple #10
0
    def _sample_n(self, n, seed):
        df = tf.convert_to_tensor(self.df)
        batch_shape = self._batch_shape_tensor(df=df)
        event_shape = self._event_shape_tensor()
        batch_ndims = ps.shape(batch_shape)[0]

        ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
        shape = ps.concat([[n], batch_shape, event_shape], 0)
        normal_seed, gamma_seed = samplers.split_seed(seed, salt='Wishart')

        # Complexity: O(nbk**2)
        x = samplers.normal(shape=shape,
                            mean=0.,
                            stddev=1.,
                            dtype=self.dtype,
                            seed=normal_seed)

        # Complexity: O(nbk)
        # This parameterization is equivalent to Chi2, i.e.,
        # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
        expanded_df = df * tf.ones(self._scale.batch_shape_tensor(),
                                   dtype=dtype_util.base_dtype(df.dtype))

        g = gamma_lib.random_gamma(
            shape=[n],
            concentration=self._multi_gamma_sequence(0.5 * expanded_df,
                                                     self._dimension()),
            log_rate=tf.convert_to_tensor(np.log(0.5), self.dtype),
            seed=gamma_seed,
            log_space=True)

        # Complexity: O(nbk**2)
        x = tf.linalg.band_part(x, -1, 0)  # Tri-lower.

        # Complexity: O(nbk)
        x = tf.linalg.set_diag(x, tf.math.exp(g * 0.5))

        # Make batch-op ready.
        # Complexity: O(nbk**2)
        perm = ps.concat([ps.range(1, ndims), [0]], 0)
        x = tf.transpose(a=x, perm=perm)
        shape = ps.concat(
            [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0)
        x = tf.reshape(x, shape)

        # Complexity: O(nbM) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so
        # this step has complexity O(nbk^3).
        x = self._scale.matmul(x)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = ps.concat([batch_shape, event_shape, [n]], 0)
        x = tf.reshape(x, shape)
        perm = ps.concat([[ndims - 1], ps.range(0, ndims - 1)], 0)
        x = tf.transpose(a=x, perm=perm)

        if not self.input_output_cholesky:
            # Complexity: O(nbk**3)
            x = tf.matmul(x, x, adjoint_b=True)

        return x
Exemple #11
0
def generate_mc_normal_draws(num_normal_draws,
                             num_time_steps,
                             num_sample_paths,
                             random_type,
                             skip=0,
                             seed=None,
                             dtype=None,
                             name=None):
    """Generates normal random samples to be consumed by a Monte Carlo algorithm.

  Many of Monte Carlo (MC) algorithms can be re-written so that all necessary
  random (or quasi-random) variables are drawn in advance as a `Tensor` of
  shape `[num_time_steps, num_samples, num_normal_draws]`, where
  `num_time_steps` is the number of time steps Monte Carlo algorithm performs,
  `num_sample_paths` is a number of sample paths of the Monte Carlo algorithm
  and `num_normal_draws` is a number of independent normal draws per sample
  paths.
  For example, in order to use quasi-random numbers in a Monte Carlo algorithm,
  the samples have to be drawn in advance.
  The function generates a `Tensor`, say, `x` in a format such that for a
  quasi-`random_type` `x[i]` is correspond to different dimensions of the
  quasi-random sequence, so that it can be used in a Monte Carlo algorithm

  Args:
    num_normal_draws: A scalar int32 `Tensor`. The number of independent normal
      draws at each time step for each sample path. Should be a graph
      compilation constant.
    num_time_steps: A scalar int32 `Tensor`. The number of time steps at which
      to draw the independent normal samples. Should be a graph compilation
      constant.
    num_sample_paths: A scalar int32 `Tensor`. The number of trajectories (e.g.,
      Monte Carlo paths) for which to draw the independent normal samples.
      Should be a graph compilation constant.
    random_type: Enum value of `tff.math.random.RandomType`. The type of
      (quasi)-random number generator to use to generate the paths.
    skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or
      Halton sequence to skip. Used only when `random_type` is 'SOBOL',
      'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored.
      Default value: `0`.
      seed: Seed for the random number generator. The seed is
        only relevant if `random_type` is one of
        `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC,
          STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and
        `HALTON_RANDOMIZED` the seed should be an Python integer. For
        `STATELESS` and  `STATELESS_ANTITHETIC `must be supplied as an integer
        `Tensor` of shape `[2]`.
        Default value: `None` which means no seed is set.
    dtype: The `dtype` of the output `Tensor`.
      Default value: `None` which maps to `float32`.
    name: Python string. The name to give this op.
      Default value: `None` which maps to `generate_mc_normal_draws`.

  Returns:
   A `Tensor` of shape `[num_time_steps, num_sample_paths, num_normal_draws]`.
  """
    if name is None:
        name = 'generate_mc_normal_draws'
    if skip is None:
        skip = 0
    with tf.name_scope(name):
        if dtype is None:
            dtype = tf.float32
        # In case of quasi-random draws, the total dimension of the draws should be
        # `num_time_steps * dim`
        total_dimension = tf.zeros([num_time_steps * num_normal_draws],
                                   dtype=dtype,
                                   name='total_dimension')
        normal_draws = random.mv_normal_sample([num_sample_paths],
                                               mean=total_dimension,
                                               random_type=random_type,
                                               seed=seed,
                                               skip=skip)
        # Reshape and transpose
        normal_draws = tf.reshape(
            normal_draws, [num_sample_paths, num_time_steps, num_normal_draws])
        # Shape [steps_num, num_samples, dim]
        normal_draws = tf.transpose(normal_draws, [1, 0, 2])
        return normal_draws
Exemple #12
0
def generate_wavelet_toy_image_data(image_width, num_samples,
                                    wavelet_num_levels):
    """Generates wavelet data for testFittingImageDataIsCorrect().

  Constructs a "mean" image in the YUV wavelet domain (parametrized by
  `image_width`, and `wavelet_num_levels`) and draws `num_samples` samples
  from a normal distribution using that mean, and returns RGB images
  corresponding to those samples and to the mean (computed in the
  specified latent space) of those samples.

  Args:
    image_width: The width and height in pixels of the images being produced.
    num_samples: The number of samples to generate.
    wavelet_num_levels: The number of levels in the wavelet decompositions of
      the generated images.

  Returns:
    A tuple of (samples, reference, color_space, representation), where
    samples = A set of sampled images of size
      (`num_samples`, `image_width`, `image_width`, 3)
    reference = The empirical mean of `samples` (computed in YUV Wavelet space
      but returned as an RGB image) of size (`image_width`, `image_width`, 3).
    color_space = 'YUV'
    representation = 'CDF9/7'
  """
    color_space = 'YUV'
    representation = 'CDF9/7'
    samples = []
    reference = []
    for level in range(wavelet_num_levels):
        samples.append([])
        reference.append([])
        w = image_width // 2**(level + 1)
        scaling = 2**level
        for _ in range(3):
            # Construct the ground-truth pixel band mean.
            mu = scaling * np.random.uniform(size=(3, w, w))
            # Draw samples from the ground-truth mean.
            band_samples = np.random.normal(
                loc=np.tile(mu[np.newaxis], [num_samples, 1, 1, 1]))
            # Take the empirical mean of the samples as a reference.
            band_reference = np.mean(band_samples, 0)
            samples[-1].append(np.reshape(band_samples, [-1, w, w]))
            reference[-1].append(band_reference)
    # Handle the residual band.
    mu = scaling * np.random.uniform(size=(3, w, w))
    band_samples = np.random.normal(
        loc=np.tile(mu[np.newaxis], [num_samples, 1, 1, 1]))
    band_reference = np.mean(band_samples, 0)
    samples.append(np.reshape(band_samples, [-1, w, w]))
    reference.append(band_reference)
    # Collapse and reshape wavelets to be ({_,} width, height, 3).
    samples = wavelet.collapse(samples, representation)
    reference = wavelet.collapse(reference, representation)
    samples = tf.transpose(tf.reshape(
        samples, [num_samples, 3, image_width, image_width]),
                           perm=[0, 2, 3, 1])
    reference = tf.transpose(reference, perm=[1, 2, 0])
    # Convert into RGB space.
    samples = util.syuv_to_rgb(samples).numpy()
    reference = util.syuv_to_rgb(reference).numpy()
    return samples, reference, color_space, representation
Exemple #13
0
def swaption_price(*,
                   expiries,
                   floating_leg_start_times,
                   floating_leg_end_times,
                   fixed_leg_payment_times,
                   floating_leg_daycount_fractions,
                   fixed_leg_daycount_fractions,
                   fixed_leg_coupon,
                   reference_rate_fn,
                   dim,
                   mean_reversion,
                   volatility,
                   notional=None,
                   is_payer_swaption=None,
                   use_analytic_pricing=True,
                   num_samples=1,
                   random_type=None,
                   seed=None,
                   skip=0,
                   time_step=None,
                   dtype=None,
                   name=None):
    """Calculates the price of European Swaptions using the Hull-White model.

  A European Swaption is a contract that gives the holder an option to enter a
  swap contract at a future date at a prespecified fixed rate. A swaption that
  grants the holder to pay fixed rate and receive floating rate is called a
  payer swaption while the swaption that grants the holder to receive fixed and
  pay floating payments is called the receiver swaption. Typically the start
  date (or the inception date) of the swap concides with the expiry of the
  swaption. Mid-curve swaptions are currently not supported (b/160061740).

  Analytic pricing of swaptions is performed using the Jamshidian decomposition
  [1].

  #### References:
    [1]: D. Brigo, F. Mercurio. Interest Rate Models-Theory and Practice.
    Second Edition. 2007.

  #### Example
  The example shows how value a batch of 1y x 1y and 1y x 2y swaptions using the
  Hull-White model.

  ````python
  import numpy as np
  import tensorflow.compat.v2 as tf
  import tf_quant_finance as tff

  dtype = tf.float64

  expiries = [1.0, 1.0]
  float_leg_start_times = [[1.0, 1.25, 1.5, 1.75, 2.0, 2.0, 2.0, 2.0],
                            [1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75]]
  float_leg_end_times = [[1.25, 1.5, 1.75, 2.0, 2.0, 2.0, 2.0, 2.0],
                          [1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]]
  fixed_leg_payment_times = [[1.25, 1.5, 1.75, 2.0, 2.0, 2.0, 2.0, 2.0],
                          [1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]]
  float_leg_daycount_fractions = [[0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0],
                              [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25]]
  fixed_leg_daycount_fractions = [[0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0],
                              [0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25]]
  fixed_leg_coupon = [[0.011, 0.011, 0.011, 0.011, 0.0, 0.0, 0.0, 0.0],
                      [0.011, 0.011, 0.011, 0.011, 0.011, 0.011, 0.011, 0.011]]
  zero_rate_fn = lambda x: 0.01 * tf.ones_like(x, dtype=dtype)
  price = tff.models.hull_white.swaption_price(
      expiries=expiries,
      floating_leg_start_times=float_leg_start_times,
      floating_leg_end_times=float_leg_end_times,
      fixed_leg_payment_times=fixed_leg_payment_times,
      floating_leg_daycount_fractions=float_leg_daycount_fractions,
      fixed_leg_daycount_fractions=fixed_leg_daycount_fractions,
      fixed_leg_coupon=fixed_leg_coupon,
      reference_rate_fn=zero_rate_fn,
      notional=100.,
      dim=1,
      mean_reversion=[0.03],
      volatility=[0.02],
      dtype=dtype)
  # Expected value: [[0.7163243383624043], [1.4031415262337608]] # shape = (2,1)
  ````

  Args:
    expiries: A real `Tensor` of any shape and dtype. The time to
      expiration of the swaptions. The shape of this input determines the number
      (and shape) of swaptions to be priced and the shape of the output.
    floating_leg_start_times: A real `Tensor` of the same dtype as `expiries`.
      The times when accrual begins for each payment in the floating leg. The
      shape of this input should be `expiries.shape + [m]` where `m` denotes
      the number of floating payments in each leg.
    floating_leg_end_times: A real `Tensor` of the same dtype as `expiries`.
      The times when accrual ends for each payment in the floating leg. The
      shape of this input should be `expiries.shape + [m]` where `m` denotes
      the number of floating payments in each leg.
    fixed_leg_payment_times: A real `Tensor` of the same dtype as `expiries`.
      The payment times for each payment in the fixed leg. The shape of this
      input should be `expiries.shape + [n]` where `n` denotes the number of
      fixed payments in each leg.
    floating_leg_daycount_fractions: A real `Tensor` of the same dtype and
      compatible shape as `floating_leg_start_times`. The daycount fractions
      for each payment in the floating leg.
    fixed_leg_daycount_fractions: A real `Tensor` of the same dtype and
      compatible shape as `fixed_leg_payment_times`. The daycount fractions
      for each payment in the fixed leg.
    fixed_leg_coupon: A real `Tensor` of the same dtype and compatible shape
      as `fixed_leg_payment_times`. The fixed rate for each payment in the
      fixed leg.
    reference_rate_fn: A Python callable that accepts expiry time as a real
      `Tensor` and returns a `Tensor` of shape `input_shape + [dim]`. Returns
      the continuously compounded zero rate at the present time for the input
      expiry time.
    dim: A Python scalar which corresponds to the number of Hull-White Models
      to be used for pricing.
    mean_reversion: A real positive `Tensor` of shape `[dim]` or a Python
      callable. The callable can be one of the following:
      (a) A left-continuous piecewise constant object (e.g.,
      `tff.math.piecewise.PiecewiseConstantFunc`) that has a property
      `is_piecewise_constant` set to `True`. In this case the object should
      have a method `jump_locations(self)` that returns a `Tensor` of shape
      `[dim, num_jumps]` or `[num_jumps]`. In the first case,
      `mean_reversion(t)` should return a `Tensor` of shape `[dim] + t.shape`,
      and in the second, `t.shape + [dim]`, where `t` is a rank 1 `Tensor` of
      the same `dtype` as the output. See example in the class docstring.
      (b) A callable that accepts scalars (stands for time `t`) and returns a
      `Tensor` of shape `[dim]`.
      Corresponds to the mean reversion rate.
    volatility: A real positive `Tensor` of the same `dtype` as
      `mean_reversion` or a callable with the same specs as above.
      Corresponds to the lond run price variance.
    notional: An optional `Tensor` of same dtype and compatible shape as
      `strikes`specifying the notional amount for the underlying swap.
       Default value: None in which case the notional is set to 1.
    is_payer_swaption: A boolean `Tensor` of a shape compatible with `expiries`.
      Indicates whether the swaption is a payer (if True) or a receiver
      (if False) swaption. If not supplied, payer swaptions are assumed.
    use_analytic_pricing: A Python boolean specifying if analytic valuation
      should be performed. Analytic valuation is only supported for constant
      `mean_reversion` and piecewise constant `volatility`. If the input is
      `False`, then valuation using Monte-Carlo simulations is performed.
      Default value: The default value is `True`.
    num_samples: Positive scalar `int32` `Tensor`. The number of simulation
      paths during Monte-Carlo valuation. This input is ignored during analytic
      valuation.
      Default value: The default value is 1.
    random_type: Enum value of `RandomType`. The type of (quasi)-random
      number generator to use to generate the simulation paths. This input is
      relevant only for Monte-Carlo valuation and ignored during analytic
      valuation.
      Default value: `None` which maps to the standard pseudo-random numbers.
    seed: Seed for the random number generator. The seed is only relevant if
      `random_type` is one of
      `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC,
        STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and
      `HALTON_RANDOMIZED` the seed should be an Python integer. For
      `STATELESS` and  `STATELESS_ANTITHETIC `must be supplied as an integer
      `Tensor` of shape `[2]`. This input is relevant only for Monte-Carlo
      valuation and ignored during analytic valuation.
      Default value: `None` which means no seed is set.
    skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or
      Halton sequence to skip. Used only when `random_type` is 'SOBOL',
      'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored.
      Default value: `0`.
    time_step: Scalar real `Tensor`. Maximal distance between time grid points
      in Euler scheme. Relevant when Euler scheme is used for simulation. This
      input is ignored during analytic valuation.
      Default value: `None`.
    dtype: The default dtype to use when converting values to `Tensor`s.
      Default value: `None` which means that default dtypes inferred by
      TensorFlow are used.
    name: Python string. The name to give to the ops created by this function.
      Default value: `None` which maps to the default name
      `hw_swaption_price`.

  Returns:
    A `Tensor` of real dtype and shape  expiries.shape + [dim] containing the
    computed swaption prices. For swaptions that have. reset in the past
    (expiries<0), the function sets the corresponding option prices to 0.0.
  """
    # TODO(b/160061740): Extend the functionality to support mid-curve swaptions.
    name = name or 'hw_swaption_price'
    del floating_leg_daycount_fractions
    with tf.name_scope(name):
        expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')
        dtype = dtype or expiries.dtype
        float_leg_start_times = tf.convert_to_tensor(
            floating_leg_start_times,
            dtype=dtype,
            name='float_leg_start_times')
        float_leg_end_times = tf.convert_to_tensor(floating_leg_end_times,
                                                   dtype=dtype,
                                                   name='float_leg_end_times')
        fixed_leg_payment_times = tf.convert_to_tensor(
            fixed_leg_payment_times,
            dtype=dtype,
            name='fixed_leg_payment_times')
        fixed_leg_daycount_fractions = tf.convert_to_tensor(
            fixed_leg_daycount_fractions,
            dtype=dtype,
            name='fixed_leg_daycount_fractions')
        fixed_leg_coupon = tf.convert_to_tensor(fixed_leg_coupon,
                                                dtype=dtype,
                                                name='fixed_leg_coupon')
        notional = tf.convert_to_tensor(notional, dtype=dtype, name='notional')
        if is_payer_swaption is None:
            is_payer_swaption = True
        is_payer_swaption = tf.convert_to_tensor(is_payer_swaption,
                                                 dtype=tf.bool,
                                                 name='is_payer_swaption')

        output_shape = expiries.shape.as_list() + [dim]
        # Add a dimension corresponding to multiple cashflows in a swap
        if expiries.shape.rank == fixed_leg_payment_times.shape.rank - 1:
            expiries = tf.expand_dims(expiries, axis=-1)
        elif expiries.shape.rank < fixed_leg_payment_times.shape.rank - 1:
            raise ValueError(
                'Swaption expiries not specified for all swaptions '
                'in the batch. Expected rank {} but received {}.'.format(
                    fixed_leg_payment_times.shape.rank - 1,
                    expiries.shape.rank))

        # Expected shape: batch_shape + [m], same as fixed_leg_payment_times.shape
        # We need to explicitly use tf.repeat because we need to price
        # batch_shape + [m] bond options with different strikes along the last
        # dimension.
        expiries = tf.repeat(expiries,
                             fixed_leg_payment_times.shape.as_list()[-1],
                             axis=-1)

        if use_analytic_pricing:
            return _analytic_valuation(
                expiries, float_leg_start_times, float_leg_end_times,
                fixed_leg_payment_times, fixed_leg_daycount_fractions,
                fixed_leg_coupon, reference_rate_fn, dim, mean_reversion,
                volatility, notional, is_payer_swaption, output_shape, dtype,
                name + '_analytic_valyation')

        # Monte-Carlo pricing
        model = vector_hull_white.VectorHullWhiteModel(
            dim,
            mean_reversion,
            volatility,
            initial_discount_rate_fn=reference_rate_fn,
            dtype=dtype)

        if time_step is None:
            raise ValueError('`time_step` must be provided for simulation '
                             'based bond option valuation.')

        sim_times, _ = tf.unique(tf.reshape(expiries, shape=[-1]))
        longest_expiry = tf.reduce_max(sim_times)
        sim_times, _ = tf.unique(
            tf.concat(
                [sim_times,
                 tf.range(time_step, longest_expiry, time_step)],
                axis=0))
        sim_times = tf.sort(sim_times, name='sort_sim_times')

        maturities = fixed_leg_payment_times
        swaptionlet_shape = maturities.shape
        tau = maturities - expiries

        curve_times_builder, _ = tf.unique(tf.reshape(tau, shape=[-1]))
        curve_times = tf.sort(curve_times_builder, name='sort_curve_times')

        p_t_tau, r_t = model.sample_discount_curve_paths(
            times=sim_times,
            curve_times=curve_times,
            num_samples=num_samples,
            random_type=random_type,
            seed=seed,
            skip=skip)

        dt = tf.concat([
            tf.convert_to_tensor([0.0], dtype=dtype),
            sim_times[1:] - sim_times[:-1]
        ],
                       axis=0)
        dt = tf.expand_dims(tf.expand_dims(dt, axis=-1), axis=0)
        discount_factors_builder = tf.math.exp(-r_t * dt)
        # Transpose before (and after) because we want the cumprod along axis=1
        # and `matvec` operates on the last axis.
        discount_factors_builder = tf.transpose(
            _cumprod_using_matvec(
                tf.transpose(discount_factors_builder, [0, 2, 1])), [0, 2, 1])

        # make discount factors the same shape as `p_t_tau`. This involves adding
        # an extra dimenstion (corresponding to `curve_times`).
        discount_factors_builder = tf.expand_dims(discount_factors_builder,
                                                  axis=1)
        # tf.repeat is needed because we will use gather_nd later on this tensor.
        discount_factors_simulated = tf.repeat(discount_factors_builder,
                                               p_t_tau.shape.as_list()[1],
                                               axis=1)

        # `sim_times` and `curve_times` are sorted for simulation. We need to
        # select the indices corresponding to our input.
        sim_time_index = tf.searchsorted(sim_times, tf.reshape(expiries, [-1]))
        curve_time_index = tf.searchsorted(curve_times, tf.reshape(tau, [-1]))

        gather_index = _prepare_indices(tf.range(0, num_samples),
                                        curve_time_index, sim_time_index,
                                        tf.range(0, dim))

        # The shape after `gather_nd` will be `(num_samples*num_swaptionlets*dim,)`
        payoff_discount_factors_builder = tf.gather_nd(
            discount_factors_simulated, gather_index)
        # Reshape to `[num_samples] + swaptionlet.shape + [dim]`
        payoff_discount_factors = tf.reshape(payoff_discount_factors_builder,
                                             [num_samples] +
                                             swaptionlet_shape + [dim])
        payoff_bond_price_builder = tf.gather_nd(p_t_tau, gather_index)
        payoff_bond_price = tf.reshape(payoff_bond_price_builder,
                                       [num_samples] + swaptionlet_shape +
                                       [dim])

        # Add an axis corresponding to `dim`
        fixed_leg_pv = tf.expand_dims(
            fixed_leg_coupon * fixed_leg_daycount_fractions,
            axis=-1) * payoff_bond_price
        # Sum fixed coupon payments within each swap
        fixed_leg_pv = tf.math.reduce_sum(fixed_leg_pv, axis=-2)
        float_leg_pv = 1.0 - payoff_bond_price[..., -1, :]
        payoff_swap = payoff_discount_factors[..., -1, :] * (float_leg_pv -
                                                             fixed_leg_pv)
        payoff_swap = tf.where(is_payer_swaption, payoff_swap,
                               -1.0 * payoff_swap)
        payoff_swaption = tf.math.maximum(payoff_swap, 0.0)
        option_value = notional * tf.math.reduce_mean(payoff_swaption, axis=0)

        return tf.reshape(option_value, output_shape)
Exemple #14
0
def _analytic_valuation(expiries, floating_leg_start_times,
                        floating_leg_end_times, fixed_leg_payment_times,
                        fixed_leg_daycount_fractions, fixed_leg_coupon,
                        reference_rate_fn, dim, mean_reversion, volatility,
                        notional, is_payer_swaption, output_shape, dtype,
                        name):
    """Helper function for analytic valuation."""
    # The below inputs are needed for midcurve swaptions
    del floating_leg_start_times, floating_leg_end_times
    with tf.name_scope(name):
        is_call_options = tf.where(is_payer_swaption,
                                   tf.convert_to_tensor(False, dtype=tf.bool),
                                   tf.convert_to_tensor(True, dtype=tf.bool))

        model = vector_hull_white.VectorHullWhiteModel(
            dim,
            mean_reversion,
            volatility,
            initial_discount_rate_fn=reference_rate_fn,
            dtype=dtype)
        coefficients = fixed_leg_daycount_fractions * fixed_leg_coupon
        jamshidian_coefficients = tf.concat([
            -coefficients[..., :-1],
            tf.expand_dims(-1.0 - coefficients[..., -1], axis=-1)
        ],
                                            axis=-1)

        breakeven_bond_option_strikes = _jamshidian_decomposition(
            model,
            expiries,
            fixed_leg_payment_times,
            jamshidian_coefficients,
            dtype,
            name=name + '_jamshidian_decomposition')

        bond_strike_rank = breakeven_bond_option_strikes.shape.rank
        perm = [bond_strike_rank - 1
                ] + [x for x in range(0, bond_strike_rank - 1)]
        breakeven_bond_option_strikes = tf.transpose(
            breakeven_bond_option_strikes, perm=perm)
        bond_option_prices = zcb.bond_option_price(
            strikes=breakeven_bond_option_strikes,
            expiries=expiries,
            maturities=fixed_leg_payment_times,
            discount_rate_fn=reference_rate_fn,
            dim=dim,
            mean_reversion=mean_reversion,
            volatility=volatility,
            is_call_options=is_call_options,
            use_analytic_pricing=True,
            dtype=dtype,
            name=name + '_bond_option')
        bond_option_prices = notional * bond_option_prices

        # Now compute P(T0, TN) + sum_i (c_i * tau_i * P(T0, Ti))
        # bond_option_prices.shape = [dim] + batch_shape + [m] + [dim], where `m`
        # denotes the number of fixed payments for the underlying swaps.
        swaption_values = (tf.reduce_sum(
            bond_option_prices * tf.expand_dims(coefficients, axis=-1),
            axis=-2) + bond_option_prices[..., -1, :])
        swaption_shape = swaption_values.shape
        gather_index = _prepare_swaption_indices(swaption_shape.as_list())
        swaption_values = tf.gather_nd(swaption_values, gather_index)
        return tf.reshape(swaption_values, output_shape)
Exemple #15
0
def _update_confusion_matrix_variables_optimized(
        variables_to_update,
        y_true,
        y_pred,
        thresholds,
        multi_label=False,
        sample_weights=None,
        label_weights=None,
        thresholds_with_epsilon=False):
    """Update confusion matrix variables with memory efficient alternative.

  Note that the thresholds need to be evenly distributed within the list, eg,
  the diff between consecutive elements are the same.

  To compute TP/FP/TN/FN, we are measuring a binary classifier
    C(t) = (predictions >= t)
  at each threshold 't'. So we have
    TP(t) = sum( C(t) * true_labels )
    FP(t) = sum( C(t) * false_labels )

  But, computing C(t) requires computation for each t. To make it fast,
  observe that C(t) is a cumulative integral, and so if we have
    thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
  where n = num_thresholds, and if we can compute the bucket function
    B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
  then we get
    C(t_i) = sum( B(j), j >= i )
  which is the reversed cumulative sum in tf.cumsum().

  We can compute B(i) efficiently by taking advantage of the fact that
  our thresholds are evenly distributed, in that
    width = 1.0 / (num_thresholds - 1)
    thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
  Given a prediction value p, we can map it to its bucket by
    bucket_index(p) = floor( p * (num_thresholds - 1) )
  so we can use tf.math.unsorted_segment_sum() to update the buckets in one
  pass.

  Consider following example:
  y_true = [0, 0, 1, 1]
  y_pred = [0.1, 0.5, 0.3, 0.9]
  thresholds = [0.0, 0.5, 1.0]
  num_buckets = 2   # [0.0, 1.0], (1.0, 2.0]
  bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets)
                       = tf.math.floor([0.2, 1.0, 0.6, 1.8])
                       = [0, 0, 0, 1]
  # The meaning of this bucket is that if any of the label is true,
  # then 1 will be added to the corresponding bucket with the index.
  # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the
  # label for 1.8 is true, then 1 will be added to bucket 1.
  #
  # Note the second item "1.0" is floored to 0, since the value need to be
  # strictly larger than the bucket lower bound.
  # In the implementation, we use tf.math.ceil() - 1 to achieve this.
  tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices,
                                                 num_segments=num_thresholds)
                  = [1, 1, 0]
  # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0,
  # and 1 value contributed by bucket 1. When we aggregate them to together,
  # the result become [a + b + c, b + c, c], since large thresholds will always
  # contribute to the value for smaller thresholds.
  true_positive = tf.math.cumsum(tp_bucket_value, reverse=True)
                = [2, 1, 0]

  This implementation exhibits a run time and space complexity of O(T + N),
  where T is the number of thresholds and N is the size of predictions.
  Metrics that rely on standard implementation instead exhibit a complexity of
  O(T * N).

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast
      to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A sorted floating point `Tensor` with value in `[0, 1]`.
      It need to be evenly distributed (the diff between each element need to be
      the same).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    sample_weights: Optional `Tensor` whose rank is either 0, or the same rank
      as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions
      must be either `1`, or the same as the corresponding `y_true` dimension).
    label_weights: Optional tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).
    thresholds_with_epsilon: Optional boolean indicating whether the leading and
      tailing thresholds has any epsilon added for floating point imprecisions.
      It will change how we handle the leading and tailing bucket.

  Returns:
    Update op.
  """
    num_thresholds = thresholds.shape.as_list()[0]

    if sample_weights is None:
        sample_weights = 1.0
    else:
        sample_weights = tf.__internal__.ops.broadcast_weights(
            tf.cast(sample_weights, dtype=y_pred.dtype), y_pred)
        if not multi_label:
            sample_weights = tf.reshape(sample_weights, [-1])
    if label_weights is None:
        label_weights = 1.0
    else:
        label_weights = tf.expand_dims(label_weights, 0)
        label_weights = tf.__internal__.ops.broadcast_weights(
            label_weights, y_pred)
        if not multi_label:
            label_weights = tf.reshape(label_weights, [-1])
    weights = tf.multiply(sample_weights, label_weights)

    # We shouldn't need this, but in case there are predict value that is out of
    # the range of [0.0, 1.0]
    y_pred = tf.clip_by_value(y_pred, clip_value_min=0.0, clip_value_max=1.0)

    y_true = tf.cast(tf.cast(y_true, tf.bool), y_true.dtype)
    if not multi_label:
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])

    true_labels = tf.multiply(y_true, weights)
    false_labels = tf.multiply((1.0 - y_true), weights)

    # Compute the bucket indices for each prediction value.
    # Since the predict value has to be strictly greater than the thresholds,
    # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket.
    # We have to use math.ceil(val) - 1 for the bucket.
    bucket_indices = tf.math.ceil(y_pred * (num_thresholds - 1)) - 1

    if thresholds_with_epsilon:
        # In this case, the first bucket should actually take into account since
        # the any prediction between [0.0, 1.0] should be larger than the first
        # threshold. We change the bucket value from -1 to 0.
        bucket_indices = tf.nn.relu(bucket_indices)

    bucket_indices = tf.cast(bucket_indices, tf.int32)

    if multi_label:
        # We need to run bucket segment sum for each of the label class. In the
        # multi_label case, the rank of the label is 2. We first transpose it so
        # that the label dim becomes the first and we can parallel run though them.
        true_labels = tf.transpose(true_labels)
        false_labels = tf.transpose(false_labels)
        bucket_indices = tf.transpose(bucket_indices)

        def gather_bucket(label_and_bucket_index):
            label, bucket_index = label_and_bucket_index[
                0], label_and_bucket_index[1]
            return tf.math.unsorted_segment_sum(data=label,
                                                segment_ids=bucket_index,
                                                num_segments=num_thresholds)

        tp_bucket_v = tf.vectorized_map(gather_bucket,
                                        (true_labels, bucket_indices))
        fp_bucket_v = tf.vectorized_map(gather_bucket,
                                        (false_labels, bucket_indices))
        tp = tf.transpose(tf.cumsum(tp_bucket_v, reverse=True, axis=1))
        fp = tf.transpose(tf.cumsum(fp_bucket_v, reverse=True, axis=1))
    else:
        tp_bucket_v = tf.math.unsorted_segment_sum(data=true_labels,
                                                   segment_ids=bucket_indices,
                                                   num_segments=num_thresholds)
        fp_bucket_v = tf.math.unsorted_segment_sum(data=false_labels,
                                                   segment_ids=bucket_indices,
                                                   num_segments=num_thresholds)
        tp = tf.cumsum(tp_bucket_v, reverse=True)
        fp = tf.cumsum(fp_bucket_v, reverse=True)

    # fn = sum(true_labels) - tp
    # tn = sum(false_labels) - fp
    if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
            or ConfusionMatrix.FALSE_NEGATIVES in variables_to_update):
        if multi_label:
            total_true_labels = tf.reduce_sum(true_labels, axis=1)
            total_false_labels = tf.reduce_sum(false_labels, axis=1)
        else:
            total_true_labels = tf.reduce_sum(true_labels)
            total_false_labels = tf.reduce_sum(false_labels)

    update_ops = []
    if ConfusionMatrix.TRUE_POSITIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES]
        update_ops.append(variable.assign_add(tp))
    if ConfusionMatrix.FALSE_POSITIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES]
        update_ops.append(variable.assign_add(fp))
    if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES]
        tn = total_false_labels - fp
        update_ops.append(variable.assign_add(tn))
    if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES]
        fn = total_true_labels - tp
        update_ops.append(variable.assign_add(fn))
    return tf.group(update_ops)
Exemple #16
0
 def transpose_fn(batch):
     # Applies the double-transpose trick for TPU.
     batch = dict(**batch)
     batch['images'] = tf.transpose(batch['images'], (1, 2, 3, 0))
     return batch
Exemple #17
0
    def _sample_n(self, n, seed):
        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        batch_ndims = tf.shape(input=batch_shape)[0]

        ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
        shape = tf.concat([[n], batch_shape, event_shape], 0)
        stream = seed_stream.SeedStream(seed, salt="Wishart")

        # Complexity: O(nbk**2)
        x = tf.random.normal(shape=shape,
                             mean=0.,
                             stddev=1.,
                             dtype=self.dtype,
                             seed=stream())

        # Complexity: O(nbk)
        # This parametrization is equivalent to Chi2, i.e.,
        # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
        expanded_df = self.df * tf.ones(
            self.scale_operator.batch_shape_tensor(),
            dtype=dtype_util.base_dtype(self.df.dtype))

        g = tf.random.gamma(shape=[n],
                            alpha=self._multi_gamma_sequence(
                                0.5 * expanded_df, self.dimension),
                            beta=0.5,
                            dtype=self.dtype,
                            seed=stream())

        # Complexity: O(nbk**2)
        x = tf.linalg.band_part(x, -1, 0)  # Tri-lower.

        # Complexity: O(nbk)
        x = tf.linalg.set_diag(x, tf.sqrt(g))

        # Make batch-op ready.
        # Complexity: O(nbk**2)
        perm = tf.concat([tf.range(1, ndims), [0]], 0)
        x = tf.transpose(a=x, perm=perm)
        shape = tf.concat(
            [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0)
        x = tf.reshape(x, shape)

        # Complexity: O(nbM) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so
        # this step has complexity O(nbk^3).
        x = self.scale_operator.matmul(x)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat([batch_shape, event_shape, [n]], 0)
        x = tf.reshape(x, shape)
        perm = tf.concat([[ndims - 1], tf.range(0, ndims - 1)], 0)
        x = tf.transpose(a=x, perm=perm)

        if not self.input_output_cholesky:
            # Complexity: O(nbk**3)
            x = tf.matmul(x, x, adjoint_b=True)

        return x
def roll_channels_from_batch(tensor):
    # Switch from [B, C, H, W, D] to [B, H, W, C, D]
    return tf.transpose(tensor, perm=[0, 2, 3, 1, 4])
    def _testMVN(self,
                 base_distribution_class,
                 base_distribution_kwargs,
                 batch_shape=(),
                 event_shape=(),
                 not_implemented_message=None):
        # Overriding shapes must be compatible w/bijector; most bijectors are
        # batch_shape agnostic and only care about event_ndims.
        # In the case of `Affine`, if we got it wrong then it would fire an
        # exception due to incompatible dimensions.
        batch_shape_pl = tf1.placeholder_with_default(
            input=np.int32(batch_shape),
            shape=None,
            name="dynamic_batch_shape")
        event_shape_pl = tf1.placeholder_with_default(
            input=np.int32(event_shape),
            shape=None,
            name="dynamic_event_shape")
        fake_mvn_dynamic = self._cls()(
            distribution=base_distribution_class(validate_args=True,
                                                 **base_distribution_kwargs),
            bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril),
            batch_shape=batch_shape_pl,
            event_shape=event_shape_pl,
            validate_args=True)

        fake_mvn_static = self._cls()(
            distribution=base_distribution_class(validate_args=True,
                                                 **base_distribution_kwargs),
            bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril),
            batch_shape=batch_shape,
            event_shape=event_shape,
            validate_args=True)

        actual_mean = np.tile(self._shift, [2, 1])  # Affine elided this tile.
        actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1]))

        def actual_mvn_log_prob(x):
            return np.concatenate([[
                stats.multivariate_normal(actual_mean[i],
                                          actual_cov[i]).logpdf(x[:, i, :])
            ] for i in range(len(actual_cov))]).T

        actual_mvn_entropy = np.concatenate([[
            stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy()
        ] for i in range(len(actual_cov))])

        self.assertAllEqual([3], fake_mvn_static.event_shape)
        self.assertAllEqual([2], fake_mvn_static.batch_shape)

        if not tf.executing_eagerly():
            self.assertAllEqual(tf.TensorShape(None),
                                fake_mvn_dynamic.event_shape)
            self.assertAllEqual(tf.TensorShape(None),
                                fake_mvn_dynamic.batch_shape)

        x = self.evaluate(
            fake_mvn_static.sample(5, seed=tfp_test_util.test_seed()))
        for unsupported_fn in (fake_mvn_static.log_cdf, fake_mvn_static.cdf,
                               fake_mvn_static.survival_function,
                               fake_mvn_static.log_survival_function):
            with self.assertRaisesRegexp(NotImplementedError,
                                         not_implemented_message):
                unsupported_fn(x)

        num_samples = 7e3
        for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]:
            # Ensure sample works by checking first, second moments.
            y = fake_mvn.sample(int(num_samples),
                                seed=tfp_test_util.test_seed())
            x = y[0:5, ...]
            sample_mean = tf.reduce_mean(input_tensor=y, axis=0)
            centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0])
            sample_cov = tf.matmul(centered_y, centered_y,
                                   transpose_b=True) / num_samples
            [
                sample_mean_,
                sample_cov_,
                x_,
                fake_event_shape_,
                fake_batch_shape_,
                fake_log_prob_,
                fake_prob_,
                fake_mean_,
                fake_entropy_,
            ] = self.evaluate([
                sample_mean,
                sample_cov,
                x,
                fake_mvn.event_shape_tensor(),
                fake_mvn.batch_shape_tensor(),
                fake_mvn.log_prob(x),
                fake_mvn.prob(x),
                fake_mvn.mean(),
                fake_mvn.entropy(),
            ])

            self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1)
            self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1)

            # Ensure all other functions work as intended.
            self.assertAllEqual([5, 2, 3], x_.shape)
            self.assertAllEqual([3], fake_event_shape_)
            self.assertAllEqual([2], fake_batch_shape_)
            self.assertAllClose(actual_mvn_log_prob(x_),
                                fake_log_prob_,
                                atol=0.,
                                rtol=1e-6)
            self.assertAllClose(np.exp(actual_mvn_log_prob(x_)),
                                fake_prob_,
                                atol=0.,
                                rtol=1e-5)
            self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6)
            self.assertAllClose(actual_mvn_entropy,
                                fake_entropy_,
                                atol=0.,
                                rtol=1e-6)
Exemple #20
0
 def _transpose_around_bijector_fn(self,
                                   bijector_fn,
                                   arg,
                                   src_event_ndims,
                                   dest_event_ndims=None,
                                   fn_reduces_event=False,
                                   **kwargs):
     # This function moves the axes corresponding to `self.sample_shape` to the
     # left of the batch shape, then applies `bijector_fn`, then moves the axes
     # corresponding to `self.sample_shape` back to the event part of the shape.
     #
     # `src_event_ndims` and `dest_event_ndims` indicate the expected event rank
     # (omitting `self.sample_shape`) before and after applying `bijector_fn`.
     #
     # This function arose because forward and inverse ended up being quite
     # similar. It was then only a small generalization to also support {F/I}LDJ.
     batch_ndims = ps.rank_from_shape(self.distribution.batch_shape_tensor,
                                      self.distribution.batch_shape)
     extra_sample_ndims = ps.rank_from_shape(self.sample_shape)
     arg_ndims = ps.rank(arg)
     # (1) Expand arg's dims.
     d = arg_ndims - batch_ndims - extra_sample_ndims - src_event_ndims
     arg = tf.reshape(arg,
                      shape=ps.pad(ps.shape(arg),
                                   paddings=[[ps.maximum(0, -d), 0]],
                                   constant_values=1))
     arg_ndims = ps.rank(arg)
     sample_ndims = ps.maximum(0, d)
     # (2) Transpose arg's dims.
     sample_dims = ps.range(0, sample_ndims)
     batch_dims = ps.range(sample_ndims, sample_ndims + batch_ndims)
     extra_sample_dims = ps.range(
         sample_ndims + batch_ndims,
         sample_ndims + batch_ndims + extra_sample_ndims)
     event_dims = ps.range(sample_ndims + batch_ndims + extra_sample_ndims,
                           arg_ndims)
     perm = ps.concat(
         [sample_dims, extra_sample_dims, batch_dims, event_dims], axis=0)
     arg = tf.transpose(arg, perm=perm)
     # (3) Apply underlying bijector.
     result = bijector_fn(arg, **kwargs)
     # (4) Transpose sample_shape from the sample to the event shape.
     result_ndims = ps.rank(result)
     if fn_reduces_event:
         dest_event_ndims = 0
     d = result_ndims - batch_ndims - extra_sample_ndims - dest_event_ndims
     if fn_reduces_event:
         # In some cases, fn may reduce event too far, i.e. ildj may return a
         # scalar `0.`, which won't work with the transpose we do below.
         result = tf.reshape(result,
                             shape=ps.pad(ps.shape(result),
                                          paddings=[[ps.maximum(0, -d), 0]],
                                          constant_values=1))
         result_ndims = ps.rank(result)
     sample_ndims = ps.maximum(0, d)
     sample_dims = ps.range(0, sample_ndims)
     extra_sample_dims = ps.range(sample_ndims,
                                  sample_ndims + extra_sample_ndims)
     batch_dims = ps.range(sample_ndims + extra_sample_ndims,
                           sample_ndims + extra_sample_ndims + batch_ndims)
     event_dims = ps.range(sample_ndims + extra_sample_ndims + batch_ndims,
                           result_ndims)
     perm = ps.concat(
         [sample_dims, batch_dims, extra_sample_dims, event_dims], axis=0)
     return tf.transpose(result, perm=perm)
Exemple #21
0
    def _sample_n(self, n, seed=None):
        loc, scale, low, high = self._loc_scale_low_high()
        batch_shape = self._batch_shape_tensor(loc=loc,
                                               scale=scale,
                                               low=low,
                                               high=high)
        sample_and_batch_shape = tf.concat([[n], batch_shape], 0)
        flat_batch_and_sample_shape = tf.stack(
            [tf.reduce_prod(batch_shape), n])

        # In order to be reparameterizable we sample on the truncated_normal of
        # unit variance and mean and scale (but with the standardized
        # truncation bounds).

        @tf.custom_gradient
        def _std_samples_with_gradients(lower, upper):
            """Standard truncated Normal with gradient support for low, high."""
            # Note: Unlike the convention in TFP, parameterized_truncated_normal
            # returns a tensor with the final dimension being the sample dimension.
            std_samples = random_ops.parameterized_truncated_normal(
                shape=flat_batch_and_sample_shape,
                means=0.0,
                stddevs=1.0,
                minvals=lower,
                maxvals=upper,
                dtype=self.dtype,
                seed=seed)

            def grad(dy):
                """Computes a derivative for the min and max parameters.

        This function implements the derivative wrt the truncation bounds, which
        get blocked by the sampler. We use a custom expression for numerical
        stability instead of automatic differentiation on CDF for implicit
        gradients.

        Args:
          dy: output gradients

        Returns:
           The standard normal samples and the gradients wrt the upper
           bound and lower bound.
        """
                # std_samples has an extra dimension (the sample dimension), expand
                # lower and upper so they broadcast along this dimension.
                # See note above regarding parameterized_truncated_normal, the sample
                # dimension is the final dimension.
                lower_broadcast = lower[..., tf.newaxis]
                upper_broadcast = upper[..., tf.newaxis]

                cdf_samples = ((special_math.ndtr(std_samples) -
                                special_math.ndtr(lower_broadcast)) /
                               (special_math.ndtr(upper_broadcast) -
                                special_math.ndtr(lower_broadcast)))

                # tiny, eps are tolerance parameters to ensure we stay away from giving
                # a zero arg to the log CDF expression.

                tiny = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny
                eps = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).eps
                cdf_samples = tf.clip_by_value(cdf_samples, tiny, 1 - eps)

                du = tf.exp(0.5 * (std_samples**2 - upper_broadcast**2) +
                            tf.math.log(cdf_samples))
                dl = tf.exp(0.5 * (std_samples**2 - lower_broadcast**2) +
                            tf.math.log1p(-cdf_samples))

                # Reduce the gradient across the samples
                grad_u = tf.reduce_sum(dy * du, axis=-1)
                grad_l = tf.reduce_sum(dy * dl, axis=-1)
                return [grad_l, grad_u]

            return std_samples, grad

        std_low, std_high = self._standardized_low_and_high(low=low,
                                                            high=high,
                                                            loc=loc,
                                                            scale=scale)
        low_high_shp = tf.broadcast_dynamic_shape(tf.shape(std_low),
                                                  tf.shape(std_high))
        std_low = tf.broadcast_to(std_low, low_high_shp)
        std_high = tf.broadcast_to(std_high, low_high_shp)

        std_samples = _std_samples_with_gradients(tf.reshape(std_low, [-1]),
                                                  tf.reshape(std_high, [-1]))

        # The returned shape is [flat_batch x n]
        std_samples = tf.transpose(std_samples, perm=[1, 0])

        std_samples = tf.reshape(std_samples, sample_and_batch_shape)
        return std_samples * scale[tf.newaxis] + loc[tf.newaxis]
def calibrate(*,
              forwards,
              expiries,
              strikes,
              volatilities,
              initial_position=None,
              optimizer_fn=None,
              tolerance=1e-6,
              maximum_iterations=100,
              dtype=None,
              name=None):
    """Calibrates the SVI model parameters for a batch of volatility skews.

  This function optimizes the SVI model parameters to fit the given volatilities
  at various strikes. The loss function is the L2 norm of the differences in the
  volatility space.

  Each volatility skew in the batch corresponds to a fixed expiry for options
  on some underlying assets. Optimization is done independently for each skew.

  TODO(b/189458981): add flexibility to accept higher rank tensors as inputs.

  #### Example
  The example shows how to calibrate a single skew, loosely based on market
  prices for GOOG210820C* (GOOG calls with 2021-08-20 expiry) as of 2021-05-27.
  https://finance.yahoo.com/quote/GOOG/options?p=GOOG&date=1629417600

  ````python
  import numpy as np
  import tensorflow.compat.v2 as tf
  import tf_quant_finance as tff

  forwards = np.array([2402.])
  expiries = np.array([0.23])
  strikes = np.array([[
      1700., 1800., 1900., 2000., 2050., 2100., 2200., 2250., 2350., 2400.,
      2450., 2500., 2550., 2600., 2650., 2700., 2750., 2800., 2850., 2900.,
      2950., 3000.
  ]])
  volatilities = np.array([[
      0.5335, 0.4882, 0.4389, 0.3937, 0.3749, 0.3569, 0.3259, 0.3135, 0.29,
      0.283, 0.2717, 0.2667, 0.2592, 0.2566, 0.2564, 0.2574, 0.2595, 0.2621,
      0.2669, 0.2732, 0.2826, 0.2967
  ]])

  tolerance=1e-4
  (svi_params, converged, _) = tff.experimental.svi.calibrate(
      forwards=forwards,
      expiries=expiries,
      strikes=strikes,
      volatilities=volatilities)

  # Expected results are tensors containing (up to numerical tolerance):
  # svi_params: [[-0.2978, 0.4212, 0.0415, 0.1282, 0.7436]]
  # converged: [True]
  ````

  Args:
    forwards: A rank 1 real `Tensor` of shape [batch_size]. The forward prices
      of the underlyig asset for each skew in the batch.
    expiries: A rank 1 real `Tensor` of shape [batch_size]. The option expiries
      for each skew in the batch.
    strikes: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The
      strike prices of the options.
    volatilities: A rank 2 real `Tensor` of shape [batch_size, num_strikes]. The
      market implied Black-Scholes volatilities to calibrate.
    initial_position: A rank 2 real `Tensor` of shape [batch_size, 5]. The SVI
      parameters to use as the initial values for the optimization. The default
      value is None, in which case the initial values are guessed heuristically
      and may lead to slower convergence.
    optimizer_fn: Optional Python callable which implements the algorithm used
      to minimize the objective function during calibration. It should have
      the following interface: result =
        optimizer_fn(value_and_gradients_function, initial_position, tolerance,
        max_iterations) `value_and_gradients_function` is a Python callable that
        accepts a point as a real `Tensor` and returns a tuple of `Tensor`s of
        real dtype containing the value of the function and its gradient at that
        point. 'initial_position' is a real `Tensor` containing the starting
        point of the optimization, 'tolerance' is a real scalar `Tensor` for
        stopping tolerance for the procedure and `max_iterations` specifies the
        maximum number of iterations.
      `optimizer_fn` should return a namedtuple containing the items: `position`
        (a tensor containing the optimal value), `converged` (a boolean
        indicating whether the optimize converged according the specified
        criteria), `failed` (a boolean indicating if the optimization resulted
        in a failure), `num_iterations` (the number of iterations used), and
        `objective_value` ( the value of the objective function at the optimal
        value). The default value for `optimizer_fn` is None and conjugate
        gradient algorithm is used.
    tolerance: Scalar `Tensor` of real dtype. The absolute tolerance for
      terminating the iterations.
      Default value: 1e-6.
    maximum_iterations: Scalar positive int32 `Tensor`. The maximum number of
      iterations during the optimization.
      Default value: 200.
    dtype: The default dtype to use when converting values to `Tensor`s.
      Default value: `None`, uses the default dtypes inferred by TensorFlow.
    name: Python string. The name to give to the ops created by this function.
      Default value: `None`, maps to the default name `svi_skew_calibration`.

  Returns:
    A Tuple of three elements: (parameters, status, iterations)
    - parameters: a tensor of shape [batch_size, 5] representing raw parameters
      for the SVI model calibrated with given input Black-Scholes volatilities.
    - status: boolean, whether the optimization algorithm succeeded in finding
      the optimal point based on the specified convergance criteria.
    - iterations: the number of iterations performed during the optimization.

  """
    name = name or 'svi_skew_calibration'
    with tf.name_scope(name):
        volatilities = tf.convert_to_tensor(volatilities,
                                            dtype=dtype,
                                            name='volatilities')
        dtype = dtype or volatilities.dtype
        forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards')
        expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')
        strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes')

        # the standard notation for log moneyness in the literature is k:=log(K/F)
        log_moneyness = tf.math.log(strikes / forwards[:, None])

        if initial_position is None:
            minvol_index = tf.argmin(volatilities, axis=1)
            a0 = tf.gather(volatilities, minvol_index, axis=1, batch_dims=1)**2
            b0 = tf.zeros_like(forwards, dtype=dtype)
            rho0 = tf.zeros_like(forwards, dtype=dtype)
            sigma0 = 0.5 * tf.ones_like(forwards, dtype=dtype)
            m0 = tf.gather(log_moneyness, minvol_index, axis=1, batch_dims=1)
            initial_position = tf.transpose([a0, b0, rho0, m0, sigma0])

        if optimizer_fn is None:
            optimizer_fn = optimizer.conjugate_gradient_minimize

        @make_val_and_grad_fn
        def loss_function(parameters):
            """Loss function for the optimization."""
            total_variance = parameterizations.total_variance_from_raw(
                parameters, log_moneyness)

            model_vol = tf.where(total_variance < 0.,
                                 tf.zeros_like(total_variance),
                                 tf.sqrt(total_variance / expiries[:, None]))

            squared_difference = tf.where(
                total_variance < 0., volatilities**2 - total_variance,
                tf.math.squared_difference(model_vol, volatilities))

            loss = tf.math.reduce_sum(squared_difference, axis=1)
            return loss

        optimization_result = optimizer_fn(loss_function,
                                           initial_position=initial_position,
                                           tolerance=tolerance,
                                           max_iterations=maximum_iterations)

        # The optimizer may converge negative SVI sigma; to enforce the positivity
        # convention, we take sigma by absolute value, which yields the same model.
        calibrated_parameters = tf.concat([
            optimization_result.position[:, :-1],
            tf.math.abs(optimization_result.position[:, -1, None])
        ],
                                          axis=1)

        return (calibrated_parameters, optimization_result.converged,
                optimization_result.num_iterations)
Exemple #23
0
def options_price_from_samples(strikes: types.RealTensor,
                               expiries: types.RealTensor,
                               maturities: types.RealTensor,
                               is_call_options: types.BoolTensor,
                               sample_discount_curve_paths_fn: Callable[
                                   ..., Tuple[types.RealTensor,
                                              types.RealTensor]],
                               num_samples: types.IntTensor,
                               time_step: types.RealTensor,
                               dtype: tf.DType = None,
                               name: str = None) -> types.RealTensor:
    """Computes the zero coupon bond options price from simulated discount curves.

  Args:
    strikes: A real `Tensor` of any shape and dtype. The strike price of the
      options. The shape of this input determines the number (and shape) of the
      options to be priced and the output.
    expiries: A real `Tensor` of the same dtype and compatible shape as
      `strikes`.  The time to expiry of each bond option.
    maturities: A real `Tensor` of the same dtype and compatible shape as
      `strikes`.  The time to maturity of the underlying zero coupon bonds.
    is_call_options: A boolean `Tensor` of a shape compatible with `strikes`.
      Indicates whether the option is a call (if True) or a put (if False).
    sample_discount_curve_paths_fn: Callable which takes the following args:

      1) times: Rank 1 `Tensor` of positive real values, specifying the times at
        which the path points are to be evaluated.
      2) curve_times: Rank 1 `Tensor` of positive real values, specifying the
        maturities at which the discount curve is to be computed at each
        simulation time.
      3) num_samples: Positive scalar integer specifying the number of paths to
        draw.

      and returns two `Tensor`s, the first being a Rank-4 tensor of shape
      `[num_samples, m, k, dim]` containing the simulated zero coupon bond
      curves, and the second being a `Tensor` of shape `[num_samples, k, dim]`
      containing the simulated short rate paths. Here, `m` is the size of
      `curve_times`, `k` is the size of `times`, and `dim` is the dimensionality
      of the paths.

    num_samples: Positive scalar `int32` `Tensor`. The number of simulation
      paths during Monte-Carlo valuation.
    time_step: Scalar real `Tensor`. Maximal distance between time grid points
      in Euler scheme. Relevant when Euler scheme is used for simulation.
    dtype: The default dtype to use when converting values to `Tensor`s.
      Default value: `None` which means that default dtypes inferred by
        TensorFlow are used.
    name: Python string. The name to give to the ops created by this function.
      Default value: `None` which maps to the default name
      `options_price_from_samples`.

  Returns:
    A `Tensor` of real dtype and shape `strikes.shape + [dim]` containing the
    computed option prices.
  """
    name = name or 'options_price_from_samples'
    with tf.name_scope(name):
        sim_times, _ = tf.unique(tf.reshape(expiries, shape=[-1]))
        longest_expiry = tf.reduce_max(sim_times)
        sim_times, _ = tf.unique(
            tf.concat(
                [sim_times,
                 tf.range(time_step, longest_expiry, time_step)],
                axis=0))
        sim_times = tf.sort(sim_times, name='sort_sim_times')
        tau = maturities - expiries
        curve_times_builder, _ = tf.unique(tf.reshape(tau, shape=[-1]))
        curve_times = tf.sort(curve_times_builder, name='sort_curve_times')

        p_t_tau, r_t = sample_discount_curve_paths_fn(times=sim_times,
                                                      curve_times=curve_times,
                                                      num_samples=num_samples)
        dim = p_t_tau.shape[-1]

        dt_builder = tf.concat(axis=0,
                               values=[
                                   tf.convert_to_tensor([0.0], dtype=dtype),
                                   sim_times[1:] - sim_times[:-1]
                               ])
        dt = tf.expand_dims(tf.expand_dims(dt_builder, axis=-1), axis=0)
        discount_factors_builder = tf.math.exp(-r_t * dt)
        # Transpose before (and after) because we want the cumprod along axis=1
        # and `matvec` operates on the last axis. The shape before and after would
        # be `(num_samples, len(times), dim)`
        discount_factors_builder = tf.transpose(
            utils.cumprod_using_matvec(
                tf.transpose(discount_factors_builder, [0, 2, 1])), [0, 2, 1])

        # make discount factors the same shape as `p_t_tau`. This involves adding
        # an extra dimenstion (corresponding to `curve_times`).
        discount_factors_builder = tf.expand_dims(discount_factors_builder,
                                                  axis=1)
        discount_factors_simulated = tf.repeat(discount_factors_builder,
                                               p_t_tau.shape.as_list()[1],
                                               axis=1)

        # `sim_times` and `curve_times` are sorted for simulation. We need to
        # select the indices corresponding to our input.
        sim_time_index = tf.searchsorted(sim_times, tf.reshape(expiries, [-1]))
        curve_time_index = tf.searchsorted(curve_times, tf.reshape(tau, [-1]))
        # Broadcast shapes of strikes, expiries and maturities
        curve_time_index, sim_time_index = tff_utils.broadcast_tensors(
            curve_time_index, sim_time_index)
        gather_index = _prepare_indices(tf.range(0, num_samples),
                                        curve_time_index, sim_time_index,
                                        tf.range(0, dim))

        # The shape after `gather_nd` would be (num_samples*num_strikes*dim,)
        payoff_discount_factors_builder = tf.gather_nd(
            discount_factors_simulated, gather_index)
        # Reshape to `[num_samples] + strikes.shape + [dim]`
        payoff_discount_factors = tf.reshape(payoff_discount_factors_builder,
                                             [num_samples] + strikes.shape +
                                             [dim])
        payoff_bond_price_builder = tf.gather_nd(p_t_tau, gather_index)
        payoff_bond_price = tf.reshape(payoff_bond_price_builder,
                                       [num_samples] + strikes.shape + [dim])

        is_call_options = tf.reshape(
            tf.broadcast_to(is_call_options, strikes.shape),
            [1] + strikes.shape + [1])

        strikes = tf.reshape(strikes, [1] + strikes.shape + [1])
        payoff = tf.where(is_call_options,
                          tf.math.maximum(payoff_bond_price - strikes, 0.0),
                          tf.math.maximum(strikes - payoff_bond_price, 0.0))
        option_value = tf.math.reduce_mean(payoff_discount_factors * payoff,
                                           axis=0)

        return option_value
def discount_factors_and_bond_prices_from_samples(
        expiries,
        payment_times,
        sample_discount_curve_paths_fn,
        num_samples,
        time_step,
        dtype=None):
    """Utility function to compute the discount factors and the bond prices.

  Args:
    expiries: A real `Tensor` of any and dtype. The time to expiration of the
      swaptions. The shape of this input determines the number (and shape) of
      swaptions to be priced and the shape of the output - e.g. if there are two
      swaptions, and there are 11 payment dates for each swaption, then the
      shape of `expiries` is [2, 11], with entries repeated along the second
      axis.
    payment_times: A real `Tensor` of same dtype and compatible shape with
      `expiries` - e.g. if there are two swaptions, and there are 11 payment
      dates for each swaption, then the shape of `payment_times` should be [2,
      11]
    sample_discount_curve_paths_fn: Callable which takes the following args:
      1) times: Rank 1 `Tensor` of positive real values, specifying the times at
        which the path points are to be evaluated.
      2) curve_times: Rank 1 `Tensor` of positive real values, specifying the
        maturities at which the discount curve is to be computed at each
        simulation time.
      3) num_samples: Positive scalar integer specifying the number of paths to
        draw.  Returns two `Tensor`s, the first being a Rank-4 tensor of shape
        [num_samples, m, k, d] containing the simulated zero coupon bond curves,
        and the second being a `Tensor` of shape [num_samples, k, d] containing
        the simulated short rate paths. Here, m is the size of `curve_times`, k
        is the size of `times`, and d is the dimensionality of the paths.
    num_samples: Positive scalar `int32` `Tensor`. The number of simulation
      paths during Monte-Carlo valuation.
    time_step: Scalar real `Tensor`. Maximal distance between time grid points
      in Euler scheme. Relevant when Euler scheme is used for simulation.
    dtype: The default dtype to use when converting values to `Tensor`s.
      Default value: `None` which means that default dtypes inferred by
        TensorFlow are used.

  Returns:
    Two real tensors, `discount_factors` and `bond_prices`, both of shape
    [num_samples] + shape(payment_times) + [dim], where `dim` is the dimension
    of each path (e.g for a Hull-White with two models, dim==2; while for HJM
    dim==1 always.)
  """
    sim_times, _ = tf.unique(tf.reshape(expiries, shape=[-1]))
    longest_expiry = tf.reduce_max(sim_times)
    sim_times, _ = tf.unique(
        tf.concat([sim_times,
                   tf.range(time_step, longest_expiry, time_step)],
                  axis=0))
    sim_times = tf.sort(sim_times, name='sort_sim_times')

    swaptionlet_shape = payment_times.shape
    tau = payment_times - expiries

    curve_times_builder, _ = tf.unique(tf.reshape(tau, shape=[-1]))
    curve_times = tf.sort(curve_times_builder, name='sort_curve_times')

    p_t_tau, r_t = sample_discount_curve_paths_fn(times=sim_times,
                                                  curve_times=curve_times,
                                                  num_samples=num_samples)
    dim = p_t_tau.shape[-1]

    dt = tf.concat(axis=0,
                   values=[
                       tf.convert_to_tensor([0.0], dtype=dtype),
                       sim_times[1:] - sim_times[:-1]
                   ])
    dt = tf.expand_dims(tf.expand_dims(dt, axis=-1), axis=0)

    # Compute the discount factors. We do this by performing the following:
    #
    # 1. We compute the implied discount factors. These are the factors:
    #    P(t1) = exp(-r1 * t1),
    #    P(t1, t2) = exp(-r2 (t2 - t1))
    #    P(t2, t3) = exp(-r3 (t3 - t2))
    #    ...
    # 2. We compute the cumulative products to get P(t2), P(t3), etc.:
    #    P(t2) = P(t1) * P(t1, t2)
    #    P(t3) = P(t1) * P(t1, t2) * P(t2, t3)
    #    ...
    # We perform the cumulative product by taking the cumulative sum over
    # log P's, and then exponentiating the sum. However, since each P is itself
    # an exponential, this effectively amounts to taking a cumsum over the
    # exponents themselves, and exponentiating in the end:
    #
    # P(t1) = exp(-r1 * t1)
    # P(t2) = exp(-r1 * t1 - r2 * (t2 - t1))
    # P(t3) = exp(-r1 * t1 - r2 * (t2 - t1) - r3 * (t3 - t2))
    # P(tk) = exp(-r1 * t1 - r2 * (t2 - t1) ... - r_k * (t_k - t_k-1))

    # Transpose before (and after) because we want the cumprod along axis=1
    # but `cumsum_using_matvec` operates on the last axis.
    cumul_rdt = tf.transpose(utils.cumsum_using_matvec(
        tf.transpose(r_t * dt, perm=[0, 2, 1])),
                             perm=[0, 2, 1])
    discount_factors = tf.math.exp(-cumul_rdt)

    # Make discount factors the same shape as `p_t_tau`. This involves adding
    # an extra dimenstion (corresponding to `curve_times`).
    discount_factors = tf.expand_dims(discount_factors, axis=1)

    # tf.repeat is needed because we will use gather_nd later on this tensor.
    discount_factors_simulated = tf.repeat(discount_factors,
                                           tf.shape(p_t_tau)[1],
                                           axis=1)

    # `sim_times` and `curve_times` are sorted for simulation. We need to
    # select the indices corresponding to our input.
    sim_time_index = tf.searchsorted(sim_times, tf.reshape(expiries, [-1]))
    curve_time_index = tf.searchsorted(curve_times, tf.reshape(tau, [-1]))

    gather_index = _prepare_indices_ijjk(tf.range(0, num_samples),
                                         curve_time_index, sim_time_index,
                                         tf.range(0, dim))

    # The shape after `gather_nd` will be `(num_samples*num_swaptionlets*dim,)`
    payoff_discount_factors_builder = tf.gather_nd(discount_factors_simulated,
                                                   gather_index)
    # Reshape to `[num_samples] + swaptionlet.shape + [dim]`
    payoff_discount_factors = tf.reshape(payoff_discount_factors_builder,
                                         [num_samples] + swaptionlet_shape +
                                         [dim])
    payoff_bond_price_builder = tf.gather_nd(p_t_tau, gather_index)
    payoff_bond_price = tf.reshape(payoff_bond_price_builder,
                                   [num_samples] + swaptionlet_shape + [dim])

    return payoff_discount_factors, payoff_bond_price
Exemple #25
0
def count_integers(arr,
                   weights=None,
                   minlength=None,
                   maxlength=None,
                   axis=None,
                   dtype=tf.int32,
                   name=None):
    """Counts the number of occurrences of each value in an integer array `arr`.

  Works like `tf.math.bincount`, but provides an `axis` kwarg that specifies
  dimensions to reduce over.  With
    `~axis = [i for i in range(arr.ndim) if i not in axis]`,
  this function returns a `Tensor` of shape `[K] + arr.shape[~axis]`.

  If `minlength` and `maxlength` are not given, `K = tf.reduce_max(arr) + 1`
  if `arr` is non-empty, and 0 otherwise.
  If `weights` are non-None, then index `i` of the output stores the sum of the
  value in `weights` at each index where the corresponding value in `arr` is
  `i`.

  Args:
    arr: An `int32` `Tensor` of non-negative values.
    weights: If non-None, must be the same shape as arr. For each value in
      `arr`, the bin will be incremented by the corresponding weight instead of
      1.
    minlength: If given, ensures the output has length at least `minlength`,
      padding with zeros at the end if necessary.
    maxlength: If given, skips values in `arr` that are equal or greater than
      `maxlength`, ensuring that the output has length at most `maxlength`.
    axis: A `0-D` or `1-D` `int32` `Tensor` (with static values) designating
      dimensions in `arr` to reduce over.
      `Default value:` `None`, meaning reduce over all dimensions.
    dtype: If `weights` is None, determines the type of the output bins.
    name: A name scope for the associated operations (optional).

  Returns:
    A vector with the same dtype as `weights` or the given `dtype`. The bin
    values.
  """
    with tf.name_scope(name or 'count_integers'):
        if axis is None:
            return tf.math.bincount(arr,
                                    weights=weights,
                                    minlength=minlength,
                                    maxlength=maxlength,
                                    dtype=dtype)

        arr = tf.convert_to_tensor(arr, dtype=tf.int32, name='arr')
        arr_ndims = _get_static_ndims(arr, expect_static=True)

        axis = _make_static_axis_non_negative_list(axis, arr_ndims)

        # ~axis from docstring.  Dims in arr that are not in axis.
        not_axis = sorted(set(range(arr_ndims)).difference(axis))

        # If we're reducing over everything, just use standard bincount.
        if not not_axis:
            return tf.math.bincount(arr,
                                    weights=weights,
                                    minlength=minlength,
                                    maxlength=maxlength,
                                    dtype=dtype)

        # Move dims in ~axis to the left, so we can tf.map_fn bincount over them,
        # Producing counts for every index I in ~axis.
        # Thus, flat_arr is not totally flat, it just has the dims in ~axis
        # flattened.
        flat_arr = _move_dims_to_flat_end(arr,
                                          not_axis,
                                          arr_ndims,
                                          right_end=False)
        minlength = minlength if minlength is not None else tf.reduce_max(
            arr) + 1
        maxlength = maxlength if maxlength is not None else tf.reduce_max(
            arr) + 1

        # tf.map_fn over dim 0.
        if weights is None:

            def one_bincount(arr_slice):
                return tf.math.bincount(arr_slice,
                                        weights=None,
                                        minlength=minlength,
                                        maxlength=maxlength,
                                        dtype=dtype)

            flat_counts = tf.map_fn(one_bincount,
                                    elems=flat_arr,
                                    fn_output_signature=dtype)
        else:
            weights = tf.convert_to_tensor(weights, name='weights')
            _get_static_ndims(weights,
                              expect_static=True,
                              expect_ndims=arr_ndims)
            flat_weights = _move_dims_to_flat_end(weights,
                                                  not_axis,
                                                  arr_ndims,
                                                  right_end=False)

            def one_bincount(arr_and_weights_slices):
                arr_slice, weights_slice = arr_and_weights_slices
                return tf.math.bincount(arr_slice,
                                        weights=weights_slice,
                                        minlength=minlength,
                                        maxlength=maxlength,
                                        dtype=dtype)

            flat_counts = tf.map_fn(one_bincount,
                                    elems=[flat_arr, flat_weights],
                                    fn_output_signature=weights.dtype)

        # flat_counts.shape = [prod(~axis), K], because map_fn stacked on axis 0.
        # bincount needs to have the K bins in axis 0, so transpose...
        flat_counts_t = tf.transpose(a=flat_counts, perm=[1, 0])

        # Throw in this assert, to ensure shape assumptions are correct.
        _get_static_ndims(flat_counts_t, expect_ndims=2, expect_static=True)

        # not_axis_shape = arr.shape[~axis]
        not_axis_shape = ps.gather(ps.shape(arr), indices=not_axis)

        # The first index of flat_counts_t indexes bins 0,..,K-1, the rest are ~axis
        out_shape = ps.concat([[-1], not_axis_shape], axis=0)

        return tf.reshape(flat_counts_t, out_shape)
Exemple #26
0
def sample(dim,
           drift_fn,
           volatility_fn,
           times,
           time_step=None,
           num_time_steps=None,
           num_samples=1,
           initial_state=None,
           random_type=None,
           seed=None,
           swap_memory=True,
           skip=0,
           precompute_normal_draws=True,
           times_grid=None,
           normal_draws=None,
           watch_params=None,
           validate_args=False,
           dtype=None,
           name=None):
  """Returns a sample paths from the process using Euler method.

  For an Ito process,

  ```
    dX = a(t, X_t) dt + b(t, X_t) dW_t
  ```
  with given drift `a` and volatility `b` functions Euler method generates a
  sequence {X_n} as

  ```
  X_{n+1} = X_n + a(t_n, X_n) dt + b(t_n, X_n) (N(0, t_{n+1}) - N(0, t_n)),
  ```
  where `dt = t_{n+1} - t_n` and `N` is a sample from the Normal distribution.
  See [1] for details.

  #### References
  [1]: Wikipedia. Euler-Maruyama method:
  https://en.wikipedia.org/wiki/Euler-Maruyama_method

  Args:
    dim: Python int greater than or equal to 1. The dimension of the Ito
      Process.
    drift_fn: A Python callable to compute the drift of the process. The
      callable should accept two real `Tensor` arguments of the same dtype.
      The first argument is the scalar time t, the second argument is the
      value of Ito process X - tensor of shape `batch_shape + [dim]`.
      The result is value of drift a(t, X). The return value of the callable
      is a real `Tensor` of the same dtype as the input arguments and of shape
      `batch_shape + [dim]`.
    volatility_fn: A Python callable to compute the volatility of the process.
      The callable should accept two real `Tensor` arguments of the same dtype
      and shape `times_shape`. The first argument is the scalar time t, the
      second argument is the value of Ito process X - tensor of shape
      `batch_shape + [dim]`. The result is value of drift b(t, X). The return
      value of the callable is a real `Tensor` of the same dtype as the input
      arguments and of shape `batch_shape + [dim, dim]`.
    times: Rank 1 `Tensor` of increasing positive real values. The times at
      which the path points are to be evaluated.
    time_step: An optional scalar real `Tensor` - maximal distance between
      points in grid in Euler schema.
      Either this or `num_time_steps` should be supplied.
      Default value: `None`.
    num_time_steps: An optional Scalar integer `Tensor` - a total number of time
      steps performed by the algorithm. The maximal distance betwen points in
      grid is bounded by `times[-1] / (num_time_steps - times.shape[0])`.
      Either this or `time_step` should be supplied.
      Default value: `None`.
    num_samples: Positive scalar `int`. The number of paths to draw.
      Default value: 1.
    initial_state: `Tensor` of shape `[dim]`. The initial state of the
      process.
      Default value: None which maps to a zero initial state.
    random_type: Enum value of `RandomType`. The type of (quasi)-random
      number generator to use to generate the paths.
      Default value: None which maps to the standard pseudo-random numbers.
    seed: Seed for the random number generator. The seed is
      only relevant if `random_type` is one of
      `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC,
        STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and
      `HALTON_RANDOMIZED` the seed should be a Python integer. For
      `STATELESS` and  `STATELESS_ANTITHETIC `must be supplied as an integer
      `Tensor` of shape `[2]`.
      Default value: `None` which means no seed is set.
    swap_memory: A Python bool. Whether GPU-CPU memory swap is enabled for this
      op. See an equivalent flag in `tf.while_loop` documentation for more
      details. Useful when computing a gradient of the op since `tf.while_loop`
      is used to propagate stochastic process in time.
      Default value: True.
    skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or
      Halton sequence to skip. Used only when `random_type` is 'SOBOL',
      'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored.
      Default value: `0`.
    precompute_normal_draws: Python bool. Indicates whether the noise increments
      `N(0, t_{n+1}) - N(0, t_n)` are precomputed. For `HALTON` and `SOBOL`
      random types the increments are always precomputed. While the resulting
      graph consumes more memory, the performance gains might be significant.
      Default value: `True`.
    times_grid: An optional rank 1 `Tensor` representing time discretization
      grid. If `times` are not on the grid, then the nearest points from the
      grid are used. When supplied, `num_time_steps` and `time_step` are
      ignored.
      Default value: `None`, which means that times grid is computed using
      `time_step` and `num_time_steps`.
    normal_draws: A `Tensor` of shape `[num_samples, num_time_points, dim]`
      and the same `dtype` as `times`. Represents random normal draws to compute
      increments `N(0, t_{n+1}) - N(0, t_n)`. When supplied, `num_samples`
      argument is ignored and the first dimensions of `normal_draws` is used
      instead.
      Default value: `None` which means that the draws are generated by the
      algorithm.
    watch_params: An optional list of zero-dimensional `Tensor`s of the same
      `dtype` as `initial_state`. If provided, specifies `Tensor`s with respect
      to which the differentiation of the sampling function will happen.
      A more efficient algorithm is used when `watch_params` are specified.
      Note the the function becomes differentiable onlhy wrt to these `Tensor`s
      and the `initial_state`. The gradient wrt any other `Tensor` is set to be
      zero.
    validate_args: Python `bool`. When `True` and `normal_draws` are supplied,
      checks that `tf.shape(normal_draws)[1]` is equal to `num_time_steps` that
      is either supplied as an argument or computed from `time_step`.
      When `False` invalid dimension may silently render incorrect outputs.
      Default value: `False`.
    dtype: `tf.Dtype`. If supplied the dtype for the input and output `Tensor`s.
      Default value: None which means that the dtype implied by `times` is
      used.
    name: Python string. The name to give this op.
      Default value: `None` which maps to `euler_sample`.

  Returns:
   A real `Tensor` of shape [num_samples, k, n] where `k` is the size of the
      `times`, `n` is the dimension of the process.

  Raises:
    ValueError:
      (a) When `times_grid` is not supplied, and neither `num_time_steps` nor
        `time_step` are supplied or if both are supplied.
      (b) If `normal_draws` is supplied and `dim` is mismatched.
    tf.errors.InvalidArgumentError: If `normal_draws` is supplied and
      `num_time_steps` is mismatched.
  """
  name = name or 'euler_sample'
  with tf.name_scope(name):
    times = tf.convert_to_tensor(times, dtype=dtype)
    if dtype is None:
      dtype = times.dtype
    if initial_state is None:
      initial_state = tf.zeros(dim, dtype=dtype)
    initial_state = tf.convert_to_tensor(initial_state, dtype=dtype,
                                         name='initial_state')
    num_requested_times = tf.shape(times)[0]
    # Create a time grid for the Euler scheme.
    if num_time_steps is not None and time_step is not None:
      raise ValueError(
          'When `times_grid` is not supplied only one of either '
          '`num_time_steps` or `time_step` should be defined but not both.')
    if times_grid is None:
      if time_step is None:
        if num_time_steps is None:
          raise ValueError(
              'When `times_grid` is not supplied, either `num_time_steps` '
              'or `time_step` should be defined.')
        num_time_steps = tf.convert_to_tensor(
            num_time_steps, dtype=tf.int32, name='num_time_steps')
        time_step = times[-1] / tf.cast(num_time_steps, dtype=dtype)
      else:
        time_step = tf.convert_to_tensor(time_step, dtype=dtype,
                                         name='time_step')
    else:
      times_grid = tf.convert_to_tensor(times_grid, dtype=dtype,
                                        name='times_grid')
    times, keep_mask, time_indices = utils.prepare_grid(
        times=times,
        time_step=time_step,
        num_time_steps=num_time_steps,
        times_grid=times_grid,
        dtype=dtype)
    if normal_draws is not None:
      normal_draws = tf.convert_to_tensor(normal_draws, dtype=dtype,
                                          name='normal_draws')
      # Shape [num_time_points, num_samples, dim]
      normal_draws = tf.transpose(normal_draws, [1, 0, 2])
      num_samples = tf.shape(normal_draws)[1]
      draws_dim = normal_draws.shape[2]
      if dim != draws_dim:
        raise ValueError(
            '`dim` should be equal to `normal_draws.shape[2]` but are '
            '{0} and {1} respectively'.format(dim, draws_dim))
      if validate_args:
        draws_times = tf.shape(normal_draws)[0]
        asserts = tf.assert_equal(
            draws_times, tf.shape(keep_mask)[0] - 1,
            message='`num_time_steps` should be equal to '
                    '`tf.shape(normal_draws)[1]`')
        with tf.compat.v1.control_dependencies([asserts]):
          normal_draws = tf.identity(normal_draws)
    if watch_params is not None:
      watch_params = [tf.convert_to_tensor(param, dtype=dtype)
                      for param in watch_params]
    return _sample(
        dim=dim,
        drift_fn=drift_fn,
        volatility_fn=volatility_fn,
        times=times,
        keep_mask=keep_mask,
        num_requested_times=num_requested_times,
        num_samples=num_samples,
        initial_state=initial_state,
        random_type=random_type,
        seed=seed,
        swap_memory=swap_memory,
        skip=skip,
        precompute_normal_draws=precompute_normal_draws,
        normal_draws=normal_draws,
        watch_params=watch_params,
        time_indices=time_indices,
        dtype=dtype)
Exemple #27
0
 def _transpose(self, x, perm):
     perm = self._make_perm(tf.rank(x), perm)
     return tf.transpose(a=x, perm=perm)
Exemple #28
0
def soft_multivariate_quantiles(x, quantiles, quantile_width=None, **kwargs):
    """Computes soft multivariate quantiles via optimal transport.

  Transport multivariate input values in x onto 2^d + 1 weighted points,
  {0,1}^d + [0.5, ..., 0.5]. Target weights are adjusted so
  that those values in x that are transported to the middle value in the target
  vector correspond to those concentrating around the quantile of interest.

  Args:
   x: Tensor<float> of shape [batch, N, d]
   quantiles: Tensor<float> of shape [r, d], r targeted quantiles of dimension d
   quantile_width: (float) mass given to the bucket supposed to attract points
     whose value concentrate around the desired quantile value. Bigger width
     means that we allow the soft quantile to be a mixture of more points
     further away from the quantile. If None, the width is set at 1/n where n is
     the number of values considered (the size along the 'axis').
   **kwargs: see sinkhorn.autodiff_sinkhorn for possible extra parameters.

  Returns:
    A Tensor<float> [N,r,d] of multivariate quantiles per batch.

  """
    quantiles = tf.constant(quantiles, tf.float32)
    batch_size = x.shape[0]
    n = tf.cast(x.shape[1], tf.float32)
    d = x.shape[2]
    if quantile_width is None:
        quantile_width = 2 / n
    num_quantiles = tf.shape(quantiles)[0]
    hypercube_vertices = tf.constant(
        list(itertools.product([-1, 1], repeat=d)), tf.float32)
    # weights attached to vertices for each quantile. this is n_quantiles x 2^r
    weights = quantiles[:,
                        tf.newaxis, :]**(0.5 *
                                         (1 - hypercube_vertices))[tf.newaxis,
                                                                   Ellipsis]
    weights *= (1 - quantiles)[:, tf.newaxis, :]**(
        0.5 * (1 + hypercube_vertices))[tf.newaxis, Ellipsis]

    weights = (1 - quantile_width) * tf.reduce_prod(weights, axis=2)
    # adding weights for quantile itself (in position 0).
    weights = tf.concat((quantile_width * tf.ones(
        (num_quantiles, 1)), weights),
                        axis=1)
    # augmenting and formating as batch_size * 2^r +1 * num_quantiles
    weights = tf.reshape(tf.tile(tf.transpose(weights), [batch_size, 1]),
                         [batch_size, 2**d + 1, num_quantiles])
    # set target locations, by adding the point at 0 that will absorb the quantile
    # augment it with batch_size
    y = tf.concat((tf.zeros((1, d), dtype=tf.float32), hypercube_vertices),
                  axis=0)
    y = tf.reshape(tf.tile(y, [batch_size, 1]), [batch_size, 2**d + 1, d])
    # center x
    x_mean = tf.reduce_mean(x, axis=1)
    x = x - x_mean[:, tf.newaxis, :]
    transports = sinkhorn.autodiff_sinkhorn(
        x, y,
        tf.ones([batch_size, n, num_quantiles], dtype=tf.float32) / n, weights,
        **kwargs)

    # recover convex combinations resulting from transporting to central point in
    # in all batches and quantile variations.
    transports = 1 / quantile_width * tf.reshape(transports[:, :, 0, :],
                                                 [batch_size, n, -1])
    # apply these convex combinations to data points + recenter.
    all_soft_quantiles = tf.reduce_sum(
        transports[:, :, :, tf.newaxis] * x[:, :, tf.newaxis, :],
        axis=1) + x_mean[:, tf.newaxis, :]
    # reshape those quantiles after having applied convex combinations.
    return tf.reshape(all_soft_quantiles, [batch_size, num_quantiles, d])
Exemple #29
0
def draw_sample(num_samples, num_classes, logits, num_trials, dtype, seed):
    """Sample a multinomial.

  The batch shape is given by broadcasting num_trials with
  remove_last_dimension(logits).

  Args:
    num_samples: Python int or singleton integer Tensor: number of multinomial
      samples to draw.
    num_classes: Python int or singleton integer Tensor: number of classes.
    logits: Floating Tensor with last dimension k, of (unnormalized) logit
      probabilities per class.
    num_trials: Tensor of number of categorical trials each multinomial consists
      of.  num_trials[..., tf.newaxis] must broadcast with logits.
    dtype: dtype at which to emit samples.
    seed: Random seed.

  Returns:
    samples: Tensor of given dtype and shape [n] + batch_shape + [k].
  """
    with tf.name_scope('draw_sample'):
        # broadcast the num_trials and logits to same shape
        num_trials = tf.ones_like(logits[..., 0],
                                  dtype=num_trials.dtype) * num_trials
        logits = tf.ones_like(num_trials[..., tf.newaxis],
                              dtype=logits.dtype) * logits

        # flatten the total_count and logits
        # flat_logits has shape [B1B2...Bm, num_classes]
        flat_logits = tf.reshape(logits, [-1, num_classes])
        flat_num_trials = num_samples * tf.reshape(num_trials,
                                                   [-1])  # [B1B2...Bm]

        # Computes each logits and num_trials situation by map_fn.

        # Using just one batch samplers.categorical call doesn't work because that
        # requires num_trials to be the same across all members of the batch of
        # logits.  This restriction makes sense for samplers.categorical because
        # for it, num_trials is part of the returned shape.  However, the
        # multinomial sampler does not need that restriction, because it sums out
        # exactly that dimension.

        # One possibility would be to draw a batch categorical whose sample count is
        # max(num_trials) and mask out the excess ones.  However, if the elements of
        # num_trials vary widely, this can be wasteful of memory.

        # TODO(b/123763054, b/112152209): Revisit the possibility of writing this
        # with a batch categorical followed by batch unsorted_segment_sum, once both
        # of those work and are memory-efficient enough.
        def _sample_one_batch_member(args):
            logits, num_cat_samples, item_seed = args  # [K], []
            # x has shape [1, num_cat_samples = num_samples * num_trials]
            x = samplers.categorical(logits[tf.newaxis, ...],
                                     num_cat_samples,
                                     seed=item_seed)
            x = tf.reshape(x, shape=[num_samples,
                                     -1])  # [num_samples, num_trials]
            x = tf.one_hot(
                x, depth=num_classes)  # [num_samples, num_trials, num_classes]
            x = tf.reduce_sum(x, axis=-2)  # [num_samples, num_classes]
            return tf.cast(x, dtype=dtype)

        flat_seeds = samplers.split_seed(seed,
                                         n=tf.shape(flat_logits)[0],
                                         salt='multinomial_draw_sample')
        x = tf.map_fn(
            _sample_one_batch_member,
            [flat_logits, flat_num_trials, flat_seeds],
            fn_output_signature=dtype)  # [B1B2...Bm, num_samples, num_classes]

        # reshape the results to proper shape
        x = tf.transpose(a=x, perm=[1, 0, 2])
        final_shape = tf.concat(
            [[num_samples], tf.shape(num_trials), [num_classes]], axis=0)
        x = tf.reshape(x, final_shape)

        return x
 def f(x, y):  # [4, 2, 3], [4, 2, 1, 3] -> [4, 3, 2]
     return tf.transpose(
         tf.cast(tf.math.cumsum(w1 * x, axis=-1), dtype=tf.float32) +
         tf.square(tf.reverse(w2 * y, axis=[-3]))[..., 0, :],
         perm=[0, 2, 1])