Example #1
0
            def _volatility_fn(t, x):
                del t

                expand_rank = len(
                    tff_utils.get_shape(x)) - self._batch_shape_rank - 1
                # `axis` is -2, because the new dimension needs to be added before `1`
                sigma_expand = self._expand_param_on_rank(self._sigma,
                                                          expand_rank,
                                                          axis=-2)
                return tf.expand_dims(sigma_expand * tf.sqrt(x), axis=-1)
Example #2
0
            def _drift_fn(t, x):
                del t

                expand_rank = tff_utils.get_shape(
                    x).rank - self._batch_shape_rank - 1
                # `axis` is -2, because the new dimension needs to be added before `1`
                theta_expand = self._expand_param_on_rank(self._theta,
                                                          expand_rank,
                                                          axis=-2)
                mean_reversion_expand = self._expand_param_on_rank(
                    self._mean_reversion, expand_rank, axis=-2)
                return theta_expand - mean_reversion_expand * x
def interpolate(x: types.RealTensor,
                x_data: types.RealTensor,
                y_data: types.RealTensor,
                left_slope: types.RealTensor = None,
                right_slope: types.RealTensor = None,
                validate_args: bool = False,
                optimize_for_tpu: bool = False,
                dtype: tf.DType = None,
                name: str = None):
  """Performs linear interpolation for supplied points.

  Given a set of knots whose x- and y- coordinates are in `x_data` and `y_data`,
  this function returns y-values for x-coordinates in `x` via piecewise
  linear interpolation.

  `x_data` must be non decreasing, but `y_data` don't need to be because we do
  not require the function approximated by these knots to be monotonic.

  #### Examples

  ```python
  import tf_quant_finance as tff
  x = [-10, -1, 1, 3, 6, 7, 8, 15, 18, 25, 30, 35]
  x_data = [-1, 2, 6, 8, 18, 30.0]
  y_data = [10, -1, -5, 7, 9, 20]

  tff.math.interpolation.linear.interpolate(x, x_data, y_data,
                                            dtype=tf.float64)
  # Expected: [ 10, 10, 2.66666667, -2, -5, 1, 7, 8.4, 9, 15.41666667, 20, 20]
  ```

  Args:
    x: x-coordinates for which we need to get interpolation. A N-D
      `Tensor` of real dtype. First N-1 dimensions represent batching
      dimensions.
    x_data: x coordinates. A N-D `Tensor` of real dtype. Should be sorted
      in non decreasing order. First N-1 dimensions represent batching
      dimensions.
    y_data: y coordinates. A N-D `Tensor` of real dtype. Should have the
      compatible shape as `x_data`. First N-1 dimensions represent batching
      dimensions.
    left_slope: The slope to use for extrapolation with x-coordinate smaller
      than the min `x_data`. It's a 0-D or N-D `Tensor`.
      Default value: `None`, which maps to `0.0` meaning constant extrapolation,
      i.e. extrapolated value will be the leftmost `y_data`.
    right_slope: The slope to use for extrapolation with x-coordinate greater
      than the max `x_data`. It's a 0-D or N-D `Tensor`.
      Default value: `None` which maps to `0.0` meaning constant extrapolation,
      i.e. extrapolated value will be the rightmost `y_data`.
    validate_args: Python `bool` that indicates whether the function performs
      the check if the shapes of `x_data` and `y_data` are equal and that the
      elements in `x_data` are non decreasing. If this value is set to `False`
      and the elements in `x_data` are not increasing, the result of linear
      interpolation may be wrong.
      Default value: `False`.
    optimize_for_tpu: A Python bool. If `True`, the algorithm uses one-hot
      encoding to lookup indices of `x` in `x_data`. This significantly
      improves performance of the algorithm on a TPU device but may slow down
      performance on the CPU.
      Default value: `False`.
    dtype: Optional tf.dtype for `x`, x_data`, `y_data`, `left_slope` and
      `right_slope`.
      Default value: `None` which means that the `dtype` inferred from
        `x`.
    name: Python str. The name prefixed to the ops created by this function.
      Default value: `None` which maps to 'linear_interpolation'.

  Returns:
    A N-D `Tensor` of real dtype corresponding to the x-values in `x`.
  """
  name = name or 'linear_interpolate'
  with tf.name_scope(name):
    x = tf.convert_to_tensor(x, dtype=dtype, name='x')
    dtype = dtype or x.dtype
    x_data = tf.convert_to_tensor(x_data, dtype=dtype, name='x_data')
    y_data = tf.convert_to_tensor(y_data, dtype=dtype, name='y_data')
    # Try broadcast batch_shapes
    x, x_data, y_data = tff_utils.broadcast_common_batch_shape(
        x, x_data, y_data)

    # Rank of the inputs is known
    batch_rank = x.shape.rank - 1
    if batch_rank == 0:
      x = tf.expand_dims(x, 0)
      x_data = tf.expand_dims(x_data, 0)
      y_data = tf.expand_dims(y_data, 0)

    if left_slope is None:
      left_slope = tf.constant(0.0, dtype=x.dtype, name='left_slope')
    else:
      left_slope = tf.convert_to_tensor(left_slope, dtype=dtype,
                                        name='left_slope')
    if right_slope is None:
      right_slope = tf.constant(0.0, dtype=x.dtype, name='right_slope')
    else:
      right_slope = tf.convert_to_tensor(right_slope, dtype=dtype,
                                         name='right_slope')
    control_deps = []
    if validate_args:
      # Check that `x_data` elements is non-decreasing
      diffs = x_data[..., 1:] - x_data[..., :-1]
      assertion = tf.debugging.assert_greater_equal(
          diffs,
          tf.zeros_like(diffs),
          message='x_data is not sorted in non-decreasing order.')
      control_deps.append(assertion)
      # Check that the shapes of `x_data` and `y_data` are equal
      control_deps.append(
          tf.compat.v1.assert_equal(tff_utils.get_shape(x_data),
                                    tff_utils.get_shape(y_data)))

    with tf.control_dependencies(control_deps):
      # Get upper bound indices for `x`.
      upper_indices = tf.searchsorted(x_data, x, side='left', out_type=tf.int32)
      x_data_size = tff_utils.get_shape(x_data)[-1]
      at_min = tf.equal(upper_indices, 0)
      at_max = tf.equal(upper_indices, x_data_size)
      # Create tensors in order to be used by `tf.where`.
      # `values_min` are extrapolated values for x-coordinates less than or
      # equal to `x_data[..., 0]`.
      # `values_max` are extrapolated values for x-coordinates greater than
      # `x_data[..., -1]`.

      values_min = tf.expand_dims(y_data[..., 0], -1) + left_slope * (
          x - tf.broadcast_to(
              tf.expand_dims(x_data[..., 0], -1),
              shape=tff_utils.get_shape(x)))
      values_max = tf.expand_dims(y_data[..., -1], -1) + right_slope * (
          x - tf.broadcast_to(
              tf.expand_dims(x_data[..., -1], -1),
              shape=tff_utils.get_shape(x)))

      # `tf.where` evaluates all branches, need to cap indices to ensure it
      # won't go out of bounds.
      lower_encoding = tf.math.maximum(upper_indices - 1, 0)
      upper_encoding = tf.math.minimum(upper_indices, x_data_size - 1)
      # Prepare indices for `tf.gather` or `tf.one_hot`
      # TODO(b/156720909): Extract get_slice logic into a common utilities
      # module for cubic and linear interpolation
      if optimize_for_tpu:
        lower_encoding = tf.one_hot(lower_encoding, x_data_size,
                                    dtype=dtype)
        upper_encoding = tf.one_hot(upper_encoding, x_data_size,
                                    dtype=dtype)
      def get_slice(x, encoding):
        if optimize_for_tpu:
          return tf.math.reduce_sum(tf.expand_dims(x, axis=-2) * encoding,
                                    axis=-1)
        else:
          return tf.gather(x, encoding, axis=-1, batch_dims=x.shape.rank - 1)
      x_data_lower = get_slice(x_data, lower_encoding)
      x_data_upper = get_slice(x_data, upper_encoding)
      y_data_lower = get_slice(y_data, lower_encoding)
      y_data_upper = get_slice(y_data, upper_encoding)

      # Nan in unselected branches could propagate through gradient calculation,
      # hence we need to clip the values to ensure no nan would occur. In this
      # case we need to ensure there is no division by zero.
      x_data_diff = x_data_upper - x_data_lower
      floor_x_diff = tf.where(at_min | at_max, x_data_diff + 1, x_data_diff)
      interpolated = y_data_lower + (x - x_data_lower) * (
          tf.math.divide_no_nan(y_data_upper - y_data_lower, floor_x_diff))

      interpolated = tf.where(at_min, values_min, interpolated)
      interpolated = tf.where(at_max, values_max, interpolated)
      if batch_rank > 0:
        return interpolated
      else:
        return tf.squeeze(interpolated, 0)
def _sample(*, dim, batch_shape, drift_fn, volatility_fn, times, keep_mask,
            num_requested_times, num_samples, initial_state, random_type, seed,
            swap_memory, skip, precompute_normal_draws, watch_params,
            time_indices, normal_draws, dtype):
    """Returns a sample of paths from the process using Euler method."""
    dt = times[1:] - times[:-1]
    sqrt_dt = tf.sqrt(dt)
    # current_state.shape = batch_shape + [num_samples, dim]
    current_state = initial_state + tf.zeros([num_samples, dim], dtype=dtype)
    steps_num = tff_utils.get_shape(dt)[-1]
    wiener_mean = None
    if normal_draws is None:
        # In order to use low-discrepancy random_type we need to generate the
        # sequence of independent random normals upfront. We also precompute random
        # numbers for stateless random type in order to ensure independent samples
        # for multiple function calls with different seeds.
        if precompute_normal_draws or random_type in (
                random.RandomType.SOBOL, random.RandomType.HALTON,
                random.RandomType.HALTON_RANDOMIZED,
                random.RandomType.STATELESS,
                random.RandomType.STATELESS_ANTITHETIC):
            normal_draws = utils.generate_mc_normal_draws(
                num_normal_draws=dim,
                num_time_steps=steps_num,
                num_sample_paths=num_samples,
                batch_shape=batch_shape,
                random_type=random_type,
                dtype=dtype,
                seed=seed,
                skip=skip)
            wiener_mean = None
        else:
            # If pseudo or anthithetic sampling is used, proceed with random sampling
            # at each step.
            wiener_mean = tf.zeros((dim, ), dtype=dtype, name='wiener_mean')
            normal_draws = None
    if watch_params is None:
        # Use while_loop if `watch_params` is not passed
        return _while_loop(steps_num=steps_num,
                           current_state=current_state,
                           drift_fn=drift_fn,
                           volatility_fn=volatility_fn,
                           wiener_mean=wiener_mean,
                           num_samples=num_samples,
                           times=times,
                           dt=dt,
                           sqrt_dt=sqrt_dt,
                           keep_mask=keep_mask,
                           num_requested_times=num_requested_times,
                           swap_memory=swap_memory,
                           random_type=random_type,
                           seed=seed,
                           normal_draws=normal_draws,
                           dtype=dtype)
    else:
        # Use custom for_loop if `watch_params` is specified
        return _for_loop(batch_shape=batch_shape,
                         steps_num=steps_num,
                         current_state=current_state,
                         drift_fn=drift_fn,
                         volatility_fn=volatility_fn,
                         wiener_mean=wiener_mean,
                         num_samples=num_samples,
                         times=times,
                         dt=dt,
                         sqrt_dt=sqrt_dt,
                         time_indices=time_indices,
                         keep_mask=keep_mask,
                         watch_params=watch_params,
                         random_type=random_type,
                         seed=seed,
                         normal_draws=normal_draws)
def sample(dim: int,
           drift_fn: Callable[..., types.RealTensor],
           volatility_fn: Callable[..., types.RealTensor],
           times: types.RealTensor,
           time_step: Optional[types.RealTensor] = None,
           num_time_steps: Optional[types.IntTensor] = None,
           num_samples: types.IntTensor = 1,
           initial_state: Optional[types.RealTensor] = None,
           random_type: Optional[random.RandomType] = None,
           seed: Optional[types.IntTensor] = None,
           swap_memory: bool = True,
           skip: types.IntTensor = 0,
           precompute_normal_draws: bool = True,
           times_grid: Optional[types.RealTensor] = None,
           normal_draws: Optional[types.RealTensor] = None,
           watch_params: Optional[List[types.RealTensor]] = None,
           validate_args: bool = False,
           tolerance: Optional[types.RealTensor] = None,
           dtype: Optional[tf.DType] = None,
           name: Optional[str] = None) -> types.RealTensor:
    """Returns a sample paths from the process using Euler method.

  For an Ito process,

  ```
    dX = a(t, X_t) dt + b(t, X_t) dW_t
    X(t=0) = x0
  ```
  with given drift `a` and volatility `b` functions Euler method generates a
  sequence {X_n} as

  ```
  X_{n+1} = X_n + a(t_n, X_n) dt + b(t_n, X_n) (N(0, t_{n+1}) - N(0, t_n)),
  X_0 = x0
  ```
  where `dt = t_{n+1} - t_n` and `N` is a sample from the Normal distribution.
  See [1] for details.

  #### Example
  Sampling from 2-dimensional Ito process of the form:

  ```none
  dX_1 = mu_1 * sqrt(t) dt + s11 * dW_1 + s12 * dW_2
  dX_2 = mu_2 * sqrt(t) dt + s21 * dW_1 + s22 * dW_2
  ```

  ```python
  import tensorflow as tf
  import tf_quant_finance as tff

  import numpy as np

  mu = np.array([0.2, 0.7])
  s = np.array([[0.3, 0.1], [0.1, 0.3]])
  num_samples = 10000
  dim = 2
  dtype = tf.float64

  # Define drift and volatility functions
  def drift_fn(t, x):
    return mu * tf.sqrt(t) * tf.ones([num_samples, dim], dtype=dtype)

  def vol_fn(t, x):
    return s * tf.ones([num_samples, dim, dim], dtype=dtype)

  # Set starting location
  x0 = np.array([0.1, -1.1])
  # Sample `num_samples` paths at specified `times` using Euler scheme.
  times = [0.1, 1.0, 2.0]
  paths = tff.models.euler_sampling.sample(
            dim=dim,
            drift_fn=drift_fn,
            volatility_fn=vol_fn,
            times=times,
            num_samples=num_samples,
            initial_state=x0,
            time_step=0.01,
            seed=42,
            dtype=dtype)
  # Expected: paths.shape = [10000, 3, 2]
  ```

  #### References
  [1]: Wikipedia. Euler-Maruyama method:
  https://en.wikipedia.org/wiki/Euler-Maruyama_method

  Args:
    dim: Python int greater than or equal to 1. The dimension of the Ito
      Process.
    drift_fn: A Python callable to compute the drift of the process. The
      callable should accept two real `Tensor` arguments of the same dtype.
      The first argument is the scalar time t, the second argument is the
      value of Ito process X - tensor of shape
      `batch_shape + [num_samples, dim]`. `batch_shape` is the shape of the
      independent stochastic processes being modelled and is inferred from the
      initial state `x0`.
      The result is value of drift a(t, X). The return value of the callable
      is a real `Tensor` of the same dtype as the input arguments and of shape
      `batch_shape + [num_samples, dim]`.
    volatility_fn: A Python callable to compute the volatility of the process.
      The callable should accept two real `Tensor` arguments of the same dtype
      and shape `times_shape`. The first argument is the scalar time t, the
      second argument is the value of Ito process X - tensor of shape
      `batch_shape + [num_samples, dim]`. The result is value of drift b(t, X).
      The return value of the callable is a real `Tensor` of the same dtype as
      the input arguments and of shape `batch_shape + [num_samples, dim, dim]`.
    times: Rank 1 `Tensor` of increasing positive real values. The times at
      which the path points are to be evaluated.
    time_step: An optional scalar real `Tensor` - maximal distance between
      points in grid in Euler schema.
      Either this or `num_time_steps` should be supplied.
      Default value: `None`.
    num_time_steps: An optional Scalar integer `Tensor` - a total number of time
      steps performed by the algorithm. The maximal distance betwen points in
      grid is bounded by `times[-1] / (num_time_steps - times.shape[0])`.
      Either this or `time_step` should be supplied.
      Default value: `None`.
    num_samples: Positive scalar `int`. The number of paths to draw.
      Default value: 1.
    initial_state: `Tensor` of shape broadcastable with
      `batch_shape + [num_samples, dim]`. The initial state of the process.
      `batch_shape` represents the shape of the independent batches of the
      stochastic process. Note that `batch_shape` is inferred from
      the `initial_state` and hence when sampling is requested for a batch of
      stochastic processes, the shape of `initial_state` should be at least
      `batch_shape + [1, 1]`.
      Default value: None which maps to a zero initial state.
    random_type: Enum value of `RandomType`. The type of (quasi)-random
      number generator to use to generate the paths.
      Default value: None which maps to the standard pseudo-random numbers.
    seed: Seed for the random number generator. The seed is
      only relevant if `random_type` is one of
      `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC,
        STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and
      `HALTON_RANDOMIZED` the seed should be a Python integer. For
      `STATELESS` and  `STATELESS_ANTITHETIC `must be supplied as an integer
      `Tensor` of shape `[2]`.
      Default value: `None` which means no seed is set.
    swap_memory: A Python bool. Whether GPU-CPU memory swap is enabled for this
      op. See an equivalent flag in `tf.while_loop` documentation for more
      details. Useful when computing a gradient of the op since `tf.while_loop`
      is used to propagate stochastic process in time.
      Default value: True.
    skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or
      Halton sequence to skip. Used only when `random_type` is 'SOBOL',
      'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored.
      Default value: `0`.
    precompute_normal_draws: Python bool. Indicates whether the noise increments
      `N(0, t_{n+1}) - N(0, t_n)` are precomputed. For `HALTON` and `SOBOL`
      random types the increments are always precomputed. While the resulting
      graph consumes more memory, the performance gains might be significant.
      Default value: `True`.
    times_grid: An optional rank 1 `Tensor` representing time discretization
      grid. If `times` are not on the grid, then the nearest points from the
      grid are used. When supplied, `num_time_steps` and `time_step` are
      ignored.
      Default value: `None`, which means that times grid is computed using
      `time_step` and `num_time_steps`.
    normal_draws: A `Tensor` of shape broadcastable with
      `batch_shape + [num_samples, num_time_points, dim]` and the same
      `dtype` as `times`. Represents random normal draws to compute increments
      `N(0, t_{n+1}) - N(0, t_n)`. When supplied, `num_samples` argument is
      ignored and the first dimensions of `normal_draws` is used instead.
      Default value: `None` which means that the draws are generated by the
      algorithm. By default normal_draws for each model in the batch are
      independent.
    watch_params: An optional list of zero-dimensional `Tensor`s of the same
      `dtype` as `initial_state`. If provided, specifies `Tensor`s with respect
      to which the differentiation of the sampling function will happen.
      A more efficient algorithm is used when `watch_params` are specified.
      Note the the function becomes differentiable onlhy wrt to these `Tensor`s
      and the `initial_state`. The gradient wrt any other `Tensor` is set to be
      zero.
    validate_args: Python `bool`. When `True` performs multiple checks:
      * That `times`  are increasing with the minimum increments of the
        specified tolerance.
      * If `normal_draws` are supplied, checks that `normal_draws.shape[1]` is
      equal to `num_time_steps` that is either supplied as an argument or
      computed from `time_step`.
      When `False` invalid dimension may silently render incorrect outputs.
      Default value: `False`.
    tolerance: A non-negative scalar `Tensor` specifying the minimum tolerance
      for discernible times on the time grid. Times that are closer than the
      tolerance are perceived to be the same.
      Default value: `None` which maps to `1-e6` if the for single precision
        `dtype` and `1e-10` for double precision `dtype`.
    dtype: `tf.Dtype`. If supplied the dtype for the input and output `Tensor`s.
      Default value: None which means that the dtype implied by `times` is
      used.
    name: Python string. The name to give this op.
      Default value: `None` which maps to `euler_sample`.

  Returns:
   A real `Tensor` of shape batch_shape_process + [num_samples, k, n] where `k`
     is the size of the `times`, `n` is the dimension of the process.

  Raises:
    ValueError:
      (a) When `times_grid` is not supplied, and neither `num_time_steps` nor
        `time_step` are supplied or if both are supplied.
      (b) If `normal_draws` is supplied and `dim` is mismatched.
    tf.errors.InvalidArgumentError: If `normal_draws` is supplied and
      `num_time_steps` is mismatched.
  """
    name = name or 'euler_sample'
    with tf.name_scope(name):
        times = tf.convert_to_tensor(times, dtype=dtype)
        if dtype is None:
            dtype = times.dtype
        asserts = []
        if tolerance is None:
            tolerance = 1e-10 if dtype == tf.float64 else 1e-6
        tolerance = tf.convert_to_tensor(tolerance, dtype=dtype)
        if validate_args:
            asserts.append(
                tf.assert_greater(
                    times[1:],
                    times[:-1] + tolerance,
                    message='`times` increments should be greater '
                    'than tolerance {0}'.format(tolerance)))
        if initial_state is None:
            initial_state = tf.zeros(dim, dtype=dtype)
        initial_state = tf.convert_to_tensor(initial_state,
                                             dtype=dtype,
                                             name='initial_state')
        batch_shape = tff_utils.get_shape(initial_state)[:-2]
        num_requested_times = tff_utils.get_shape(times)[0]
        # Create a time grid for the Euler scheme.
        if num_time_steps is not None and time_step is not None:
            raise ValueError(
                'When `times_grid` is not supplied only one of either '
                '`num_time_steps` or `time_step` should be defined but not both.'
            )
        if times_grid is None:
            if time_step is None:
                if num_time_steps is None:
                    raise ValueError(
                        'When `times_grid` is not supplied, either `num_time_steps` '
                        'or `time_step` should be defined.')
                num_time_steps = tf.convert_to_tensor(num_time_steps,
                                                      dtype=tf.int32,
                                                      name='num_time_steps')
                time_step = times[-1] / tf.cast(num_time_steps, dtype=dtype)
            else:
                time_step = tf.convert_to_tensor(time_step,
                                                 dtype=dtype,
                                                 name='time_step')
        else:
            times_grid = tf.convert_to_tensor(times_grid,
                                              dtype=dtype,
                                              name='times_grid')
            if validate_args:
                asserts.append(
                    tf.assert_greater(
                        times_grid[1:],
                        times_grid[:-1] + tolerance,
                        message='`times_grid` increments should be greater '
                        'than tolerance {0}'.format(tolerance)))
        times, keep_mask, time_indices = utils.prepare_grid(
            times=times,
            time_step=time_step,
            num_time_steps=num_time_steps,
            times_grid=times_grid,
            tolerance=tolerance,
            dtype=dtype)

        if normal_draws is not None:
            normal_draws = tf.convert_to_tensor(normal_draws,
                                                dtype=dtype,
                                                name='normal_draws')
            # Shape [num_time_points] + batch_shape + [num_samples, dim]
            normal_draws_rank = normal_draws.shape.rank
            perm = tf.concat(
                [[normal_draws_rank - 2],
                 tf.range(normal_draws_rank - 2), [normal_draws_rank - 1]],
                axis=0)
            normal_draws = tf.transpose(normal_draws, perm=perm)
            num_samples = tf.shape(normal_draws)[-2]
            draws_dim = normal_draws.shape[-1]
            if dim != draws_dim:
                raise ValueError(
                    '`dim` should be equal to `normal_draws.shape[2]` but are '
                    '{0} and {1} respectively'.format(dim, draws_dim))
            if validate_args:
                draws_times = tff_utils.get_shape(normal_draws)[0]
                asserts.append(
                    tf.assert_equal(
                        draws_times,
                        tf.shape(keep_mask)[0] - 1,
                        message='`num_time_steps` should be equal to '
                        '`tf.shape(normal_draws)[1]`'))
        if validate_args:
            with tf.control_dependencies(asserts):
                times = tf.identity(times)
        if watch_params is not None:
            watch_params = [
                tf.convert_to_tensor(param, dtype=dtype)
                for param in watch_params
            ]
        return _sample(dim=dim,
                       batch_shape=batch_shape,
                       drift_fn=drift_fn,
                       volatility_fn=volatility_fn,
                       times=times,
                       keep_mask=keep_mask,
                       num_requested_times=num_requested_times,
                       num_samples=num_samples,
                       initial_state=initial_state,
                       random_type=random_type,
                       seed=seed,
                       swap_memory=swap_memory,
                       skip=skip,
                       precompute_normal_draws=precompute_normal_draws,
                       normal_draws=normal_draws,
                       watch_params=watch_params,
                       time_indices=time_indices,
                       dtype=dtype)
    def _sabr_sample_paths(self, initial_forward, initial_volatility, times,
                           time_step, num_samples, random_type, seed,
                           precompute_normal_draws, skip):
        """Returns a sample of paths from the process."""
        num_requested_times = tff_utils.get_shape(times)[0]
        # Prepare results format
        forward = tf.zeros(shape=(num_samples, ),
                           dtype=self._dtype) + initial_forward
        vol = tf.zeros(shape=(num_samples, ),
                       dtype=self._dtype) + initial_volatility
        if isinstance(num_requested_times, int) and num_requested_times == 1:
            record_samples = False
            forward_paths = forward
            vol_paths = vol
        else:
            # If more than one sample has to be recorded, create a TensorArray
            record_samples = True
            element_shape = forward.shape
            forward_paths = tf.TensorArray(dtype=times.dtype,
                                           size=num_requested_times,
                                           element_shape=element_shape,
                                           clear_after_read=False)
            vol_paths = tf.TensorArray(dtype=times.dtype,
                                       size=num_requested_times,
                                       element_shape=element_shape,
                                       clear_after_read=False)
        # Define sampling while_loop body function
        cond_fn = lambda index, *args: index < tf.size(times)

        # In order to use low-discrepancy random_type we need to generate the
        # sequence of independent random normals upfront. We also precompute random
        # numbers for stateless random type in order to ensure independent samples
        # for multiple function calls with different seeds.
        if precompute_normal_draws or random_type in (
                random.RandomType.SOBOL, random.RandomType.HALTON,
                random.RandomType.HALTON_RANDOMIZED,
                random.RandomType.STATELESS,
                random.RandomType.STATELESS_ANTITHETIC):
            num_time_steps = tf.cast(tf.math.ceil(
                tf.math.divide(times[-1], time_step)),
                                     dtype=tf.int32) + times.shape[0]
            # We need a [3] + initial_forward.shape tensor of random draws.
            # This will be accessed by normal_draws_index.
            num_normal_draws = 3 * tf.size(initial_forward)
            normal_draws = utils.generate_mc_normal_draws(
                num_normal_draws=num_normal_draws,
                num_time_steps=num_time_steps,
                num_sample_paths=num_samples,
                random_type=random_type,
                seed=seed,
                skip=skip,
                dtype=self._dtype)
        else:
            normal_draws = None

        def body_fn(index, current_time, forward, vol, forward_paths,
                    vol_paths, normal_draws_index):
            """Simulate Sabr process to the next time point."""
            forward, vol, normal_draws_index = self._propagate_to_time(
                forward, vol, current_time, times[index], time_step,
                random_type, seed, normal_draws, normal_draws_index,
                num_time_steps)
            # Always update paths in outer loop.
            if record_samples:
                # Update volatility paths
                vol_paths = vol_paths.write(index, vol)
                # Update forward paths
                forward_paths = forward_paths.write(index, forward)
            else:
                vol_paths = vol
                forward_paths = forward
            return index + 1, times[
                index], forward, vol, forward_paths, vol_paths, normal_draws_index

        start_time = tf.constant(0, dtype=self._dtype)
        # Sample paths
        _, _, _, _, forward_paths, vol_paths, _ = tf.while_loop(
            cond_fn,
            body_fn,
            (0, start_time, forward, vol, forward_paths, vol_paths, 0),
            maximum_iterations=tf.size(times))
        if not record_samples:
            # shape [num_samples, 1]
            vol_paths = tf.expand_dims(vol_paths, axis=-1)
            forward_paths = tf.expand_dims(forward_paths, axis=-1)
            # shape [num_samples, 1, 1]
            return tf.stack([forward_paths, vol_paths], -1)
        # Shape [num_time_points] + [num_samples]
        vol_paths = vol_paths.stack()
        forward_paths = forward_paths.stack()
        # transpose to shape [num_samples, num_time_points]
        vol_paths = tf.transpose(vol_paths)
        forward_paths = tf.transpose(forward_paths)
        # Shape [num_samples, num_time_points, 2]
        return tf.stack([forward_paths, vol_paths], -1)
Example #7
0
def _leverage_function_using_pde(*, risk_free_rate, dividend_yield, lv_model,
                                 variance_model, rho, initial_spot,
                                 initial_variance, max_time, time_step,
                                 num_grid_points, grid_minimums, grid_maximums,
                                 dtype):
    """Computes Leverage function using Fokker-Planck PDE for joint density.

  This function computes the leverage function for the LSV model by first
  computing the joint probablity density function `p(t, X(t), v(t))` where
  `X(t)` is the log of the spot price and `v(t)` is the variance at time `t`.
  The joint probablity density is computed using the Fokker-Planck equation of
  the LSV model (see 6.8.2 in Ref [1]):
  ```None
  dp/dt = 1/2 d^2 [v L(t,X)^2 p]/dX^2 + 1/2 d^2 [b(v)^2 p]/dv^2 +
          rho d^2 [sqrt(v)L(t,X)b(v) p]/dXdv - d[(r - d - 1/2 v L(t,X)^2)p]/dX -
          d[a(v) p]/dv
  ```

  where `a(v)` and `b(v)` are the drift and diffusion functions for the
  variance process. Defining

  ```None
  I_n(k,t) = int v^n p(t, k, v) dv
  ```

  we can calculate the leverage function as follows:
  ```None
  L(k, t) = sigma(exp(k), t) sqrt(I_0(k, t)/I_1(k, t)).
  ```

  Args:
    risk_free_rate: A scalar real `Tensor` specifying the (continuosly
      compounded) risk free interest rate. If the underlying is an FX rate, then
      use this input to specify the domestic interest rate.
    dividend_yield: A real scalar `Tensor` specifying the (continuosly
      compounded) dividend yield. If the underlying is an FX rate, then use this
      input to specify the foreign interest rate.
    lv_model: An instance of `LocalVolatilityModel` specifying the local
      volatility for the spot price.
    variance_model: An instance of `LSVVarianceModel` specifying the dynamics of
      the variance process of the LSV model.
    rho: A real scalar `Tensor` specifying the correlation between spot price
      and the stochastic variance.
    initial_spot: A real scalar `Tensor` specifying the underlying spot price on
      the valuation date.
    initial_variance: A real scalar `Tensor` specifying the initial variance on
      the valuation date.
    max_time: A real scalar `Tensor` specifying the maximum time to which the
      Fokker-Planck PDE is evolved.
    time_step: A real scalar `Tensor` specifying the time step during the
      numerical solution of the Fokker-Planck PDE.
    num_grid_points: A scalar integer `Tensor` specifying the number of
      discretization points for each spatial dimension.
    grid_minimums: An optional `Tensor` of size 2 containing the minimum grid
      points for PDE spatial discretization. `grid_minimums[0]` correspond
      to the minimum spot price in the spatial grid and `grid_minimums[1]`
      correspond to the minimum variance value.
    grid_maximums: An optional `Tensor` of size 2 containing the maximum grid
      points for PDE spatial discretization. `grid_maximums[0]` correspond
      to the maximum spot price in the spatial grid and `grid_maximums[1]`
      correspond to the maximum variance value.
    dtype: The default dtype to use when converting values to `Tensor`s.

  Returns:
    A Python callable which computes the Leverage function `L(t, S(t))`. The
    function accepts a scalar `Tensor` corresponding to time 't' and a real
    `Tensor` of shape `[num_samples, 1]` corresponding to the spot price (S) as
    inputs  and return a real `Tensor` corresponding to the leverage function
    computed at (S,t).

  """
    if variance_model.dim() > 1:
        raise ValueError(
            "The default model of Leverage function doesn\'t support "
            "the variance process with more than 1 factor.")

    pde_grid_tol = _machine_eps(dtype)
    rho = tf.convert_to_tensor(rho, dtype=dtype)
    initial_spot = tf.convert_to_tensor(initial_spot, dtype=dtype)
    initial_log_spot = tf.math.log(
        tf.convert_to_tensor(initial_spot, dtype=dtype))
    initial_variance = tf.convert_to_tensor(initial_variance, dtype=dtype)
    risk_free_rate = tf.convert_to_tensor(risk_free_rate, dtype=dtype)
    dividend_yield = tf.convert_to_tensor(dividend_yield, dtype=dtype)
    rho = tf.convert_to_tensor(rho, dtype=dtype)

    x_scale = initial_log_spot
    y_scale = initial_variance
    # scaled log spot = log(spot/initial_spot)
    # scaled variance = variance / initial_variance
    scaled_initial_point = tf.convert_to_tensor([0.0, 1.0], dtype=dtype)

    # These are minimums and maximums for scaled log spot and scaled variance
    if grid_minimums is None:
        grid_minimums = [0.01, 0.0001]
    else:
        grid_minimums = tf.convert_to_tensor(grid_minimums, dtype=dtype)
        grid_minimums = [
            grid_minimums[0] / initial_spot,
            grid_minimums[1] / initial_variance
        ]
    if grid_maximums is None:
        grid_maximums = [10.0, 5.0]
    else:
        grid_maximums = tf.convert_to_tensor(grid_maximums, dtype=dtype)
        grid_maximums = [
            grid_maximums[0] / initial_spot,
            grid_maximums[1] / initial_variance
        ]

    log_spot_min = tf.math.log(
        tf.convert_to_tensor([grid_minimums[0]], dtype=dtype))
    log_spot_max = tf.math.log(
        tf.convert_to_tensor([grid_maximums[0]], dtype=dtype))
    variance_min = tf.convert_to_tensor([grid_minimums[1]], dtype=dtype)
    variance_max = tf.convert_to_tensor([grid_maximums[1]], dtype=dtype)

    grid_minimums = tf.concat([log_spot_min, variance_min], axis=0)
    grid_maximums = tf.concat([log_spot_max, variance_max], axis=0)

    grid = _tavella_randell_nonuniform_grid(grid_minimums, grid_maximums,
                                            scaled_initial_point,
                                            num_grid_points, 0.3, dtype)
    grid = [tf.expand_dims(grid[0], axis=0), tf.expand_dims(grid[1], axis=0)]

    delta_x = tf.math.reduce_min(grid[0][0, 1:] - grid[0][0, :-1])
    delta_y = tf.math.reduce_min(grid[1][0, 1:] - grid[1][0, :-1])
    # Initialize leverage function L(t=0, S) = 1
    leverage_fn = functools.partial(linear.interpolate,
                                    x_data=[[0.0, 1.0]],
                                    y_data=[[1.0, 1.0]],
                                    dtype=dtype)

    def _initial_value():
        """Computes initial value as a delta function delta(log_spot(t), var(0))."""
        log_spot, variance = tf.meshgrid(*grid)

        init_value = tf.where(
            tf.math.logical_and(
                tf.math.abs(log_spot - scaled_initial_point[0]) <
                delta_x + pde_grid_tol,
                tf.math.abs(variance - scaled_initial_point[1]) <
                delta_y + pde_grid_tol), 1.0 / (delta_x * delta_y * 4), 0.0)
        # initial_value.shape = (1, num_grid_x, num_grid_y)
        return tf.expand_dims(init_value, axis=0)

    def _second_order_coeff_fn(t, grid):
        log_spot = grid[0] + x_scale
        variance = grid[1] * y_scale
        leverage_fn_t_x = leverage_fn(log_spot)
        val_xx = 0.5 * variance * leverage_fn_t_x**2
        val_xy = 0.5 * (rho * tf.math.sqrt(variance) * leverage_fn_t_x *
                        variance_model.volatility_fn()(t, variance)) / y_scale
        val_yx = val_xy
        val_yy = 0.5 * variance_model.volatility_fn()(t,
                                                      variance)**2 / y_scale**2
        # return list of shape = (2,2). Each element has shape = grid.shape
        return [[-val_yy, -val_yx], [-val_xy, -val_xx]]

    def _first_order_coeff_fn(t, grid):
        log_spot = grid[0] + x_scale
        variance = grid[1] * y_scale
        leverage_fn_t_x = leverage_fn(log_spot)
        val_x = (risk_free_rate - dividend_yield -
                 0.5 * variance * leverage_fn_t_x**2)
        val_y = variance_model.drift_fn()(t, variance)
        # return list of shape = (2,). Each element has shape = grid.shape
        return [val_y / y_scale, val_x]

    def _compute_leverage_fn(t, coord_grid, value_grid):
        log_spot = tf.expand_dims(coord_grid[0], axis=-1) + x_scale
        local_volatility_values = lv_model.local_volatility_fn()(
            t, tf.math.exp(log_spot))
        # TODO(b/176826650): Large values represent instability. Eventually this
        # should be addressed inside local vol model.
        local_volatility_values = tf.where(
            tf.math.abs(local_volatility_values) > 1e4,
            tf.constant(0.0, dtype=dtype), local_volatility_values)
        # variance_given_logspot.shape = (num_grid_x, 1)
        variance_given_logspot = _conditional_expected_variance_from_pde_solution(
            [coord_grid[0] + x_scale, coord_grid[1] * y_scale], value_grid,
            dtype)(log_spot)

        leverage_fn_values = tf.math.divide_no_nan(
            local_volatility_values, tf.math.sqrt(variance_given_logspot))

        leverage_fn = functools.partial(
            linear.interpolate,
            x_data=grid[0] + x_scale,
            y_data=tf.transpose(leverage_fn_values),
            dtype=dtype)

        return leverage_fn

    @pde.boundary_conditions.neumann
    def _trivial_neumann_boundary(t, location_grid):
        del t, location_grid
        return 0.0

    # joint_density.shape = (1, num_grid_x, num_grid_y)
    joint_density = _initial_value()

    def loop_body(i, tstart, joint_density, leverage_fn_values):
        joint_density, coord_grid, _, _ = pde.fd_solvers.solve_forward(
            tstart,
            tstart + time_step,
            coord_grid=[grid[0][0], grid[1][0]],
            values_grid=joint_density,
            time_step=time_step / 10.0,
            values_transform_fn=None,
            inner_second_order_coeff_fn=_second_order_coeff_fn,
            inner_first_order_coeff_fn=_first_order_coeff_fn,
            zeroth_order_coeff_fn=None,
            boundary_conditions=[[
                _trivial_neumann_boundary, _trivial_neumann_boundary
            ], [_trivial_neumann_boundary, _trivial_neumann_boundary]],
            dtype=dtype)
        joint_density = tf.math.maximum(joint_density, 0.0)
        area_under_joint_density = _two_d_integration(
            [grid[0][0, :], grid[1][0, :]], joint_density)
        joint_density = joint_density / area_under_joint_density

        # TODO(b/176826743): Perform fixed point iteration instead of one step
        # update
        leverage_fn = _compute_leverage_fn(tstart + time_step, coord_grid,
                                           joint_density)
        leverage_v = leverage_fn(grid[0][0, :] + x_scale)[0, :]
        leverage_fn_values = leverage_fn_values.write(i, leverage_v)

        return i + 1, tstart + time_step, joint_density, leverage_fn_values

    times = tf.range(0.0, max_time + time_step, time_step, dtype=dtype)
    tstart = times[0]
    first_leverage_value = leverage_fn(grid[0][0])[0]
    leverage_fn_values = tf.TensorArray(
        dtype=dtype,
        size=tff_utils.get_shape(times)[0],
        element_shape=tff_utils.get_shape(first_leverage_value),
        clear_after_read=False)
    leverage_fn_values.write(0, first_leverage_value)

    loop_cond = lambda i, tstart, *args: tf.less(tstart, max_time)
    initial_args = (1, tstart, joint_density, leverage_fn_values)
    _, _, _, leverage_fn_values = tf.while_loop(loop_cond, loop_body,
                                                initial_args)
    leverage_fn_values = leverage_fn_values.stack()

    # leverage_fn_values.shape = (num_pde_timesteps, num_grid_x,)
    leverage_fn_values = tf.convert_to_tensor(leverage_fn_values, dtype=dtype)

    def _return_fn(t, spot):
        leverage_fn_interpolator = (
            math.interpolation.interpolation_2d.Interpolation2D(
                x_data=[times],
                y_data=tf.expand_dims(tf.repeat(grid[0] + x_scale,
                                                times.shape[0],
                                                axis=0),
                                      axis=0),
                z_data=tf.expand_dims(leverage_fn_values, axis=0),
                dtype=dtype))
        return leverage_fn_interpolator.interpolate(t, tf.math.log(spot))

    return _return_fn
def sample(*,
           dim: int,
           drift_fn: Callable[..., types.RealTensor],
           volatility_fn: Callable[..., types.RealTensor],
           times: types.RealTensor,
           time_step: Optional[types.RealTensor] = None,
           num_time_steps: Optional[types.IntTensor] = None,
           num_samples: types.IntTensor = 1,
           initial_state: Optional[types.RealTensor] = None,
           grad_volatility_fn: Optional[Callable[
               ..., List[types.RealTensor]]] = None,
           random_type: Optional[random.RandomType] = None,
           seed: Optional[types.IntTensor] = None,
           swap_memory: bool = True,
           skip: types.IntTensor = 0,
           precompute_normal_draws: bool = True,
           watch_params: Optional[List[types.RealTensor]] = None,
           stratonovich_order: int = 5,
           dtype: Optional[tf.DType] = None,
           name: Optional[str] = None) -> types.RealTensor:
    r"""Returns a sample paths from the process using the Milstein method.

  For an Ito process,

  ```
    dX = a(t, X_t) dt + b(t, X_t) dW_t
  ```
  given drift `a`, volatility `b` and derivative of volatility `b'`, the
  Milstein method generates a
  sequence {Y_n} approximating X

  ```
  Y_{n+1} = Y_n + a(t_n, Y_n) dt + b(t_n, Y_n) dW_n + \frac{1}{2} b(t_n, Y_n)
  b'(t_n, Y_n) ((dW_n)^2 - dt)
  ```
  where `dt = t_{n+1} - t_n`, `dW_n = (N(0, t_{n+1}) - N(0, t_n))` and `N` is a
  sample from the Normal distribution.

  In higher dimensions, when `a(t, X_t)` is a d-dimensional vector valued
  function and `W_t` is a d-dimensional Wiener process, we have for the kth
  element of the expansion:

  ```
  Y_{n+1}[k] = Y_n[k] + a(t_n, Y_n)[k] dt + \sum_{j=1}^d b(t_n, Y_n)[k, j]
  dW_n[j] + \sum_{j_1=1}^d \sum_{j_2=1}^d L_{j_1} b(t_n, Y_n)[k, j_2] I(j_1,
  j_2)
  ```
  where `L_{j} = \sum_{i=1}^d b(t_n, Y_n)[i, j] \frac{\partial}{\partial x^i}`
  is an operator and `I(j_1, j_2) = \int_{t_n}^{t_{n+1}} \int_{t_n}^{s_1}
  dW_{s_2}[j_1] dW_{s_1}[j_2]` is a multiple Ito integral.


  See [1] and [2] for details.

  #### References
  [1]: Wikipedia. Milstein method:
  https://en.wikipedia.org/wiki/Milstein_method
  [2]: Peter E. Kloeden,  Eckhard Platen. Numerical Solution of Stochastic
    Differential Equations. Springer. 1992

  Args:
    dim: Python int greater than or equal to 1. The dimension of the Ito
      Process.
    drift_fn: A Python callable to compute the drift of the process. The
      callable should accept two real `Tensor` arguments of the same dtype. The
      first argument is the scalar time t, the second argument is the value of
      Ito process X - tensor of shape `batch_shape + [dim]`. The result is
      value of drift a(t, X). The return value of the callable is a real
      `Tensor` of the same dtype as the input arguments and of shape
      `batch_shape + [dim]`.
    volatility_fn: A Python callable to compute the volatility of the process.
      The callable should accept two real `Tensor` arguments of the same dtype
      as `times`. The first argument is the scalar time t, the second argument
      is the value of Ito process X - tensor of shape `batch_shape + [dim]`. The
      result is value of volatility b(t, X). The return value of the callable is
      a real `Tensor` of the same dtype as the input arguments and of shape
      `batch_shape + [dim, dim]`.
    times: Rank 1 `Tensor` of increasing positive real values. The times at
      which the path points are to be evaluated.
    time_step: An optional scalar real `Tensor` - maximal distance between
      points in grid in Milstein schema.
      Either this or `num_time_steps` should be supplied.
      Default value: `None`.
    num_time_steps: An optional Scalar integer `Tensor` - a total number of time
      steps performed by the algorithm. The maximal distance between points in
      grid is bounded by `times[-1] / (num_time_steps - times.shape[0])`.
      Either this or `time_step` should be supplied.
      Default value: `None`.
    num_samples: Positive scalar `int`. The number of paths to draw.
      Default value: 1.
    initial_state: `Tensor` of shape `[dim]`. The initial state of the
      process.
      Default value: None which maps to a zero initial state.
    grad_volatility_fn: An optional python callable to compute the gradient of
      `volatility_fn`. The callable should accept three real `Tensor` arguments
      of the same dtype as `times`. The first argument is the scalar time t. The
      second argument is the value of Ito process X - tensor of shape
      `batch_shape + [dim]`. The third argument is a tensor of input gradients
      of shape `batch_shape + [dim]` to pass to `gradient.fwd_gradient`. The
      result is a list of values corresponding to the forward gradient of
      volatility b(t, X) with respect to X. The return value of the callable is
      a list of size `dim` containing real `Tensor`s of the same dtype as the
      input arguments and of shape `batch_shape + [dim, dim]`. Each index of the
      list corresponds to a dimension of the state. If `None`, the gradient is
      computed from `volatility_fn` using forward differentiation.
    random_type: Enum value of `RandomType`. The type of (quasi)-random number
      generator to use to generate the paths.
      Default value: None which maps to the standard pseudo-random numbers.
    seed: Seed for the random number generator. The seed is only relevant if
      `random_type` is one of `[STATELESS, PSEUDO, HALTON_RANDOMIZED,
      PSEUDO_ANTITHETIC, STATELESS_ANTITHETIC]`. For `PSEUDO`,
      `PSEUDO_ANTITHETIC` and `HALTON_RANDOMIZED` the seed should be a Python
      integer. For `STATELESS` and  `STATELESS_ANTITHETIC `must be supplied as
      an integer `Tensor` of shape `[2]`.
      Default value: `None` which means no seed is set.
    swap_memory: A Python bool. Whether GPU-CPU memory swap is enabled for this
      op. See an equivalent flag in `tf.while_loop` documentation for more
      details. Useful when computing a gradient of the op since `tf.while_loop`
      is used to propagate stochastic process in time.
      Default value: True.
    skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or
      Halton sequence to skip. Used only when `random_type` is 'SOBOL',
      'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored.
      Default value: `0`.
    precompute_normal_draws: Python bool. Indicates whether the noise increments
      `N(0, t_{n+1}) - N(0, t_n)` are precomputed. For `HALTON` and `SOBOL`
      random types the increments are always precomputed. While the resulting
      graph consumes more memory, the performance gains might be significant.
      Default value: `True`.
    watch_params: An optional list of zero-dimensional `Tensor`s of the same
      `dtype` as `initial_state`. If provided, specifies `Tensor`s with respect
      to which the differentiation of the sampling function will happen. A more
      efficient algorithm is used when `watch_params` are specified. Note the
      the function becomes differentiable only wrt to these `Tensor`s and the
      `initial_state`. The gradient wrt any other `Tensor` is set to be zero.
    stratonovich_order: A positive integer. The number of terms to use when
      calculating the approximate Stratonovich integrals in the multidimensional
      scheme. Stratonovich integrals are an alternative to Ito integrals, and
      can be used interchangeably when defining the higher order terms in the
      update equation. We use Stratonovich integrals here because they have a
      convenient approximation scheme for calculating cross terms involving
      different components of the Wiener process. See Eq. 8.10 in Section 5.8 of
      [2]. Default value: `5`.
    dtype: `tf.Dtype`. If supplied the dtype for the input and output `Tensor`s.
      Default value: None which means that the dtype implied by `times` is used.
    name: Python string. The name to give this op.
      Default value: `None` which maps to `milstein_sample`.
  """
    name = name or 'milstein_sample'
    with tf.name_scope(name):
        if stratonovich_order <= 0:
            raise ValueError(
                '`stratonovich_order` must be a positive integer.')
        times = tf.convert_to_tensor(times, dtype=dtype)
        if dtype is None:
            dtype = times.dtype
        if initial_state is None:
            initial_state = tf.zeros(dim, dtype=dtype)
        initial_state = tf.convert_to_tensor(initial_state,
                                             dtype=dtype,
                                             name='initial_state')
        num_requested_times = tff_utils.get_shape(times)[0]
        # Create a time grid for the Milstein scheme.
        if num_time_steps is not None and time_step is not None:
            raise ValueError(
                'Only one of either `num_time_steps` or `time_step` '
                'should be defined but not both')
        if time_step is None:
            if num_time_steps is None:
                raise ValueError(
                    'Either `num_time_steps` or `time_step` should be '
                    'defined.')
            num_time_steps = tf.convert_to_tensor(num_time_steps,
                                                  dtype=tf.int32,
                                                  name='num_time_steps')
            time_step = times[-1] / tf.cast(num_time_steps, dtype=dtype)
        else:
            time_step = tf.convert_to_tensor(time_step,
                                             dtype=dtype,
                                             name='time_step')
        times, keep_mask, time_indices = utils.prepare_grid(
            times=times,
            time_step=time_step,
            num_time_steps=num_time_steps,
            dtype=dtype)
        if watch_params is not None:
            watch_params = [
                tf.convert_to_tensor(param, dtype=dtype)
                for param in watch_params
            ]
        if grad_volatility_fn is None:

            def _grad_volatility_fn(current_time, current_state,
                                    input_gradients):
                return gradient.fwd_gradient(
                    functools.partial(volatility_fn, current_time),
                    current_state,
                    input_gradients=input_gradients,
                    unconnected_gradients=tf.UnconnectedGradients.ZERO)

            grad_volatility_fn = _grad_volatility_fn

        input_gradients = None
        if dim > 1:
            input_gradients = tf.unstack(tf.eye(dim, dtype=dtype))
            input_gradients = [
                tf.broadcast_to(start, [num_samples, dim])
                for start in input_gradients
            ]

        return _sample(dim=dim,
                       drift_fn=drift_fn,
                       volatility_fn=volatility_fn,
                       grad_volatility_fn=grad_volatility_fn,
                       times=times,
                       time_step=time_step,
                       keep_mask=keep_mask,
                       num_requested_times=num_requested_times,
                       num_samples=num_samples,
                       initial_state=initial_state,
                       random_type=random_type,
                       seed=seed,
                       swap_memory=swap_memory,
                       skip=skip,
                       precompute_normal_draws=precompute_normal_draws,
                       watch_params=watch_params,
                       time_indices=time_indices,
                       input_gradients=input_gradients,
                       stratonovich_order=stratonovich_order,
                       dtype=dtype)
Example #9
0
def interpolate(x: types.RealTensor,
                spline_data: SplineParameters,
                optimize_for_tpu: bool = False,
                dtype: tf.DType = None,
                name: str = None) -> types.RealTensor:
    """Interpolates spline values for the given `x` and the `spline_data`.

  Constant extrapolation is performed for the values outside the domain
  `spline_data.x_data`. This means that for `x > max(spline_data.x_data)`,
  `interpolate(x, spline_data) = spline_data.y_data[-1]`
  and for  `x < min(spline_data.x_data)`,
  `interpolate(x, spline_data) = spline_data.y_data[0]`.

  For the interpolation formula refer to p.548 of [1].

  #### References:
  [1]: R. Sedgewick, Algorithms in C, 1990, p. 545-550.
    Link: http://index-of.co.uk/Algorithms/Algorithms%20in%20C.pdf

  Args:
    x: A real `Tensor` of shape `batch_shape + [num_points]`.
    spline_data: An instance of `SplineParameters`. `spline_data.x_data` should
      have the same batch shape as `x`.
    optimize_for_tpu: A Python bool. If `True`, the algorithm uses one-hot
      encoding to lookup indices of `x` in `spline_data.x_data`. This
      significantly improves performance of the algorithm on a TPU device but
      may slow down performance on the CPU.
      Default value: `False`.
    dtype: Optional dtype for `x`.
      Default value: `None` which maps to the default dtype inferred by
        TensorFlow.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` which is mapped to the default name
        `cubic_spline_interpolate`.

  Returns:
      A `Tensor` of the same shape and `dtype` as `x`. Represents
      the interpolated values.

  Raises:
    ValueError:
      If `x` batch shape is different from `spline_data.x_data` batch
      shape.
  """
    name = name or 'cubic_spline_interpolate'
    with tf.name_scope(name):
        x = tf.convert_to_tensor(x, dtype=dtype, name='x')
        dtype = x.dtype
        # Unpack the spline data
        x_data = spline_data.x_data
        y_data = spline_data.y_data
        spline_coeffs = spline_data.spline_coeffs
        # Try broadcast batch_shapes
        x, x_data, y_data, spline_coeffs = tff_utils.broadcast_common_batch_shape(
            x, x_data, y_data, spline_coeffs)
        # Determine the splines to use.
        indices = tf.searchsorted(x_data, x, side='right') - 1
        # This selects all elements for the start of the spline interval.
        # Make sure indices lie in the permissible range
        lower_encoding = tf.maximum(indices, 0)
        # This selects all elements for the end of the spline interval.
        # Make sure indices lie in the permissible range
        upper_encoding = tf.minimum(indices + 1,
                                    tff_utils.get_shape(x_data)[-1] - 1)
        # Prepare indices for `tf.gather` or `tf.one_hot`
        # TODO(b/156720909): Extract get_slice logic into a common utilities module
        # for cubic and linear interpolation
        if optimize_for_tpu:
            x_data_size = tff_utils.get_shape(x_data)[-1]
            lower_encoding = tf.one_hot(lower_encoding,
                                        x_data_size,
                                        dtype=dtype)
            upper_encoding = tf.one_hot(upper_encoding,
                                        x_data_size,
                                        dtype=dtype)
        # Calculate dx and dy.
        # Simplified logic:
        # dx = x_data[indices + 1] - x_data[indices]
        # dy = y_data[indices + 1] - y_data[indices]
        # indices is a tensor with different values per row/spline
        def get_slice(x, encoding):
            if optimize_for_tpu:
                return tf.math.reduce_sum(tf.expand_dims(x, axis=-2) *
                                          encoding,
                                          axis=-1)
            else:
                return tf.gather(x,
                                 encoding,
                                 axis=-1,
                                 batch_dims=x.shape.rank - 1)

        x0 = get_slice(x_data, lower_encoding)
        x1 = get_slice(x_data, upper_encoding)
        dx = x1 - x0

        y0 = get_slice(y_data, lower_encoding)
        y1 = get_slice(y_data, upper_encoding)
        dy = y1 - y0

        spline_coeffs0 = get_slice(spline_coeffs, lower_encoding)
        spline_coeffs1 = get_slice(spline_coeffs, upper_encoding)

        t = (x - x0) / dx
        t = tf.where(dx > 0, t, tf.zeros_like(t))
        df = ((t + 1.0) * spline_coeffs1 * 2.0) - (
            (t - 2.0) * spline_coeffs0 * 2.0)
        df1 = df * t * (t - 1) / 6.0
        result = y0 + (t * dy) + (dx * dx * df1)
        # Use constant extrapolation outside the domain
        upper_bound = tf.expand_dims(tf.reduce_max(x_data, -1),
                                     -1) + tf.zeros_like(result)
        lower_bound = tf.expand_dims(tf.reduce_min(x_data, -1),
                                     -1) + tf.zeros_like(result)
        result = tf.where(tf.logical_and(x <= upper_bound, x >= lower_bound),
                          result, tf.where(x > upper_bound, y0, y1))
        return result
    def sample_paths(self,
                     times: types.RealTensor,
                     initial_state: types.RealTensor,
                     num_samples: types.IntTensor = 1,
                     random_type: Optional[random.RandomType] = None,
                     seed: Optional[types.RealTensor] = None,
                     time_step: Optional[types.RealTensor] = None,
                     skip: types.IntTensor = 0,
                     tolerance: types.RealTensor = 1e-6,
                     num_time_steps: Optional[types.IntTensor] = None,
                     precompute_normal_draws: types.BoolTensor = True,
                     times_grid: Optional[types.RealTensor] = None,
                     normal_draws: Optional[types.RealTensor] = None,
                     name: Optional[str] = None) -> types.RealTensor:
        """Returns a sample of paths from the process.

    Using Quadratic-Exponential (QE) method described in [1] generates samples
    paths started at time zero and returns paths values at the specified time
    points.

    Args:
      times: Rank 1 `Tensor` of positive real values. The times at which the
        path points are to be evaluated.
      initial_state: A rank 1 `Tensor` with two elements where the first element
        corresponds to the initial value of the log spot `X(0)` and the second
        to the starting variance value `V(0)`.
      num_samples: Positive scalar `int`. The number of paths to draw.
      random_type: Enum value of `RandomType`. The type of (quasi)-random
        number generator to use to generate the paths.
        Default value: None which maps to the standard pseudo-random numbers.
      seed: Seed for the random number generator. The seed is
        only relevant if `random_type` is one of
        `[STATELESS, PSEUDO, HALTON_RANDOMIZED, PSEUDO_ANTITHETIC,
          STATELESS_ANTITHETIC]`. For `PSEUDO`, `PSEUDO_ANTITHETIC` and
        `HALTON_RANDOMIZED` the seed should be an Python integer. For
        `STATELESS` and  `STATELESS_ANTITHETIC `must be supplied as an integer
        `Tensor` of shape `[2]`.
        Default value: `None` which means no seed is set.
      time_step: Positive Python float to denote time discretization parameter.
      skip: `int32` 0-d `Tensor`. The number of initial points of the Sobol or
        Halton sequence to skip. Used only when `random_type` is 'SOBOL',
        'HALTON', or 'HALTON_RANDOMIZED', otherwise ignored.
      tolerance: Scalar positive real `Tensor`. Specifies minimum time tolerance
        for which the stochastic process `X(t) != X(t + tolerance)`.
        Default value: 1e-6.
      num_time_steps: An optional Scalar integer `Tensor` - a total number of
        time steps performed by the algorithm. The maximal distance between
        points in grid is bounded by
        `times[-1] / (num_time_steps - times.shape[0])`.
        Either this or `time_step` should be supplied.
        Default value: `None`.
      precompute_normal_draws: Python bool. Indicates whether the noise
        increments `N(0, t_{n+1}) - N(0, t_n)` are precomputed. For `HALTON`
        and `SOBOL` random types the increments are always precomputed. While
        the resulting graph consumes more memory, the performance gains might
        be significant.
        Default value: `True`.
      times_grid: An optional rank 1 `Tensor` representing time discretization
        grid. If `times` are not on the grid, then the nearest points from the
        grid are used. When supplied, `num_time_steps` and `time_step` are
        ignored.
        Default value: `None`, which means that times grid is computed using
        `time_step` and `num_time_steps`.
      normal_draws: A `Tensor` of shape broadcastable with
        `[num_samples, num_time_points, 2]` and the same
        `dtype` as `times`. Represents random normal draws to compute increments
        `N(0, t_{n+1}) - N(0, t_n)`. When supplied, `num_samples` argument is
        ignored and the first dimensions of `normal_draws` is used instead.
        Default value: `None` which means that the draws are generated by the
        algorithm. By default normal_draws for each model in the batch are
        independent.
      name: Str. The name to give this op.
        Default value: `sample_paths`.

    Returns:
      A `Tensor`s of shape [num_samples, k, 2] where `k` is the size
      of the `times`. For each sample and time the first dimension represents
      the simulated log-state trajectories of the spot price `X(t)`, whereas the
      second one represents the simulated variance trajectories `V(t)`.

    Raises:
      ValueError: If `time_step` is not supplied.

    #### References:
      [1]: Leif Andersen. Efficient Simulation of the Heston Stochastic
        Volatility Models. 2006.
    """
        if random_type is None:
            random_type = random.RandomType.PSEUDO
        # Note: all the notations below are the same as in [1].
        name = name or (self._name + '_sample_path')
        with tf.name_scope(name):
            times = tf.convert_to_tensor(times, self._dtype)
            if normal_draws is not None:
                # shape: [num_samples, num_time_points, dim]
                normal_draws = tf.convert_to_tensor(normal_draws,
                                                    dtype=self._dtype,
                                                    name='normal_draws')
                # shape: [num_time_points, num_samples, dim]
                perm = [1, 0, 2]
                normal_draws = tf.transpose(normal_draws, perm=perm)
                num_samples = tff_utils.get_shape(normal_draws)[-2]
            current_log_spot = (tf.convert_to_tensor(initial_state[..., 0],
                                                     dtype=self._dtype) +
                                tf.zeros([num_samples], dtype=self._dtype))
            current_vol = (tf.convert_to_tensor(initial_state[..., 1],
                                                dtype=self._dtype) +
                           tf.zeros([num_samples], dtype=self._dtype))
            num_requested_times = tff_utils.get_shape(times)[0]
            if times_grid is None:
                if time_step is None:
                    if num_time_steps is None:
                        raise ValueError(
                            'When `times_grid` is not supplied, either `num_time_steps` '
                            'or `time_step` should be defined.')
                    else:
                        num_time_steps = tf.convert_to_tensor(
                            num_time_steps,
                            dtype=tf.int32,
                            name='num_time_steps')
                        time_step = times[-1] / tf.cast(num_time_steps,
                                                        dtype=self._dtype)
                else:
                    if num_time_steps is not None:
                        raise ValueError(
                            'Both `time_step` and `num_time_steps` can not be `None` '
                            'simultaneously when calling sample_paths of HestonModel.'
                        )
                    time_step = tf.convert_to_tensor(time_step,
                                                     dtype=self._dtype,
                                                     name='time_step')
            else:
                times_grid = tf.convert_to_tensor(times_grid,
                                                  dtype=self._dtype,
                                                  name='times_grid')
            times, keep_mask = _prepare_grid(times,
                                             time_step,
                                             times.dtype,
                                             self._mean_reversion,
                                             self._theta,
                                             self._volvol,
                                             self._rho,
                                             num_time_steps=num_time_steps,
                                             times_grid=times_grid)
            return self._sample_paths(
                times=times,
                num_requested_times=num_requested_times,
                current_log_spot=current_log_spot,
                current_vol=current_vol,
                num_samples=num_samples,
                random_type=random_type,
                keep_mask=keep_mask,
                seed=seed,
                skip=skip,
                tolerance=tolerance,
                precompute_normal_draws=precompute_normal_draws,
                normal_draws=normal_draws)
Example #11
0
 def _get_batch_shape(param):
     """`param` must has shape `batch_shape + [1]`."""
     param_shape = tff_utils.get_shape(param)
     # Last rank is `1`
     return param_shape[:-1]
Example #12
0
    def _sample_paths(
        self,
        theta,
        mean_reversion,
        sigma,
        element_shape,
        times,
        num_requested_times,
        initial_state,
        num_samples,
        random_type,
        seed,
    ):
        """Returns a sample of paths from the process."""
        times = tf.concat([[0], times], -1)
        # Time increments
        # Shape [num_requested_times, 1, 1]
        dts = tf.expand_dims(tf.expand_dims(times[1:] - times[:-1], axis=-1),
                             axis=-1)
        (poisson_fn, gamma_fn, poisson_seed_fn,
         gamma_seed_fn) = self._get_distributions(random_type)

        def _sample_at_time(i, update_idx, current_x, samples):
            dt = dts[i]
            # Shape batch_shape + [num_samples, dim]
            zeta = tf.where(
                tf.math.equal(mean_reversion, tf.zeros_like(mean_reversion)),
                dt, (1 - tf.math.exp(-mean_reversion * dt)) / mean_reversion)
            c = tf.math.divide_no_nan(tf.constant(4, dtype=self._dtype),
                                      sigma**2 * zeta)
            d = c * tf.math.exp(-mean_reversion * dt)

            poisson_rv = poisson_fn(shape=element_shape,
                                    lam=d * current_x / 2,
                                    seed=poisson_seed_fn(seed, i),
                                    dtype=self._dtype)

            gamma_param_alpha = poisson_rv + 2 * theta / (sigma**2)
            gamma_param_beta = c / 2

            new_x = gamma_fn(shape=element_shape,
                             alpha=gamma_param_alpha,
                             beta=gamma_param_beta,
                             seed=gamma_seed_fn(seed, i),
                             dtype=self._dtype)
            # `gamma_fn` outputs infinity when `c==0`
            new_x = tf.where(c > 0, new_x, current_x)

            samples = samples.write(i, new_x)
            return (i + 1, update_idx, new_x, samples)

        cond_fn = lambda i, *args: i < num_requested_times
        samples = tf.TensorArray(dtype=self._dtype,
                                 size=num_requested_times,
                                 element_shape=element_shape,
                                 clear_after_read=False)
        _, _, _, samples = tf.while_loop(
            cond_fn,
            _sample_at_time, (0, 0, initial_state, samples),
            maximum_iterations=num_requested_times)

        # Shape [num_requested_times, batch_shape..., num_samples, 1]
        samples = samples.stack()
        samples_rank = len(tff_utils.get_shape(samples))
        perm = [batch_idx for batch_idx in range(1, samples_rank - 2)
                ] + [samples_rank - 2, 0, samples_rank - 1]
        # Shape batch_shape + [num_samples, num_requested_times, 1]
        return tf.transpose(samples, perm=perm)
Example #13
0
    def sample_paths(self,
                     times: types.RealTensor,
                     initial_state: Optional[types.RealTensor] = None,
                     num_samples: int = 1,
                     random_type: Optional[random.RandomType] = None,
                     seed: Optional[int] = None,
                     name: Optional[str] = None) -> types.RealTensor:
        """Returns a sample of paths from the process.

    Using exact simulation method from [1].

    Args:
      times: Rank 1 `Tensor` of positive real values. The times at which the
        path points are to be evaluated.
      initial_state: A `Tensor` of the same `dtype` as `times` and of shape
        broadcastable with `batch_shape + [num_samples, 1]`. Represents the
        initial state of the Ito process. `batch_shape` is the shape of the
        independent stochastic processes being modelled and is inferred from the
        initial state `x0`.
        Default value: `None` which maps to a initial state of ones.
      num_samples: Positive scalar `int`. The number of paths to draw.
      random_type: `STATELESS` or `PSEUDO` type from `RandomType` Enum. The type
        of (quasi)-random number generator to use to generate the paths.
      seed: The seed for the random number generator.
        For `PSEUDO` random type: it is an Integer.
        For `STATELESS` random type: it is an integer `Tensor` of shape `[2]`.
          In this case the algorithm samples random numbers with seeds `[seed[0]
          + i, seed[1] + j], i in {0, 1}, j in {0, 1, ..., num_times}`, where
          `num_times` is the size of `times`.
        Default value: `None` which means no seed is set, but it works only with
          `PSEUDO` random type. For `STATELESS` it has to be provided.
      name: Str. The name to give this op.
        Default value: `sample_paths`.

    Returns:
      A `Tensor`s of shape batch_shape + [num_samples, num_times, 1] where
      `num_times` is
      the size of the `times`.

    Raises:
      ValueError: If `random_type` or `seed` is not supported.

    ## Example

    ```python
    import tensorflow as tf
    import tf_quant_finance as tff

    # In this example `batch_shape` is 2, so parameters has shape [2, 1]
    process = tff.models.CirModel(
        theta=[[0.02], [0.03]],
        mean_reversion=[[0.5], [0.4]],
        sigma=[[0.1], [0.5]],
        dtype=tf.float64)

    num_samples = 5
    # `initial_state` has shape [num_samples, 1]
    initial_state=[[0.1], [0.2], [0.3], [0.4], [0.5]]
    times = [0.1, 0.2, 1.0]
    samples = process.sample_paths(
        times=times,
        num_samples=num_samples,
        initial_state=initial_state)
    # `samples` has shape [2, 5, 3, 1]
    ```

    #### References:
    [1]: A. Alfonsi. Affine Diffusions and Related Processes: Simulation,
      Theory and Applications
    """
        name = name or (self._name + "_sample_path")
        with tf.name_scope(name):
            element_shape = self._batch_shape + [num_samples, self._dim]

            # batch_shape + [1] -> batch_shape + [1 (for num_samples), 1]
            theta = self._expand_param_on_rank(self._theta, 1, axis=-2)
            mean_reversion = self._expand_param_on_rank(self._mean_reversion,
                                                        1,
                                                        axis=-2)
            sigma = self._expand_param_on_rank(self._sigma, 1, axis=-2)

            if initial_state is None:
                initial_state = tf.ones(element_shape,
                                        dtype=self._dtype,
                                        name="initial_state")
            else:
                initial_state = (tf.convert_to_tensor(
                    initial_state, dtype=self._dtype, name="initial_state") +
                                 tf.zeros(element_shape, dtype=self._dtype))

            times = tf.convert_to_tensor(times,
                                         dtype=self._dtype,
                                         name="times")
            num_requested_times = tff_utils.get_shape(times)[0]
            if random_type is None:
                random_type = random.RandomType.PSEUDO
            if random_type == random.RandomType.STATELESS and seed is None:
                raise ValueError(
                    "`seed` equal to None is not supported with STATELESS random type."
                )

            return self._sample_paths(
                theta=theta,
                mean_reversion=mean_reversion,
                sigma=sigma,
                element_shape=element_shape,
                times=times,
                num_requested_times=num_requested_times,
                initial_state=initial_state,
                num_samples=num_samples,
                random_type=random_type,
                seed=seed,
            )