コード例 #1
0
ファイル: utils_test.py プロジェクト: zkkxu/tf-quant-finance
 def test_broadcast_common_batch_shape(self):
     x = tf.zeros([3, 4])
     y = tf.zeros([2, 1, 3, 4])
     x, y = utils.broadcast_common_batch_shape(x, y)
     with self.subTest("ShapeX"):
         x_eval = self.evaluate(x)
         self.assertAllEqual(x_eval, np.zeros([2, 1, 3, 4]))
     with self.subTest("ShapeY"):
         y_eval = self.evaluate(y)
         self.assertAllEqual(y_eval, np.zeros([2, 1, 3, 4]))
コード例 #2
0
def interpolate(x,
                x_data,
                y_data,
                left_slope=None,
                right_slope=None,
                validate_args=False,
                optimize_for_tpu=False,
                dtype=None,
                name=None):
    """Performs linear interpolation for supplied points.

  Given a set of knots whose x- and y- coordinates are in `x_data` and `y_data`,
  this function returns y-values for x-coordinates in `x` via piecewise
  linear interpolation.

  `x_data` must be non decreasing, but `y_data` don't need to be because we do
  not require the function approximated by these knots to be monotonic.

  #### Examples

  ```python
  x = [-10, -1, 1, 3, 6, 7, 8, 15, 18, 25, 30, 35]
  x_data = [-1, 2, 6, 8, 18, 30.0]
  y_data = [10, -1, -5, 7, 9, 20]

  result = linear_interpolation(x, x_data, y_data)
  # [ 10, 10, 2.66666667, -2, -5, 1, 7, 8.4, 9, 15.41666667, 20, 20]
  ```

  Args:
    x: x-coordinates for which we need to get interpolation. A N-D `Tensor` of
      real dtype. First N-1 dimensions represent batching dimensions.
    x_data: x coordinates. A N-D `Tensor` of real dtype. Should be sorted
      in non decreasing order. First N-1 dimensions represent batching
      dimensions.
    y_data: y coordinates. A N-D `Tensor` of real dtype. Should have the
      compatible shape as `x_data`. First N-1 dimensions represent batching
      dimensions.
    left_slope: The slope to use for extrapolation with x-coordinate smaller
      than the min `x_data`. It's a 0-D or N-D `Tensor`.
      Default value: `None`, which maps to `0.0` meaning constant extrapolation,
      i.e. extrapolated value will be the leftmost `y_data`.
    right_slope: The slope to use for extrapolation with x-coordinate greater
      than the max `x_data`. It's a 0-D or N-D `Tensor`.
      Default value: `None` which maps to `0.0` meaning constant extrapolation,
      i.e. extrapolated value will be the rightmost `y_data`.
    validate_args: Python `bool` that indicates whether the function performs
      the check if the shapes of `x_data` and `y_data` are equal and that the
      elements in `x_data` are non decreasing. If this value is set to `False`
      and the elements in `x_data` are not increasing, the result of linear
      interpolation may be wrong.
      Default value: `False`.
    optimize_for_tpu: A Python bool. If `True`, the algorithm uses one-hot
      encoding to lookup indices of `x_values` in `x_data`. This significantly
      improves performance of the algorithm on a TPU device but may slow down
      performance on the CPU.
      Default value: `False`.
    dtype: Optional tf.dtype for `x`, x_data`, `y_data`, `left_slope` and
      `right_slope`.
      Default value: `None` which means that the `dtype` inferred by TensorFlow
      is used.
    name: Python str. The name prefixed to the ops created by this function.
      Default value: `None` which maps to 'linear_interpolation'.

  Returns:
    A N-D `Tensor` of real dtype corresponding to the x-values in `x`.
  """
    name = name or "linear_interpolation"
    with tf.name_scope(name):
        x = tf.convert_to_tensor(x, dtype=dtype, name="x")
        dtype = dtype or x.dtype
        x_data = tf.convert_to_tensor(x_data, dtype=dtype, name="x_data")
        y_data = tf.convert_to_tensor(y_data, dtype=dtype, name="y_data")
        # Try broadcast batch_shapes
        x, x_data = utils.broadcast_common_batch_shape(x, x_data)
        x, y_data = utils.broadcast_common_batch_shape(x, y_data)
        x_data, y_data = utils.broadcast_common_batch_shape(x_data, y_data)

        batch_shape = x.shape.as_list()[:-1]
        if not batch_shape:
            x = tf.expand_dims(x, 0)
            x_data = tf.expand_dims(x_data, 0)
            y_data = tf.expand_dims(y_data, 0)

        if left_slope is None:
            left_slope = tf.constant(0.0, dtype=x.dtype, name="left_slope")
        else:
            left_slope = tf.convert_to_tensor(left_slope,
                                              dtype=dtype,
                                              name="left_slope")
        if right_slope is None:
            right_slope = tf.constant(0.0, dtype=x.dtype, name="right_slope")
        else:
            right_slope = tf.convert_to_tensor(right_slope,
                                               dtype=dtype,
                                               name="right_slope")
        control_deps = []
        if validate_args:
            # Check that `x_data` elements is non-decreasing
            diffs = x_data[..., 1:] - x_data[..., :-1]
            assertion = tf.compat.v1.debugging.assert_greater_equal(
                diffs,
                tf.zeros_like(diffs),
                message="x_data is not sorted in non-decreasing order.")
            control_deps.append(assertion)
            # Check that the shapes of `x_data` and `y_data` are equal
            control_deps.append(
                tf.compat.v1.assert_equal(tf.shape(x_data), tf.shape(y_data)))

        with tf.control_dependencies(control_deps):
            # Get upper bound indices for `x`.
            upper_indices = tf.searchsorted(x_data,
                                            x,
                                            side="left",
                                            out_type=tf.int32)
            x_data_size = x_data.shape.as_list()[-1]
            at_min = tf.equal(upper_indices, 0)
            at_max = tf.equal(upper_indices, x_data_size)
            # Create tensors in order to be used by `tf.where`.
            # `values_min` are extrapolated values for x-coordinates less than or
            # equal to `x_data[..., 0]`.
            # `values_max` are extrapolated values for x-coordinates greater than
            # `x_data[..., -1]`.

            values_min = tf.expand_dims(
                y_data[..., 0], -1) + left_slope * (x - tf.broadcast_to(
                    tf.expand_dims(x_data[..., 0], -1), shape=tf.shape(x)))
            values_max = tf.expand_dims(
                y_data[..., -1], -1) + right_slope * (x - tf.broadcast_to(
                    tf.expand_dims(x_data[..., -1], -1), shape=tf.shape(x)))

            # `tf.where` evaluates all branches, need to cap indices to ensure it
            # won't go out of bounds.
            lower_encoding = tf.math.maximum(upper_indices - 1, 0)
            upper_encoding = tf.math.minimum(upper_indices, x_data_size - 1)
            # Prepare indices for `tf.gather_nd` or `tf.one_hot`
            # TODO(b/156720909): Extract get_slice logic into a common utilities
            # module for cubic and linear interpolation
            if optimize_for_tpu:
                lower_encoding = tf.one_hot(lower_encoding,
                                            x_data_size,
                                            dtype=dtype)
                upper_encoding = tf.one_hot(upper_encoding,
                                            x_data_size,
                                            dtype=dtype)

            def get_slice(x, encoding):
                if optimize_for_tpu:
                    return tf.math.reduce_sum(tf.expand_dims(x, axis=-2) *
                                              encoding,
                                              axis=-1)
                else:
                    return tf.gather(x,
                                     encoding,
                                     axis=-1,
                                     batch_dims=x.shape.rank - 1)

            x_data_lower = get_slice(x_data, lower_encoding)
            x_data_upper = get_slice(x_data, upper_encoding)
            y_data_lower = get_slice(y_data, lower_encoding)
            y_data_upper = get_slice(y_data, upper_encoding)

            # Nan in unselected branches could propagate through gradient calculation,
            # hence we need to clip the values to ensure no nan would occur. In this
            # case we need to ensure there is no division by zero.
            x_data_diff = x_data_upper - x_data_lower
            floor_x_diff = tf.where(at_min | at_max, x_data_diff + 1,
                                    x_data_diff)
            interpolated = y_data_lower + (x - x_data_lower) * (
                y_data_upper - y_data_lower) / floor_x_diff

            interpolated = tf.where(at_min, values_min, interpolated)
            interpolated = tf.where(at_max, values_max, interpolated)
            if batch_shape:
                return interpolated
            else:
                return tf.squeeze(interpolated, 0)
コード例 #3
0
def build(x_data, y_data, validate_args=False, dtype=None, name=None):
    """Builds a SplineParameters interpolation object.

  Given a `Tensor` of state points `x_data` and corresponding values `y_data`
  creates an object that contains iterpolation coefficients. The object can be
  used by the `interpolate` function to get interpolated values for a set of
  state points `x` using the cubic spline interpolation algorithm.
  It assumes that the second derivative at the first and last spline points
  are zero. The basic logic is explained in [1] (see also, e.g., [2]).

  Repeated entries in `x_data` are allowed for the boundary values of `x_data`.
  For example, `x_data` can be `[1., 1., 2, 3. 4., 4., 4.]` but not
  `[1., 2., 2., 3.]`. The repeated values play no role in interpolation and are
  useful only for interpolating multiple splines with different numbers of data
  point. It is user responsibility to verify that the corresponding
  values of `y_data` are the same for the repeated values of `x_data`.

  Typical Usage Example:

  ```python
  import tensorflow.compat.v2 as tf
  import numpy as np

  x_data = np.linspace(-5.0, 5.0,  num=11)
  y_data = 1.0/(1.0 + x_data**2)
  spline = cubic_interpolation.build(x_data, y_data)
  x_args = [3.3, 3.4, 3.9]

  y = cubic_interpolation.interpolate(x_args, spline)
  ```

  #### References:
  [1]: R. Sedgewick, Algorithms in C, 1990, p. 545-550.
    Link: http://index-of.co.uk/Algorithms/Algorithms%20in%20C.pdf
  [2]: R. Pienaar, M Choudhry. Fitting the term structure of interest rates:
    the practical implementation of cubic spline methodology.
    Link:
    http://yieldcurve.com/mktresearch/files/PienaarChoudhry_CubicSpline2.pdf

  Args:
    x_data: A real `Tensor` of shape `[..., num_points]` containing
      X-coordinates of points to fit the splines to. The values have to
      be monotonically non-decreasing along the last dimension.
    y_data: A `Tensor` of the same shape and `dtype` as `x_data` containing
      Y-coordinates of points to fit the splines to.
    validate_args: Python `bool`. When `True`, verifies if elements of `x_data`
      are sorted in the last dimension in non-decreasing order despite possibly
      degrading runtime performance.
      Default value: False.
    dtype: Optional dtype for both `x_data` and `y_data`.
      Default value: `None` which maps to the default dtype inferred by
      TensorFlow.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` which is mapped to the default name
      `cubic_spline_build`.

  Returns:
    An instance of `SplineParameters`.
  """
    # Main body of build
    with tf.compat.v1.name_scope(name,
                                 default_name="cubic_spline_build",
                                 values=[x_data, y_data]):
        x_data = tf.convert_to_tensor(x_data, dtype=dtype, name="x_data")
        y_data = tf.convert_to_tensor(y_data, dtype=dtype, name="y_data")
        # Sanity check inputs
        if validate_args:
            assert_sanity_check = [_validate_arguments(x_data)]
        else:
            assert_sanity_check = []
        x_data, y_data = utils.broadcast_common_batch_shape(x_data, y_data)
        with tf.compat.v1.control_dependencies(assert_sanity_check):
            spline_coeffs = _calculate_spline_coeffs(x_data, y_data)

        return SplineParameters(x_data=x_data,
                                y_data=y_data,
                                spline_coeffs=spline_coeffs)
コード例 #4
0
def interpolate(x_values,
                spline_data,
                optimize_for_tpu=False,
                dtype=None,
                name=None):
    """Interpolates spline values for the given `x_values` and the `spline_data`.

  Constant extrapolation is performed for the values outside the domain
  `spline_data.x_data`. This means that for `x > max(spline_data.x_data)`,
  `interpolate(x, spline_data) = spline_data.y_data[-1]`
  and for  `x < min(spline_data.x_data)`,
  `interpolate(x, spline_data) = spline_data.y_data[0]`.

  For the interpolation formula refer to p.548 of [1].

  #### References:
  [1]: R. Sedgewick, Algorithms in C, 1990, p. 545-550.
    Link: http://index-of.co.uk/Algorithms/Algorithms%20in%20C.pdf

  Args:
    x_values: A real `Tensor` of shape `batch_shape + [num_points]`.
    spline_data: An instance of `SplineParameters`. `spline_data.x_data` should
      have the same batch shape as `x_values`.
    optimize_for_tpu: A Python bool. If `True`, the algorithm uses one-hot
      encoding to lookup indices of `x_values` in `spline_data.x_data`. This
      significantly improves performance of the algorithm on a TPU device but
      may slow down performance on the CPU.
      Default value: `False`.
    dtype: Optional dtype for `x_values`.
      Default value: `None` which maps to the default dtype inferred by
      TensorFlow.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` which is mapped to the default name
      `cubic_spline_interpolate`.

  Returns:
      A `Tensor` of the same shape and `dtype` as `x_values`. Represents
      the interpolated values.

  Raises:
    ValueError:
      If `x_values` batch shape is different from `spline_data.x_data` batch
      shape.
  """
    name = name or "cubic_spline_interpolate"
    with tf.name_scope(name):
        x_values = tf.convert_to_tensor(x_values, dtype=dtype, name="x_values")
        dtype = x_values.dtype
        # Unpack the spline data
        x_data = spline_data.x_data
        y_data = spline_data.y_data
        spline_coeffs = spline_data.spline_coeffs
        # Try broadcast batch_shapes
        x_values, x_data = utils.broadcast_common_batch_shape(x_values, x_data)
        x_values, y_data = utils.broadcast_common_batch_shape(x_values, y_data)
        x_values, spline_coeffs = utils.broadcast_common_batch_shape(
            x_values, spline_coeffs)
        # Determine the splines to use.
        indices = tf.searchsorted(x_data, x_values, side="right") - 1
        # This selects all elements for the start of the spline interval.
        # Make sure indices lie in the permissible range
        lower_encoding = tf.maximum(indices, 0)
        # This selects all elements for the end of the spline interval.
        # Make sure indices lie in the permissible range
        upper_encoding = tf.minimum(indices + 1,
                                    x_data.shape.as_list()[-1] - 1)
        # Prepare indices for `tf.gather_nd` or `tf.one_hot`
        # TODO(b/156720909): Extract get_slice logic into a common utilities module
        # for cubic and linear interpolation
        if optimize_for_tpu:
            x_data_size = x_data.shape.as_list()[-1]
            lower_encoding = tf.one_hot(lower_encoding,
                                        x_data_size,
                                        dtype=dtype)
            upper_encoding = tf.one_hot(upper_encoding,
                                        x_data_size,
                                        dtype=dtype)
        # Calculate dx and dy.
        # Simplified logic:
        # dx = x_data[indices + 1] - x_data[indices]
        # dy = y_data[indices + 1] - y_data[indices]
        # indices is a tensor with different values per row/spline
        # Hence use a selection matrix with gather_nd
        def get_slice(x, encoding):
            if optimize_for_tpu:
                return tf.math.reduce_sum(tf.expand_dims(x, axis=-2) *
                                          encoding,
                                          axis=-1)
            else:
                return tf.gather(x,
                                 encoding,
                                 axis=-1,
                                 batch_dims=x.shape.rank - 1)

        x0 = get_slice(x_data, lower_encoding)
        x1 = get_slice(x_data, upper_encoding)
        dx = x1 - x0

        y0 = get_slice(y_data, lower_encoding)
        y1 = get_slice(y_data, upper_encoding)
        dy = y1 - y0

        spline_coeffs0 = get_slice(spline_coeffs, lower_encoding)
        spline_coeffs1 = get_slice(spline_coeffs, upper_encoding)

        t = (x_values - x0) / dx
        t = tf.where(dx > 0, t, tf.zeros_like(t))
        df = ((t + 1.0) * spline_coeffs1 * 2.0) - (
            (t - 2.0) * spline_coeffs0 * 2.0)
        df1 = df * t * (t - 1) / 6.0
        result = y0 + (t * dy) + (dx * dx * df1)
        # Use constant extrapolation outside the domain
        upper_bound = tf.expand_dims(tf.reduce_max(x_data, -1),
                                     -1) + tf.zeros_like(result)
        lower_bound = tf.expand_dims(tf.reduce_min(x_data, -1),
                                     -1) + tf.zeros_like(result)
        result = tf.where(
            tf.logical_and(x_values <= upper_bound, x_values >= lower_bound),
            result, tf.where(x_values > upper_bound, y0, y1))
        return result
コード例 #5
0
 def fn(x, y):
   return utils.broadcast_common_batch_shape(x, y)
コード例 #6
0
def build(x_data: types.RealTensor,
          y_data: types.RealTensor,
          boundary_condition_type: BoundaryConditionType = None,
          left_boundary_value: types.RealTensor = None,
          right_boundary_value: types.RealTensor = None,
          validate_args: bool = False,
          dtype: tf.DType = None,
          name=None) -> SplineParameters:
    """Builds a SplineParameters interpolation object.

  Given a `Tensor` of state points `x_data` and corresponding values `y_data`
  creates an object that contains interpolation coefficients. The object can be
  used by the `interpolate` function to get interpolated values for a set of
  state points `x` using the cubic spline interpolation algorithm.
  It assumes that the second derivative at the first and last spline points
  are zero. The basic logic is explained in [1] (see also, e.g., [2]).

  Repeated entries in `x_data` are only allowed for the *right* boundary values
  of `x_data`.
  For example, `x_data` can be `[1., 2, 3. 4., 4., 4.]` but not
  `[1., 1., 2., 3.]`. The repeated values play no role in interpolation and are
  useful only for interpolating multiple splines with different numbers of data
  point. It is user responsibility to verify that the corresponding
  values of `y_data` are the same for the repeated values of `x_data`.

  Typical Usage Example:

  ```python
  import tensorflow as tf
  import tf_quant_finance as tff
  import numpy as np

  x_data = tf.linspace(-5.0, 5.0,  num=11)
  y_data = 1.0/(1.0 + x_data**2)
  spline = tff.math.interpolation.cubic.build_spline(x_data, y_data)
  x_args = [3.3, 3.4, 3.9]

  tff.math.interpolation.cubic.interpolate(x_args, spline)
  # Expected: [0.0833737 , 0.07881707, 0.06149562]
  ```

  #### References:
  [1]: R. Sedgewick, Algorithms in C, 1990, p. 545-550.
    Link: https://api.semanticscholar.org/CorpusID:10976311
  [2]: R. Pienaar, M Choudhry. Fitting the term structure of interest rates:
    the practical implementation of cubic spline methodology.
    Link:
    http://yieldcurve.com/mktresearch/files/PienaarChoudhry_CubicSpline2.pdf

  Args:
    x_data: A real `Tensor` of shape `[..., num_points]` containing
      X-coordinates of points to fit the splines to. The values have to be
      monotonically non-decreasing along the last dimension.
    y_data: A `Tensor` of the same shape and `dtype` as `x_data` containing
      Y-coordinates of points to fit the splines to.
    boundary_condition_type: Boundary condition type for current cubic
      interpolation. Instance of BoundaryConditionType enum.
      Default value: `None` which maps to `BoundaryConditionType.NATURAL`.
    left_boundary_value: Set to non-empty value IFF boundary_condition_type is
      FIXED_FIRST_DERIVATIVE, in which case set to cubic spline's first
      derivative at `x_data[..., 0]`.
    right_boundary_value: Set to non-empty value IFF boundary_condition_type is
      FIXED_FIRST_DERIVATIVE, in which case set to cubic spline's first
      derivative at `x_data[..., num_points - 1]`.
    validate_args: Python `bool`. When `True`, verifies if elements of `x_data`
      are sorted in the last dimension in non-decreasing order despite possibly
      degrading runtime performance.
      Default value: False.
    dtype: Optional dtype for both `x_data` and `y_data`.
      Default value: `None` which maps to the default dtype inferred by
        TensorFlow.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` which is mapped to the default name
        `cubic_spline_build`.

  Returns:
    An instance of `SplineParameters`.
  """
    if boundary_condition_type is None:
        boundary_condition_type = BoundaryConditionType.NATURAL
    if name is None:
        name = 'cubic_spline_build'
    with tf.name_scope(name):
        x_data = tf.convert_to_tensor(x_data, dtype=dtype, name='x_data')
        y_data = tf.convert_to_tensor(y_data, dtype=dtype, name='y_data')
        # Sanity check inputs
        if validate_args:
            assert_sanity_check = [_validate_arguments(x_data)]
        else:
            assert_sanity_check = []
        x_data, y_data = utils.broadcast_common_batch_shape(x_data, y_data)

        if boundary_condition_type == BoundaryConditionType.FIXED_FIRST_DERIVATIVE:
            if left_boundary_value is None or right_boundary_value is None:
                raise ValueError(
                    'Expected non-empty left_boundary_value/right_boundary_value when '
                    'boundary_condition_type is FIXED_FIRST_DERIVATIVE, actual '
                    'left_boundary_value {0}, actual right_boundary_value {1}'.
                    format(left_boundary_value, right_boundary_value))
        with tf.compat.v1.control_dependencies(assert_sanity_check):
            spline_coeffs = _calculate_spline_coeffs(x_data, y_data,
                                                     boundary_condition_type,
                                                     left_boundary_value,
                                                     right_boundary_value)

        return SplineParameters(x_data=x_data,
                                y_data=y_data,
                                spline_coeffs=spline_coeffs)