Ejemplo n.º 1
0
def natural_exp_decay(learning_rate,
                      global_step,
                      decay_steps,
                      decay_rate,
                      staircase=False,
                      name=None):
    """Applies natural exponential decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an exponential decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * global_step /
  decay_step)
  ```

  or, if `staircase` is `True`, as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step /
  decay_step))
  ```

  Example: decay exponentially with a base of 0.96:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  decay_steps = 5
  k = 0.5
  learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate,
  global_step,
                                             decay_steps, k)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.
      The initial learning rate.
    global_step: A Python number. Global step to use for the decay computation.
      Must not be negative.
    decay_steps: How often to apply decay.
    decay_rate: A Python number.  The decay rate.
    staircase: Whether to apply decay in a discrete staircase, as opposed to
      continuous, fashion.
    name: String.  Optional name of the operation.  Defaults to
      'ExponentialTimeDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.

  @compatibility(eager)
  When eager execution is enabled, this function returns a function which in
  turn returns the decayed learning rate Tensor. This can be useful for changing
  the learning rate value across different invocations of optimizer functions.
  @end_compatibility
  """
    natural_exp_rate = tf.exp(tf.negative(decay_rate))
    decayed_lr = learning_rate_schedule.ExponentialDecay(learning_rate,
                                                         decay_steps,
                                                         natural_exp_rate,
                                                         staircase=staircase,
                                                         name=name)

    if not tf.executing_eagerly():
        decayed_lr = decayed_lr(global_step)
    else:
        decayed_lr = functools.partial(decayed_lr, global_step)
    return decayed_lr
Ejemplo n.º 2
0
 def _inverse(self, y):
     y = self._maybe_assert_valid(y)
     return tf.exp(
         tf.math.log1p(-tf.exp(tf.math.log1p(-y) / self.concentration0)) /
         self.concentration1)
Ejemplo n.º 3
0
 def _forward_log_det_jacobian(self, x):
     scale = tf.convert_to_tensor(self.scale)
     z = (x - self.loc) / scale
     return -z - tf.exp(-z) - tf.math.log(scale)
Ejemplo n.º 4
0
 def testComposeFromTensor(self):
     x = tf.constant([-5., 0., 5.])
     self.assertAllClose(*self.evaluate([tf.exp(x), tfb.Exp()(x)]),
                         atol=0,
                         rtol=1e-3)
Ejemplo n.º 5
0
 def _rate_parameter_no_checks(self):
     if self._rate is None:
         return tf.exp(self._log_rate)
     return tf.identity(self._rate)
Ejemplo n.º 6
0
 def _testCumulativeLogSumExp(self, x, axis=0):
     result_naive = tf.cumsum(tf.exp(x), axis=axis)
     result_fused = tf.exp(tfp.math.log_cumsum_exp(x, axis=axis))
     self.assertAllClose(result_naive, result_fused)
Ejemplo n.º 7
0
 def _forward(self, x):
   return tf.exp(x)
Ejemplo n.º 8
0
 def _sample_next_volatilities(self, vol, dt, dwv):
     return vol * tf.exp(self._volvol * dwv * tf.sqrt(dt) -
                         self._volvol**2 * dt * 0.5)
Ejemplo n.º 9
0
  def run_test_sample_consistent_log_prob(
      self, sess_run_fn, dist,
      num_samples=int(1e5), num_threshold=int(1e3), seed=None,
      batch_size=None,
      rtol=1e-2, atol=0.):
    """Tests that sample/log_prob are consistent with each other.

    "Consistency" means that `sample` and `log_prob` correspond to the same
    distribution.

    Note: this test only verifies a necessary condition for consistency--it does
    does not verify sufficiency hence does not prove `sample`, `log_prob` truly
    are consistent.

    Args:
      sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and
        returning a list of results after running one "step" of TensorFlow
        computation, typically set to `sess.run`.
      dist: Distribution instance or object which implements `sample`,
        `log_prob`, `event_shape_tensor` and `batch_shape_tensor`.
      num_samples: Python `int` scalar indicating the number of Monte-Carlo
        samples to draw from `dist`.
      num_threshold: Python `int` scalar indicating the number of samples a
        bucket must contain before being compared to the probability.
        Default value: 1e3; must be at least 1.
        Warning, set too high will cause test to falsely pass but setting too
        low will cause the test to falsely fail.
      seed: Python `int` indicating the seed to use when sampling from `dist`.
        In general it is not recommended to use `None` during a test as this
        increases the likelihood of spurious test failure.
      batch_size: Hint for unpacking result of samples. Default: `None` means
        batch_size is inferred.
      rtol: Python `float`-type indicating the admissible relative error between
        analytical and sample statistics.
      atol: Python `float`-type indicating the admissible absolute error between
        analytical and sample statistics.

    Raises:
      ValueError: if `num_threshold < 1`.
    """
    if num_threshold < 1:
      raise ValueError('num_threshold({}) must be at least 1.'.format(
          num_threshold))
    # Histogram only supports vectors so we call it once per batch coordinate.
    y = dist.sample(num_samples, seed=test_seed_stream(hardcoded_seed=seed))
    y = tf.reshape(y, shape=[num_samples, -1])
    if batch_size is None:
      batch_size = tf.reduce_prod(dist.batch_shape_tensor())
    batch_dims = tf.shape(dist.batch_shape_tensor())[0]
    edges_expanded_shape = 1 + tf.pad(tensor=[-2], paddings=[[0, batch_dims]])
    for b, x in enumerate(tf.unstack(y, num=batch_size, axis=1)):
      counts, edges = self.histogram(x)
      edges = tf.reshape(edges, edges_expanded_shape)
      probs = tf.exp(dist.log_prob(edges))
      probs = tf.reshape(probs, shape=[-1, batch_size])[:, b]

      [counts_, probs_] = sess_run_fn([counts, probs])
      valid = counts_ > num_threshold
      probs_ = probs_[valid]
      counts_ = counts_[valid]
      self.assertAllClose(probs_, counts_ / num_samples,
                          rtol=rtol, atol=atol)
Ejemplo n.º 10
0
 def _mean(self):
   one = tf.constant(1., dtype=self.dtype)
   return self.scale * tf.exp(
       tf.math.lgamma(one + tf.math.reciprocal(self.concentration)))
Ejemplo n.º 11
0
def option_price(*,
                 volatilities,
                 strikes,
                 expiries,
                 spots=None,
                 forwards=None,
                 discount_rates=None,
                 continuous_dividends=None,
                 cost_of_carries=None,
                 discount_factors=None,
                 is_call_options=None,
                 dtype=None,
                 name=None):
  """Computes the Black Scholes price for a batch of call or put options.

  #### Example

  ```python
    # Price a batch of 5 vanilla call options.
    volatilities = np.array([0.0001, 102.0, 2.0, 0.1, 0.4])
    forwards = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
    # Strikes will automatically be broadcasted to shape [5].
    strikes = np.array([3.0])
    # Expiries will be broadcast to shape [5], i.e. each option has strike=3
    # and expiry = 1.
    expiries = 1.0
    computed_prices = tff.black_scholes.option_price(
        volatilities=volatilities,
        strikes=strikes,
        expiries=expiries,
        forwards=forwards)
  # Expected print output of computed prices:
  # [ 0.          2.          2.04806848  1.00020297  2.07303131]
  ```

  #### References:
  [1] Hull, John C., Options, Futures and Other Derivatives. Pearson, 2018.
  [2] Wikipedia contributors. Black-Scholes model. Available at:
    https://en.wikipedia.org/w/index.php?title=Black%E2%80%93Scholes_model

  Args:
    volatilities: Real `Tensor` of any shape and dtype. The volatilities to
      expiry of the options to price.
    strikes: A real `Tensor` of the same dtype and compatible shape as
      `volatilities`. The strikes of the options to be priced.
    expiries: A real `Tensor` of same dtype and compatible shape as
      `volatilities`. The expiry of each option. The units should be such that
      `expiry * volatility**2` is dimensionless.
    spots: A real `Tensor` of any shape that broadcasts to the shape of the
      `volatilities`. The current spot price of the underlying. Either this
      argument or the `forwards` (but not both) must be supplied.
    forwards: A real `Tensor` of any shape that broadcasts to the shape of
      `volatilities`. The forwards to maturity. Either this argument or the
      `spots` must be supplied but both must not be supplied.
    discount_rates: An optional real `Tensor` of same dtype as the
      `volatilities` and of the shape that broadcasts with `volatilities`.
      If not `None`, discount factors are calculated as e^(-rT),
      where r are the discount rates, or risk free rates. At most one of
      discount_rates and discount_factors can be supplied.
      Default value: `None`, equivalent to r = 0 and discount factors = 1 when
      discount_factors also not given.
    continuous_dividends: An optional real `Tensor` of same dtype as the
      `volatilities` and of the shape that broadcasts with `volatilities`.
      If not `None`, `cost_of_carries` is calculated as r - q,
      where r are the `discount_rates` and q is `continuous_dividends`. Either
      this or `cost_of_carries` can be given.
      Default value: `None`, equivalent to q = 0.
    cost_of_carries: An optional real `Tensor` of same dtype as the
      `volatilities` and of the shape that broadcasts with `volatilities`.
      Cost of storing a physical commodity, the cost of interest paid when
      long, or the opportunity cost, or the cost of paying dividends when short.
      If not `None`, and `spots` is supplied, used to calculate forwards from
      `spots`: F = e^(bT) * S, where F is the forwards price, b is the cost of
      carries, T is expiries and S is the spot price. If `None`, value assumed
      to be equal to the `discount_rate` - `continuous_dividends`
      Default value: `None`, equivalent to b = r.
    discount_factors: An optional real `Tensor` of same dtype as the
      `volatilities`. If not `None`, these are the discount factors to expiry
      (i.e. e^(-rT)). Mutually exclusive with discount_rate and cost_of_carry.
      If neither is given, no discounting is applied (i.e. the undiscounted
      option price is returned). If `spots` is supplied and `discount_factors`
      is not `None` then this is also used to compute the forwards to expiry.
      At most one of discount_rates and discount_factors can be supplied.
      Default value: `None`, which maps to -log(discount_factors) / expiries
    is_call_options: A boolean `Tensor` of a shape compatible with
      `volatilities`. Indicates whether the option is a call (if True) or a put
      (if False). If not supplied, call options are assumed.
    dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion
      of any supplied non-`Tensor` arguments to `Tensor`.
      Default value: `None` which maps to the default dtype inferred by
        TensorFlow.
    name: str. The name for the ops created by this function.
      Default value: `None` which is mapped to the default name `option_price`.

  Returns:
    option_prices: A `Tensor` of the same shape as `forwards`. The Black
    Scholes price of the options.

  Raises:
    ValueError: If both `forwards` and `spots` are supplied or if neither is
      supplied.
    ValueError: If both `discount_rates` and `discount_factors` is supplied.
    ValueError: If both `continuous_dividends` and `cost_of_carries` is
      supplied.
  """
  if (spots is None) == (forwards is None):
    raise ValueError('Either spots or forwards must be supplied but not both.')
  if (discount_rates is not None) and (discount_factors is not None):
    raise ValueError('At most one of discount_rates and discount_factors may '
                     'be supplied')
  if (continuous_dividends is not None) and (cost_of_carries is not None):
    raise ValueError('At most one of continuous_dividends and cost_of_carries '
                     'may be supplied')

  with tf.name_scope(name or 'option_price'):
    strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes')
    dtype = strikes.dtype
    volatilities = tf.convert_to_tensor(
        volatilities, dtype=dtype, name='volatilities')
    expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')

    if discount_rates is not None:
      discount_rates = tf.convert_to_tensor(
          discount_rates, dtype=dtype, name='discount_rates')
    elif discount_factors is not None:
      discount_rates = -tf.math.log(discount_factors) / expiries
    else:
      discount_rates = tf.convert_to_tensor(
          0.0, dtype=dtype, name='discount_rates')

    if continuous_dividends is None:
      continuous_dividends = tf.convert_to_tensor(
          0.0, dtype=dtype, name='continuous_dividends')

    if cost_of_carries is not None:
      cost_of_carries = tf.convert_to_tensor(
          cost_of_carries, dtype=dtype, name='cost_of_carries')
    else:
      cost_of_carries = discount_rates - continuous_dividends

    if discount_factors is not None:
      discount_factors = tf.convert_to_tensor(
          discount_factors, dtype=dtype, name='discount_factors')
    else:
      discount_factors = tf.exp(-discount_rates * expiries)

    if forwards is not None:
      forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards')
    else:
      spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots')
      forwards = spots * tf.exp(cost_of_carries * expiries)

    sqrt_var = volatilities * tf.math.sqrt(expiries)
    d1 = (tf.math.log(forwards / strikes) + sqrt_var * sqrt_var / 2) / sqrt_var
    d2 = d1 - sqrt_var
    undiscounted_calls = forwards * _ncdf(d1) - strikes * _ncdf(d2)
    if is_call_options is None:
      return discount_factors * undiscounted_calls
    undiscounted_forward = forwards - strikes
    undiscounted_puts = undiscounted_calls - undiscounted_forward
    predicate = tf.broadcast_to(is_call_options, tf.shape(undiscounted_calls))
    return discount_factors * tf.where(predicate, undiscounted_calls,
                                       undiscounted_puts)
 def to_loc_scale(log_precision, mean_times_precision):
   variance = 1./tf.exp(log_precision)
   mean = mean_times_precision * variance
   return {'loc': mean, 'scale': tf.sqrt(variance)}
def _gamma_from_loc_scale_named(loc, log_scale):
  return {'concentration': (loc / tf.exp(log_scale))**2,
          'rate': loc / (tf.exp(log_scale))**2}
def _gamma_from_loc_scale_positional(loc, log_scale):
  return (loc / tf.exp(log_scale))**2, loc / (tf.exp(log_scale))**2
Ejemplo n.º 15
0
 def _prob(self, x):
     return tf.exp(self._log_prob(x))
Ejemplo n.º 16
0
def reduce_weighted_logsumexp(logx,
                              w=None,
                              axis=None,
                              keep_dims=False,
                              return_sign=False,
                              name=None):
  """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`.

  If all weights `w` are known to be positive, it is more efficient to directly
  use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more
  efficient than `du.reduce_weighted_logsumexp(logx, w)`.

  Reduces `input_tensor` along the dimensions given in `axis`.
  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
  entry in `axis`. If `keep_dims` is true, the reduced dimensions
  are retained with length 1.

  If `axis` has no entries, all dimensions are reduced, and a
  tensor with a single element is returned.

  This function is more numerically stable than log(sum(w * exp(input))). It
  avoids overflows caused by taking the exp of large inputs and underflows
  caused by taking the log of small inputs.

  For example:

  ```python
  x = tf.constant([[0., 0, 0],
                   [0, 0, 0]])

  w = tf.constant([[-1., 1, 1],
                   [1, 1, 1]])

  du.reduce_weighted_logsumexp(x, w)
  # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4)

  du.reduce_weighted_logsumexp(x, w, axis=0)
  # ==> [log(-1+1), log(1+1), log(1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1)
  # ==> [log(-1+1+1), log(1+1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True)
  # ==> [[log(-1+1+1)], [log(1+1+1)]]

  du.reduce_weighted_logsumexp(x, w, axis=[0, 1])
  # ==> log(-1+5)
  ```

  Args:
    logx: The tensor to reduce. Should have numeric type.
    w: The weight tensor. Should have numeric type identical to `logx`.
    axis: The dimensions to reduce. If `None` (the default), reduces all
      dimensions. Must be in the range `[-rank(input_tensor),
      rank(input_tensor))`.
    keep_dims: If true, retains reduced dimensions with length 1.
    return_sign: If `True`, returns the sign of the result.
    name: A name for the operation (optional).

  Returns:
    lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor.
    sign: (Optional) The sign of `sum(weight * exp(x))`.
  """
  with tf.name_scope(name or 'reduce_weighted_logsumexp'):
    logx = tf.convert_to_tensor(logx, name='logx')
    if w is None:
      lswe = tf.reduce_logsumexp(logx, axis=axis, keepdims=keep_dims)
      if return_sign:
        sgn = tf.ones_like(lswe)
        return lswe, sgn
      return lswe
    w = tf.convert_to_tensor(w, dtype=logx.dtype, name='w')
    log_absw_x = logx + tf.math.log(tf.abs(w))
    max_log_absw_x = tf.reduce_max(log_absw_x, axis=axis, keepdims=True)
    # If the largest element is `-inf` or `inf` then we don't bother subtracting
    # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That
    # this is ok follows from the fact that we're actually free to subtract any
    # value we like, so long as we add it back after taking the `log(sum(...))`.
    max_log_absw_x = tf.where(
        tf.math.is_inf(max_log_absw_x),
        tf.zeros([], max_log_absw_x.dtype),
        max_log_absw_x)
    wx_over_max_absw_x = (tf.sign(w) * tf.exp(log_absw_x - max_log_absw_x))
    sum_wx_over_max_absw_x = tf.reduce_sum(
        wx_over_max_absw_x, axis=axis, keepdims=keep_dims)
    if not keep_dims:
      max_log_absw_x = tf.squeeze(max_log_absw_x, axis)
    sgn = tf.sign(sum_wx_over_max_absw_x)
    lswe = max_log_absw_x + tf.math.log(sgn * sum_wx_over_max_absw_x)
    if return_sign:
      return lswe, sgn
    return lswe
Ejemplo n.º 17
0
def european_option_price(
    *,
    strikes: types.RealTensor,
    expiries: types.RealTensor,
    spots: types.RealTensor = None,
    forwards: types.RealTensor = None,
    is_call_options: types.BoolTensor = None,
    discount_rates: types.RealTensor = None,
    dividend_rates: types.RealTensor = None,
    discount_factors: types.RealTensor = None,
    variances: types.RealTensor,
    mean_reversion: types.RealTensor,
    theta: types.RealTensor,
    volvol: types.RealTensor,
    rho: types.RealTensor = None,
    integration_method: integration.IntegrationMethod = None,
    dtype: tf.DType = None,
    name: str = None,
    **kwargs) -> types.RealTensor:
  """Calculates European option prices under the Heston model.

  Heston originally published in 1993 his eponymous model [3]. He provided
  a semi- analytical formula for pricing European option via Fourier transform
  under his model. However, as noted by Albrecher [1], the characteristic
  function used in Heston paper can suffer numerical issues because of the
  discontinuous nature of the square root function in the complex plane, and a
  second version of the characteric function which doesn't suffer this
  shortcoming should be used instead. Attari [2] further refined the numerical
  method by reducing the number of numerical integrations (only one Fourier
  transform instead of two) and with an integrand function decaying
  quadratically instead of linearly. Attari's numerical method is implemented
  here.

  Heston model:
  ```
    dF/F = sqrt(V) * dW_1
    dV = mean_reversion * (theta - V) * dt * sigma * sqrt(V) * dW_2
    <dW_1,dW_2> = rho *dt
  ```
  The variance V follows a square root process.

  #### Example
  ```python
  import tf_quant_finance as tff
  import numpy as np
  prices = tff.models.heston.approximations.european_option_price(
      variances=0.11,
      strikes=102.0,
      expiries=1.2,
      forwards=100.0,
      is_call_options=True,
      mean_reversion=2.0,
      theta=0.5,
      volvol=0.15,
      rho=0.3,
      discount_factors=1.0,
      dtype=np.float64)
  # Expected print output of prices:
  # 24.82219619
  ```
  #### References
  [1] Hansjorg Albrecher, The Little Heston Trap
  https://perswww.kuleuven.be/~u0009713/HestonTrap.pdf
  [2] Mukarram Attari, Option Pricing Using Fourier Transforms: A Numerically
  Efficient Simplification
  https://papers.ssrn.com/sol3/papers.cfm?abstract_id=520042
  [3] Steven L. Heston, A Closed-Form Solution for Options with Stochastic
  Volatility with Applications to Bond and Currency Options
  http://faculty.baruch.cuny.edu/lwu/890/Heston93.pdf
  Args:
    strikes: A real `Tensor` of any shape and dtype. The strikes of the options
      to be priced.
    expiries: A real `Tensor` of the same dtype and compatible shape as
      `strikes`.  The expiry of each option.
    spots: A real `Tensor` of any shape that broadcasts to the shape of the
      `strikes`. The current spot price of the underlying. Either this
      argument or the `forwards` (but not both) must be supplied.
    forwards: A real `Tensor` of any shape that broadcasts to the shape of
      `strikes`. The forwards to maturity. Either this argument or the
      `spots` must be supplied but both must not be supplied.
    is_call_options: A boolean `Tensor` of a shape compatible with
      `strikes`. Indicates whether the option is a call (if True) or a put
      (if False). If not supplied, call options are assumed.
    discount_rates: An optional real `Tensor` of same dtype as the
      `strikes` and of the shape that broadcasts with `strikes`.
      If not `None`, discount factors are calculated as e^(-rT),
      where r are the discount rates, or risk free rates. At most one of
      discount_rates and discount_factors can be supplied.
      Default value: `None`, equivalent to r = 0 and discount factors = 1 when
      discount_factors also not given.
    dividend_rates: An optional real `Tensor` of same dtype as the
      `strikes` and of the shape that broadcasts with `strikes`.
      Default value: `None`, equivalent to q = 0.
    discount_factors: An optional real `Tensor` of same dtype as the
      `strikes`. If not `None`, these are the discount factors to expiry
      (i.e. e^(-rT)). Mutually exclusive with `discount_rates`. If neither is
      given, no discounting is applied (i.e. the undiscounted option price is
      returned). If `spots` is supplied and `discount_factors` is not `None`
      then this is also used to compute the forwards to expiry. At most one of
      `discount_rates` and `discount_factors` can be supplied.
      Default value: `None`, which maps to e^(-rT) calculated from
      discount_rates.
    variances: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The initial value of the variance.
    mean_reversion: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The mean reversion strength of the variance square root
      process.
    theta: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The mean reversion level of the variance square root process.
    volvol: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The volatility of the variance square root process (volatility
      of volatility)
    rho: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The correlation between spot and variance.
    integration_method: An instance of `math.integration.IntegrationMethod`.
      Default value: `None` which maps to the Simpsons integration rule.
    dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion
      of any supplied non-`Tensor` arguments to `Tensor`.
      Default value: None which maps to the default dtype inferred by
      TensorFlow.
    name: str. The name for the ops created by this function.
      Default value: None which is mapped to the default name
      `heston_price`.
    **kwargs: Additional parameters for the underlying integration method.
      If not supplied and `integration_method` is Simpson, then uses
      `IntegrationMethod.COMPOSITE_SIMPSONS_RULE` with `num_points=1001`, and
      bounds `lower=1e-9`, `upper=100`.
  Returns:
    A `Tensor` of the same shape as the input data which is the price of
    European options under the Heston model.
  """
  if (spots is None) == (forwards is None):
    raise ValueError('Either spots or forwards must be supplied but not both.')
  if (discount_rates is not None) and (discount_factors is not None):
    raise ValueError('At most one of discount_rates and discount_factors may '
                     'be supplied')

  with tf.compat.v1.name_scope(name, default_name='eu_option_price'):
    strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes')
    dtype = strikes.dtype
    expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')
    mean_reversion = tf.convert_to_tensor(mean_reversion, dtype=dtype,
                                          name='mean_reversion')
    theta = tf.convert_to_tensor(theta, dtype=dtype, name='theta')
    volvol = tf.convert_to_tensor(volvol, dtype=dtype, name='volvol')
    rho = tf.convert_to_tensor(rho, dtype=dtype, name='rho')
    variances = tf.convert_to_tensor(variances, dtype=dtype, name='variances')

    if discount_factors is not None:
      discount_factors = tf.convert_to_tensor(
          discount_factors, dtype=dtype, name='discount_factors')

    if discount_rates is not None:
      discount_rates = tf.convert_to_tensor(
          discount_rates, dtype=dtype, name='discount_rates')
    elif discount_factors is not None:
      discount_rates = -tf.math.log(discount_factors) / expiries
    else:
      discount_rates = tf.convert_to_tensor(
          0.0, dtype=dtype, name='discount_rates')

    if dividend_rates is None:
      dividend_rates = 0.0
    dividend_rates = tf.convert_to_tensor(
        dividend_rates, dtype=dtype, name='dividend_rates')

    if discount_factors is None:
      discount_factors = tf.exp(-discount_rates * expiries)  # pylint: disable=invalid-unary-operand-type

    if forwards is not None:
      forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards')
    else:
      spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots')
      cost_of_carries = discount_rates - dividend_rates
      forwards = spots * tf.exp(cost_of_carries * expiries)

    # Cast as complex for the characteristic function calculation
    expiries_real = tf.complex(expiries, tf.zeros_like(expiries))
    mean_reversion_real = tf.complex(mean_reversion,
                                     tf.zeros_like(mean_reversion))
    theta_real = tf.complex(theta, tf.zeros_like(theta))
    volvol_real = tf.complex(volvol, tf.zeros_like(volvol))
    rho_real = tf.complex(rho, tf.zeros_like(rho))
    variances_real = tf.complex(variances, tf.zeros_like(variances))

    # Prepare inputs to build an integrand_function
    expiries_real = tf.expand_dims(expiries_real, -1)
    mean_reversion_real = tf.expand_dims(mean_reversion_real, -1)
    theta_real = tf.expand_dims(theta_real, -1)
    volvol_real = tf.expand_dims(volvol_real, -1)
    rho_real = tf.expand_dims(rho_real, -1)
    variances_real = tf.expand_dims(variances_real, -1)
    if integration_method is None:
      integration_method = _COMPOSITE_SIMPSONS_RULE
    if integration_method == _COMPOSITE_SIMPSONS_RULE:
      if 'num_points' not in kwargs:
        kwargs['num_points'] = 1001
      if 'lower' not in kwargs:
        kwargs['lower'] = 1e-9
      if 'upper' not in kwargs:
        kwargs['upper'] = 100
    def char_fun(u):
      # Using 'second formula' for the (first) characteristic function of
      # log( spot_T / forwards )
      # (noted 'phi_2' in 'The Little Heston Trap', (Albrecher))
      u_real = tf.complex(u, tf.zeros_like(u))
      u_imag = tf.complex(tf.zeros_like(u), u)
      s = rho_real * volvol_real * u_imag
      # TODO(b/156221007): investigate why
      # s_mean_reversion = (s - mean_reversion_real)**2 leads to a wrong result
      # in graph mode.
      s_mean_reversion = ((s - mean_reversion_real) * s
                          - (s - mean_reversion_real) * mean_reversion_real)
      d = s_mean_reversion - volvol_real ** 2 * (-u_imag - u_real ** 2)
      d = tf.math.sqrt(d)
      g = (mean_reversion_real - s - d) / (mean_reversion_real - s + d)
      a = mean_reversion_real * theta_real
      h = g * tf.math.exp(-d * expiries_real)
      m = 2 * tf.math.log((1 - h) / (1 - g))
      c = (a / volvol_real ** 2) * ((mean_reversion_real - s - d)
                                    * expiries_real - m)
      e = (1 - tf.math.exp(-d * expiries_real))
      d_new = (mean_reversion_real - s - d) / volvol_real ** 2 * (e / (1 - h))
      return tf.math.exp(c + d_new * variances_real)

    def integrand_function(u, k):
      # Note that with [2], integrand is in 1 / u**2,
      # which converges faster than Heston 1993 (which is in 1 /u)
      char_fun_complex = char_fun(u)
      char_fun_real_part = tf.math.real(char_fun_complex)
      char_fun_imag_part = tf.math.imag(char_fun_complex)

      a = (char_fun_real_part + char_fun_imag_part / u) * tf.math.cos(u * k)
      b = (char_fun_imag_part - char_fun_real_part / u) * tf.math.sin(u * k)

      return (a + b) / (1.0 + u * u)

    k = tf.expand_dims(tf.math.log(strikes / forwards), axis=-1)

    integral = integration.integrate(
        lambda u: integrand_function(u, k),
        method=integration_method,
        dtype=dtype,
        **kwargs)
    undiscounted_call_prices = forwards - strikes * (0.5 + integral / _PI_)

    if is_call_options is None:
      return undiscounted_call_prices * discount_factors
    else:
      is_call_options = tf.convert_to_tensor(is_call_options, dtype=tf.bool,
                                             name='is_call_options')
      # Use call-put parity for Put
      undiscounted_put_prices = undiscounted_call_prices - forwards + strikes

      undiscount_prices = tf.where(
          is_call_options,
          undiscounted_call_prices,
          undiscounted_put_prices)
      return undiscount_prices * discount_factors
Ejemplo n.º 18
0
 def js1(logu):
   return (-logu
           - (1. + tf.exp(logu)) * (
               tf.nn.softplus(logu)))
Ejemplo n.º 19
0
 def _prob(self, counts):
   return tf.exp(self._log_prob(counts))
Ejemplo n.º 20
0
 def js2(logu):
   return 2. * (tf.exp(logu) * (
       logu - tf.nn.softplus(logu)))
Ejemplo n.º 21
0
 def _sample_n(self, n, seed=None):
   shape = tf.concat([[n], self.batch_shape_tensor()], 0)
   sampled = tf.random.uniform(shape, maxval=1., seed=seed, dtype=self.dtype)
   log_sample = tf.math.log(
       self.scale) - tf.math.log1p(-sampled) / self.concentration
   return tf.exp(log_sample)
Ejemplo n.º 22
0
def expectation(f, samples, log_prob=None, use_reparametrization=True,
                axis=0, keepdims=False, name=None, keep_dims=False):
  """Computes the Monte-Carlo approximation of `E_p[f(X)]`.

  This function computes the Monte-Carlo approximation of an expectation, i.e.,

  ```none
  E_p[f(X)] approx= m**-1 sum_i^m f(x_j),  x_j ~iid p(X)
  ```

  where:

  - `x_j = samples[j, ...]`,
  - `log(p(samples)) = log_prob(samples)` and
  - `m = prod(shape(samples)[axis])`.

  Tricks: Reparameterization and Score-Gradient

  When p is "reparameterized", i.e., a diffeomorphic transformation of a
  parameterless distribution (e.g.,
  `Normal(Y; m, s) <=> Y = sX + m, X ~ Normal(0,1)`), we can swap gradient and
  expectation, i.e.,
  `grad[ Avg{ s_i : i=1...n } ] = Avg{ grad[s_i] : i=1...n }` where
  `S_n = Avg{s_i}` and `s_i = f(x_i), x_i ~ p`.

  However, if p is not reparameterized, TensorFlow's gradient will be incorrect
  since the chain-rule stops at samples of non-reparameterized distributions.
  (The non-differentiated result, `approx_expectation`, is the same regardless
  of `use_reparametrization`.) In this circumstance using the Score-Gradient
  trick results in an unbiased gradient, i.e.,

  ```none
  grad[ E_p[f(X)] ]
  = grad[ int dx p(x) f(x) ]
  = int dx grad[ p(x) f(x) ]
  = int dx [ p'(x) f(x) + p(x) f'(x) ]
  = int dx p(x) [p'(x) / p(x) f(x) + f'(x) ]
  = int dx p(x) grad[ f(x) p(x) / stop_grad[p(x)] ]
  = E_p[ grad[ f(x) p(x) / stop_grad[p(x)] ] ]
  ```

  Unless p is not reparametrized, it is usually preferable to
  `use_reparametrization = True`.

  Warning: users are responsible for verifying `p` is a "reparameterized"
  distribution.

  Example Use:

  ```python
  # Monte-Carlo approximation of a reparameterized distribution, e.g., Normal.

  num_draws = int(1e5)
  p = tfp.distributions.Normal(loc=0., scale=1.)
  q = tfp.distributions.Normal(loc=1., scale=2.)
  exact_kl_normal_normal = tfp.distributions.kl_divergence(p, q)
  # ==> 0.44314718
  approx_kl_normal_normal = tfp.monte_carlo.expectation(
      f=lambda x: p.log_prob(x) - q.log_prob(x),
      samples=p.sample(num_draws, seed=42),
      log_prob=p.log_prob,
      use_reparametrization=(p.reparameterization_type
                             == tfp.distributions.FULLY_REPARAMETERIZED))
  # ==> 0.44632751
  # Relative Error: <1%

  # Monte-Carlo approximation of non-reparameterized distribution,
  # e.g., Bernoulli.

  num_draws = int(1e5)
  p = tfp.distributions.Bernoulli(probs=0.4)
  q = tfp.distributions.Bernoulli(probs=0.8)
  exact_kl_bernoulli_bernoulli = tfp.distributions.kl_divergence(p, q)
  # ==> 0.38190854
  approx_kl_bernoulli_bernoulli = tfp.monte_carlo.expectation(
      f=lambda x: p.log_prob(x) - q.log_prob(x),
      samples=p.sample(num_draws, seed=42),
      log_prob=p.log_prob,
      use_reparametrization=(p.reparameterization_type
                             == tfp.distributions.FULLY_REPARAMETERIZED))
  # ==> 0.38336259
  # Relative Error: <1%

  # For comparing the gradients, see `expectation_test.py`.
  ```

  Note: The above example is for illustration only. To compute approximate
  KL-divergence, the following is preferred:

  ```python
  approx_kl_p_q = bf.monte_carlo_csiszar_f_divergence(
      f=bf.kl_reverse,
      p_log_prob=q.log_prob,
      q=p,
      num_draws=num_draws)
  ```

  Args:
    f: Python callable which can return `f(samples)`.
    samples: `Tensor` or nested structure (list, dict, etc.) of `Tensor`s,
      representing samples used to form the Monte-Carlo approximation of
      `E_p[f(X)]`.  A batch of samples should be indexed by `axis` dimensions.
    log_prob: Python callable which can return `log_prob(samples)`. Must
      correspond to the natural-logarithm of the pdf/pmf of each sample. Only
      required/used if `use_reparametrization=False`.
      Default value: `None`.
    use_reparametrization: Python `bool` indicating that the approximation
      should use the fact that the gradient of samples is unbiased. Whether
      `True` or `False`, this arg only affects the gradient of the resulting
      `approx_expectation`.
      Default value: `True`.
    axis: The dimensions to average. If `None`, averages all
      dimensions.
      Default value: `0` (the left-most dimension).
    keepdims: If True, retains averaged dimensions using size `1`.
      Default value: `False`.
    name: A `name_scope` for operations created by this function.
      Default value: `None` (which implies "expectation").
    keep_dims: (Deprecated) If True, retains averaged dimensions using size `1`.
      Default value: `False`.

  Returns:
    approx_expectation: `Tensor` corresponding to the Monte-Carlo approximation
      of `E_p[f(X)]`.

  Raises:
    ValueError: if `f` is not a Python `callable`.
    ValueError: if `use_reparametrization=False` and `log_prob` is not a Python
      `callable`.
  """
  keepdims = keepdims or keep_dims
  del keep_dims
  with tf.name_scope(name or 'expectation'):
    if not callable(f):
      raise ValueError('`f` must be a callable function.')
    if use_reparametrization:
      return tf.reduce_mean(f(samples), axis=axis, keepdims=keepdims)
    else:
      if not callable(log_prob):
        raise ValueError('`log_prob` must be a callable function.')
      stop = tf.stop_gradient  # For readability.
      x = tf.nest.map_structure(stop, samples)
      logpx = log_prob(x)
      fx = f(x)  # Call `f` once in case it has side-effects.
      # To achieve this, we use the fact that:
      #   `h(x) - stop(h(x)) == zeros_like(h(x))`
      # but its gradient is grad[h(x)].
      #
      # This technique was published as:
      # Jakob Foerster, Greg Farquhar, Maruan Al-Shedivat, Tim Rocktaeschel,
      # Eric P. Xing, Shimon Whiteson (ICML 2018)
      # "DiCE: The Infinitely Differentiable Monte-Carlo Estimator"
      # https://arxiv.org/abs/1802.05098
      #
      # Unlike using:
      #   fx = fx + stop(fx) * (logpx - stop(logpx)),
      # DiCE ensures that any order gradients of the objective
      # are unbiased gradient estimators.
      #
      # Note that IEEE754 specifies that `x - x == 0.` and `x + 0. == x`, hence
      # this trick loses no precision. For more discussion regarding the
      # relevant portions of the IEEE754 standard, see the StackOverflow
      # question,
      # "Is there a floating point value of x, for which x-x == 0 is false?"
      # http://stackoverflow.com/q/2686644
      dice = fx * tf.exp(logpx - stop(logpx))
      return tf.reduce_mean(dice, axis=axis, keepdims=keepdims)
Ejemplo n.º 23
0
 def _log_normalization(self, log_rate):
     return tf.exp(log_rate)
Ejemplo n.º 24
0
  def _testSampleConsistentLogProbInterval(self,
                                           concentrations,
                                           det_bounds,
                                           dim,
                                           num_samples=int(1e5),
                                           dtype=np.float32,
                                           input_output_cholesky=False,
                                           false_fail_rate=1e-6,
                                           target_discrepancy=0.1,
                                           seed=42):
    # Consider the set M of dim x dim correlation matrices whose
    # determinant exceeds some bound (rationale for bound forthwith).
    # - This is a (convex!) shape in dim * (dim - 1) / 2 dimensions
    #   (because a correlation matrix is determined by its lower
    #   triangle, and the main diagonal is all 1s).
    # - Further, M is contained entirely in the [-1,1] cube,
    #   because no correlation can fall outside that interval.
    #
    # We have two different ways to estimate the volume of M:
    # - Importance sampling from the LKJ distribution
    # - Importance sampling from the uniform distribution on the cube
    #
    # This test checks that these two methods agree.  However, because
    # the uniform proposal leads to many rejections (thus slowness),
    # those volumes are computed offline and the confidence intervals
    # are presented to this test procedure in the "volume_bounds"
    # table.
    #
    # Why place a lower bound on the determinant?  Because for eta > 1,
    # the density of LKJ approaches 0 as the determinant approaches 0.
    # However, the test methodology requires an upper bound on the
    # improtance weights produced.  Rejecting matrices with too-small
    # determinant (from both methods) allows me to supply that bound.
    #
    # I considered several alternative regions whose volume I might
    # know analytically (without having to do rejection).
    # - Option a: Some hypersphere guaranteed to be contained inside M.
    #   - Con: I don't know a priori how to find a radius for it.
    #   - Con: I still need a lower bound on the determinants that appear
    #     in this sphere, and I don't know how to compute it.
    # - Option b: Some trapezoid given as the convex hull of the
    #   nearly-extreme correlation matrices (i.e., those that partition
    #   the variables into two strongly anti-correclated groups).
    #   - Con: Would have to dig up n-d convex hull code to implement this.
    #   - Con: Need to compute the volume of that convex hull.
    #   - Con: Need a bound on the determinants of the matrices in that hull.
    # - Option c: Same thing, but with the matrices that make a single pair
    #   of variables strongly correlated (or anti-correlated), and leaves
    #   the others uncorrelated.
    #   - Same cons, except that there is a determinant bound (which
    #     felt pretty loose).
    lows = [dtype(volume_bounds[dim][db][0]) for db in det_bounds]
    highs = [dtype(volume_bounds[dim][db][1]) for db in det_bounds]
    concentration = np.array(concentrations, dtype=dtype)
    det_bounds = np.array(det_bounds, dtype=dtype)
    # Due to possible numerical inaccuracies while lower bounding the
    # determinant, the maximum of the importance weights may exceed the
    # theoretical maximum (importance_maxima). We add a tolerance to guard
    # against this. An alternative would have been to add a threshold while
    # filtering in _det_ok_mask, but that would affect the mean as well.
    high_tolerance = 1e-6

    testee_lkj = tfd.LKJ(
        dimension=dim,
        concentration=concentration,
        input_output_cholesky=input_output_cholesky,
        validate_args=True)
    x = testee_lkj.sample(num_samples, seed=seed)
    importance_weights = (
        tf.exp(-testee_lkj.log_prob(x)) * _det_ok_mask(x, det_bounds,
                                                       input_output_cholesky))
    importance_maxima = (1. / det_bounds) ** (concentration - 1) * tf.exp(
        testee_lkj._log_normalization())
    check1 = st.assert_true_mean_in_interval_by_dkwm(
        samples=importance_weights,
        low=0.,
        high=importance_maxima + high_tolerance,
        expected_low=lows,
        expected_high=highs,
        false_fail_rate=false_fail_rate)
    check2 = assert_util.assert_less(
        st.min_discrepancy_of_true_means_detectable_by_dkwm(
            num_samples,
            low=0.,
            high=importance_maxima + high_tolerance,
            false_fail_rate=false_fail_rate,
            false_pass_rate=false_fail_rate), dtype(target_discrepancy))
    self.evaluate([check1, check2])
Ejemplo n.º 25
0
    def build(self, input_shape):
        dtype = self.dtype
        if len(input_shape) == 2:
            batch_image_shape, batch_conditional_shape = input_shape
            conditional_input = tf.keras.layers.Input(
                shape=batch_conditional_shape[1:], dtype=dtype)
        else:
            batch_image_shape = input_shape
            conditional_input = None

        image_shape = batch_image_shape[1:]
        image_input = tf.keras.layers.Input(shape=image_shape, dtype=dtype)

        if self._resnet_activation == 'concat_elu':
            activation = tf.keras.layers.Lambda(
                lambda x: tf.nn.elu(tf.concat([x, -x], axis=-1)), dtype=dtype)
        else:
            activation = tf.keras.activations.get(self._resnet_activation)

        # Define layers with default inputs and layer wrapper applied
        Conv2D = functools.partial(  # pylint:disable=invalid-name
            self._layer_wrapper(tf.keras.layers.Convolution2D),
            filters=self._num_filters,
            padding='same',
            kernel_regularizer=tf.keras.regularizers.l2(self._l2_weight),
            dtype=dtype)

        Dense = functools.partial(  # pylint:disable=invalid-name
            self._layer_wrapper(tf.keras.layers.Dense),
            kernel_regularizer=tf.keras.regularizers.l2(self._l2_weight),
            dtype=dtype)

        Conv2DTranspose = functools.partial(  # pylint:disable=invalid-name
            self._layer_wrapper(tf.keras.layers.Conv2DTranspose),
            filters=self._num_filters,
            padding='same',
            strides=(2, 2),
            kernel_regularizer=tf.keras.regularizers.l2(self._l2_weight),
            dtype=dtype)

        rows, cols = self._receptive_field_dims

        # Define the dimensions of the valid (unmasked) areas of the layer kernels
        # for stride 1 convolutions in the internal layers.
        kernel_valid_dims = {
            'vertical': (rows - 1, cols),  # vertical stack
            'horizontal': (2, cols // 2 + 1)
        }  # horizontal stack

        # Define the size of the kernel necessary to center the current pixel
        # correctly for stride 1 convolutions in the internal layers.
        kernel_sizes = {
            'vertical': (2 * rows - 3, cols),
            'horizontal': (3, cols)
        }

        # Make the kernel constraint functions for stride 1 convolutions in internal
        # layers.
        kernel_constraints = {
            k: _make_kernel_constraint(kernel_sizes[k], (0, v[0]), (0, v[1]))
            for k, v in kernel_valid_dims.items()
        }

        # Build the initial vertical stack/horizontal stack convolutional layers,
        # as shown in Figure 1 of [2]. The receptive field of the initial vertical
        # stack layer is a rectangular area centered above the current pixel.
        vertical_stack_init = Conv2D(kernel_size=(2 * rows - 1, cols),
                                     kernel_constraint=_make_kernel_constraint(
                                         (2 * rows - 1, cols), (0, rows - 1),
                                         (0, cols)))(image_input)

        # In Figure 1 [2], the receptive field of the horizontal stack is
        # illustrated as the pixels in the same row and to the left of the current
        # pixel. [1] increases the height of this receptive field from one pixel to
        # two (`horizontal_stack_left`) and additionally includes a subset of the
        # row of pixels centered above the current pixel (`horizontal_stack_up`).
        horizontal_stack_up = Conv2D(kernel_size=(3, cols),
                                     kernel_constraint=_make_kernel_constraint(
                                         (3, cols), (0, 1),
                                         (0, cols)))(image_input)

        horizontal_stack_left = Conv2D(
            kernel_size=(3, cols),
            kernel_constraint=_make_kernel_constraint(
                (3, cols), (0, 2), (0, cols // 2)))(image_input)

        horizontal_stack_init = tf.keras.layers.add(
            [horizontal_stack_up, horizontal_stack_left], dtype=dtype)

        layer_stacks = {
            'vertical': [vertical_stack_init],
            'horizontal': [horizontal_stack_init]
        }

        # Build the downward pass of the U-net (left-hand half of Figure 2 of [1]).
        # Each `i` iteration builds one of the highest-level blocks (identified as
        # 'Sequence of 6 layers' in the figure, consisting of `num_resnet=5` stride-
        # 1 layers, and one stride-2 layer that contracts the height/width
        # dimensions). The `_` iterations build the stride 1 layers. The layers of
        # the downward pass are stored in lists, since we'll later need them to make
        # skip-connections to layers in the upward pass of the U-net (the skip-
        # connections are represented by curved lines in Figure 2 [1]).
        for i in range(self._num_hierarchies):
            for _ in range(self._num_resnet):
                # Build a layer shown in Figure 2 of [2]. The 'vertical' iteration
                # builds the layers in the left half of the figure, and the 'horizontal'
                # iteration builds the layers in the right half.
                for stack in ['vertical', 'horizontal']:
                    input_x = layer_stacks[stack][-1]
                    x = activation(input_x)
                    x = Conv2D(kernel_size=kernel_sizes[stack],
                               kernel_constraint=kernel_constraints[stack])(x)

                    # Add the vertical-stack layer to the horizontal-stack layer
                    if stack == 'horizontal':
                        h = activation(layer_stacks['vertical'][-1])
                        h = Dense(self._num_filters)(h)
                        x = tf.keras.layers.add([h, x], dtype=dtype)

                    x = activation(x)
                    x = tf.keras.layers.Dropout(self._dropout_p,
                                                dtype=dtype)(x)
                    x = Conv2D(filters=2 * self._num_filters,
                               kernel_size=kernel_sizes[stack],
                               kernel_constraint=kernel_constraints[stack])(x)

                    if conditional_input is not None:
                        h_projection = _build_and_apply_h_projection(
                            conditional_input, self._num_filters, dtype=dtype)
                        x = tf.keras.layers.add([x, h_projection], dtype=dtype)

                    x = _apply_sigmoid_gating(x)

                    # Add a residual connection from the layer's input.
                    out = tf.keras.layers.add([input_x, x], dtype=dtype)
                    layer_stacks[stack].append(out)

            if i < self._num_hierarchies - 1:
                # Build convolutional layers that contract the height/width dimensions
                # on the downward pass between each set of layers (e.g. contracting from
                # 32x32 to 16x16 in Figure 2 of [1]).
                for stack in ['vertical', 'horizontal']:
                    # Define kernel dimensions/masking to maintain the autoregressive property.
                    x = layer_stacks[stack][-1]
                    h, w = kernel_valid_dims[stack]
                    kernel_height = 2 * h
                    if stack == 'vertical':
                        kernel_width = w + 1
                    else:
                        kernel_width = 2 * w

                    kernel_size = (kernel_height, kernel_width)
                    kernel_constraint = _make_kernel_constraint(
                        kernel_size, (0, h), (0, w))
                    x = Conv2D(strides=(2, 2),
                               kernel_size=kernel_size,
                               kernel_constraint=kernel_constraint)(x)
                    layer_stacks[stack].append(x)

        # Upward pass of the U-net (right-hand half of Figure 2 of [1]). We stored
        # the layers of the downward pass in a list, in order to access them to make
        # skip-connections to the upward pass. For the upward pass, we need to keep
        # track of only the current layer, so we maintain a reference to the
        # current layer of the horizontal/vertical stack in the `upward_pass` dict.
        # The upward pass begins with the last layer of the downward pass.
        upward_pass = {key: stack.pop() for key, stack in layer_stacks.items()}

        # As with the downward pass, each `i` iteration builds a highest level block
        # in Figure 2 [1], and the `_` iterations build individual layers within the
        # block.
        for i in range(self._num_hierarchies):
            num_resnet = self._num_resnet if i == 0 else self._num_resnet + 1

            for _ in range(num_resnet):
                # Build a layer as shown in Figure 2 of [2], with a skip-connection
                # from the symmetric layer in the downward pass.
                for stack in ['vertical', 'horizontal']:
                    input_x = upward_pass[stack]
                    x_symmetric = layer_stacks[stack].pop()

                    x = activation(input_x)
                    x = Conv2D(kernel_size=kernel_sizes[stack],
                               kernel_constraint=kernel_constraints[stack])(x)

                    # Include the vertical-stack layer of the upward pass in the layers
                    # to be added to the horizontal layer.
                    if stack == 'horizontal':
                        x_symmetric = tf.keras.layers.Concatenate(
                            axis=-1, dtype=dtype)(
                                [upward_pass['vertical'], x_symmetric])

                    # Add a skip-connection from the symmetric layer in the downward
                    # pass to the layer `x` in the upward pass.
                    h = activation(x_symmetric)
                    h = Dense(self._num_filters)(h)
                    x = tf.keras.layers.add([h, x], dtype=dtype)

                    x = activation(x)
                    x = tf.keras.layers.Dropout(self._dropout_p,
                                                dtype=dtype)(x)
                    x = Conv2D(filters=2 * self._num_filters,
                               kernel_size=kernel_sizes[stack],
                               kernel_constraint=kernel_constraints[stack])(x)

                    if conditional_input is not None:
                        h_projection = _build_and_apply_h_projection(
                            conditional_input, self._num_filters, dtype=dtype)
                        x = tf.keras.layers.add([x, h_projection], dtype=dtype)

                    x = _apply_sigmoid_gating(x)
                    upward_pass[stack] = tf.keras.layers.add([input_x, x],
                                                             dtype=dtype)

            # Define deconvolutional layers that expand height/width dimensions on the
            # upward pass (e.g. expanding from 8x8 to 16x16 in Figure 2 of [1]), with
            # the correct kernel dimensions/masking to maintain the autoregressive
            # property.
            if i < self._num_hierarchies - 1:
                for stack in ['vertical', 'horizontal']:
                    h, w = kernel_valid_dims[stack]
                    kernel_height = 2 * h - 2
                    if stack == 'vertical':
                        kernel_width = w + 1
                        kernel_constraint = _make_kernel_constraint(
                            (kernel_height, kernel_width),
                            (h - 2, kernel_height), (0, w))
                    else:
                        kernel_width = 2 * w - 2
                        kernel_constraint = _make_kernel_constraint(
                            (kernel_height, kernel_width),
                            (h - 2, kernel_height), (w - 2, kernel_width))

                    x = upward_pass[stack]
                    x = Conv2DTranspose(kernel_size=(kernel_height,
                                                     kernel_width),
                                        kernel_constraint=kernel_constraint)(x)
                    upward_pass[stack] = x

        x_out = tf.keras.layers.ELU(dtype=dtype)(upward_pass['horizontal'])

        # Build final Dense/Reshape layers to output the correct number of
        # parameters per pixel.
        num_channels = tensorshape_util.as_list(image_shape)[-1]
        num_coeffs = num_channels * (
            num_channels - 1) // 2  # alpha, beta, gamma in eq.3 of paper
        num_out = num_channels * 2 + num_coeffs + 1  # mu, s + alpha, beta, gamma + 1 (mixture weight)
        num_out_total = num_out * self._num_logistic_mix
        params = Dense(num_out_total)(x_out)
        params = tf.reshape(
            params,
            prefer_static.concat(  # [-1,H,W,nb mixtures, params per mixture]
                [[-1], image_shape[:-1], [self._num_logistic_mix, num_out]],
                axis=0))

        # If there is one color channel, split the parameters into a list of three
        # output `Tensor`s: (1) component logits for the Quantized Logistic mixture
        # distribution, (2) location parameters for each component, and (3) scale
        # parameters for each component. If there is more than one color channel,
        # return a fourth `Tensor` for the coefficients for the linear dependence
        # among color channels (e.g. alpha, beta, gamma).
        # [logits, mu, s, linear dependence]
        splits = 3 if num_channels == 1 else [
            1, num_channels, num_channels, num_coeffs
        ]
        outputs = tf.split(params, splits, axis=-1)

        # Squeeze singleton dimension from component logits
        outputs[0] = tf.squeeze(outputs[0], axis=-1)

        # Ensure scales are positive and do not collapse to near-zero
        outputs[2] = tf.nn.softplus(outputs[2]) + tf.cast(
            tf.exp(-7.), self.dtype)

        inputs = image_input if conditional_input is None else [
            image_input, conditional_input
        ]
        self._network = tf.keras.Model(inputs=inputs, outputs=outputs)
        super(_PixelCNNNetwork, self).build(input_shape)
Ejemplo n.º 26
0
  def test_batch_of_filters(self):

    batch_shape = [3, 2]
    num_particles = 1000
    num_timesteps = 40

    # Batch of priors on object 1D positions and velocities.
    initial_state_prior = tfd.JointDistributionNamed({
        'position': tfd.Normal(loc=0., scale=tf.ones(batch_shape)),
        'velocity': tfd.Normal(loc=0., scale=tf.ones(batch_shape) * 0.1)})

    def transition_fn(_, previous_state):
      return tfd.JointDistributionNamed({
          'position': tfd.Normal(
              loc=previous_state['position'] +previous_state['velocity'],
              scale=0.1),
          'velocity': tfd.Normal(loc=previous_state['velocity'], scale=0.01)})

    def observation_fn(_, state):
      return tfd.Normal(loc=state['position'], scale=0.1)

    # Batch of synthetic observations, .
    true_initial_positions = np.random.randn(*batch_shape).astype(self.dtype)
    true_velocities = 0.1 * np.random.randn(
        *batch_shape).astype(self.dtype)
    observed_positions = (
        true_velocities *
        np.arange(num_timesteps).astype(self.dtype)[..., None, None] +
        true_initial_positions)

    (particles,
     log_weights,
     parent_indices,
     step_log_marginal_likelihoods) = self.evaluate(
         tfp.experimental.mcmc.particle_filter(
             observations=observed_positions,
             initial_state_prior=initial_state_prior,
             transition_fn=transition_fn,
             observation_fn=observation_fn,
             num_particles=num_particles,
             seed=test_util.test_seed()))

    self.assertAllEqual(particles['position'].shape,
                        [num_timesteps] + batch_shape + [num_particles])
    self.assertAllEqual(particles['velocity'].shape,
                        [num_timesteps] + batch_shape + [num_particles])
    self.assertAllEqual(parent_indices.shape,
                        [num_timesteps] + batch_shape + [num_particles])
    self.assertAllEqual(step_log_marginal_likelihoods.shape,
                        [num_timesteps] + batch_shape)

    self.assertAllClose(
        self.evaluate(
            tf.reduce_sum(tf.exp(log_weights) *
                          particles['position'], axis=-1)),
        observed_positions,
        atol=0.1)

    velocity_means = tf.reduce_sum(tf.exp(log_weights) *
                                   particles['velocity'], axis=-1)
    self.assertAllClose(
        self.evaluate(tf.reduce_mean(velocity_means, axis=0)),
        true_velocities, atol=0.05)

    # Uncertainty in velocity should decrease over time.
    velocity_stddev = self.evaluate(
        tf.math.reduce_std(particles['velocity'], axis=-1))
    self.assertAllLess((velocity_stddev[-1] - velocity_stddev[0]), 0.)

    trajectories = self.evaluate(
        tfp.experimental.mcmc.reconstruct_trajectories(particles,
                                                       parent_indices))
    self.assertAllEqual([num_timesteps] + batch_shape + [num_particles],
                        trajectories['position'].shape)
    self.assertAllEqual([num_timesteps] + batch_shape + [num_particles],
                        trajectories['velocity'].shape)
Ejemplo n.º 27
0
 def _inverse(self, y):
     x0 = y[..., 0, tf.newaxis]
     xk = tf.exp(y[..., 1:])
     x = tf.concat([x0, xk], axis=-1)
     return tf.cumsum(x, axis=-1)
Ejemplo n.º 28
0
 def grad(dy):
     prob = tf.exp(concentration * (tf.cos(x) - 1.)) / (
         (2. * np.pi) * tf.math.bessel_i0e(concentration))
     return dy * prob, dy * dcdf_dconcentration
Ejemplo n.º 29
0
 def _forward(self, x):
     z = (x - self.loc) / self.scale
     return tf.exp(-tf.exp(-z))
Ejemplo n.º 30
0
 def _entropy(self):
     log_probs0, log_probs1 = self._outcome_log_probs()
     probs1 = tf.exp(log_probs1)
     return -(1. - probs1) * log_probs0 - probs1 * log_probs1