def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None): """Applies natural exponential decay to the initial learning rate. When training a model, it is often recommended to lower the learning rate as the training progresses. This function applies an exponential decay function to a provided initial learning rate. It requires an `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * global_step / decay_step) ``` or, if `staircase` is `True`, as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step / decay_step)) ``` Example: decay exponentially with a base of 0.96: ```python ... global_step = tf.Variable(0, trainable=False) learning_rate = 0.1 decay_steps = 5 k = 0.5 learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate, global_step, decay_steps, k) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.compat.v1.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A Python number. Global step to use for the decay computation. Must not be negative. decay_steps: How often to apply decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. name: String. Optional name of the operation. Defaults to 'ExponentialTimeDecay'. Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. Raises: ValueError: if `global_step` is not supplied. @compatibility(eager) When eager execution is enabled, this function returns a function which in turn returns the decayed learning rate Tensor. This can be useful for changing the learning rate value across different invocations of optimizer functions. @end_compatibility """ natural_exp_rate = tf.exp(tf.negative(decay_rate)) decayed_lr = learning_rate_schedule.ExponentialDecay(learning_rate, decay_steps, natural_exp_rate, staircase=staircase, name=name) if not tf.executing_eagerly(): decayed_lr = decayed_lr(global_step) else: decayed_lr = functools.partial(decayed_lr, global_step) return decayed_lr
def _inverse(self, y): y = self._maybe_assert_valid(y) return tf.exp( tf.math.log1p(-tf.exp(tf.math.log1p(-y) / self.concentration0)) / self.concentration1)
def _forward_log_det_jacobian(self, x): scale = tf.convert_to_tensor(self.scale) z = (x - self.loc) / scale return -z - tf.exp(-z) - tf.math.log(scale)
def testComposeFromTensor(self): x = tf.constant([-5., 0., 5.]) self.assertAllClose(*self.evaluate([tf.exp(x), tfb.Exp()(x)]), atol=0, rtol=1e-3)
def _rate_parameter_no_checks(self): if self._rate is None: return tf.exp(self._log_rate) return tf.identity(self._rate)
def _testCumulativeLogSumExp(self, x, axis=0): result_naive = tf.cumsum(tf.exp(x), axis=axis) result_fused = tf.exp(tfp.math.log_cumsum_exp(x, axis=axis)) self.assertAllClose(result_naive, result_fused)
def _forward(self, x): return tf.exp(x)
def _sample_next_volatilities(self, vol, dt, dwv): return vol * tf.exp(self._volvol * dwv * tf.sqrt(dt) - self._volvol**2 * dt * 0.5)
def run_test_sample_consistent_log_prob( self, sess_run_fn, dist, num_samples=int(1e5), num_threshold=int(1e3), seed=None, batch_size=None, rtol=1e-2, atol=0.): """Tests that sample/log_prob are consistent with each other. "Consistency" means that `sample` and `log_prob` correspond to the same distribution. Note: this test only verifies a necessary condition for consistency--it does does not verify sufficiency hence does not prove `sample`, `log_prob` truly are consistent. Args: sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and returning a list of results after running one "step" of TensorFlow computation, typically set to `sess.run`. dist: Distribution instance or object which implements `sample`, `log_prob`, `event_shape_tensor` and `batch_shape_tensor`. num_samples: Python `int` scalar indicating the number of Monte-Carlo samples to draw from `dist`. num_threshold: Python `int` scalar indicating the number of samples a bucket must contain before being compared to the probability. Default value: 1e3; must be at least 1. Warning, set too high will cause test to falsely pass but setting too low will cause the test to falsely fail. seed: Python `int` indicating the seed to use when sampling from `dist`. In general it is not recommended to use `None` during a test as this increases the likelihood of spurious test failure. batch_size: Hint for unpacking result of samples. Default: `None` means batch_size is inferred. rtol: Python `float`-type indicating the admissible relative error between analytical and sample statistics. atol: Python `float`-type indicating the admissible absolute error between analytical and sample statistics. Raises: ValueError: if `num_threshold < 1`. """ if num_threshold < 1: raise ValueError('num_threshold({}) must be at least 1.'.format( num_threshold)) # Histogram only supports vectors so we call it once per batch coordinate. y = dist.sample(num_samples, seed=test_seed_stream(hardcoded_seed=seed)) y = tf.reshape(y, shape=[num_samples, -1]) if batch_size is None: batch_size = tf.reduce_prod(dist.batch_shape_tensor()) batch_dims = tf.shape(dist.batch_shape_tensor())[0] edges_expanded_shape = 1 + tf.pad(tensor=[-2], paddings=[[0, batch_dims]]) for b, x in enumerate(tf.unstack(y, num=batch_size, axis=1)): counts, edges = self.histogram(x) edges = tf.reshape(edges, edges_expanded_shape) probs = tf.exp(dist.log_prob(edges)) probs = tf.reshape(probs, shape=[-1, batch_size])[:, b] [counts_, probs_] = sess_run_fn([counts, probs]) valid = counts_ > num_threshold probs_ = probs_[valid] counts_ = counts_[valid] self.assertAllClose(probs_, counts_ / num_samples, rtol=rtol, atol=atol)
def _mean(self): one = tf.constant(1., dtype=self.dtype) return self.scale * tf.exp( tf.math.lgamma(one + tf.math.reciprocal(self.concentration)))
def option_price(*, volatilities, strikes, expiries, spots=None, forwards=None, discount_rates=None, continuous_dividends=None, cost_of_carries=None, discount_factors=None, is_call_options=None, dtype=None, name=None): """Computes the Black Scholes price for a batch of call or put options. #### Example ```python # Price a batch of 5 vanilla call options. volatilities = np.array([0.0001, 102.0, 2.0, 0.1, 0.4]) forwards = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) # Strikes will automatically be broadcasted to shape [5]. strikes = np.array([3.0]) # Expiries will be broadcast to shape [5], i.e. each option has strike=3 # and expiry = 1. expiries = 1.0 computed_prices = tff.black_scholes.option_price( volatilities=volatilities, strikes=strikes, expiries=expiries, forwards=forwards) # Expected print output of computed prices: # [ 0. 2. 2.04806848 1.00020297 2.07303131] ``` #### References: [1] Hull, John C., Options, Futures and Other Derivatives. Pearson, 2018. [2] Wikipedia contributors. Black-Scholes model. Available at: https://en.wikipedia.org/w/index.php?title=Black%E2%80%93Scholes_model Args: volatilities: Real `Tensor` of any shape and dtype. The volatilities to expiry of the options to price. strikes: A real `Tensor` of the same dtype and compatible shape as `volatilities`. The strikes of the options to be priced. expiries: A real `Tensor` of same dtype and compatible shape as `volatilities`. The expiry of each option. The units should be such that `expiry * volatility**2` is dimensionless. spots: A real `Tensor` of any shape that broadcasts to the shape of the `volatilities`. The current spot price of the underlying. Either this argument or the `forwards` (but not both) must be supplied. forwards: A real `Tensor` of any shape that broadcasts to the shape of `volatilities`. The forwards to maturity. Either this argument or the `spots` must be supplied but both must not be supplied. discount_rates: An optional real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. If not `None`, discount factors are calculated as e^(-rT), where r are the discount rates, or risk free rates. At most one of discount_rates and discount_factors can be supplied. Default value: `None`, equivalent to r = 0 and discount factors = 1 when discount_factors also not given. continuous_dividends: An optional real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. If not `None`, `cost_of_carries` is calculated as r - q, where r are the `discount_rates` and q is `continuous_dividends`. Either this or `cost_of_carries` can be given. Default value: `None`, equivalent to q = 0. cost_of_carries: An optional real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. Cost of storing a physical commodity, the cost of interest paid when long, or the opportunity cost, or the cost of paying dividends when short. If not `None`, and `spots` is supplied, used to calculate forwards from `spots`: F = e^(bT) * S, where F is the forwards price, b is the cost of carries, T is expiries and S is the spot price. If `None`, value assumed to be equal to the `discount_rate` - `continuous_dividends` Default value: `None`, equivalent to b = r. discount_factors: An optional real `Tensor` of same dtype as the `volatilities`. If not `None`, these are the discount factors to expiry (i.e. e^(-rT)). Mutually exclusive with discount_rate and cost_of_carry. If neither is given, no discounting is applied (i.e. the undiscounted option price is returned). If `spots` is supplied and `discount_factors` is not `None` then this is also used to compute the forwards to expiry. At most one of discount_rates and discount_factors can be supplied. Default value: `None`, which maps to -log(discount_factors) / expiries is_call_options: A boolean `Tensor` of a shape compatible with `volatilities`. Indicates whether the option is a call (if True) or a put (if False). If not supplied, call options are assumed. dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion of any supplied non-`Tensor` arguments to `Tensor`. Default value: `None` which maps to the default dtype inferred by TensorFlow. name: str. The name for the ops created by this function. Default value: `None` which is mapped to the default name `option_price`. Returns: option_prices: A `Tensor` of the same shape as `forwards`. The Black Scholes price of the options. Raises: ValueError: If both `forwards` and `spots` are supplied or if neither is supplied. ValueError: If both `discount_rates` and `discount_factors` is supplied. ValueError: If both `continuous_dividends` and `cost_of_carries` is supplied. """ if (spots is None) == (forwards is None): raise ValueError('Either spots or forwards must be supplied but not both.') if (discount_rates is not None) and (discount_factors is not None): raise ValueError('At most one of discount_rates and discount_factors may ' 'be supplied') if (continuous_dividends is not None) and (cost_of_carries is not None): raise ValueError('At most one of continuous_dividends and cost_of_carries ' 'may be supplied') with tf.name_scope(name or 'option_price'): strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') dtype = strikes.dtype volatilities = tf.convert_to_tensor( volatilities, dtype=dtype, name='volatilities') expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') if discount_rates is not None: discount_rates = tf.convert_to_tensor( discount_rates, dtype=dtype, name='discount_rates') elif discount_factors is not None: discount_rates = -tf.math.log(discount_factors) / expiries else: discount_rates = tf.convert_to_tensor( 0.0, dtype=dtype, name='discount_rates') if continuous_dividends is None: continuous_dividends = tf.convert_to_tensor( 0.0, dtype=dtype, name='continuous_dividends') if cost_of_carries is not None: cost_of_carries = tf.convert_to_tensor( cost_of_carries, dtype=dtype, name='cost_of_carries') else: cost_of_carries = discount_rates - continuous_dividends if discount_factors is not None: discount_factors = tf.convert_to_tensor( discount_factors, dtype=dtype, name='discount_factors') else: discount_factors = tf.exp(-discount_rates * expiries) if forwards is not None: forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards') else: spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots') forwards = spots * tf.exp(cost_of_carries * expiries) sqrt_var = volatilities * tf.math.sqrt(expiries) d1 = (tf.math.log(forwards / strikes) + sqrt_var * sqrt_var / 2) / sqrt_var d2 = d1 - sqrt_var undiscounted_calls = forwards * _ncdf(d1) - strikes * _ncdf(d2) if is_call_options is None: return discount_factors * undiscounted_calls undiscounted_forward = forwards - strikes undiscounted_puts = undiscounted_calls - undiscounted_forward predicate = tf.broadcast_to(is_call_options, tf.shape(undiscounted_calls)) return discount_factors * tf.where(predicate, undiscounted_calls, undiscounted_puts)
def to_loc_scale(log_precision, mean_times_precision): variance = 1./tf.exp(log_precision) mean = mean_times_precision * variance return {'loc': mean, 'scale': tf.sqrt(variance)}
def _gamma_from_loc_scale_named(loc, log_scale): return {'concentration': (loc / tf.exp(log_scale))**2, 'rate': loc / (tf.exp(log_scale))**2}
def _gamma_from_loc_scale_positional(loc, log_scale): return (loc / tf.exp(log_scale))**2, loc / (tf.exp(log_scale))**2
def _prob(self, x): return tf.exp(self._log_prob(x))
def reduce_weighted_logsumexp(logx, w=None, axis=None, keep_dims=False, return_sign=False, name=None): """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`. If all weights `w` are known to be positive, it is more efficient to directly use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more efficient than `du.reduce_weighted_logsumexp(logx, w)`. Reduces `input_tensor` along the dimensions given in `axis`. Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in `axis`. If `keep_dims` is true, the reduced dimensions are retained with length 1. If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. This function is more numerically stable than log(sum(w * exp(input))). It avoids overflows caused by taking the exp of large inputs and underflows caused by taking the log of small inputs. For example: ```python x = tf.constant([[0., 0, 0], [0, 0, 0]]) w = tf.constant([[-1., 1, 1], [1, 1, 1]]) du.reduce_weighted_logsumexp(x, w) # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4) du.reduce_weighted_logsumexp(x, w, axis=0) # ==> [log(-1+1), log(1+1), log(1+1)] du.reduce_weighted_logsumexp(x, w, axis=1) # ==> [log(-1+1+1), log(1+1+1)] du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True) # ==> [[log(-1+1+1)], [log(1+1+1)]] du.reduce_weighted_logsumexp(x, w, axis=[0, 1]) # ==> log(-1+5) ``` Args: logx: The tensor to reduce. Should have numeric type. w: The weight tensor. Should have numeric type identical to `logx`. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. keep_dims: If true, retains reduced dimensions with length 1. return_sign: If `True`, returns the sign of the result. name: A name for the operation (optional). Returns: lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor. sign: (Optional) The sign of `sum(weight * exp(x))`. """ with tf.name_scope(name or 'reduce_weighted_logsumexp'): logx = tf.convert_to_tensor(logx, name='logx') if w is None: lswe = tf.reduce_logsumexp(logx, axis=axis, keepdims=keep_dims) if return_sign: sgn = tf.ones_like(lswe) return lswe, sgn return lswe w = tf.convert_to_tensor(w, dtype=logx.dtype, name='w') log_absw_x = logx + tf.math.log(tf.abs(w)) max_log_absw_x = tf.reduce_max(log_absw_x, axis=axis, keepdims=True) # If the largest element is `-inf` or `inf` then we don't bother subtracting # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That # this is ok follows from the fact that we're actually free to subtract any # value we like, so long as we add it back after taking the `log(sum(...))`. max_log_absw_x = tf.where( tf.math.is_inf(max_log_absw_x), tf.zeros([], max_log_absw_x.dtype), max_log_absw_x) wx_over_max_absw_x = (tf.sign(w) * tf.exp(log_absw_x - max_log_absw_x)) sum_wx_over_max_absw_x = tf.reduce_sum( wx_over_max_absw_x, axis=axis, keepdims=keep_dims) if not keep_dims: max_log_absw_x = tf.squeeze(max_log_absw_x, axis) sgn = tf.sign(sum_wx_over_max_absw_x) lswe = max_log_absw_x + tf.math.log(sgn * sum_wx_over_max_absw_x) if return_sign: return lswe, sgn return lswe
def european_option_price( *, strikes: types.RealTensor, expiries: types.RealTensor, spots: types.RealTensor = None, forwards: types.RealTensor = None, is_call_options: types.BoolTensor = None, discount_rates: types.RealTensor = None, dividend_rates: types.RealTensor = None, discount_factors: types.RealTensor = None, variances: types.RealTensor, mean_reversion: types.RealTensor, theta: types.RealTensor, volvol: types.RealTensor, rho: types.RealTensor = None, integration_method: integration.IntegrationMethod = None, dtype: tf.DType = None, name: str = None, **kwargs) -> types.RealTensor: """Calculates European option prices under the Heston model. Heston originally published in 1993 his eponymous model [3]. He provided a semi- analytical formula for pricing European option via Fourier transform under his model. However, as noted by Albrecher [1], the characteristic function used in Heston paper can suffer numerical issues because of the discontinuous nature of the square root function in the complex plane, and a second version of the characteric function which doesn't suffer this shortcoming should be used instead. Attari [2] further refined the numerical method by reducing the number of numerical integrations (only one Fourier transform instead of two) and with an integrand function decaying quadratically instead of linearly. Attari's numerical method is implemented here. Heston model: ``` dF/F = sqrt(V) * dW_1 dV = mean_reversion * (theta - V) * dt * sigma * sqrt(V) * dW_2 <dW_1,dW_2> = rho *dt ``` The variance V follows a square root process. #### Example ```python import tf_quant_finance as tff import numpy as np prices = tff.models.heston.approximations.european_option_price( variances=0.11, strikes=102.0, expiries=1.2, forwards=100.0, is_call_options=True, mean_reversion=2.0, theta=0.5, volvol=0.15, rho=0.3, discount_factors=1.0, dtype=np.float64) # Expected print output of prices: # 24.82219619 ``` #### References [1] Hansjorg Albrecher, The Little Heston Trap https://perswww.kuleuven.be/~u0009713/HestonTrap.pdf [2] Mukarram Attari, Option Pricing Using Fourier Transforms: A Numerically Efficient Simplification https://papers.ssrn.com/sol3/papers.cfm?abstract_id=520042 [3] Steven L. Heston, A Closed-Form Solution for Options with Stochastic Volatility with Applications to Bond and Currency Options http://faculty.baruch.cuny.edu/lwu/890/Heston93.pdf Args: strikes: A real `Tensor` of any shape and dtype. The strikes of the options to be priced. expiries: A real `Tensor` of the same dtype and compatible shape as `strikes`. The expiry of each option. spots: A real `Tensor` of any shape that broadcasts to the shape of the `strikes`. The current spot price of the underlying. Either this argument or the `forwards` (but not both) must be supplied. forwards: A real `Tensor` of any shape that broadcasts to the shape of `strikes`. The forwards to maturity. Either this argument or the `spots` must be supplied but both must not be supplied. is_call_options: A boolean `Tensor` of a shape compatible with `strikes`. Indicates whether the option is a call (if True) or a put (if False). If not supplied, call options are assumed. discount_rates: An optional real `Tensor` of same dtype as the `strikes` and of the shape that broadcasts with `strikes`. If not `None`, discount factors are calculated as e^(-rT), where r are the discount rates, or risk free rates. At most one of discount_rates and discount_factors can be supplied. Default value: `None`, equivalent to r = 0 and discount factors = 1 when discount_factors also not given. dividend_rates: An optional real `Tensor` of same dtype as the `strikes` and of the shape that broadcasts with `strikes`. Default value: `None`, equivalent to q = 0. discount_factors: An optional real `Tensor` of same dtype as the `strikes`. If not `None`, these are the discount factors to expiry (i.e. e^(-rT)). Mutually exclusive with `discount_rates`. If neither is given, no discounting is applied (i.e. the undiscounted option price is returned). If `spots` is supplied and `discount_factors` is not `None` then this is also used to compute the forwards to expiry. At most one of `discount_rates` and `discount_factors` can be supplied. Default value: `None`, which maps to e^(-rT) calculated from discount_rates. variances: A real `Tensor` of the same dtype and compatible shape as `strikes`. The initial value of the variance. mean_reversion: A real `Tensor` of the same dtype and compatible shape as `strikes`. The mean reversion strength of the variance square root process. theta: A real `Tensor` of the same dtype and compatible shape as `strikes`. The mean reversion level of the variance square root process. volvol: A real `Tensor` of the same dtype and compatible shape as `strikes`. The volatility of the variance square root process (volatility of volatility) rho: A real `Tensor` of the same dtype and compatible shape as `strikes`. The correlation between spot and variance. integration_method: An instance of `math.integration.IntegrationMethod`. Default value: `None` which maps to the Simpsons integration rule. dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion of any supplied non-`Tensor` arguments to `Tensor`. Default value: None which maps to the default dtype inferred by TensorFlow. name: str. The name for the ops created by this function. Default value: None which is mapped to the default name `heston_price`. **kwargs: Additional parameters for the underlying integration method. If not supplied and `integration_method` is Simpson, then uses `IntegrationMethod.COMPOSITE_SIMPSONS_RULE` with `num_points=1001`, and bounds `lower=1e-9`, `upper=100`. Returns: A `Tensor` of the same shape as the input data which is the price of European options under the Heston model. """ if (spots is None) == (forwards is None): raise ValueError('Either spots or forwards must be supplied but not both.') if (discount_rates is not None) and (discount_factors is not None): raise ValueError('At most one of discount_rates and discount_factors may ' 'be supplied') with tf.compat.v1.name_scope(name, default_name='eu_option_price'): strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') dtype = strikes.dtype expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') mean_reversion = tf.convert_to_tensor(mean_reversion, dtype=dtype, name='mean_reversion') theta = tf.convert_to_tensor(theta, dtype=dtype, name='theta') volvol = tf.convert_to_tensor(volvol, dtype=dtype, name='volvol') rho = tf.convert_to_tensor(rho, dtype=dtype, name='rho') variances = tf.convert_to_tensor(variances, dtype=dtype, name='variances') if discount_factors is not None: discount_factors = tf.convert_to_tensor( discount_factors, dtype=dtype, name='discount_factors') if discount_rates is not None: discount_rates = tf.convert_to_tensor( discount_rates, dtype=dtype, name='discount_rates') elif discount_factors is not None: discount_rates = -tf.math.log(discount_factors) / expiries else: discount_rates = tf.convert_to_tensor( 0.0, dtype=dtype, name='discount_rates') if dividend_rates is None: dividend_rates = 0.0 dividend_rates = tf.convert_to_tensor( dividend_rates, dtype=dtype, name='dividend_rates') if discount_factors is None: discount_factors = tf.exp(-discount_rates * expiries) # pylint: disable=invalid-unary-operand-type if forwards is not None: forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards') else: spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots') cost_of_carries = discount_rates - dividend_rates forwards = spots * tf.exp(cost_of_carries * expiries) # Cast as complex for the characteristic function calculation expiries_real = tf.complex(expiries, tf.zeros_like(expiries)) mean_reversion_real = tf.complex(mean_reversion, tf.zeros_like(mean_reversion)) theta_real = tf.complex(theta, tf.zeros_like(theta)) volvol_real = tf.complex(volvol, tf.zeros_like(volvol)) rho_real = tf.complex(rho, tf.zeros_like(rho)) variances_real = tf.complex(variances, tf.zeros_like(variances)) # Prepare inputs to build an integrand_function expiries_real = tf.expand_dims(expiries_real, -1) mean_reversion_real = tf.expand_dims(mean_reversion_real, -1) theta_real = tf.expand_dims(theta_real, -1) volvol_real = tf.expand_dims(volvol_real, -1) rho_real = tf.expand_dims(rho_real, -1) variances_real = tf.expand_dims(variances_real, -1) if integration_method is None: integration_method = _COMPOSITE_SIMPSONS_RULE if integration_method == _COMPOSITE_SIMPSONS_RULE: if 'num_points' not in kwargs: kwargs['num_points'] = 1001 if 'lower' not in kwargs: kwargs['lower'] = 1e-9 if 'upper' not in kwargs: kwargs['upper'] = 100 def char_fun(u): # Using 'second formula' for the (first) characteristic function of # log( spot_T / forwards ) # (noted 'phi_2' in 'The Little Heston Trap', (Albrecher)) u_real = tf.complex(u, tf.zeros_like(u)) u_imag = tf.complex(tf.zeros_like(u), u) s = rho_real * volvol_real * u_imag # TODO(b/156221007): investigate why # s_mean_reversion = (s - mean_reversion_real)**2 leads to a wrong result # in graph mode. s_mean_reversion = ((s - mean_reversion_real) * s - (s - mean_reversion_real) * mean_reversion_real) d = s_mean_reversion - volvol_real ** 2 * (-u_imag - u_real ** 2) d = tf.math.sqrt(d) g = (mean_reversion_real - s - d) / (mean_reversion_real - s + d) a = mean_reversion_real * theta_real h = g * tf.math.exp(-d * expiries_real) m = 2 * tf.math.log((1 - h) / (1 - g)) c = (a / volvol_real ** 2) * ((mean_reversion_real - s - d) * expiries_real - m) e = (1 - tf.math.exp(-d * expiries_real)) d_new = (mean_reversion_real - s - d) / volvol_real ** 2 * (e / (1 - h)) return tf.math.exp(c + d_new * variances_real) def integrand_function(u, k): # Note that with [2], integrand is in 1 / u**2, # which converges faster than Heston 1993 (which is in 1 /u) char_fun_complex = char_fun(u) char_fun_real_part = tf.math.real(char_fun_complex) char_fun_imag_part = tf.math.imag(char_fun_complex) a = (char_fun_real_part + char_fun_imag_part / u) * tf.math.cos(u * k) b = (char_fun_imag_part - char_fun_real_part / u) * tf.math.sin(u * k) return (a + b) / (1.0 + u * u) k = tf.expand_dims(tf.math.log(strikes / forwards), axis=-1) integral = integration.integrate( lambda u: integrand_function(u, k), method=integration_method, dtype=dtype, **kwargs) undiscounted_call_prices = forwards - strikes * (0.5 + integral / _PI_) if is_call_options is None: return undiscounted_call_prices * discount_factors else: is_call_options = tf.convert_to_tensor(is_call_options, dtype=tf.bool, name='is_call_options') # Use call-put parity for Put undiscounted_put_prices = undiscounted_call_prices - forwards + strikes undiscount_prices = tf.where( is_call_options, undiscounted_call_prices, undiscounted_put_prices) return undiscount_prices * discount_factors
def js1(logu): return (-logu - (1. + tf.exp(logu)) * ( tf.nn.softplus(logu)))
def _prob(self, counts): return tf.exp(self._log_prob(counts))
def js2(logu): return 2. * (tf.exp(logu) * ( logu - tf.nn.softplus(logu)))
def _sample_n(self, n, seed=None): shape = tf.concat([[n], self.batch_shape_tensor()], 0) sampled = tf.random.uniform(shape, maxval=1., seed=seed, dtype=self.dtype) log_sample = tf.math.log( self.scale) - tf.math.log1p(-sampled) / self.concentration return tf.exp(log_sample)
def expectation(f, samples, log_prob=None, use_reparametrization=True, axis=0, keepdims=False, name=None, keep_dims=False): """Computes the Monte-Carlo approximation of `E_p[f(X)]`. This function computes the Monte-Carlo approximation of an expectation, i.e., ```none E_p[f(X)] approx= m**-1 sum_i^m f(x_j), x_j ~iid p(X) ``` where: - `x_j = samples[j, ...]`, - `log(p(samples)) = log_prob(samples)` and - `m = prod(shape(samples)[axis])`. Tricks: Reparameterization and Score-Gradient When p is "reparameterized", i.e., a diffeomorphic transformation of a parameterless distribution (e.g., `Normal(Y; m, s) <=> Y = sX + m, X ~ Normal(0,1)`), we can swap gradient and expectation, i.e., `grad[ Avg{ s_i : i=1...n } ] = Avg{ grad[s_i] : i=1...n }` where `S_n = Avg{s_i}` and `s_i = f(x_i), x_i ~ p`. However, if p is not reparameterized, TensorFlow's gradient will be incorrect since the chain-rule stops at samples of non-reparameterized distributions. (The non-differentiated result, `approx_expectation`, is the same regardless of `use_reparametrization`.) In this circumstance using the Score-Gradient trick results in an unbiased gradient, i.e., ```none grad[ E_p[f(X)] ] = grad[ int dx p(x) f(x) ] = int dx grad[ p(x) f(x) ] = int dx [ p'(x) f(x) + p(x) f'(x) ] = int dx p(x) [p'(x) / p(x) f(x) + f'(x) ] = int dx p(x) grad[ f(x) p(x) / stop_grad[p(x)] ] = E_p[ grad[ f(x) p(x) / stop_grad[p(x)] ] ] ``` Unless p is not reparametrized, it is usually preferable to `use_reparametrization = True`. Warning: users are responsible for verifying `p` is a "reparameterized" distribution. Example Use: ```python # Monte-Carlo approximation of a reparameterized distribution, e.g., Normal. num_draws = int(1e5) p = tfp.distributions.Normal(loc=0., scale=1.) q = tfp.distributions.Normal(loc=1., scale=2.) exact_kl_normal_normal = tfp.distributions.kl_divergence(p, q) # ==> 0.44314718 approx_kl_normal_normal = tfp.monte_carlo.expectation( f=lambda x: p.log_prob(x) - q.log_prob(x), samples=p.sample(num_draws, seed=42), log_prob=p.log_prob, use_reparametrization=(p.reparameterization_type == tfp.distributions.FULLY_REPARAMETERIZED)) # ==> 0.44632751 # Relative Error: <1% # Monte-Carlo approximation of non-reparameterized distribution, # e.g., Bernoulli. num_draws = int(1e5) p = tfp.distributions.Bernoulli(probs=0.4) q = tfp.distributions.Bernoulli(probs=0.8) exact_kl_bernoulli_bernoulli = tfp.distributions.kl_divergence(p, q) # ==> 0.38190854 approx_kl_bernoulli_bernoulli = tfp.monte_carlo.expectation( f=lambda x: p.log_prob(x) - q.log_prob(x), samples=p.sample(num_draws, seed=42), log_prob=p.log_prob, use_reparametrization=(p.reparameterization_type == tfp.distributions.FULLY_REPARAMETERIZED)) # ==> 0.38336259 # Relative Error: <1% # For comparing the gradients, see `expectation_test.py`. ``` Note: The above example is for illustration only. To compute approximate KL-divergence, the following is preferred: ```python approx_kl_p_q = bf.monte_carlo_csiszar_f_divergence( f=bf.kl_reverse, p_log_prob=q.log_prob, q=p, num_draws=num_draws) ``` Args: f: Python callable which can return `f(samples)`. samples: `Tensor` or nested structure (list, dict, etc.) of `Tensor`s, representing samples used to form the Monte-Carlo approximation of `E_p[f(X)]`. A batch of samples should be indexed by `axis` dimensions. log_prob: Python callable which can return `log_prob(samples)`. Must correspond to the natural-logarithm of the pdf/pmf of each sample. Only required/used if `use_reparametrization=False`. Default value: `None`. use_reparametrization: Python `bool` indicating that the approximation should use the fact that the gradient of samples is unbiased. Whether `True` or `False`, this arg only affects the gradient of the resulting `approx_expectation`. Default value: `True`. axis: The dimensions to average. If `None`, averages all dimensions. Default value: `0` (the left-most dimension). keepdims: If True, retains averaged dimensions using size `1`. Default value: `False`. name: A `name_scope` for operations created by this function. Default value: `None` (which implies "expectation"). keep_dims: (Deprecated) If True, retains averaged dimensions using size `1`. Default value: `False`. Returns: approx_expectation: `Tensor` corresponding to the Monte-Carlo approximation of `E_p[f(X)]`. Raises: ValueError: if `f` is not a Python `callable`. ValueError: if `use_reparametrization=False` and `log_prob` is not a Python `callable`. """ keepdims = keepdims or keep_dims del keep_dims with tf.name_scope(name or 'expectation'): if not callable(f): raise ValueError('`f` must be a callable function.') if use_reparametrization: return tf.reduce_mean(f(samples), axis=axis, keepdims=keepdims) else: if not callable(log_prob): raise ValueError('`log_prob` must be a callable function.') stop = tf.stop_gradient # For readability. x = tf.nest.map_structure(stop, samples) logpx = log_prob(x) fx = f(x) # Call `f` once in case it has side-effects. # To achieve this, we use the fact that: # `h(x) - stop(h(x)) == zeros_like(h(x))` # but its gradient is grad[h(x)]. # # This technique was published as: # Jakob Foerster, Greg Farquhar, Maruan Al-Shedivat, Tim Rocktaeschel, # Eric P. Xing, Shimon Whiteson (ICML 2018) # "DiCE: The Infinitely Differentiable Monte-Carlo Estimator" # https://arxiv.org/abs/1802.05098 # # Unlike using: # fx = fx + stop(fx) * (logpx - stop(logpx)), # DiCE ensures that any order gradients of the objective # are unbiased gradient estimators. # # Note that IEEE754 specifies that `x - x == 0.` and `x + 0. == x`, hence # this trick loses no precision. For more discussion regarding the # relevant portions of the IEEE754 standard, see the StackOverflow # question, # "Is there a floating point value of x, for which x-x == 0 is false?" # http://stackoverflow.com/q/2686644 dice = fx * tf.exp(logpx - stop(logpx)) return tf.reduce_mean(dice, axis=axis, keepdims=keepdims)
def _log_normalization(self, log_rate): return tf.exp(log_rate)
def _testSampleConsistentLogProbInterval(self, concentrations, det_bounds, dim, num_samples=int(1e5), dtype=np.float32, input_output_cholesky=False, false_fail_rate=1e-6, target_discrepancy=0.1, seed=42): # Consider the set M of dim x dim correlation matrices whose # determinant exceeds some bound (rationale for bound forthwith). # - This is a (convex!) shape in dim * (dim - 1) / 2 dimensions # (because a correlation matrix is determined by its lower # triangle, and the main diagonal is all 1s). # - Further, M is contained entirely in the [-1,1] cube, # because no correlation can fall outside that interval. # # We have two different ways to estimate the volume of M: # - Importance sampling from the LKJ distribution # - Importance sampling from the uniform distribution on the cube # # This test checks that these two methods agree. However, because # the uniform proposal leads to many rejections (thus slowness), # those volumes are computed offline and the confidence intervals # are presented to this test procedure in the "volume_bounds" # table. # # Why place a lower bound on the determinant? Because for eta > 1, # the density of LKJ approaches 0 as the determinant approaches 0. # However, the test methodology requires an upper bound on the # improtance weights produced. Rejecting matrices with too-small # determinant (from both methods) allows me to supply that bound. # # I considered several alternative regions whose volume I might # know analytically (without having to do rejection). # - Option a: Some hypersphere guaranteed to be contained inside M. # - Con: I don't know a priori how to find a radius for it. # - Con: I still need a lower bound on the determinants that appear # in this sphere, and I don't know how to compute it. # - Option b: Some trapezoid given as the convex hull of the # nearly-extreme correlation matrices (i.e., those that partition # the variables into two strongly anti-correclated groups). # - Con: Would have to dig up n-d convex hull code to implement this. # - Con: Need to compute the volume of that convex hull. # - Con: Need a bound on the determinants of the matrices in that hull. # - Option c: Same thing, but with the matrices that make a single pair # of variables strongly correlated (or anti-correlated), and leaves # the others uncorrelated. # - Same cons, except that there is a determinant bound (which # felt pretty loose). lows = [dtype(volume_bounds[dim][db][0]) for db in det_bounds] highs = [dtype(volume_bounds[dim][db][1]) for db in det_bounds] concentration = np.array(concentrations, dtype=dtype) det_bounds = np.array(det_bounds, dtype=dtype) # Due to possible numerical inaccuracies while lower bounding the # determinant, the maximum of the importance weights may exceed the # theoretical maximum (importance_maxima). We add a tolerance to guard # against this. An alternative would have been to add a threshold while # filtering in _det_ok_mask, but that would affect the mean as well. high_tolerance = 1e-6 testee_lkj = tfd.LKJ( dimension=dim, concentration=concentration, input_output_cholesky=input_output_cholesky, validate_args=True) x = testee_lkj.sample(num_samples, seed=seed) importance_weights = ( tf.exp(-testee_lkj.log_prob(x)) * _det_ok_mask(x, det_bounds, input_output_cholesky)) importance_maxima = (1. / det_bounds) ** (concentration - 1) * tf.exp( testee_lkj._log_normalization()) check1 = st.assert_true_mean_in_interval_by_dkwm( samples=importance_weights, low=0., high=importance_maxima + high_tolerance, expected_low=lows, expected_high=highs, false_fail_rate=false_fail_rate) check2 = assert_util.assert_less( st.min_discrepancy_of_true_means_detectable_by_dkwm( num_samples, low=0., high=importance_maxima + high_tolerance, false_fail_rate=false_fail_rate, false_pass_rate=false_fail_rate), dtype(target_discrepancy)) self.evaluate([check1, check2])
def build(self, input_shape): dtype = self.dtype if len(input_shape) == 2: batch_image_shape, batch_conditional_shape = input_shape conditional_input = tf.keras.layers.Input( shape=batch_conditional_shape[1:], dtype=dtype) else: batch_image_shape = input_shape conditional_input = None image_shape = batch_image_shape[1:] image_input = tf.keras.layers.Input(shape=image_shape, dtype=dtype) if self._resnet_activation == 'concat_elu': activation = tf.keras.layers.Lambda( lambda x: tf.nn.elu(tf.concat([x, -x], axis=-1)), dtype=dtype) else: activation = tf.keras.activations.get(self._resnet_activation) # Define layers with default inputs and layer wrapper applied Conv2D = functools.partial( # pylint:disable=invalid-name self._layer_wrapper(tf.keras.layers.Convolution2D), filters=self._num_filters, padding='same', kernel_regularizer=tf.keras.regularizers.l2(self._l2_weight), dtype=dtype) Dense = functools.partial( # pylint:disable=invalid-name self._layer_wrapper(tf.keras.layers.Dense), kernel_regularizer=tf.keras.regularizers.l2(self._l2_weight), dtype=dtype) Conv2DTranspose = functools.partial( # pylint:disable=invalid-name self._layer_wrapper(tf.keras.layers.Conv2DTranspose), filters=self._num_filters, padding='same', strides=(2, 2), kernel_regularizer=tf.keras.regularizers.l2(self._l2_weight), dtype=dtype) rows, cols = self._receptive_field_dims # Define the dimensions of the valid (unmasked) areas of the layer kernels # for stride 1 convolutions in the internal layers. kernel_valid_dims = { 'vertical': (rows - 1, cols), # vertical stack 'horizontal': (2, cols // 2 + 1) } # horizontal stack # Define the size of the kernel necessary to center the current pixel # correctly for stride 1 convolutions in the internal layers. kernel_sizes = { 'vertical': (2 * rows - 3, cols), 'horizontal': (3, cols) } # Make the kernel constraint functions for stride 1 convolutions in internal # layers. kernel_constraints = { k: _make_kernel_constraint(kernel_sizes[k], (0, v[0]), (0, v[1])) for k, v in kernel_valid_dims.items() } # Build the initial vertical stack/horizontal stack convolutional layers, # as shown in Figure 1 of [2]. The receptive field of the initial vertical # stack layer is a rectangular area centered above the current pixel. vertical_stack_init = Conv2D(kernel_size=(2 * rows - 1, cols), kernel_constraint=_make_kernel_constraint( (2 * rows - 1, cols), (0, rows - 1), (0, cols)))(image_input) # In Figure 1 [2], the receptive field of the horizontal stack is # illustrated as the pixels in the same row and to the left of the current # pixel. [1] increases the height of this receptive field from one pixel to # two (`horizontal_stack_left`) and additionally includes a subset of the # row of pixels centered above the current pixel (`horizontal_stack_up`). horizontal_stack_up = Conv2D(kernel_size=(3, cols), kernel_constraint=_make_kernel_constraint( (3, cols), (0, 1), (0, cols)))(image_input) horizontal_stack_left = Conv2D( kernel_size=(3, cols), kernel_constraint=_make_kernel_constraint( (3, cols), (0, 2), (0, cols // 2)))(image_input) horizontal_stack_init = tf.keras.layers.add( [horizontal_stack_up, horizontal_stack_left], dtype=dtype) layer_stacks = { 'vertical': [vertical_stack_init], 'horizontal': [horizontal_stack_init] } # Build the downward pass of the U-net (left-hand half of Figure 2 of [1]). # Each `i` iteration builds one of the highest-level blocks (identified as # 'Sequence of 6 layers' in the figure, consisting of `num_resnet=5` stride- # 1 layers, and one stride-2 layer that contracts the height/width # dimensions). The `_` iterations build the stride 1 layers. The layers of # the downward pass are stored in lists, since we'll later need them to make # skip-connections to layers in the upward pass of the U-net (the skip- # connections are represented by curved lines in Figure 2 [1]). for i in range(self._num_hierarchies): for _ in range(self._num_resnet): # Build a layer shown in Figure 2 of [2]. The 'vertical' iteration # builds the layers in the left half of the figure, and the 'horizontal' # iteration builds the layers in the right half. for stack in ['vertical', 'horizontal']: input_x = layer_stacks[stack][-1] x = activation(input_x) x = Conv2D(kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) # Add the vertical-stack layer to the horizontal-stack layer if stack == 'horizontal': h = activation(layer_stacks['vertical'][-1]) h = Dense(self._num_filters)(h) x = tf.keras.layers.add([h, x], dtype=dtype) x = activation(x) x = tf.keras.layers.Dropout(self._dropout_p, dtype=dtype)(x) x = Conv2D(filters=2 * self._num_filters, kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) if conditional_input is not None: h_projection = _build_and_apply_h_projection( conditional_input, self._num_filters, dtype=dtype) x = tf.keras.layers.add([x, h_projection], dtype=dtype) x = _apply_sigmoid_gating(x) # Add a residual connection from the layer's input. out = tf.keras.layers.add([input_x, x], dtype=dtype) layer_stacks[stack].append(out) if i < self._num_hierarchies - 1: # Build convolutional layers that contract the height/width dimensions # on the downward pass between each set of layers (e.g. contracting from # 32x32 to 16x16 in Figure 2 of [1]). for stack in ['vertical', 'horizontal']: # Define kernel dimensions/masking to maintain the autoregressive property. x = layer_stacks[stack][-1] h, w = kernel_valid_dims[stack] kernel_height = 2 * h if stack == 'vertical': kernel_width = w + 1 else: kernel_width = 2 * w kernel_size = (kernel_height, kernel_width) kernel_constraint = _make_kernel_constraint( kernel_size, (0, h), (0, w)) x = Conv2D(strides=(2, 2), kernel_size=kernel_size, kernel_constraint=kernel_constraint)(x) layer_stacks[stack].append(x) # Upward pass of the U-net (right-hand half of Figure 2 of [1]). We stored # the layers of the downward pass in a list, in order to access them to make # skip-connections to the upward pass. For the upward pass, we need to keep # track of only the current layer, so we maintain a reference to the # current layer of the horizontal/vertical stack in the `upward_pass` dict. # The upward pass begins with the last layer of the downward pass. upward_pass = {key: stack.pop() for key, stack in layer_stacks.items()} # As with the downward pass, each `i` iteration builds a highest level block # in Figure 2 [1], and the `_` iterations build individual layers within the # block. for i in range(self._num_hierarchies): num_resnet = self._num_resnet if i == 0 else self._num_resnet + 1 for _ in range(num_resnet): # Build a layer as shown in Figure 2 of [2], with a skip-connection # from the symmetric layer in the downward pass. for stack in ['vertical', 'horizontal']: input_x = upward_pass[stack] x_symmetric = layer_stacks[stack].pop() x = activation(input_x) x = Conv2D(kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) # Include the vertical-stack layer of the upward pass in the layers # to be added to the horizontal layer. if stack == 'horizontal': x_symmetric = tf.keras.layers.Concatenate( axis=-1, dtype=dtype)( [upward_pass['vertical'], x_symmetric]) # Add a skip-connection from the symmetric layer in the downward # pass to the layer `x` in the upward pass. h = activation(x_symmetric) h = Dense(self._num_filters)(h) x = tf.keras.layers.add([h, x], dtype=dtype) x = activation(x) x = tf.keras.layers.Dropout(self._dropout_p, dtype=dtype)(x) x = Conv2D(filters=2 * self._num_filters, kernel_size=kernel_sizes[stack], kernel_constraint=kernel_constraints[stack])(x) if conditional_input is not None: h_projection = _build_and_apply_h_projection( conditional_input, self._num_filters, dtype=dtype) x = tf.keras.layers.add([x, h_projection], dtype=dtype) x = _apply_sigmoid_gating(x) upward_pass[stack] = tf.keras.layers.add([input_x, x], dtype=dtype) # Define deconvolutional layers that expand height/width dimensions on the # upward pass (e.g. expanding from 8x8 to 16x16 in Figure 2 of [1]), with # the correct kernel dimensions/masking to maintain the autoregressive # property. if i < self._num_hierarchies - 1: for stack in ['vertical', 'horizontal']: h, w = kernel_valid_dims[stack] kernel_height = 2 * h - 2 if stack == 'vertical': kernel_width = w + 1 kernel_constraint = _make_kernel_constraint( (kernel_height, kernel_width), (h - 2, kernel_height), (0, w)) else: kernel_width = 2 * w - 2 kernel_constraint = _make_kernel_constraint( (kernel_height, kernel_width), (h - 2, kernel_height), (w - 2, kernel_width)) x = upward_pass[stack] x = Conv2DTranspose(kernel_size=(kernel_height, kernel_width), kernel_constraint=kernel_constraint)(x) upward_pass[stack] = x x_out = tf.keras.layers.ELU(dtype=dtype)(upward_pass['horizontal']) # Build final Dense/Reshape layers to output the correct number of # parameters per pixel. num_channels = tensorshape_util.as_list(image_shape)[-1] num_coeffs = num_channels * ( num_channels - 1) // 2 # alpha, beta, gamma in eq.3 of paper num_out = num_channels * 2 + num_coeffs + 1 # mu, s + alpha, beta, gamma + 1 (mixture weight) num_out_total = num_out * self._num_logistic_mix params = Dense(num_out_total)(x_out) params = tf.reshape( params, prefer_static.concat( # [-1,H,W,nb mixtures, params per mixture] [[-1], image_shape[:-1], [self._num_logistic_mix, num_out]], axis=0)) # If there is one color channel, split the parameters into a list of three # output `Tensor`s: (1) component logits for the Quantized Logistic mixture # distribution, (2) location parameters for each component, and (3) scale # parameters for each component. If there is more than one color channel, # return a fourth `Tensor` for the coefficients for the linear dependence # among color channels (e.g. alpha, beta, gamma). # [logits, mu, s, linear dependence] splits = 3 if num_channels == 1 else [ 1, num_channels, num_channels, num_coeffs ] outputs = tf.split(params, splits, axis=-1) # Squeeze singleton dimension from component logits outputs[0] = tf.squeeze(outputs[0], axis=-1) # Ensure scales are positive and do not collapse to near-zero outputs[2] = tf.nn.softplus(outputs[2]) + tf.cast( tf.exp(-7.), self.dtype) inputs = image_input if conditional_input is None else [ image_input, conditional_input ] self._network = tf.keras.Model(inputs=inputs, outputs=outputs) super(_PixelCNNNetwork, self).build(input_shape)
def test_batch_of_filters(self): batch_shape = [3, 2] num_particles = 1000 num_timesteps = 40 # Batch of priors on object 1D positions and velocities. initial_state_prior = tfd.JointDistributionNamed({ 'position': tfd.Normal(loc=0., scale=tf.ones(batch_shape)), 'velocity': tfd.Normal(loc=0., scale=tf.ones(batch_shape) * 0.1)}) def transition_fn(_, previous_state): return tfd.JointDistributionNamed({ 'position': tfd.Normal( loc=previous_state['position'] +previous_state['velocity'], scale=0.1), 'velocity': tfd.Normal(loc=previous_state['velocity'], scale=0.01)}) def observation_fn(_, state): return tfd.Normal(loc=state['position'], scale=0.1) # Batch of synthetic observations, . true_initial_positions = np.random.randn(*batch_shape).astype(self.dtype) true_velocities = 0.1 * np.random.randn( *batch_shape).astype(self.dtype) observed_positions = ( true_velocities * np.arange(num_timesteps).astype(self.dtype)[..., None, None] + true_initial_positions) (particles, log_weights, parent_indices, step_log_marginal_likelihoods) = self.evaluate( tfp.experimental.mcmc.particle_filter( observations=observed_positions, initial_state_prior=initial_state_prior, transition_fn=transition_fn, observation_fn=observation_fn, num_particles=num_particles, seed=test_util.test_seed())) self.assertAllEqual(particles['position'].shape, [num_timesteps] + batch_shape + [num_particles]) self.assertAllEqual(particles['velocity'].shape, [num_timesteps] + batch_shape + [num_particles]) self.assertAllEqual(parent_indices.shape, [num_timesteps] + batch_shape + [num_particles]) self.assertAllEqual(step_log_marginal_likelihoods.shape, [num_timesteps] + batch_shape) self.assertAllClose( self.evaluate( tf.reduce_sum(tf.exp(log_weights) * particles['position'], axis=-1)), observed_positions, atol=0.1) velocity_means = tf.reduce_sum(tf.exp(log_weights) * particles['velocity'], axis=-1) self.assertAllClose( self.evaluate(tf.reduce_mean(velocity_means, axis=0)), true_velocities, atol=0.05) # Uncertainty in velocity should decrease over time. velocity_stddev = self.evaluate( tf.math.reduce_std(particles['velocity'], axis=-1)) self.assertAllLess((velocity_stddev[-1] - velocity_stddev[0]), 0.) trajectories = self.evaluate( tfp.experimental.mcmc.reconstruct_trajectories(particles, parent_indices)) self.assertAllEqual([num_timesteps] + batch_shape + [num_particles], trajectories['position'].shape) self.assertAllEqual([num_timesteps] + batch_shape + [num_particles], trajectories['velocity'].shape)
def _inverse(self, y): x0 = y[..., 0, tf.newaxis] xk = tf.exp(y[..., 1:]) x = tf.concat([x0, xk], axis=-1) return tf.cumsum(x, axis=-1)
def grad(dy): prob = tf.exp(concentration * (tf.cos(x) - 1.)) / ( (2. * np.pi) * tf.math.bessel_i0e(concentration)) return dy * prob, dy * dcdf_dconcentration
def _forward(self, x): z = (x - self.loc) / self.scale return tf.exp(-tf.exp(-z))
def _entropy(self): log_probs0, log_probs1 = self._outcome_log_probs() probs1 = tf.exp(log_probs1) return -(1. - probs1) * log_probs0 - probs1 * log_probs1