def testBijector(self, bijector_name, data): tfp_hps.guitar_skip_if_matches('Tanh', bijector_name, 'b/144163991') bijector, event_dim = self._draw_bijector(bijector_name, data) # Forward mapping: Check differentiation through forward mapping with # respect to the input and parameter variables. Also check that any # variables are not referenced overmuch. xs = self._draw_domain_tensor(bijector, data, event_dim) wrt_vars = [xs] + [ v for v in bijector.trainable_variables if v.dtype.is_floating ] with tf.GradientTape() as tape: with tfp_hps.assert_no_excessive_var_usage( 'method `forward` of {}'.format(bijector)): tape.watch(wrt_vars) # TODO(b/73073515): Fix graph mode gradients with bijector caching. ys = bijector.forward(xs + 0) grads = tape.gradient(ys, wrt_vars) assert_no_none_grad(bijector, 'forward', wrt_vars, grads) # For scalar bijectors, verify correctness of the _is_increasing method. # TODO(b/148459057): Except, don't verify Softfloor on Guitar because # of numerical problem. def exception(bijector): if not tfp_hps.running_under_guitar(): return False if isinstance(bijector, tfb.Softfloor): return True if isinstance(bijector, tfb.Invert): return exception(bijector.bijector) return False if (bijector.forward_min_event_ndims == 0 and bijector.inverse_min_event_ndims == 0 and not exception(bijector)): dydx = grads[0] hp.note('dydx: {}'.format(dydx)) isfinite = tf.math.is_finite(dydx) incr_or_slope_eq0 = bijector._internal_is_increasing() | tf.equal( dydx, 0) # pylint: disable=protected-access self.assertAllEqual( isfinite & incr_or_slope_eq0, isfinite & (dydx >= 0) | tf.zeros_like(incr_or_slope_eq0)) # FLDJ: Check differentiation through forward log det jacobian with # respect to the input and parameter variables. Also check that any # variables are not referenced overmuch. event_ndims = data.draw( hps.integers(min_value=bijector.forward_min_event_ndims, max_value=xs.shape.ndims)) with tf.GradientTape() as tape: max_permitted = _ldj_tensor_conversions_allowed(bijector, is_forward=True) with tfp_hps.assert_no_excessive_var_usage( 'method `forward_log_det_jacobian` of {}'.format(bijector), max_permissible=max_permitted): tape.watch(wrt_vars) # TODO(b/73073515): Fix graph mode gradients with bijector caching. ldj = bijector.forward_log_det_jacobian( xs + 0, event_ndims=event_ndims) grads = tape.gradient(ldj, wrt_vars) assert_no_none_grad(bijector, 'forward_log_det_jacobian', wrt_vars, grads) # Inverse mapping: Check differentiation through inverse mapping with # respect to the codomain "input" and parameter variables. Also check that # any variables are not referenced overmuch. ys = self._draw_codomain_tensor(bijector, data, event_dim) wrt_vars = [ys] + [ v for v in bijector.trainable_variables if v.dtype.is_floating ] with tf.GradientTape() as tape: with tfp_hps.assert_no_excessive_var_usage( 'method `inverse` of {}'.format(bijector)): tape.watch(wrt_vars) # TODO(b/73073515): Fix graph mode gradients with bijector caching. xs = bijector.inverse(ys + 0) grads = tape.gradient(xs, wrt_vars) assert_no_none_grad(bijector, 'inverse', wrt_vars, grads) # ILDJ: Check differentiation through inverse log det jacobian with respect # to the codomain "input" and parameter variables. Also check that any # variables are not referenced overmuch. event_ndims = data.draw( hps.integers(min_value=bijector.inverse_min_event_ndims, max_value=ys.shape.ndims)) with tf.GradientTape() as tape: max_permitted = _ldj_tensor_conversions_allowed(bijector, is_forward=False) with tfp_hps.assert_no_excessive_var_usage( 'method `inverse_log_det_jacobian` of {}'.format(bijector), max_permissible=max_permitted): tape.watch(wrt_vars) # TODO(b/73073515): Fix graph mode gradients with bijector caching. ldj = bijector.inverse_log_det_jacobian( ys + 0, event_ndims=event_ndims) grads = tape.gradient(ldj, wrt_vars) assert_no_none_grad(bijector, 'inverse_log_det_jacobian', wrt_vars, grads) # Check that the outputs of forward_dtype and inverse_dtype match the dtypes # of the outputs of forward and inverse. self.assertAllEqualNested(ys.dtype, bijector.forward_dtype(xs.dtype)) self.assertAllEqualNested(xs.dtype, bijector.inverse_dtype(ys.dtype))
def _mode(self): adjusted_count = tf.where(1. < self.total_count, self.total_count - 1., tf.zeros_like(self.total_count)) return tf.floor(adjusted_count * tf.exp(self.logits))
def _mode(self): total_count = tf.convert_to_tensor(self.total_count) adjusted_count = tf.where(1. < total_count, total_count - 1., tf.zeros_like(total_count)) return tf.floor(adjusted_count * tf.exp(self._logits_parameter_no_checks()))
def testDenseFlipout(self): batch_size, in_size, out_size = 2, 3, 4 with self.cached_session() as sess: tf1.set_random_seed(9069) (kernel_posterior, kernel_prior, kernel_divergence, bias_posterior, bias_prior, bias_divergence, layer, inputs, outputs, kl_penalty) = self._testDenseSetUp( tfp.layers.DenseFlipout, batch_size, in_size, out_size, seed=44) tf1.set_random_seed(9069) expected_kernel_posterior_affine = tfd.Normal( loc=tf.zeros_like(kernel_posterior.result_loc), scale=kernel_posterior.result_scale) expected_kernel_posterior_affine_tensor = ( expected_kernel_posterior_affine.sample(seed=42)) stream = tfd.SeedStream(layer.seed, salt='DenseFlipout') sign_input = tf.random.uniform([batch_size, in_size], minval=0, maxval=2, dtype=tf.int64, seed=stream()) sign_input = tf.cast(2 * sign_input - 1, inputs.dtype) sign_output = tf.random.uniform([batch_size, out_size], minval=0, maxval=2, dtype=tf.int64, seed=stream()) sign_output = tf.cast(2 * sign_output - 1, inputs.dtype) perturbed_inputs = tf.matmul( inputs * sign_input, expected_kernel_posterior_affine_tensor) perturbed_inputs *= sign_output expected_outputs = tf.matmul(inputs, kernel_posterior.result_loc) expected_outputs += perturbed_inputs expected_outputs += bias_posterior.result_sample [ expected_outputs_, actual_outputs_, expected_kernel_divergence_, actual_kernel_divergence_, expected_bias_, actual_bias_, expected_bias_divergence_, actual_bias_divergence_, ] = sess.run([ expected_outputs, outputs, kernel_divergence.result, kl_penalty[0], bias_posterior.result_sample, layer.bias_posterior_tensor, bias_divergence.result, kl_penalty[1], ]) self.assertAllClose( expected_bias_, actual_bias_, rtol=1e-6, atol=0.) self.assertAllClose( expected_outputs_, actual_outputs_, rtol=1e-6, atol=0.) self.assertAllClose( expected_kernel_divergence_, actual_kernel_divergence_, rtol=1e-6, atol=0.) self.assertAllClose( expected_bias_divergence_, actual_bias_divergence_, rtol=1e-6, atol=0.) expected_args = [kernel_posterior, kernel_prior, None] # We expect that there was one call to kernel_divergence, with the above # args; MockKLDivergence appends the list of args to a list, so the above # args should be in the 0th position of that list. actual_args = kernel_divergence.args[0] # Test for identity with 'is'. TensorFlowTestCase.assertAllEqual actually # coerces the inputs to numpy arrays, so we can't use that to assert that # the arguments (which are a mixture of Distributions and Tensors) are # equal. for a, b in zip(expected_args, actual_args): self.assertIs(a, b) # Same story as above. expected_args = [bias_posterior, bias_prior, bias_posterior.result_sample] actual_args = bias_divergence.args[0] for a, b in zip(expected_args, actual_args): self.assertIs(a, b)
def update(value_and_gradients_function, val_left, val_right, val_trial, f_lim, active=None): """Squeezes a bracketing interval containing the minimum. Given an interval which brackets a minimum and a point in that interval, finds a smaller nested interval which also brackets the minimum. If the supplied point does not lie in the bracketing interval, the current interval is returned. The following description is given in terms of individual points evaluated on a line function to be minimized. Note, however, the implementation also accepts batches of points allowing to minimize multiple line functions at once. See details on the docstring of `value_and_gradients_function` below. The requirement of the interval bracketing a minimum is expressed through the opposite slope conditions. Assume the left end point is 'a', the right end point is 'b', the function to be minimized is 'f' and the derivative is 'df'. The update procedure relies on the following conditions being satisfied: ''' f(a) <= f(0) + epsilon (1) df(a) < 0 (2) df(b) > 0 (3) ''' In the first condition, epsilon is a small positive constant. The condition demands that the function at the left end point be not much bigger than the starting point (i.e. 0). This is an easy to satisfy condition because by assumption, we are in a direction where the function value is decreasing. The second and third conditions together demand that there is at least one zero of the derivative in between a and b. In addition to the interval, the update algorithm requires a third point to be supplied. Usually, this point would lie within the interval [a, b]. If the point is outside this interval, the current interval is returned. If the point lies within the interval, the behaviour of the function and derivative value at this point is used to squeeze the original interval in a manner that preserves the opposite slope conditions. For further details of this component, see the procedure U0-U3 on page 123 of the [Hager and Zhang (2006)][2] article. Note that this function does not explicitly verify whether the opposite slope conditions are satisfied for the supplied interval. It is assumed that this is so. Args: value_and_gradients_function: A Python callable that accepts a real scalar tensor and returns an object that can be converted to a namedtuple. The namedtuple should have fields 'f' and 'df' that correspond to scalar tensors of real dtype containing the value of the function and its derivative at that point. The other namedtuple fields, if present, should be tensors or sequences (possibly nested) of tensors. In usual optimization application, this function would be generated by projecting the multivariate objective function along some specific direction. The direction is determined by some other procedure but should be a descent direction (i.e. the derivative of the projected univariate function must be negative at 0.). Alternatively, the function may represent the batching of `n` such line functions (e.g. projecting a single multivariate objective function along `n` distinct directions at once) accepting n points as input, i.e. a tensor of shape [n], and the fields 'f' and 'df' in the returned namedtuple should each be a tensor of shape [n], with the corresponding function values and derivatives at the input points. val_left: Return value of value_and_gradients_function at the left end point of the bracketing interval (labelles 'a' above). val_right: Return value of value_and_gradients_function at the right end point of the bracketing interval (labelles 'b' above). val_trial: Return value of value_and_gradients_function at the trial point to be used to shrink the interval (labelled 'c' above). f_lim: real `Tensor` of shape [n]. The function value threshold for the approximate Wolfe conditions to be checked for each batch member. active: optional boolean `Tensor` of shape [n]. Relevant in batching mode only, indicates batch members on which the update procedure should be applied. On non-active members the current left/right interval is returned unmodified. Returns: A namedtuple containing the following fields: iteration: An int32 scalar `Tensor`. The number of iterations performed by the bisect algorithm. stopped: A boolean `Tensor` of shape [n]. True for those batch members where the bisection algorithm terminated. failed: A boolean `Tensor` of shape [n]. True for those batch members where an error was encountered. num_evals: An int32 scalar `Tensor`. The number of times the objective function was evaluated. left: Return value of value_and_gradients_function at the updated left end point of the interval found. right: Return value of value_and_gradients_function at the updated right end point of the interval found. """ # We should only update if the trial point is within the interval. within_range = (val_left.x < val_trial.x) & (val_trial.x < val_right.x) if active is not None: within_range = within_range & active # The new point is a valid left end point if it has negative slope # and the value at the point is not too large. valid_left = (val_trial.df < 0) & (val_trial.f <= f_lim) # If the trial point has a negative slope but the value at that point # is too high, bisect can narrow down an interval between the current left # and the trial point. needs_bisect = within_range & (val_trial.df < 0) & (val_trial.f > f_lim) # Note that if `~valid_left` it is because either: # - the slope at the trial point is positive, so it is a valid right # point, or # - the needs_bisect condition is true. # In both cases we want to keep the current left and replace right # with the trial point. left = val_where(within_range & valid_left, val_trial, val_left) right = val_where(within_range & ~valid_left, val_trial, val_right) bisect_args = _IntermediateResult( iteration=tf.convert_to_tensor(0), stopped=~needs_bisect, failed=tf.zeros_like(within_range), # i.e. all false. num_evals=tf.convert_to_tensor(0), left=left, right=right) return _bisect(value_and_gradients_function, bisect_args, f_lim)
def _entropy(self): i0e = tf.math.bessel_i0e(self.concentration) i1e = tf.math.bessel_i1e(self.concentration) entropy = (self.concentration * (1 - i1e / i0e) + tf.math.log(i0e) + np.log(2 * np.pi)) return entropy + tf.zeros_like(self.loc)
def _variance(self): concentration = self.concentration + tf.zeros_like(self.loc) return 1. - tf.math.bessel_i1e(concentration) / tf.math.bessel_i0e( concentration)
def collater_fn(batch: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]: """Collater function for relation classification task. See BaseTask.""" def flatten_bsz(tensor): return tf.reshape(tensor, [bsz]) new_batch = { 'text_ids': batch['text_ids'], 'text_mask': batch['text_mask'], 'classifier_target': flatten_bsz(batch['target']), } # Sample mentions across batch # We want to make sure that the subject / object mentions always have # priority when we sample `max_batch_mentions` out of all available # mentions. Additionally, we want these subject / object mentions to be # in the same order as their samples. In other words, we want the first # sampled mention to be object mention from the first sample, the second # sampled mention to be subject mention from the first sample, the third # sampled mention to be object mention from the second sample, etc. subj_index = flatten_bsz(batch['subject_mention_indices']) obj_index = flatten_bsz(batch['object_mention_indices']) # Adjust subject / object mention positions in individual samples to their # positions in flattened mentions. shift = tf.range( bsz, dtype=obj_index.dtype) * config.max_mentions_per_sample mention_target_indices = tf.reshape( tf.stack([subj_index + shift, obj_index + shift], axis=1), [-1]) # Sample the rest of the mentions uniformly across batch scores = tf.random.uniform(shape=tf.shape(batch['mention_mask'])) scores = scores * tf.cast(batch['mention_mask'], tf.float32) # We want to adjust scores for target mentions so they don't get sampled # for the second time. We achive this by making their scores negative. def set_negative_scores(scores, indices): indices_2d = tf.stack( [tf.range(bsz, dtype=indices.dtype), indices], axis=1) return tf.tensor_scatter_nd_update( scores, indices_2d, tf.fill(tf.shape(indices), -1.0)) # Note that since we're using 2D scores (not yet flattened for simplicity) # we use unadjusted `subj_index` and `obj_index`. scores = set_negative_scores(scores, subj_index) scores = set_negative_scores(scores, obj_index) # There are `2 * bsz` target mentions which were already chosen num_to_sample = tf.maximum(max_batch_mentions - 2 * bsz, 0) sampled_scores, sampled_indices = tf.math.top_k(tf.reshape( scores, [-1]), num_to_sample, sorted=True) # Note that negative scores indicate that we have double-sampled some of # the target mentions (we set their scores to negative right above). # In this case, we remove them. num_not_double_sampled = tf.reduce_sum( tf.cast(tf.not_equal(sampled_scores, -1), tf.int32)) sampled_indices = sampled_indices[:num_not_double_sampled] # Combine target mentions (subject / object) with sampled mentions mention_target_indices = tf.cast(mention_target_indices, sampled_indices.dtype) sampled_indices = tf.concat( [mention_target_indices, sampled_indices], axis=0) sampled_indices = mention_preprocess_utils.dynamic_padding_1d( sampled_indices, max_batch_mentions) dtype = batch['mention_start_positions'].dtype mention_mask = tf.reshape(batch['mention_mask'], [n_candidate_mentions]) new_batch['mention_mask'] = tf.gather(mention_mask, sampled_indices) new_batch['mention_start_positions'] = tf.gather( tf.reshape(batch['mention_start_positions'], [n_candidate_mentions]), sampled_indices) new_batch['mention_end_positions'] = tf.gather( tf.reshape(batch['mention_end_positions'], [n_candidate_mentions]), sampled_indices) new_batch['mention_batch_positions'] = tf.gather( tf.repeat(tf.range(bsz, dtype=dtype), config.max_mentions_per_sample), sampled_indices) new_batch['mention_target_indices'] = tf.range(2 * bsz, dtype=dtype) new_batch['mention_subject_indices'] = tf.range(bsz, dtype=dtype) * 2 new_batch['mention_object_indices'] = tf.range(bsz, dtype=dtype) * 2 + 1 if config.get('max_length_with_entity_tokens') is not None: batch_with_entity_tokens = mention_preprocess_utils.add_entity_tokens( text_ids=new_batch['text_ids'], text_mask=new_batch['text_mask'], mention_mask=new_batch['mention_mask'], mention_batch_positions=new_batch[ 'mention_batch_positions'], mention_start_positions=new_batch[ 'mention_start_positions'], mention_end_positions=new_batch['mention_end_positions'], new_length=config.max_length_with_entity_tokens, ) # Update `text_ids`, `text_mask`, `mention_mask`, `mention_*_positions` new_batch.update(batch_with_entity_tokens) # Update `max_length` max_length = config.max_length_with_entity_tokens else: max_length = encoder_config.max_length new_batch['mention_target_batch_positions'] = tf.gather( new_batch['mention_batch_positions'], new_batch['mention_target_indices']) new_batch['mention_target_start_positions'] = tf.gather( new_batch['mention_start_positions'], new_batch['mention_target_indices']) new_batch['mention_target_end_positions'] = tf.gather( new_batch['mention_end_positions'], new_batch['mention_target_indices']) new_batch['mention_target_weights'] = tf.ones(2 * bsz) # Fake IDs -- some encoders (ReadTwice) need them new_batch['mention_target_ids'] = tf.zeros(2 * bsz) new_batch['segment_ids'] = tf.zeros_like(new_batch['text_ids']) position_ids = tf.expand_dims(tf.range(max_length), axis=0) new_batch['position_ids'] = tf.tile(position_ids, (bsz, 1)) return new_batch
def bootstrap_results(self, init_state): """Returns an object with the same type as returned by `one_step`. Args: init_state: `Tensor` or Python `list` of `Tensor`s representing the initial state(s) of the Markov chain(s). Returns: kernel_results: A (possibly nested) `tuple`, `namedtuple` or `list` of `Tensor`s representing internal calculations made within this function. This inculdes replica states. """ with tf.name_scope( mcmc_util.make_name(self.name, 'remc', 'bootstrap_results')): init_state, unused_is_multipart_state = mcmc_util.prepare_state_parts( init_state) inverse_temperatures = tf.convert_to_tensor( self.inverse_temperatures, name='inverse_temperatures') if self._state_includes_replicas: it_n_replica = inverse_temperatures.shape[0] state_n_replica = init_state[0].shape[0] if ((it_n_replica is not None) and (state_n_replica is not None) and (it_n_replica != state_n_replica)): raise ValueError( 'Number of replicas implied by initial state ({}) must equal ' 'number of replicas implied by inverse_temperatures ({}), but ' 'did not'.format(it_n_replica, state_n_replica)) # We will now replicate each of a possible batch of initial stats, one for # each inverse_temperature. So if init_state=[x, y] of shapes [Sx, Sy] # then the new shape is [(T, Sx), (T, Sy)] where (a, b) means # concatenation and T=shape(inverse_temperature). num_replica = ps.size0(inverse_temperatures) replica_shape = ps.convert_to_shape_tensor([num_replica]) if self._state_includes_replicas: replica_states = init_state else: replica_states = [ tf.broadcast_to( # pylint: disable=g-complex-comprehension x, ps.concat([replica_shape, ps.shape(x)], axis=0), name='replica_states') for x in init_state ] target_log_prob_for_inner_kernel = _make_replica_target_log_prob_fn( target_log_prob_fn=self.target_log_prob_fn, inverse_temperatures=inverse_temperatures, untempered_log_prob_fn=self.untempered_log_prob_fn, tempered_log_prob_fn=self.tempered_log_prob_fn, ) # TODO(b/159636942): Clean up the helpful error msg after 2020-11-10. try: inner_kernel = self.make_kernel_fn( # pylint: disable=not-callable target_log_prob_for_inner_kernel) except TypeError as e: if 'argument' not in str(e): raise raise TypeError( '`ReplicaExchangeMC`s `make_kernel_fn` no longer receives a second ' '(`seed`) argument. `TransitionKernel` instances now receive seeds ' 'via `one_step`.') replica_results = inner_kernel.bootstrap_results(replica_states) pre_swap_replica_target_log_prob = _get_field( replica_results, 'target_log_prob') replica_and_batch_shape = ps.shape( pre_swap_replica_target_log_prob) batch_shape = replica_and_batch_shape[1:] inverse_temperatures = bu.left_justified_broadcast_to( inverse_temperatures, replica_and_batch_shape) # Pretend we did a "null swap", which will always be accepted. swaps = bu.left_justified_broadcast_to(tf.range(num_replica), replica_and_batch_shape) # is_swap_accepted.shape = [n_replica, n_replica] + batch_shape. is_swap_accepted = distribution_util.rotate_transpose(tf.eye( num_replica, batch_shape=batch_shape, dtype=tf.bool), shift=2) return ReplicaExchangeMCKernelResults( post_swap_replica_states=replica_states, pre_swap_replica_results=replica_results, post_swap_replica_results=_set_swapped_fields_to_nan( replica_results), is_swap_proposed=is_swap_accepted, is_swap_accepted=is_swap_accepted, is_swap_proposed_adjacent=_sub_diag(is_swap_accepted), is_swap_accepted_adjacent=_sub_diag(is_swap_accepted), inverse_temperatures=self.inverse_temperatures, swaps=swaps, step_count=tf.zeros(shape=(), dtype=tf.int32), seed=samplers.zeros_seed(), potential_energy=tf.zeros_like( pre_swap_replica_target_log_prob), )
def posterior_marginals(self, observations, mask=None, name='posterior_marginals'): """Compute marginal posterior distribution for each state. This function computes, for each time step, the marginal conditional probability that the hidden Markov model was in each possible state given the observations that were made at each time step. So if the hidden states are `z[0],...,z[num_steps - 1]` and the observations are `x[0], ..., x[num_steps - 1]`, then this function computes `P(z[i] | x[0], ..., x[num_steps - 1])` for all `i` from `0` to `num_steps - 1`. This operation is sometimes called smoothing. It uses a form of the forward-backward algorithm. Note: the behavior of this function is undefined if the `observations` argument represents impossible observations from the model. Args: observations: A tensor representing a batch of observations made on the hidden Markov model. The rightmost dimension of this tensor gives the steps in a sequence of observations from a single sample from the hidden Markov model. The size of this dimension should match the `num_steps` parameter of the hidden Markov model object. The other dimensions are the dimensions of the batch and these are broadcast with the hidden Markov model's parameters. mask: optional bool-type `tensor` with rightmost dimension matching `num_steps` indicating which observations the result of this function should be conditioned on. When the mask has value `True` the corresponding observations aren't used. if `mask` is `None` then all of the observations are used. the `mask` dimensions left of the last are broadcast with the hmm batch as well as with the observations. name: Python `str` name prefixed to Ops created by this class. Default value: "HiddenMarkovModel". Returns: posterior_marginal: A `Categorical` distribution object representing the marginal probability of the hidden Markov model being in each state at each step. The rightmost dimension of the `Categorical` distributions batch will equal the `num_steps` parameter providing one marginal distribution for each step. The other dimensions are the dimensions corresponding to the batch of observations. Raises: ValueError: if rightmost dimension of `observations` does not have size `num_steps`. """ with self._name_and_control_scope(name): observation_tensor_shape = tf.shape(observations) observation_distribution = self.observation_distribution underlying_event_rank = tf.size( observation_distribution.event_shape_tensor()) mask_tensor_shape = tf.shape(mask) if mask is not None else None num_states = self.transition_distribution.batch_shape_tensor()[-1] with self._observation_mask_shape_preconditions( observation_tensor_shape, mask_tensor_shape, underlying_event_rank): observation_log_probs = self._observation_log_probs( observations, mask) log_init = _extract_log_probs(num_states, self.initial_distribution) log_prob = log_init + observation_log_probs[0] log_transition = _extract_log_probs( num_states, self.transition_distribution) log_adjoint_prob = tf.zeros_like(log_prob) def _scan_multiple_steps_forwards(): def forward_step(log_previous_step, log_prob_observation): return _log_vector_matrix( log_previous_step, log_transition) + log_prob_observation forward_log_probs = tf.scan(forward_step, observation_log_probs[1:], initializer=log_prob, name='forward_log_probs') return tf.concat([[log_prob], forward_log_probs], axis=0) forward_log_probs = prefer_static.cond( self._num_steps > 1, _scan_multiple_steps_forwards, lambda: tf.convert_to_tensor([log_prob])) total_log_prob = tf.reduce_logsumexp(forward_log_probs[-1], axis=-1) def _scan_multiple_steps_backwards(): """Perform `scan` operation when `num_steps` > 1.""" def backward_step(log_previous_step, log_prob_observation): return _log_matrix_vector( log_transition, log_prob_observation + log_previous_step) backward_log_adjoint_probs = tf.scan( backward_step, observation_log_probs[1:], initializer=log_adjoint_prob, reverse=True, name='backward_log_adjoint_probs') return tf.concat( [backward_log_adjoint_probs, [log_adjoint_prob]], axis=0) backward_log_adjoint_probs = prefer_static.cond( self._num_steps > 1, _scan_multiple_steps_backwards, lambda: tf.convert_to_tensor([log_adjoint_prob])) log_likelihoods = forward_log_probs + backward_log_adjoint_probs marginal_log_probs = distribution_util.move_dimension( log_likelihoods - total_log_prob[..., tf.newaxis], 0, -2) return categorical.Categorical(logits=marginal_log_probs)
def _inverse_log_det_jacobian(self, y): return tf.zeros_like(y[..., 0])
def _observation_log_probs(self, observations, mask): """Compute and shape tensor of log probs associated with observations..""" # Let E be the underlying event shape # M the number of steps in the HMM # N the number of states of the HMM # # Then the incoming observations have shape # # observations : batch_o [M] E # # and the mask (if present) has shape # # mask : batch_m [M] # # Let this HMM distribution have batch shape batch_d # We need to broadcast all three of these batch shapes together # into the shape batch. # # We need to move the step dimension to the first dimension to make # them suitable for folding or scanning over. # # When we call `log_prob` for our observations we need to # do this for each state the observation could correspond to. # We do this by expanding the dimensions by 1 so we end up with: # # observations : [M] batch [1] [E] # # After calling `log_prob` we get # # observation_log_probs : [M] batch [N] # # We wish to use `mask` to select from this so we also # reshape and broadcast it up to shape # # mask : [M] batch [N] observation_distribution = self.observation_distribution underlying_event_rank = tf.size( observation_distribution.event_shape_tensor()) observation_tensor_shape = tf.shape(observations) observation_batch_shape = observation_tensor_shape[:-1 - underlying_event_rank] observation_event_shape = observation_tensor_shape[ -1 - underlying_event_rank:] if mask is not None: mask_tensor_shape = tf.shape(mask) mask_batch_shape = mask_tensor_shape[:-1] batch_shape = tf.broadcast_dynamic_shape(observation_batch_shape, self.batch_shape_tensor()) if mask is not None: batch_shape = tf.broadcast_dynamic_shape(batch_shape, mask_batch_shape) observations = tf.broadcast_to( observations, tf.concat([batch_shape, observation_event_shape], axis=0)) observation_rank = tf.rank(observations) observations = distribution_util.move_dimension( observations, observation_rank - underlying_event_rank - 1, 0) observations = tf.expand_dims(observations, observation_rank - underlying_event_rank) observation_log_probs = observation_distribution.log_prob(observations) if mask is not None: mask = tf.broadcast_to( mask, tf.concat([batch_shape, [self._num_steps]], axis=0)) mask = distribution_util.move_dimension(mask, -1, 0) observation_log_probs = tf.where( mask[..., tf.newaxis], tf.zeros_like(observation_log_probs), observation_log_probs) return observation_log_probs
def impute_missing_values(model, observed_time_series, parameter_samples, include_observation_noise=False): """Runs posterior inference to impute the missing values in a time series. This method computes the posterior marginals `p(latent state | observations)`, given the time series at observed timesteps (a missingness mask should be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back through the observation model to impute a predictive distribution on the observed time series. At unobserved steps, this is an imputed value; at other steps it is interpreted as the model's estimate of the underlying noise-free series. Args: model: `tfp.sts.Sum` instance defining an additive STS model. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. include_observation_noise: If `False`, the imputed uncertainties represent the model's estimate of the noise-free time series at each timestep. If `True`, they represent the model's estimate of the range of values that could be *observed* at each timestep, including any i.i.d. observation noise. Default value: `False`. Returns: imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Example To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`: ```python time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5] observed_time_series = tfp.sts.MaskedTimeSeries( time_series=time_series_with_nans, is_missing=tf.math.is_nan(time_series_with_nans)) ``` Masked time series can be passed to `tfp.sts` methods in place of a `observed_time_series` `Tensor`: ```python # Build model using observed time series to set heuristic priors. linear_trend_model = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series) model = tfp.sts.Sum([linear_trend_model], observed_time_series=observed_time_series) # Fit model to data parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series) ``` After fitting a model, `impute_missing_values` will return a distribution ```python # Impute missing values imputed_series_distribution = tfp.sts.impute_missing_values( model, observed_time_series) print('imputed means and stddevs: ', imputed_series_distribution.mean(), imputed_series_distribution.stddev()) ``` """ with tf.name_scope('impute_missing_values'): [observed_time_series, mask] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] lgssm = model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples) posterior_means, posterior_covs = lgssm.posterior_marginals( observed_time_series, mask=mask) observation_means, observation_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=posterior_covs) if not include_observation_noise: # Extract just the variance of observation noise by pushing forward # zero-variance latents. _, observation_noise_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=tf.zeros_like(posterior_covs)) # Subtract out the observation noise that was added in the original # pushforward. Note that this could cause numerical issues if the # observation noise is very large. If this becomes an issue we could # avoid the subtraction by plumbing `include_observation_noise` through # `lgssm.latents_to_observations`. observation_covs -= observation_noise_covs # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. return sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0])
def forecast(model, observed_time_series, parameter_samples, num_steps_forecast, include_observation_noise=True): """Construct predictive distribution over future observations. Given samples from the posterior over parameters, return the predictive distribution over future observations for num_steps_forecast timesteps. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. num_steps_forecast: scalar `int` `Tensor` number of steps to forecast. include_observation_noise: Python `bool` indicating whether the forecast distribution should include uncertainty from observation noise. If `True`, the forecast is over future observations, if `False`, the forecast is over future values of the latent noise-free time series. Default value: `True`. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_steps_forecast, 1] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `forecast`, we construct a forecast distribution: ```python forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) forecast_mean = forecast_dist.mean()[..., 0] # shape: [50] forecast_scale = forecast_dist.stddev()[..., 0] # shape: [50] forecast_samples = forecast_dist.sample(10)[..., 0] # shape: [10, 50] ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python (variational_loss, variational_distributions) = tfp.sts.build_factored_variational_loss( model=model, observed_time_series=observed_time_series) # OMITTED: take steps to optimize variational loss samples = {k: q.sample(30) for (k, q) in variational_distributions.items()} forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_forecast(observed_time_series, forecast_mean, forecast_scale, forecast_samples): plt.figure(figsize=(12, 6)) num_steps = observed_time_series.shape[-1] num_steps_forecast = forecast_mean.shape[-1] num_steps_train = num_steps - num_steps_forecast c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(np.arange(num_steps), observed_time_series, lw=2, color=c1, label='ground truth') forecast_steps = np.arange(num_steps_train, num_steps_train+num_steps_forecast) plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1) plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2, label='forecast') plt.fill_between(forecast_steps, forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2) plt.xlim([0, num_steps]) plt.legend() plot_forecast(observed_time_series, forecast_mean=forecast_mean, forecast_scale=forecast_scale, forecast_samples=forecast_samples) ``` """ with tf.name_scope('forecast'): [observed_time_series, mask] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the observed timesteps to extract the # latent state posterior at timestep T+1 (i.e., the final # filtering distribution, pushed through the transition model). # This is the prior for the forecast model ("today's prior # is yesterday's posterior"). num_observed_steps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] observed_data_ssm = model.make_state_space_model( num_timesteps=num_observed_steps, param_vals=parameter_samples) (_, _, _, predictive_means, predictive_covs, _, _) = observed_data_ssm.forward_filter(observed_time_series, mask=mask) # Build a batch of state-space models over the forecast period. Because # we'll use MixtureSameFamily to mix over the posterior draws, we need to # do some shenanigans to move the `[num_posterior_draws]` batch dimension # from the leftmost to the rightmost side of the model's batch shape. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate `move_dimension` calls here. parameter_samples = model._canonicalize_param_vals_as_map( parameter_samples) # pylint: disable=protected-access parameter_samples_with_reordered_batch_dimension = { param.name: dist_util.move_dimension( parameter_samples[param.name], 0, -(1 + _prefer_static_event_ndims(param.prior))) for param in model.parameters } forecast_prior = tfd.MultivariateNormalFullCovariance( loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2), covariance_matrix=dist_util.move_dimension( predictive_covs[..., -1, :, :], 0, -3)) # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather # than leading) dimension of parameters, the parameter batch shapes no # longer broadcast against the `constant_offset` attribute used in `sts.Sum` # models. We fix this by manually adding an extra broadcasting dim to # `constant_offset` if present. # The root cause of this hack is that we mucked with param dimensions above # and are now passing params that are 'invalid' in the sense that they don't # match the shapes of the model's param priors. The fix (as above) will be # to update MixtureSameFamily so we can avoid changing param dimensions # altogether. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate this hack. kwargs = {} if hasattr(model, 'constant_offset'): kwargs['constant_offset'] = tf.convert_to_tensor( value=model.constant_offset, dtype=forecast_prior.dtype)[..., tf.newaxis] if not include_observation_noise: parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale'] = tf.zeros_like( parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale']) # We assume that any STS model that has a `constant_offset` attribute # will allow it to be overridden as a kwarg. This is currently just # `sts.Sum`. # TODO(b/120245392): when kwargs hack is removed, switch back to calling # the public version of `_make_state_space_model`. forecast_ssm = model._make_state_space_model( # pylint: disable=protected-access num_timesteps=num_steps_forecast, param_map=parameter_samples_with_reordered_batch_dimension, initial_state_prior=forecast_prior, initial_step=num_observed_steps, **kwargs) num_posterior_draws = dist_util.prefer_static_value( forecast_ssm.batch_shape_tensor())[-1] return tfd.MixtureSameFamily(mixture_distribution=tfd.Categorical( logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)), components_distribution=forecast_ssm)
def drift_from_total_drift(t): start_time = tf.zeros_like(t) return gradient.fwd_gradient( lambda x: total_drift_fn(start_time, x), t)
def sample_lkj( num_samples, dimension, concentration, cholesky_space=False, seed=None, name=None): """Returns a Tensor of samples from an LKJ distribution. Args: num_samples: Python `int`. The number of samples to draw. dimension: Python `int`. The dimension of correlation matrices. concentration: `Tensor` representing the concentration of the LKJ distribution. cholesky_space: Python `bool`. Whether to take samples from LKJ or Chol(LKJ). seed: Python integer seed for RNG name: Python `str` name prefixed to Ops created by this function. Returns: samples: A Tensor of correlation matrices (or Cholesky factors of correlation matrices if `cholesky_space = True`) with shape `[n] + B + [D, D]`, where `B` is the shape of the `concentration` parameter, and `D` is the `dimension`. Raises: ValueError: If `dimension` is negative. """ if dimension < 0: raise ValueError( 'Cannot sample negative-dimension correlation matrices.') # Notation below: B is the batch shape, i.e., tf.shape(concentration) # We need 1 seed for beta corr12, and 2 per loop iter. num_seeds = 1 + 2 * max(0, dimension - 2) seeds = list(samplers.split_seed(seed, n=num_seeds, salt='sample_lkj')) with tf.name_scope('sample_lkj' or name): concentration = tf.convert_to_tensor(concentration) if not dtype_util.is_floating(concentration.dtype): raise TypeError( 'The concentration argument should have floating type, not ' '{}'.format(dtype_util.name(concentration.dtype))) concentration = _replicate(num_samples, concentration) concentration_shape = tf.shape(concentration) if dimension <= 1: # For any dimension <= 1, there is only one possible correlation matrix. shape = tf.concat([ concentration_shape, [dimension, dimension]], axis=0) return tf.ones(shape=shape, dtype=concentration.dtype) beta_conc = concentration + (dimension - 2.) / 2. beta_dist = beta.Beta(concentration1=beta_conc, concentration0=beta_conc) # Note that the sampler below deviates from [1], by doing the sampling in # cholesky space. This does not change the fundamental logic of the # sampler, but does speed up the sampling. # This is the correlation coefficient between the first two dimensions. # This is also `r` in reference [1]. corr12 = 2. * beta_dist.sample(seed=seeds.pop()) - 1. # Below we construct the Cholesky of the initial 2x2 correlation matrix, # which is of the form: # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the # first two dimensions. # This is the top-left corner of the cholesky of the final sample. first_row = tf.concat([ tf.ones_like(corr12)[..., tf.newaxis], tf.zeros_like(corr12)[..., tf.newaxis]], axis=-1) second_row = tf.concat([ corr12[..., tf.newaxis], tf.sqrt(1 - corr12**2)[..., tf.newaxis]], axis=-1) chol_result = tf.concat([ first_row[..., tf.newaxis, :], second_row[..., tf.newaxis, :]], axis=-2) for n in range(2, dimension): # Loop invariant: on entry, result has shape B + [n, n] beta_conc = beta_conc - 0.5 # norm is y in reference [1]. norm = beta.Beta( concentration1=n/2., concentration0=beta_conc ).sample(seed=seeds.pop()) # distance shape: B + [1] for broadcast distance = tf.sqrt(norm)[..., tf.newaxis] # direction is u in reference [1]. # direction shape: B + [n] direction = _uniform_unit_norm( n, concentration_shape, concentration.dtype, seed=seeds.pop()) # raw_correlation is w in reference [1]. raw_correlation = distance * direction # shape: B + [n] # This is the next row in the cholesky of the result, # which differs from the construction in reference [1]. # In the reference, the new row `z` = chol_result @ raw_correlation^T # = C @ raw_correlation^T (where as short hand we use C = chol_result). # We prove that the below equation is the right row to add to the # cholesky, by showing equality with reference [1]. # Let S be the sample constructed so far, and let `z` be as in # reference [1]. Then at this iteration, the new sample S' will be # [[S z^T] # [z 1]] # In our case we have the cholesky decomposition factor C, so # we want our new row x (same size as z) to satisfy: # [[S z^T] [[C 0] [[C^T x^T] [[CC^T Cx^T] # [z 1]] = [x k]] [0 k]] = [xC^t xx^T + k**2]] # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible, # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 - # distance**2). new_row = tf.concat( [raw_correlation, tf.sqrt(1. - norm[..., tf.newaxis])], axis=-1) # Finally add this new row, by growing the cholesky of the result. chol_result = tf.concat([ chol_result, tf.zeros_like(chol_result[..., 0][..., tf.newaxis])], axis=-1) chol_result = tf.concat( [chol_result, new_row[..., tf.newaxis, :]], axis=-2) assert not seeds, 'Did not use all seeds: ' + len(seeds) if cholesky_space: return chol_result result = tf.matmul(chol_result, chol_result, transpose_b=True) # The diagonal for a correlation matrix should always be ones. Due to # numerical instability the matmul might not achieve that, so manually set # these to ones. result = tf.linalg.set_diag( result, tf.ones(shape=tf.shape(result)[:-1], dtype=result.dtype)) # This sampling algorithm can produce near-PSD matrices on which standard # algorithms such as `tf.cholesky` or `tf.linalg.self_adjoint_eigvals` # fail. Specifically, as documented in b/116828694, around 2% of trials # of 900,000 5x5 matrices (distributed according to 9 different # concentration parameter values) contained at least one matrix on which # the Cholesky decomposition failed. return result
def _cdf(self, x): z = self._z(x) + tf.zeros_like(self.concentration) concentration = self.concentration + tf.zeros_like(z) return von_mises_cdf(z, concentration)
def european_option_price(*, strikes=None, expiries=None, is_call_options=None, variances=None, kappas=None, thetas=None, sigmas=None, rhos=None, spots=None, forwards=None, discount_rates=None, continuous_dividends=None, cost_of_carries=None, discount_factors=None, integration_method=None, dtype=None, name=None, **kwargs): """Calculates European option prices under the Heston model. Heston originally published in 1993 his eponymous model [3]. He provided a semi- analytical formula for pricing European option via Fourier transform under his model. However, as noted by Albrecher [1], the characteristic function used in Heston paper can suffer numerical issues because of the discontinuous nature of the square root function in the complex plane, and a second version of the characteric function which doesn't suffer this shortcoming should be used instead. Attari [2] further refined the numerical method by reducing the number of numerical integrations (only one Fourier transform instead of two) and with an integrand function decaying quadratically instead of linearly. Attari's numerical method is implemented here. Heston model: ``` dF/F = sqrt(V) * dW_1 dV = kappa * (theta - V) * dt * sigma * sqrt(V) * dW_2 <dW_1,dW_2> = rho *dt ``` The variance V follows a square root process. #### Example ```python import tf_quant_finance as tff import numpy as np prices = tff.models.heston.approximations.european_option_price( variances=0.11, strikes=102.0, expiries=1.2, forwards=100.0, is_call_options=True, kappas=2.0, thetas=0.5, sigmas=0.15, rhos=0.3, discount_factors=1.0, dtype=np.float64) # Expected print output of prices: # 24.82219619 ``` #### References [1] Hansjorg Albrecher, The Little Heston Trap https://perswww.kuleuven.be/~u0009713/HestonTrap.pdf [2] Mukarram Attari, Option Pricing Using Fourier Transforms: A Numerically Efficient Simplification https://papers.ssrn.com/sol3/papers.cfm?abstract_id=520042 [3] Steven L. Heston, A Closed-Form Solution for Options with Stochastic Volatility with Applications to Bond and Currency Options http://faculty.baruch.cuny.edu/lwu/890/Heston93.pdf Args: strikes: A real `Tensor` of any shape and dtype. The strikes of the options to be priced. expiries: A real `Tensor` of the same dtype and compatible shape as `strikes`. The expiry of each option. is_call_options: A boolean `Tensor` of a shape compatible with `strikes`. Indicates whether the option is a call (if True) or a put (if False). If not supplied, call options are assumed. variances: A real `Tensor` of the same dtype and compatible shape as `strikes`. The initial value of the variance. kappas: A real `Tensor` of the same dtype and compatible shape as `strikes`. The mean reversion strength of the variance square root process. thetas: A real `Tensor` of the same dtype and compatible shape as `strikes`. The mean reversion level of the variance square root process. sigmas: A real `Tensor` of the same dtype and compatible shape as `strikes`. The volatility of the variance square root process (volatility of volatility) rhos: A real `Tensor` of the same dtype and compatible shape as `strikes`. The correlation between spot and variance. spots: A real `Tensor` of any shape that broadcasts to the shape of the `volatilities`. The current spot price of the underlying. Either this argument or the `forwards` (but not both) must be supplied. forwards: A real `Tensor` of any shape that broadcasts to the shape of `strikes`. The forwards to maturity. Either this argument or the `spots` must be supplied but both must not be supplied. discount_rates: An optional real `Tensor` of same dtype as the `strikes` and of the shape that broadcasts with `strikes`. If not `None`, discount factors are calculated as e^(-rT), where r are the discount rates, or risk free rates. At most one of discount_rates and discount_factors can be supplied. Default value: `None`, equivalent to r = 0 and discount factors = 1 when discount_factors also not given. continuous_dividends: An optional real `Tensor` of same dtype as the `strikes` and of the shape that broadcasts with `strikes`. If not `None`, `cost_of_carries` is calculated as r - q, where r are the `discount_rates` and q is `continuous_dividends`. Either this or `cost_of_carries` can be given. Default value: `None`, equivalent to q = 0. cost_of_carries: An optional real `Tensor` of same dtype as the `strikes` and of the shape that broadcasts with `strikes`. Cost of storing a physical commodity, the cost of interest paid when long, or the opportunity cost, or the cost of paying dividends when short. If not `None`, and `spots` is supplied, used to calculate forwards from `spots`: F = e^(bT) * S, where F is the forwards price, b is the cost of carries, T is expiries and S is the spot price. If `None`, value assumed to be equal to the `discount_rate` - `continuous_dividends` Default value: `None`, equivalent to b = r. discount_factors: An optional real `Tensor` of same dtype as the `strikes`. If not `None`, these are the discount factors to expiry (i.e. e^(-rT)). Mutually exclusive with discount_rate and cost_of_carry. If neither is given, no discounting is applied (i.e. the undiscounted option price is returned). If `spots` is supplied and `discount_factors` is not `None` then this is also used to compute the forwards to expiry. At most one of discount_rates and discount_factors can be supplied. Default value: `None`, which maps to -log(discount_factors) / expiries integration_method: An instance of `math.integration.IntegrationMethod`. Default value: `None` which maps to the Simpsons integration rule. dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion of any supplied non-`Tensor` arguments to `Tensor`. Default value: None which maps to the default dtype inferred by TensorFlow. name: str. The name for the ops created by this function. Default value: None which is mapped to the default name `heston_price`. **kwargs: Additional parameters for the underlying integration method. If not supplied and `integration_method` is Simpson, then uses `IntegrationMethod.COMPOSITE_SIMPSONS_RULE` with `num_points=1001`, and bounds `lower=1e-9`, `upper=100`. Returns: A `Tensor` of the same shape as the input data which is the price of European options under the Heston model. """ if (spots is None) == (forwards is None): raise ValueError( 'Either spots or forwards must be supplied but not both.') if (discount_rates is not None) and (discount_factors is not None): raise ValueError( 'At most one of discount_rates and discount_factors may ' 'be supplied') if (continuous_dividends is not None) and (cost_of_carries is not None): raise ValueError( 'At most one of continuous_dividends and cost_of_carries ' 'may be supplied') with tf.compat.v1.name_scope(name, default_name='eu_option_price'): strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') dtype = strikes.dtype expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') kappas = tf.convert_to_tensor(kappas, dtype=dtype, name='kappas') thetas = tf.convert_to_tensor(thetas, dtype=dtype, name='thetas') sigmas = tf.convert_to_tensor(sigmas, dtype=dtype, name='sigmas') rhos = tf.convert_to_tensor(rhos, dtype=dtype, name='rhos') variances = tf.convert_to_tensor(variances, dtype=dtype, name='variances') if discount_factors is not None: discount_factors = tf.convert_to_tensor(discount_factors, dtype=dtype, name='discount_factors') if discount_rates is not None: discount_rates = tf.convert_to_tensor(discount_rates, dtype=dtype, name='discount_rates') elif discount_factors is not None: discount_rates = -tf.math.log(discount_factors) / expiries else: discount_rates = tf.convert_to_tensor(0.0, dtype=dtype, name='discount_rates') if continuous_dividends is None: continuous_dividends = tf.convert_to_tensor( 0.0, dtype=dtype, name='continuous_dividends') if cost_of_carries is not None: cost_of_carries = tf.convert_to_tensor(cost_of_carries, dtype=dtype, name='cost_of_carries') else: cost_of_carries = discount_rates - continuous_dividends if discount_factors is None: discount_factors = tf.exp(-discount_rates * expiries) # pylint: disable=invalid-unary-operand-type if forwards is not None: forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards') else: spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots') forwards = spots * tf.exp(cost_of_carries * expiries) # Cast as complex for the characteristic function calculation expiries_real = tf.complex(expiries, tf.zeros_like(expiries)) kappas_real = tf.complex(kappas, tf.zeros_like(kappas)) thetas_real = tf.complex(thetas, tf.zeros_like(thetas)) sigmas_real = tf.complex(sigmas, tf.zeros_like(sigmas)) rhos_real = tf.complex(rhos, tf.zeros_like(rhos)) variances_real = tf.complex(variances, tf.zeros_like(variances)) # Prepare inputs to build an integrand_function expiries_real = tf.expand_dims(expiries_real, -1) kappas_real = tf.expand_dims(kappas_real, -1) thetas_real = tf.expand_dims(thetas_real, -1) sigmas_real = tf.expand_dims(sigmas_real, -1) rhos_real = tf.expand_dims(rhos_real, -1) variances_real = tf.expand_dims(variances_real, -1) if integration_method is None: integration_method = _COMPOSITE_SIMPSONS_RULE if integration_method == _COMPOSITE_SIMPSONS_RULE: if 'num_points' not in kwargs: kwargs['num_points'] = 1001 if 'lower' not in kwargs: kwargs['lower'] = 1e-9 if 'upper' not in kwargs: kwargs['upper'] = 100 def char_fun(u): # Using 'second formula' for the (first) characteristic function of # log( spot_T / forwards ) # (noted 'phi_2' in 'The Little Heston Trap', (Albrecher)) u_real = tf.complex(u, tf.zeros_like(u)) u_imag = tf.complex(tf.zeros_like(u), u) s = rhos_real * sigmas_real * u_imag # TODO(b/156221007): investigate why s_kappa = (s - kappas_real)**2 leads # to a wrong result in graph mode. s_kappa = (s - kappas_real) * s - (s - kappas_real) * kappas_real d = s_kappa - sigmas_real**2 * (-u_imag - u_real**2) d = tf.math.sqrt(d) g = (kappas_real - s - d) / (kappas_real - s + d) a = kappas_real * thetas_real h = g * tf.math.exp(-d * expiries_real) m = 2 * tf.math.log((1 - h) / (1 - g)) c = (a / sigmas_real**2) * ( (kappas_real - s - d) * expiries_real - m) e = (1 - tf.math.exp(-d * expiries_real)) d_new = (kappas_real - s - d) / sigmas_real**2 * (e / (1 - h)) return tf.math.exp(c + d_new * variances_real) def integrand_function(u, k): # Note that with [2], integrand is in 1 / u**2, # which converges faster than Heston 1993 (which is in 1 /u) char_fun_complex = char_fun(u) char_fun_real_part = tf.math.real(char_fun_complex) char_fun_imag_part = tf.math.imag(char_fun_complex) a = (char_fun_real_part + char_fun_imag_part / u) * tf.math.cos( u * k) b = (char_fun_imag_part - char_fun_real_part / u) * tf.math.sin( u * k) return (a + b) / (1.0 + u * u) k = tf.expand_dims(tf.math.log(strikes / forwards), axis=-1) integral = integration.integrate(lambda u: integrand_function(u, k), method=integration_method, dtype=dtype, **kwargs) undiscounted_call_prices = forwards - strikes * (0.5 + integral / _PI_) if is_call_options is None: return undiscounted_call_prices * discount_factors else: is_call_options = tf.convert_to_tensor(is_call_options, dtype=tf.bool, name='is_call_options') # Use call-put parity for Put undiscounted_put_prices = undiscounted_call_prices - forwards + strikes undiscount_prices = tf.where(is_call_options, undiscounted_call_prices, undiscounted_put_prices) return undiscount_prices * discount_factors
def _mean(self): return self.loc + tf.zeros_like(self.concentration)
def minimize(value_and_gradients_function, initial_position, tolerance=1e-8, x_tolerance=0, f_relative_tolerance=0, max_iterations=50, parallel_iterations=1, stopping_condition=None, params=None, name=None): """Minimizes a differentiable function. Implementation of algorithm described in [HZ2006]. Updated formula for next search direction were taken from [HZ2013]. Supports batches with 1-dimensional batch shape. ### References: [HZ2006] Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." http://users.clas.ufl.edu/hager/papers/CG/cg_compare.pdf [HZ2013] W. W. Hager and H. Zhang (2013) The limited memory conjugate gradient method. https://pdfs.semanticscholar.org/8769/69f3911777e0ff0663f21b67dff30518726b.pdf ### Usage: The following example demonstrates this optimizer attempting to find the minimum for a simple two dimensional quadratic objective function. ```python minimum = np.array([1.0, 1.0]) # The center of the quadratic bowl. scales = np.array([2.0, 3.0]) # The scales along the two axes. # The objective function and the gradient. def quadratic(x): value = tf.reduce_sum(scales * (x - minimum) ** 2) return value, tf.gradients(value, x)[0] start = tf.constant([0.6, 0.8]) # Starting point for the search. optim_results = conjugate_gradient.minimize( quadratic, initial_position=start, tolerance=1e-8) with tf.Session() as session: results = session.run(optim_results) # Check that the search converged assert(results.converged) # Check that the argmin is close to the actual value. np.testing.assert_allclose(results.position, minimum) ``` Args: value_and_gradients_function: A Python callable that accepts a point as a real `Tensor` and returns a tuple of `Tensor`s of real dtype containing the value of the function and its gradient at that point. The function to be minimized. The input should be of shape `[..., n]`, where `n` is the size of the domain of input points, and all others are batching dimensions. The first component of the return value should be a real `Tensor` of matching shape `[...]`. The second component (the gradient) should also be of shape `[..., n]` like the input value to the function. initial_position: Real `Tensor` of shape `[..., n]`. The starting point, or points when using batching dimensions, of the search procedure. At these points the function value and the gradient norm should be finite. tolerance: Scalar `Tensor` of real dtype. Specifies the gradient tolerance for the procedure. If the supremum norm of the gradient vector is below this number, the algorithm is stopped. x_tolerance: Scalar `Tensor` of real dtype. If the absolute change in the position between one iteration and the next is smaller than this number, the algorithm is stopped. f_relative_tolerance: Scalar `Tensor` of real dtype. If the relative change in the objective value between one iteration and the next is smaller than this value, the algorithm is stopped. max_iterations: Scalar positive int32 `Tensor`. The maximum number of iterations. parallel_iterations: Positive integer. The number of iterations allowed to run in parallel. stopping_condition: (Optional) A Python function that takes as input two Boolean tensors of shape `[...]`, and returns a Boolean scalar tensor. The input tensors are `converged` and `failed`, indicating the current status of each respective batch member; the return value states whether the algorithm should stop. The default is tfp.optimizer.converged_all which only stops when all batch members have either converged or failed. An alternative is tfp.optimizer.converged_any which stops as soon as one batch member has converged, or when all have failed. params: ConjugateGradientParams object with adjustable parameters of the algorithm. If not supplied, default parameters will be used. name: (Optional) Python str. The name prefixed to the ops created by this function. If not supplied, the default name 'minimize' is used. Returns: optimizer_results: A namedtuple containing the following items: converged: boolean tensor of shape `[...]` indicating for each batch member whether the minimum was found within tolerance. failed: boolean tensor of shape `[...]` indicating for each batch member whether a line search step failed to find a suitable step size satisfying Wolfe conditions. In the absence of any constraints on the number of objective evaluations permitted, this value will be the complement of `converged`. However, if there is a constraint and the search stopped due to available evaluations being exhausted, both `failed` and `converged` will be simultaneously False. num_objective_evaluations: The total number of objective evaluations performed. position: A tensor of shape `[..., n]` containing the last argument value found during the search from each starting point. If the search converged, then this value is the argmin of the objective function. objective_value: A tensor of shape `[...]` with the value of the objective function at the `position`. If the search converged, then this is the (local) minimum of the objective function. objective_gradient: A tensor of shape `[..., n]` containing the gradient of the objective function at the `position`. If the search converged the max-norm of this tensor should be below the tolerance. """ with tf.compat.v1.name_scope(name, 'minimize', [initial_position, tolerance]): if params is None: params = ConjugateGradientParams() initial_position = tf.convert_to_tensor( value=initial_position, name='initial_position') dtype = initial_position.dtype tolerance = tf.convert_to_tensor( value=tolerance, dtype=dtype, name='grad_tolerance') f_relative_tolerance = tf.convert_to_tensor( value=f_relative_tolerance, dtype=dtype, name='f_relative_tolerance') x_tolerance = tf.convert_to_tensor( value=x_tolerance, dtype=dtype, name='x_tolerance') max_iterations = tf.convert_to_tensor( value=max_iterations, name='max_iterations') stopping_condition = stopping_condition or converged_all delta = tf.convert_to_tensor( params.sufficient_decrease_param, dtype=dtype, name='delta') sigma = tf.convert_to_tensor( params.curvature_param, dtype=dtype, name='sigma') eps = tf.convert_to_tensor( params.threshold_use_approximate_wolfe_condition, dtype=dtype, name='sigma') eta = tf.convert_to_tensor( params.direction_update_param, dtype=dtype, name='eta') psi_1 = tf.convert_to_tensor( params.initial_guess_small_factor, dtype=dtype, name='psi_1') psi_2 = tf.convert_to_tensor( params.initial_guess_step_multiplier, dtype=dtype, name='psi_2') f0, df0 = value_and_gradients_function(initial_position) converged = tf.norm(df0, axis=-1) < tolerance initial_state = _OptimizerState( converged=converged, failed=tf.zeros_like(converged), # All false. num_iterations=tf.convert_to_tensor(value=0), num_objective_evaluations=tf.convert_to_tensor(value=1), position=initial_position, objective_value=f0, objective_gradient=df0, direction=-df0, prev_step=tf.ones_like(f0), ) def _cond(state): """Continue if iterations remain and stopping condition is not met.""" return ( (state.num_iterations < max_iterations) & tf.logical_not(stopping_condition(state.converged, state.failed))) def _body(state): """Main optimization loop.""" # We use notation of [HZ2006] for brevity. x_k = state.position d_k = state.direction f_k = state.objective_value g_k = state.objective_gradient a_km1 = state.prev_step # Means a_{k-1}. # Define scalar function, which is objective restricted to direction. def ls_func(alpha): pt = x_k + tf.expand_dims(alpha, axis=-1) * d_k objective_value, gradient = value_and_gradients_function(pt) return ValueAndGradient( x=alpha, f=objective_value, df=_dot(gradient, d_k), full_gradient=gradient) # Generate initial guess for line search. # [HZ2006] suggests to generate first initial guess separately, but # [JuliaLineSearches] generates it as if previous step length was 1, and # we do the same. phi_0 = f_k dphi_0 = _dot(g_k, d_k) ls_val_0 = ValueAndGradient( x=tf.zeros_like(phi_0), f=phi_0, df=dphi_0, full_gradient=g_k) step_guess_result = _init_step(ls_val_0, a_km1, ls_func, psi_1, psi_2, params.quad_step) init_step = step_guess_result.step # Check if initial step size already satisfies Wolfe condition, and in # that case don't perform line search. c = init_step.x phi_lim = phi_0 + eps * tf.abs(phi_0) phi_c = init_step.f dphi_c = init_step.df # Original Wolfe conditions, T1 in [HZ2006]. suff_decrease_1 = delta * dphi_0 >= (phi_c - phi_0) / c curvature = dphi_c >= sigma * dphi_0 wolfe1 = suff_decrease_1 & curvature # Approximate Wolfe conditions, T2 in [HZ2006]. suff_decrease_2 = (2 * delta - 1) * dphi_0 >= dphi_c curvature = dphi_c >= sigma * dphi_0 wolfe2 = suff_decrease_2 & curvature & (phi_c <= phi_lim) wolfe = wolfe1 | wolfe2 skip_line_search = (step_guess_result.may_terminate & wolfe) | state.failed | state.converged # Call Hager-Zhang line search (L0-L3 in [HZ2006]). # Parameter theta from [HZ2006] is not adjustable, it's always 0.5. ls_result = linesearch.hager_zhang( ls_func, value_at_zero=ls_val_0, converged=skip_line_search, initial_step_size=init_step.x, value_at_initial_step=init_step, shrinkage_param=params.shrinkage_param, expansion_param=params.expansion_param, sufficient_decrease_param=delta, curvature_param=sigma, threshold_use_approximate_wolfe_condition=eps) # Moving to the next point, using step length from line search. # If line search was skipped, take step length from initial guess. # To save objective evaluation, use objective value and gradient returned # by line search or initial guess. a_k = tf.compat.v1.where( skip_line_search, init_step.x, ls_result.left.x) x_kp1 = state.position + tf.expand_dims(a_k, -1) * d_k f_kp1 = tf.compat.v1.where( skip_line_search, init_step.f, ls_result.left.f) g_kp1 = tf.compat.v1.where(skip_line_search, init_step.full_gradient, ls_result.left.full_gradient) # Evaluate next direction. # Use formulas (2.7)-(2.11) from [HZ2013] with P_k=I. y_k = g_kp1 - g_k d_dot_y = _dot(d_k, y_k) b_k = (_dot(y_k, g_kp1) - _norm_sq(y_k) * _dot(g_kp1, d_k) / d_dot_y) / d_dot_y eta_k = eta * _dot(d_k, g_k) / _norm_sq(d_k) b_k = tf.maximum(b_k, eta_k) d_kp1 = -g_kp1 + tf.expand_dims(b_k, -1) * d_k # Check convergence criteria. grad_converged = _norm_inf(g_kp1) <= tolerance x_converged = (_norm_inf(x_kp1 - x_k) <= x_tolerance) f_converged = ( tf.math.abs(f_kp1 - f_k) <= f_relative_tolerance * tf.math.abs(f_k)) converged = grad_converged | x_converged | f_converged # Construct new state for next iteration. new_state = _OptimizerState( converged=converged, failed=state.failed, num_iterations=state.num_iterations + 1, num_objective_evaluations=state.num_objective_evaluations + step_guess_result.func_evals + ls_result.func_evals, position=tf.compat.v1.where(state.converged, x_k, x_kp1), objective_value=tf.compat.v1.where(state.converged, f_k, f_kp1), objective_gradient=tf.compat.v1.where(state.converged, g_k, g_kp1), direction=d_kp1, prev_step=a_k) return (new_state,) final_state = tf.while_loop( _cond, _body, (initial_state,), parallel_iterations=parallel_iterations)[0] return OptimizerResult( converged=final_state.converged, failed=final_state.failed, num_iterations=final_state.num_iterations, num_objective_evaluations=final_state.num_objective_evaluations, position=final_state.position, objective_value=final_state.objective_value, objective_gradient=final_state.objective_gradient)
def prepare_args(model_matrix, response, model_coefficients, predicted_linear_response, offset, name=None): """Helper to `fit` which sanitizes input args. Args: model_matrix: (Batch of) `float`-like, matrix-shaped `Tensor` where each row represents a sample's features. response: (Batch of) vector-shaped `Tensor` where each element represents a sample's observed response (to the corresponding row of features). Must have same `dtype` as `model_matrix`. model_coefficients: Optional (batch of) vector-shaped `Tensor` representing the model coefficients, one for each column in `model_matrix`. Must have same `dtype` as `model_matrix`. Default value: `tf.zeros(tf.shape(model_matrix)[-1], model_matrix.dtype)`. predicted_linear_response: Optional `Tensor` with `shape`, `dtype` matching `response`; represents `offset` shifted initial linear predictions based on current `model_coefficients`. Default value: `offset` if `model_coefficients is None`, and `tf.linalg.matvec(model_matrix, model_coefficients_start) + offset` otherwise. offset: Optional `Tensor` with `shape`, `dtype` matching `response`; represents constant shift applied to `predicted_linear_response`. Default value: `None` (i.e., `tf.zeros_like(response)`). name: Python `str` used as name prefix to ops created by this function. Default value: `"prepare_args"`. Returns: model_matrix: A `Tensor` with `shape`, `dtype` and values of the `model_matrix` argument. response: A `Tensor` with `shape`, `dtype` and values of the `response` argument. model_coefficients_start: A `Tensor` with `shape`, `dtype` and values of the `model_coefficients_start` argument if specified. A (batch of) vector-shaped `Tensors` with `dtype` matching `model_matrix` containing the default starting point otherwise. predicted_linear_response: A `Tensor` with `shape`, `dtype` and values of the `predicted_linear_response` argument if specified. A `Tensor` with `shape`, `dtype` matching `response` containing the default value otherwise. offset: A `Tensor` with `shape`, `dtype` and values of the `offset` argument if specified or `None` otherwise. """ graph_deps = [ model_matrix, response, model_coefficients, predicted_linear_response, offset ] with tf.name_scope(name or 'prepare_args'): dtype = dtype_util.common_dtype(graph_deps, np.float32) model_matrix = tf.convert_to_tensor(model_matrix, dtype=dtype, name='model_matrix') if offset is not None: offset = tf.convert_to_tensor(offset, dtype=dtype, name='offset') response = tf.convert_to_tensor(response, dtype=dtype, name='response') use_default_model_coefficients = model_coefficients is None if use_default_model_coefficients: # User did not supply model coefficients; assume they're all zero. batch_shape = tf.shape(model_matrix)[:-2] num_columns = tf.shape(model_matrix)[-1] model_coefficients = tf.zeros(shape=tf.concat( [batch_shape, [num_columns]], axis=0), dtype=dtype, name='model_coefficients') else: # User did supply model coefficients; convert to Tensor in case it's # numpy or literal. model_coefficients = tf.convert_to_tensor( model_coefficients, dtype=dtype, name='model_coefficients') if predicted_linear_response is None: if use_default_model_coefficients: # Since we're using zeros for model_coefficients, we know the predicted # linear response will also be all zeros. if offset is None: predicted_linear_response = tf.zeros_like( response, dtype, name='predicted_linear_response') else: predicted_linear_response = tf.broadcast_to( offset, tf.shape(response), name='predicted_linear_response') else: # We were given model_coefficients but not the predicted linear # response. predicted_linear_response = compute_predicted_linear_response( model_matrix, model_coefficients, offset) else: predicted_linear_response = tf.convert_to_tensor( predicted_linear_response, dtype=dtype, name='predicted_linear_response') return [ model_matrix, response, model_coefficients, predicted_linear_response, offset, ]
def _body(state): """Main optimization loop.""" # We use notation of [HZ2006] for brevity. x_k = state.position d_k = state.direction f_k = state.objective_value g_k = state.objective_gradient a_km1 = state.prev_step # Means a_{k-1}. # Define scalar function, which is objective restricted to direction. def ls_func(alpha): pt = x_k + tf.expand_dims(alpha, axis=-1) * d_k objective_value, gradient = value_and_gradients_function(pt) return ValueAndGradient( x=alpha, f=objective_value, df=_dot(gradient, d_k), full_gradient=gradient) # Generate initial guess for line search. # [HZ2006] suggests to generate first initial guess separately, but # [JuliaLineSearches] generates it as if previous step length was 1, and # we do the same. phi_0 = f_k dphi_0 = _dot(g_k, d_k) ls_val_0 = ValueAndGradient( x=tf.zeros_like(phi_0), f=phi_0, df=dphi_0, full_gradient=g_k) step_guess_result = _init_step(ls_val_0, a_km1, ls_func, psi_1, psi_2, params.quad_step) init_step = step_guess_result.step # Check if initial step size already satisfies Wolfe condition, and in # that case don't perform line search. c = init_step.x phi_lim = phi_0 + eps * tf.abs(phi_0) phi_c = init_step.f dphi_c = init_step.df # Original Wolfe conditions, T1 in [HZ2006]. suff_decrease_1 = delta * dphi_0 >= (phi_c - phi_0) / c curvature = dphi_c >= sigma * dphi_0 wolfe1 = suff_decrease_1 & curvature # Approximate Wolfe conditions, T2 in [HZ2006]. suff_decrease_2 = (2 * delta - 1) * dphi_0 >= dphi_c curvature = dphi_c >= sigma * dphi_0 wolfe2 = suff_decrease_2 & curvature & (phi_c <= phi_lim) wolfe = wolfe1 | wolfe2 skip_line_search = (step_guess_result.may_terminate & wolfe) | state.failed | state.converged # Call Hager-Zhang line search (L0-L3 in [HZ2006]). # Parameter theta from [HZ2006] is not adjustable, it's always 0.5. ls_result = linesearch.hager_zhang( ls_func, value_at_zero=ls_val_0, converged=skip_line_search, initial_step_size=init_step.x, value_at_initial_step=init_step, shrinkage_param=params.shrinkage_param, expansion_param=params.expansion_param, sufficient_decrease_param=delta, curvature_param=sigma, threshold_use_approximate_wolfe_condition=eps) # Moving to the next point, using step length from line search. # If line search was skipped, take step length from initial guess. # To save objective evaluation, use objective value and gradient returned # by line search or initial guess. a_k = tf.compat.v1.where( skip_line_search, init_step.x, ls_result.left.x) x_kp1 = state.position + tf.expand_dims(a_k, -1) * d_k f_kp1 = tf.compat.v1.where( skip_line_search, init_step.f, ls_result.left.f) g_kp1 = tf.compat.v1.where(skip_line_search, init_step.full_gradient, ls_result.left.full_gradient) # Evaluate next direction. # Use formulas (2.7)-(2.11) from [HZ2013] with P_k=I. y_k = g_kp1 - g_k d_dot_y = _dot(d_k, y_k) b_k = (_dot(y_k, g_kp1) - _norm_sq(y_k) * _dot(g_kp1, d_k) / d_dot_y) / d_dot_y eta_k = eta * _dot(d_k, g_k) / _norm_sq(d_k) b_k = tf.maximum(b_k, eta_k) d_kp1 = -g_kp1 + tf.expand_dims(b_k, -1) * d_k # Check convergence criteria. grad_converged = _norm_inf(g_kp1) <= tolerance x_converged = (_norm_inf(x_kp1 - x_k) <= x_tolerance) f_converged = ( tf.math.abs(f_kp1 - f_k) <= f_relative_tolerance * tf.math.abs(f_k)) converged = grad_converged | x_converged | f_converged # Construct new state for next iteration. new_state = _OptimizerState( converged=converged, failed=state.failed, num_iterations=state.num_iterations + 1, num_objective_evaluations=state.num_objective_evaluations + step_guess_result.func_evals + ls_result.func_evals, position=tf.compat.v1.where(state.converged, x_k, x_kp1), objective_value=tf.compat.v1.where(state.converged, f_k, f_kp1), objective_gradient=tf.compat.v1.where(state.converged, g_k, g_kp1), direction=d_kp1, prev_step=a_k) return (new_state,)
def windowed_mean(x, low_indices=None, high_indices=None, axis=0, name=None): """Windowed estimates of mean. Computes means among data in the Tensor `x` along the given windows: result[i] = mean(x[low_indices[i]:high_indices[i]+1]) efficiently. To wit, if K is the size of `low_indices` and `high_indices`, and `N` is the size of `x` along the given `axis`, the computation takes O(K + N) work, O(log(N)) depth (the length of the longest series of operations that are performed sequentially), and only uses O(1) TensorFlow kernel invocations. This function can be useful for assessing the behavior over time of trailing-window estimators from some iterative process, such as the last half of an MCMC chain. Suppose `x` has shape `Bx + [N] + E`, where the `Bx` component has rank `axis`, and `low_indices` and `high_indices` broadcast to shape `[M]`. Then each element of `low_indices` and `high_indices` must be between 0 and N+1, and the shape of the output will be `Bx + [M] + E`. Batch shape in the indices is not currently supported. The default windows are `[0, 1), [1, 2), [1, 3), [2, 4), [2, 5), ...` This corresponds to analyzing `x` as though it were streaming, for example successive states of an MCMC sampler, and we were interested in the variance of the last half of the data at each point. Args: x: A numeric `Tensor` holding `N` samples along the given `axis`, whose windowed means are desired. low_indices: An integer `Tensor` defining the lower boundary (inclusive) of each window. Default: elementwise half of `high_indices`. high_indices: An integer `Tensor` defining the upper boundary (exclusive) of each window. Must be broadcast-compatible with `low_indices`. Default: `tf.range(1, N+1)`, i.e., N windows that each end in the corresponding datum from `x` (inclusive)`. axis: Scalar `Tensor` designating the axis holding samples. This is the axis of `x` along which we take windows, and therefore the axis that `low_indices` and `high_indices` index into. Other axes are treated in batch. Default value: `0` (leftmost dimension). name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., `'windowed_mean'`). Returns: means: A numeric `Tensor` holding the windowed means of `x` along the `axis` dimension. """ with tf.name_scope(name or 'windowed_mean'): x = tf.convert_to_tensor(x) low_indices, high_indices, low_counts, high_counts = _prepare_window_args( x, low_indices, high_indices, axis) raw_cumsum = tf.cumsum(x, axis=axis) cum_sums = tf.concat( [tf.zeros_like(tf.gather(raw_cumsum, [0], axis=axis)), raw_cumsum], axis=axis) low_sums = tf.gather(cum_sums, low_indices, axis=axis) high_sums = tf.gather(cum_sums, high_indices, axis=axis) counts = high_counts - low_counts return _safe_average(high_sums - low_sums, counts)
def _init_step(pos, prev_step, func, psi_1, psi_2, quad_step): """Finds initial step size for line seacrh at given point. Corresponds to I1-I2 in [HZ2006]. Args: pos: ValueAndGradient for current point. prev_step: Step size at previous iteration. func: Callable taking real `Tensor` and returning ValueAndGradient, describes scalar function for line search. psi_1: Real scalar `Tensor`. Factor to multiply previous step to get right point for quadratic interpolation. psi_2: Real scalar `Tesnor`. Factor to multiply previous step if qudratic interpolation failed. quad_step: Boolean. Whether to try quadratic interpolation. Returns: _StepGuessResult namedtuple containing initial guess and additional data. """ phi_0 = pos.f derphi_0 = pos.df step = func(psi_1 * prev_step) can_take = step.f > phi_0 result = _StepGuessResult( step=step, func_evals=1, can_take=can_take, may_terminate=tf.zeros_like(can_take)) # Try to approximate function with a parabola and take its minimum as initial # guess. if quad_step: # Quadratic coefficient of parabola. If it's positive, parabola is convex # and has minimum. q_koef = step.f - phi_0 - step.x * derphi_0 quad_step_success = tf.logical_and(step.f <= phi_0, q_koef > 0.0) def update_result_1(): new_x = tf.compat.v1.where( quad_step_success, -0.5 * (derphi_0 * step.x**2) / q_koef, result.step.x) return _StepGuessResult( step=func(new_x), func_evals=result.func_evals + 1, can_take=tf.logical_or(result.can_take, quad_step_success), may_terminate=tf.logical_or(result.may_terminate, quad_step_success)) result = tf.cond( tf.reduce_any(quad_step_success), update_result_1, lambda: result) def update_result_2(): new_x = tf.compat.v1.where(can_take, result.step.x, psi_2 * prev_step) return _StepGuessResult( step=func(new_x), func_evals=result.func_evals + 1, can_take=tf.ones_like(can_take), may_terminate=result.may_terminate) # According to [HZ2006] we should fall back to psi_2*prev_step when quadratic # interpolation failed. However, [JuliaLineSearches] retains guess # psi_1*prev_step if func(psi_1 * prev_step) > func(0), because then local # minimum is within (0, psi_1*prev_step). result = tf.cond( tf.reduce_all(result.can_take), lambda: result, update_result_2) return result
def one_step(self, current_state, previous_kernel_results): """Runs one iteration of the No U-Turn Sampler. Args: current_state: `Tensor` or Python `list` of `Tensor`s representing the current state(s) of the Markov chain(s). The first `r` dimensions index independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`. previous_kernel_results: `collections.namedtuple` containing `Tensor`s representing values from previous calls to this function (or from the `bootstrap_results` function.) Returns: next_state: `Tensor` or Python list of `Tensor`s representing the state(s) of the Markov chain(s) after taking `self.num_trajectories_per_step` steps. Has same type and shape as `current_state`. kernel_results: `collections.namedtuple` of internal calculations used to advance the chain. """ if self.stackless and not tf.executing_eagerly(): raise ValueError( "Cannot use stackless auto-batching in graph mode.") current_target_log_prob = previous_kernel_results.target_log_prob current_grads_log_prob = previous_kernel_results.grads_target_log_prob leapfrogs_taken = previous_kernel_results.leapfrogs_taken leapfrogs_computed = previous_kernel_results.leapfrogs_computed with tf1.name_scope(self.name, values=[ current_state, self.step_size, current_target_log_prob, current_grads_log_prob ]): unwrap_state_list = False with tf1.name_scope("initialize"): if not tf.nest.is_nested(current_state): unwrap_state_list = True current_state = [current_state] current_state = [ tf.convert_to_tensor(value=s) for s in current_state ] step_size = self.step_size if not tf.nest.is_nested(step_size): step_size = [step_size] step_size = [tf.convert_to_tensor(value=s) for s in step_size] if len(step_size) == 1: step_size = step_size * len(current_state) if len(step_size) != len(current_state): raise ValueError( "Expected either one step size or {} (size of " "`current_state`), but found {}".format( len(current_state), len(step_size))) num_steps = tf.constant([self.num_trajectories_per_step], dtype=tf.int64) if self.backend is None: if self._seed_stream() is not None: # The user wanted reproducible results; limit the parallel iterations backend = ab.TensorFlowBackend(while_parallel_iterations=1) else: backend = ab.TensorFlowBackend() else: backend = self.backend # The `dry_run` and `max_stack_depth` arguments are added by the # @ctx.batch decorator, confusing pylint. # pylint: disable=unexpected-keyword-arg ((next_state, next_target_log_prob, next_grads_target_log_prob), new_leapfrogs) = self.many_steps( num_steps, current_state, current_target_log_prob, current_grads_log_prob, step_size, tf.zeros_like(leapfrogs_taken), # leapfrogs dry_run=not self.use_auto_batching, stackless=self.stackless, backend=backend, max_stack_depth=self.max_tree_depth + 4, block_code_cache=self._block_code_cache) if unwrap_state_list: next_state = next_state[0] return next_state, NUTSKernelResults( next_target_log_prob, next_grads_target_log_prob, leapfrogs_taken + new_leapfrogs, leapfrogs_computed + tf.math.reduce_max(input_tensor=new_leapfrogs))
def barrier_price(*, volatilities: types.RealTensor, strikes: types.RealTensor, expiries: types.RealTensor, spots: types.RealTensor, barriers: types.RealTensor, rebates: types.RealTensor = None, discount_rates: types.RealTensor = None, dividend_rates: types.RealTensor = None, is_barrier_down: types.BoolTensor = None, is_knock_out: types.BoolTensor = None, is_call_options: types.BoolTensor = None, dtype: tf.DType = None, name: str = None) -> types.RealTensor: """Prices barrier options in a Black-Scholes Model. Computes the prices of options with a single barrier in Black-Scholes world as described in Ref. [1]. Note that the barrier is applied continuously. #### Example This example is taken from Ref. [2], Page 154. ```python import tf_quant_finance as tff dtype = np.float32 discount_rates = np.array([.08, .08]) dividend_rates = np.array([.04, .04]) spots = np.array([100., 100.]) strikes = np.array([90., 90.]) barriers = np.array([95. 95.]) rebates = np.array([3. 3.]) volatilities = np.array([.25, .25]) expiries = np.array([.5, .5]) barriers_type = np.array([5, 1]) is_barrier_down = np.array([True, False]) is_knock_out = np.array([False, False]) is_call_option = np.array([True, True]) price = tff.black_scholes.barrier_price( discount_rates, dividend_rates, spots, strikes, barriers, rebates, volatilities, expiries, is_barrier_down, is_knock_out, is_call_options) # Expected output # `Tensor` with values [9.024, 7.7627] ``` #### References [1]: Lee Clewlow, Javier Llanos, Chris Strickland, Caracas Venezuela Pricing Exotic Options in a Black-Scholes World, 1994 https://warwick.ac.uk/fac/soc/wbs/subjects/finance/research/wpaperseries/1994/94-54.pdf [2]: Espen Gaarder Haug, The Complete Guide to Option Pricing Formulas, 2nd Edition, 1997 Args: volatilities: Real `Tensor` of any shape and dtype. The volatilities to expiry of the options to price. strikes: A real `Tensor` of the same dtype and compatible shape as `volatilities`. The strikes of the options to be priced. expiries: A real `Tensor` of same dtype and compatible shape as `volatilities`. The expiry of each option. The units should be such that `expiry * volatility**2` is dimensionless. spots: A real `Tensor` of any shape that broadcasts to the shape of the `volatilities`. The current spot price of the underlying. barriers: A real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. The barriers of each option. rebates: A real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. For knockouts, this is a fixed cash payout in case the barrier is breached. For knockins, this is a fixed cash payout in case the barrier level is not breached. In the former case, the rebate is paid immediately on breach whereas in the latter, the rebate is paid at the expiry of the option. Default value: `None` which maps to no rebates. discount_rates: A real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. Discount rates, or risk free rates. Default value: `None`, equivalent to discount_rate = 0. dividend_rates: A real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. A continuous dividend rate paid by the underlier. If `None`, then defaults to zero dividends. Default value: `None`, equivalent to zero dividends. is_barrier_down: A real `Tensor` of `boolean` values and of the shape that broadcasts with `volatilities`. True if barrier is below asset price at expiration. Default value: `True`. is_knock_out: A real `Tensor` of `boolean` values and of the shape that broadcasts with `volatilities`. True if option is knock out else false. Default value: `True`. is_call_options: A real `Tensor` of `boolean` values and of the shape that broadcasts with `volatilities`. True if option is call else false. Default value: `True`. dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion of any supplied non-`Tensor` arguments to `Tensor`. Default value: `None` which maps to the default dtype inferred by TensorFlow. name: str. The name for the ops created by this function. Default value: `None` which is mapped to the default name `barrier_price`. Returns: option_prices: A `Tensor` of same shape as `spots`. The approximate price of the barriers option under black scholes. """ # The computation is done as in Ref [2] where each integral is split into # two matrices. The first matrix contains the algebraic terms and the second # matrix contains the probability distribution terms. Masks are used to filter # appropriate terms for calculating the integral. Then a dot product of each # row in the matricies coupled with the masks work to calculate the prices of # the barriers option. with tf.name_scope(name or 'barrier_price'): spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots') dtype = spots.dtype strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') volatilities = tf.convert_to_tensor( volatilities, dtype=dtype, name='volatilities') expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') barriers = tf.convert_to_tensor(barriers, dtype=dtype, name='barriers') if rebates is not None: rebates = tf.convert_to_tensor(rebates, dtype=dtype, name='rebates') else: rebates = tf.zeros_like(spots, dtype=dtype, name='rebates') # Convert all to tensor and enforce float dtype where required if discount_rates is not None: discount_rates = tf.convert_to_tensor( discount_rates, dtype=dtype, name='discount_rates') else: discount_rates = tf.convert_to_tensor( 0.0, dtype=dtype, name='discount_rates') if dividend_rates is not None: dividend_rates = tf.convert_to_tensor( dividend_rates, dtype=dtype, name='dividend_rates') else: dividend_rates = tf.convert_to_tensor( 0.0, dtype=dtype, name='dividend_rates') if is_barrier_down is None: is_barrier_down = tf.constant(1, name='is_barrier_down') else: is_barrier_down = tf.convert_to_tensor(is_barrier_down, dtype=tf.bool, name='is_barrier_down') is_barrier_down = tf.where(is_barrier_down, 1, 0) if is_knock_out is None: is_knock_out = tf.constant(1, name='is_knock_out') else: is_knock_out = tf.convert_to_tensor(is_knock_out, dtype=tf.bool, name='is_knock_out') is_knock_out = tf.where(is_knock_out, 1, 0) if is_call_options is None: is_call_options = tf.constant(1, name='is_call_options') else: is_call_options = tf.convert_to_tensor(is_call_options, dtype=tf.bool, name='is_call_options') is_call_options = tf.where(is_call_options, 1, 0) # Indices which range from 0-7 are used to select the appropriate # mask for each barrier indices = tf.bitwise.left_shift( is_barrier_down, 2) + tf.bitwise.left_shift( is_knock_out, 1) + is_call_options # Masks select the appropriate terms for integral approximations # Integrals are separated by algebraic terms and probability # distribution terms. This give 12 different terms per matrix # (6 integrals, 2 terms each) # shape = [8, 12] mask_matrix_greater_strike = tf.constant([ [1, 1, -1, -1, 0, 0, 1, 1, 1, 1, 0, 0], # up and in put [1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0], # up and in call [0, 0, 1, 1, 0, 0, -1, -1, 0, 0, 1, 1], # up and out put [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], # up and out call [0, 0, 1, 1, -1, -1, 1, 1, 0, 0, 1, 1], # down and in put [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0], # down and in call [1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 1, 1], # down and out put [1, 1, 0, 0, -1, -1, 0, 0, 0, 0, 1, 1]]) # down and out call mask_matrix_lower_strike = tf.constant([ [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0], # up and in put [0, 0, 1, 1, -1, -1, 1, 1, 1, 1, 0, 0], # up and in call [1, 1, 0, 0, -1, -1, 0, 0, 0, 0, 1, 1], # up and out put [1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 1, 1], # up and out call [1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0], # down and in put [1, 1, -1, -1, 0, 0, 1, 1, 1, 1, 0, 0], # down and in call [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], # down and out put [0, 0, 1, 1, 0, 0, -1, -1, 0, 0, 1, 1]]) # down and out call # Create masks # Masks are shape [strikes.shape, 12] masks_lower = tf.gather(mask_matrix_lower_strike, indices, axis=0) masks_greater = tf.gather(mask_matrix_greater_strike, indices, axis=0) strikes_greater = tf.expand_dims(strikes > barriers, axis=-1) masks = tf.where(strikes_greater, masks_greater, masks_lower) masks = tf.cast(masks, dtype=dtype) one = tf.constant(1, dtype=dtype) call_or_put = tf.cast(tf.where(tf.equal(is_call_options, 0), -one, one), dtype=dtype) below_or_above = tf.cast(tf.where(tf.equal(is_barrier_down, 0), -one, one), dtype=dtype) # Calculate params for integrals sqrt_var = volatilities * tf.math.sqrt(expiries) mu = (discount_rates - dividend_rates) - ((volatilities**2) / 2) lamda = 1 + (mu / (volatilities**2)) x = (tf.math.log(spots / strikes) / (sqrt_var)) + (lamda * sqrt_var) x1 = (tf.math.log(spots / barriers) / (sqrt_var)) + (lamda * sqrt_var) y = (tf.math.log((barriers**2) / (spots * strikes)) / ( sqrt_var)) + (lamda * sqrt_var) y1 = (tf.math.log(barriers / spots) / (sqrt_var)) + (lamda * sqrt_var) b = ((mu**2) + (2 * (volatilities**2) * discount_rates)) / (volatilities**2) z = (tf.math.log(barriers / spots) / (sqrt_var)) + (b * sqrt_var) a = mu / (volatilities**2) # Other params used for integrals discount_factors = tf.math.exp( -discount_rates * expiries, name='discount_factors') barriers_ratio = tf.math.divide(barriers, spots, name='barriers_ratio') spots_term = call_or_put * spots * tf.math.exp(-dividend_rates * expiries) strikes_term = call_or_put * strikes * discount_factors # rank is used to stack elements and reduce_sum strike_rank = strikes.shape.rank # Constructing Matrix with first and second algebraic terms for each # integral [strike.shape, 12] terms_mat = tf.stack( (spots_term, -strikes_term, spots_term, -strikes_term, spots_term * (barriers_ratio**(2 * lamda)), -strikes_term * (barriers_ratio**((2 * lamda) - 2)), spots_term * (barriers_ratio**(2 * lamda)), -strikes_term * (barriers_ratio**((2 * lamda) - 2)), rebates * discount_factors, -rebates * discount_factors * ( # pylint: disable=invalid-unary-operand-type barriers_ratio**((2 * lamda) - 2)), rebates * (barriers_ratio**(a + b)), rebates * (barriers_ratio**(a - b))), name='term_matrix', axis=strike_rank) # Constructing Matrix with first and second norm for each integral # [strikes.shape, 12] cdf_mat = tf.stack( (call_or_put * x, call_or_put * (x - sqrt_var), call_or_put * x1, call_or_put * (x1 - sqrt_var), below_or_above * y, below_or_above * (y - sqrt_var), below_or_above * y1, below_or_above * (y1 - sqrt_var), below_or_above * (x1 - sqrt_var), below_or_above * (y1 - sqrt_var), below_or_above * z, below_or_above * (z - (2 * b * sqrt_var))), name='cdf_matrix', axis=strike_rank) cdf_mat = _ncdf(cdf_mat) # Calculating and returning price for each option return tf.reduce_sum(masks * terms_mat * cdf_mat, axis=strike_rank)
def _mean(self): # Shape is broadcasted with + tf.zeros_like(). return self.loc + tf.zeros_like(self.concentration)
def binary_price(*, volatilities: types.RealTensor, strikes: types.RealTensor, expiries: types.RealTensor, spots: types.RealTensor = None, forwards: types.RealTensor = None, discount_rates: types.RealTensor = None, dividend_rates: types.RealTensor = None, discount_factors: types.RealTensor = None, is_call_options: types.BoolTensor = None, is_normal_volatility: bool = False, dtype: tf.DType = None, name: str = None) -> types.RealTensor: """Computes the Black Scholes price for a batch of binary call or put options. The binary call (resp. put) option priced here is that which pays off a unit of cash if the underlying asset has a value greater (resp. smaller) than the strike price at expiry. Hence the binary option price is the discounted probability that the asset will end up higher (resp. lower) than the strike price at expiry. #### Example ```python # Price a batch of 5 binary call options. volatilities = np.array([0.0001, 102.0, 2.0, 0.1, 0.4]) forwards = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) # Strikes will automatically be broadcasted to shape [5]. strikes = np.array([3.0]) # Expiries will be broadcast to shape [5], i.e. each option has strike=3 # and expiry = 1. expiries = 1.0 computed_prices = tff.black_scholes.binary_price( volatilities=volatilities, strikes=strikes, expiries=expiries, forwards=forwards) # Expected print output of prices: # [0. 0. 0.15865525 0.99764937 0.85927418] ``` #### References: [1] Hull, John C., Options, Futures and Other Derivatives. Pearson, 2018. [2] Wikipedia contributors. Binary option. Available at: https://en.wikipedia.org/w/index.php?title=Binary_option Args: volatilities: Real `Tensor` of any shape and dtype. The volatilities to expiry of the options to price. strikes: A real `Tensor` of the same dtype and compatible shape as `volatilities`. The strikes of the options to be priced. expiries: A real `Tensor` of same dtype and compatible shape as `volatilities`. The expiry of each option. The units should be such that `expiry * volatility**2` is dimensionless. spots: A real `Tensor` of any shape that broadcasts to the shape of the `volatilities`. The current spot price of the underlying. Either this argument or the `forwards` (but not both) must be supplied. forwards: A real `Tensor` of any shape that broadcasts to the shape of `volatilities`. The forwards to maturity. Either this argument or the `spots` must be supplied but both must not be supplied. discount_rates: An optional real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. If not `None`, discount factors are calculated as e^(-rT), where r are the discount rates, or risk free rates. At most one of discount_rates and discount_factors can be supplied. Default value: `None`, equivalent to r = 0 and discount factors = 1 when discount_factors also not given. dividend_rates: An optional real `Tensor` of same dtype as the `volatilities` and of the shape that broadcasts with `volatilities`. Default value: `None`, equivalent to q = 0. discount_factors: An optional real `Tensor` of same dtype as the `volatilities`. If not None, these are the discount factors to expiry (i.e. e^(-rT)). If None, no discounting is applied (i.e. the undiscounted option price is returned). If `spots` is supplied and `discount_factors` is not None then this is also used to compute the forwards to expiry. Default value: None, equivalent to discount factors = 1. is_call_options: A boolean `Tensor` of a shape compatible with `volatilities`. Indicates whether the option is a call (if True) or a put (if False). If not supplied, call options are assumed. is_normal_volatility: An optional Python boolean specifying whether the `volatilities` correspond to lognormal Black volatility (if False) or normal Black volatility (if True). Default value: False, which corresponds to lognormal volatility. dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion of any supplied non-`Tensor` arguments to `Tensor`. Default value: None which maps to the default dtype inferred by TensorFlow (float32). name: str. The name for the ops created by this function. Default value: None which is mapped to the default name `binary_price`. Returns: binary_prices: A `Tensor` of the same shape as `forwards`. The Black Scholes price of the binary options. Raises: ValueError: If both `forwards` and `spots` are supplied or if neither is supplied. ValueError: If both `discount_rates` and `discount_factors` is supplied. """ if (spots is None) == (forwards is None): raise ValueError('Either spots or forwards must be supplied but not both.') if (discount_rates is not None) and (discount_factors is not None): raise ValueError('At most one of discount_rates and discount_factors may ' 'be supplied') with tf.name_scope(name or 'binary_price'): strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') dtype = strikes.dtype volatilities = tf.convert_to_tensor( volatilities, dtype=dtype, name='volatilities') expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') if discount_rates is not None: discount_rates = tf.convert_to_tensor( discount_rates, dtype=dtype, name='discount_rates') discount_factors = tf.exp(-discount_rates * expiries) elif discount_factors is not None: discount_factors = tf.convert_to_tensor( discount_factors, dtype=dtype, name='discount_factors') discount_rates = -tf.math.log(discount_factors) / expiries else: discount_rates = tf.convert_to_tensor( 0.0, dtype=dtype, name='discount_rates') discount_factors = tf.convert_to_tensor( 1.0, dtype=dtype, name='discount_factors') if dividend_rates is None: dividend_rates = tf.convert_to_tensor( 0.0, dtype=dtype, name='dividend_rates') if forwards is not None: forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards') else: spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots') forwards = spots / discount_factors sqrt_var = volatilities * tf.math.sqrt(expiries) if is_normal_volatility: # normal model d2 = (forwards - strikes) / sqrt_var else: # lognormal model d2 = tf.math.log(forwards / strikes) / sqrt_var - sqrt_var / 2 zero_volatility_call_payoff = tf.where(forwards > strikes, tf.ones_like(strikes, dtype=dtype), tf.zeros_like(strikes, dtype=dtype)) undiscounted_calls = tf.where(sqrt_var > 0, _ncdf(d2), zero_volatility_call_payoff) if is_call_options is None: return discount_factors * undiscounted_calls undiscounted_puts = 1 - undiscounted_calls predicate = tf.broadcast_to(is_call_options, tf.shape(undiscounted_calls)) return discount_factors * tf.where(predicate, undiscounted_calls, undiscounted_puts)
def _variance(self): return tf.zeros_like(self.loc)
def _update_trajectory_grad(previous_kernel_results, previous_state, proposed_state, proposed_velocity, trajectory_jitter, accept_prob, step_size, criterion_fn, max_leapfrog_steps, experimental_shard_axis_names=None, experimental_chain_axis_names=None): """Updates the trajectory length.""" # Compute criterion grads. def leapfrog_action(dt): # This represents the effect on the criterion value as the state follows the # proposed velocity. This implicitly assumes an identity mass matrix. def adjust_state(x, v, shard_axes=None): broadcasted_dt = distribute_lib.pbroadcast( bu.left_justified_expand_dims_like(dt, v), shard_axes) return x + broadcasted_dt * v adjusted_state = _map_structure_up_to_with_axes( proposed_state, adjust_state, proposed_state, proposed_velocity, experimental_shard_axis_names=experimental_shard_axis_names) return criterion_fn(previous_state, adjusted_state, accept_prob) criterion, trajectory_grad = gradient.value_and_gradient( leapfrog_action, tf.zeros_like(accept_prob)) trajectory_grad *= trajectory_jitter # Weight by acceptance probability. experimental_chain_axis_names = distribute_lib.canonicalize_named_axis( experimental_chain_axis_names) trajectory_grad = tf.where(accept_prob > 1e-4, trajectory_grad, 0.) trajectory_grad = tf.where(tf.math.is_finite(trajectory_grad), trajectory_grad, 0.) trajectory_grad = (_reduce_sum_with_axes( trajectory_grad * accept_prob, None, experimental_chain_axis_names) / _reduce_sum_with_axes(accept_prob + 1e-20, None, experimental_chain_axis_names)) # Compute Adam/RMSProp step size. dtype = previous_kernel_results.adaptation_rate.dtype iteration_f = tf.cast(previous_kernel_results.step, dtype) + 1. msg_adaptation_rate = 0.05 new_averaged_sq_grad = ( (1 - msg_adaptation_rate) * previous_kernel_results.averaged_sq_grad + msg_adaptation_rate * trajectory_grad**2) adjusted_averaged_sq_grad = new_averaged_sq_grad / ( 1. - (1 - msg_adaptation_rate)**iteration_f) trajectory_step_size = (previous_kernel_results.adaptation_rate / tf.sqrt(adjusted_averaged_sq_grad + 1e-20)) # Apply the gradient. Clip absolute value to ~log(2)/2. log_update = tf.clip_by_value(trajectory_step_size * trajectory_grad, -0.35, 0.35) new_max_trajectory_length = previous_kernel_results.max_trajectory_length * tf.exp( log_update) # Iterate averaging. average_weight = iteration_f**(-0.5) new_averaged_max_trajectory_length = tf.exp( average_weight * tf.math.log(new_max_trajectory_length) + (1 - average_weight) * tf.math.log(1e-10 + previous_kernel_results.averaged_max_trajectory_length)) # Clip the maximum trajectory length. new_max_trajectory_length = _clip_max_trajectory_length( new_max_trajectory_length, step_size, previous_kernel_results.adaptation_rate, max_leapfrog_steps) return previous_kernel_results._replace( criterion=criterion, max_trajectory_length=new_max_trajectory_length, averaged_sq_grad=new_averaged_sq_grad, averaged_max_trajectory_length=new_averaged_max_trajectory_length)