Example #1
0
    def testBijector(self, bijector_name, data):
        tfp_hps.guitar_skip_if_matches('Tanh', bijector_name, 'b/144163991')

        bijector, event_dim = self._draw_bijector(bijector_name, data)

        # Forward mapping: Check differentiation through forward mapping with
        # respect to the input and parameter variables.  Also check that any
        # variables are not referenced overmuch.
        xs = self._draw_domain_tensor(bijector, data, event_dim)
        wrt_vars = [xs] + [
            v for v in bijector.trainable_variables if v.dtype.is_floating
        ]
        with tf.GradientTape() as tape:
            with tfp_hps.assert_no_excessive_var_usage(
                    'method `forward` of {}'.format(bijector)):
                tape.watch(wrt_vars)
                # TODO(b/73073515): Fix graph mode gradients with bijector caching.
                ys = bijector.forward(xs + 0)
        grads = tape.gradient(ys, wrt_vars)
        assert_no_none_grad(bijector, 'forward', wrt_vars, grads)

        # For scalar bijectors, verify correctness of the _is_increasing method.
        # TODO(b/148459057): Except, don't verify Softfloor on Guitar because
        # of numerical problem.
        def exception(bijector):
            if not tfp_hps.running_under_guitar():
                return False
            if isinstance(bijector, tfb.Softfloor):
                return True
            if isinstance(bijector, tfb.Invert):
                return exception(bijector.bijector)
            return False

        if (bijector.forward_min_event_ndims == 0
                and bijector.inverse_min_event_ndims == 0
                and not exception(bijector)):
            dydx = grads[0]
            hp.note('dydx: {}'.format(dydx))
            isfinite = tf.math.is_finite(dydx)
            incr_or_slope_eq0 = bijector._internal_is_increasing() | tf.equal(
                dydx, 0)  # pylint: disable=protected-access
            self.assertAllEqual(
                isfinite & incr_or_slope_eq0,
                isfinite & (dydx >= 0) | tf.zeros_like(incr_or_slope_eq0))

        # FLDJ: Check differentiation through forward log det jacobian with
        # respect to the input and parameter variables.  Also check that any
        # variables are not referenced overmuch.
        event_ndims = data.draw(
            hps.integers(min_value=bijector.forward_min_event_ndims,
                         max_value=xs.shape.ndims))
        with tf.GradientTape() as tape:
            max_permitted = _ldj_tensor_conversions_allowed(bijector,
                                                            is_forward=True)
            with tfp_hps.assert_no_excessive_var_usage(
                    'method `forward_log_det_jacobian` of {}'.format(bijector),
                    max_permissible=max_permitted):
                tape.watch(wrt_vars)
                # TODO(b/73073515): Fix graph mode gradients with bijector caching.
                ldj = bijector.forward_log_det_jacobian(
                    xs + 0, event_ndims=event_ndims)
        grads = tape.gradient(ldj, wrt_vars)
        assert_no_none_grad(bijector, 'forward_log_det_jacobian', wrt_vars,
                            grads)

        # Inverse mapping: Check differentiation through inverse mapping with
        # respect to the codomain "input" and parameter variables.  Also check that
        # any variables are not referenced overmuch.
        ys = self._draw_codomain_tensor(bijector, data, event_dim)
        wrt_vars = [ys] + [
            v for v in bijector.trainable_variables if v.dtype.is_floating
        ]
        with tf.GradientTape() as tape:
            with tfp_hps.assert_no_excessive_var_usage(
                    'method `inverse` of {}'.format(bijector)):
                tape.watch(wrt_vars)
                # TODO(b/73073515): Fix graph mode gradients with bijector caching.
                xs = bijector.inverse(ys + 0)
        grads = tape.gradient(xs, wrt_vars)
        assert_no_none_grad(bijector, 'inverse', wrt_vars, grads)

        # ILDJ: Check differentiation through inverse log det jacobian with respect
        # to the codomain "input" and parameter variables.  Also check that any
        # variables are not referenced overmuch.
        event_ndims = data.draw(
            hps.integers(min_value=bijector.inverse_min_event_ndims,
                         max_value=ys.shape.ndims))
        with tf.GradientTape() as tape:
            max_permitted = _ldj_tensor_conversions_allowed(bijector,
                                                            is_forward=False)
            with tfp_hps.assert_no_excessive_var_usage(
                    'method `inverse_log_det_jacobian` of {}'.format(bijector),
                    max_permissible=max_permitted):
                tape.watch(wrt_vars)
                # TODO(b/73073515): Fix graph mode gradients with bijector caching.
                ldj = bijector.inverse_log_det_jacobian(
                    ys + 0, event_ndims=event_ndims)
        grads = tape.gradient(ldj, wrt_vars)
        assert_no_none_grad(bijector, 'inverse_log_det_jacobian', wrt_vars,
                            grads)

        # Check that the outputs of forward_dtype and inverse_dtype match the dtypes
        # of the outputs of forward and inverse.
        self.assertAllEqualNested(ys.dtype, bijector.forward_dtype(xs.dtype))
        self.assertAllEqualNested(xs.dtype, bijector.inverse_dtype(ys.dtype))
Example #2
0
 def _mode(self):
     adjusted_count = tf.where(1. < self.total_count, self.total_count - 1.,
                               tf.zeros_like(self.total_count))
     return tf.floor(adjusted_count * tf.exp(self.logits))
Example #3
0
 def _mode(self):
     total_count = tf.convert_to_tensor(self.total_count)
     adjusted_count = tf.where(1. < total_count, total_count - 1.,
                               tf.zeros_like(total_count))
     return tf.floor(adjusted_count *
                     tf.exp(self._logits_parameter_no_checks()))
Example #4
0
  def testDenseFlipout(self):
    batch_size, in_size, out_size = 2, 3, 4
    with self.cached_session() as sess:
      tf1.set_random_seed(9069)
      (kernel_posterior, kernel_prior, kernel_divergence,
       bias_posterior, bias_prior, bias_divergence, layer, inputs,
       outputs, kl_penalty) = self._testDenseSetUp(
           tfp.layers.DenseFlipout,
           batch_size, in_size, out_size, seed=44)

      tf1.set_random_seed(9069)
      expected_kernel_posterior_affine = tfd.Normal(
          loc=tf.zeros_like(kernel_posterior.result_loc),
          scale=kernel_posterior.result_scale)
      expected_kernel_posterior_affine_tensor = (
          expected_kernel_posterior_affine.sample(seed=42))

      stream = tfd.SeedStream(layer.seed, salt='DenseFlipout')

      sign_input = tf.random.uniform([batch_size, in_size],
                                     minval=0,
                                     maxval=2,
                                     dtype=tf.int64,
                                     seed=stream())
      sign_input = tf.cast(2 * sign_input - 1, inputs.dtype)
      sign_output = tf.random.uniform([batch_size, out_size],
                                      minval=0,
                                      maxval=2,
                                      dtype=tf.int64,
                                      seed=stream())
      sign_output = tf.cast(2 * sign_output - 1, inputs.dtype)
      perturbed_inputs = tf.matmul(
          inputs * sign_input, expected_kernel_posterior_affine_tensor)
      perturbed_inputs *= sign_output

      expected_outputs = tf.matmul(inputs, kernel_posterior.result_loc)
      expected_outputs += perturbed_inputs
      expected_outputs += bias_posterior.result_sample

      [
          expected_outputs_, actual_outputs_,
          expected_kernel_divergence_, actual_kernel_divergence_,
          expected_bias_, actual_bias_,
          expected_bias_divergence_, actual_bias_divergence_,
      ] = sess.run([
          expected_outputs, outputs,
          kernel_divergence.result, kl_penalty[0],
          bias_posterior.result_sample, layer.bias_posterior_tensor,
          bias_divergence.result, kl_penalty[1],
      ])

      self.assertAllClose(
          expected_bias_, actual_bias_,
          rtol=1e-6, atol=0.)
      self.assertAllClose(
          expected_outputs_, actual_outputs_,
          rtol=1e-6, atol=0.)
      self.assertAllClose(
          expected_kernel_divergence_, actual_kernel_divergence_,
          rtol=1e-6, atol=0.)
      self.assertAllClose(
          expected_bias_divergence_, actual_bias_divergence_,
          rtol=1e-6, atol=0.)

      expected_args = [kernel_posterior,
                       kernel_prior,
                       None]
      # We expect that there was one call to kernel_divergence, with the above
      # args; MockKLDivergence appends the list of args to a list, so the above
      # args should be in the 0th position of that list.
      actual_args = kernel_divergence.args[0]
      # Test for identity with 'is'. TensorFlowTestCase.assertAllEqual actually
      # coerces the inputs to numpy arrays, so we can't use that to assert that
      # the arguments (which are a mixture of Distributions and Tensors) are
      # equal.
      for a, b in zip(expected_args, actual_args):
        self.assertIs(a, b)

      # Same story as above.
      expected_args = [bias_posterior, bias_prior, bias_posterior.result_sample]
      actual_args = bias_divergence.args[0]
      for a, b in zip(expected_args, actual_args):
        self.assertIs(a, b)
Example #5
0
def update(value_and_gradients_function, val_left, val_right, val_trial, f_lim,
           active=None):
  """Squeezes a bracketing interval containing the minimum.

  Given an interval which brackets a minimum and a point in that interval,
  finds a smaller nested interval which also brackets the minimum. If the
  supplied point does not lie in the bracketing interval, the current interval
  is returned.

  The following description is given in terms of individual points evaluated on
  a line function to be minimized. Note, however, the implementation also
  accepts batches of points allowing to minimize multiple line functions at
  once. See details on the docstring of `value_and_gradients_function` below.

  The requirement of the interval bracketing a minimum is expressed through the
  opposite slope conditions. Assume the left end point is 'a', the right
  end point is 'b', the function to be minimized is 'f' and the derivative is
  'df'. The update procedure relies on the following conditions being satisfied:

  '''
    f(a) <= f(0) + epsilon   (1)
    df(a) < 0                (2)
    df(b) > 0                (3)
  '''

  In the first condition, epsilon is a small positive constant. The condition
  demands that the function at the left end point be not much bigger than the
  starting point (i.e. 0). This is an easy to satisfy condition because by
  assumption, we are in a direction where the function value is decreasing.
  The second and third conditions together demand that there is at least one
  zero of the derivative in between a and b.

  In addition to the interval, the update algorithm requires a third point to
  be supplied. Usually, this point would lie within the interval [a, b]. If the
  point is outside this interval, the current interval is returned. If the
  point lies within the interval, the behaviour of the function and derivative
  value at this point is used to squeeze the original interval in a manner that
  preserves the opposite slope conditions.

  For further details of this component, see the procedure U0-U3 on page 123 of
  the [Hager and Zhang (2006)][2] article.

  Note that this function does not explicitly verify whether the opposite slope
  conditions are satisfied for the supplied interval. It is assumed that this
  is so.

  Args:
    value_and_gradients_function: A Python callable that accepts a real scalar
      tensor and returns an object that can be converted to a namedtuple.
      The namedtuple should have fields 'f' and 'df' that correspond to scalar
      tensors of real dtype containing the value of the function and its
      derivative at that point. The other namedtuple fields, if present,
      should be tensors or sequences (possibly nested) of tensors.
      In usual optimization application, this function would be generated by
      projecting the multivariate objective function along some specific
      direction. The direction is determined by some other procedure but should
      be a descent direction (i.e. the derivative of the projected univariate
      function must be negative at 0.).
      Alternatively, the function may represent the batching of `n` such line
      functions (e.g. projecting a single multivariate objective function along
      `n` distinct directions at once) accepting n points as input, i.e. a
      tensor of shape [n], and the fields 'f' and 'df' in the returned
      namedtuple should each be a tensor of shape [n], with the corresponding
      function values and derivatives at the input points.
    val_left: Return value of value_and_gradients_function at the left
      end point of the bracketing interval (labelles 'a' above).
    val_right: Return value of value_and_gradients_function at the right
      end point of the bracketing interval (labelles 'b' above).
    val_trial: Return value of value_and_gradients_function at the trial point
      to be used to shrink the interval (labelled 'c' above).
    f_lim: real `Tensor` of shape [n]. The function value threshold for
      the approximate Wolfe conditions to be checked for each batch member.
    active: optional boolean `Tensor` of shape [n]. Relevant in batching mode
      only, indicates batch members on which the update procedure should be
      applied. On non-active members the current left/right interval is returned
      unmodified.

  Returns:
    A namedtuple containing the following fields:
      iteration: An int32 scalar `Tensor`. The number of iterations performed
        by the bisect algorithm.
      stopped: A boolean `Tensor` of shape [n]. True for those batch members
        where the bisection algorithm terminated.
      failed: A boolean `Tensor` of shape [n]. True for those batch members
        where an error was encountered.
      num_evals: An int32 scalar `Tensor`. The number of times the objective
        function was evaluated.
      left: Return value of value_and_gradients_function at the updated left
        end point of the interval found.
      right: Return value of value_and_gradients_function at the updated right
        end point of the interval found.
  """
  # We should only update if the trial point is within the interval.
  within_range = (val_left.x < val_trial.x) & (val_trial.x < val_right.x)
  if active is not None:
    within_range = within_range & active

  # The new point is a valid left end point if it has negative slope
  # and the value at the point is not too large.
  valid_left = (val_trial.df < 0) & (val_trial.f <= f_lim)

  # If the trial point has a negative slope but the value at that point
  # is too high, bisect can narrow down an interval between the current left
  # and the trial point.
  needs_bisect = within_range & (val_trial.df < 0) & (val_trial.f > f_lim)

  # Note that if `~valid_left` it is because either:
  # - the slope at the trial point is positive, so it is a valid right
  #   point, or
  # - the needs_bisect condition is true.
  # In both cases we want to keep the current left and replace right
  # with the trial point.
  left = val_where(within_range & valid_left, val_trial, val_left)
  right = val_where(within_range & ~valid_left, val_trial, val_right)

  bisect_args = _IntermediateResult(
      iteration=tf.convert_to_tensor(0),
      stopped=~needs_bisect,
      failed=tf.zeros_like(within_range),  # i.e. all false.
      num_evals=tf.convert_to_tensor(0),
      left=left,
      right=right)
  return _bisect(value_and_gradients_function, bisect_args, f_lim)
Example #6
0
 def _entropy(self):
     i0e = tf.math.bessel_i0e(self.concentration)
     i1e = tf.math.bessel_i1e(self.concentration)
     entropy = (self.concentration * (1 - i1e / i0e) + tf.math.log(i0e) +
                np.log(2 * np.pi))
     return entropy + tf.zeros_like(self.loc)
Example #7
0
 def _variance(self):
     concentration = self.concentration + tf.zeros_like(self.loc)
     return 1. - tf.math.bessel_i1e(concentration) / tf.math.bessel_i0e(
         concentration)
Example #8
0
        def collater_fn(batch: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
            """Collater function for relation classification task. See BaseTask."""
            def flatten_bsz(tensor):
                return tf.reshape(tensor, [bsz])

            new_batch = {
                'text_ids': batch['text_ids'],
                'text_mask': batch['text_mask'],
                'classifier_target': flatten_bsz(batch['target']),
            }

            # Sample mentions across batch

            # We want to make sure that the subject / object mentions always have
            # priority when we sample `max_batch_mentions` out of all available
            # mentions. Additionally, we want these subject / object  mentions to be
            # in the same order as their samples. In other words, we want the first
            # sampled mention to be object mention from the first sample, the second
            # sampled mention to be subject mention from the first sample, the third
            # sampled mention to be object mention from the second sample, etc.

            subj_index = flatten_bsz(batch['subject_mention_indices'])
            obj_index = flatten_bsz(batch['object_mention_indices'])

            # Adjust subject / object mention positions in individual samples to their
            # positions in flattened mentions.
            shift = tf.range(
                bsz, dtype=obj_index.dtype) * config.max_mentions_per_sample
            mention_target_indices = tf.reshape(
                tf.stack([subj_index + shift, obj_index + shift], axis=1),
                [-1])

            # Sample the rest of the mentions uniformly across batch
            scores = tf.random.uniform(shape=tf.shape(batch['mention_mask']))
            scores = scores * tf.cast(batch['mention_mask'], tf.float32)

            # We want to adjust scores for target mentions so they don't get sampled
            # for the second time. We achive this by making their scores negative.
            def set_negative_scores(scores, indices):
                indices_2d = tf.stack(
                    [tf.range(bsz, dtype=indices.dtype), indices], axis=1)
                return tf.tensor_scatter_nd_update(
                    scores, indices_2d, tf.fill(tf.shape(indices), -1.0))

            # Note that since we're using 2D scores (not yet flattened for simplicity)
            # we use unadjusted `subj_index` and `obj_index`.
            scores = set_negative_scores(scores, subj_index)
            scores = set_negative_scores(scores, obj_index)

            # There are `2 * bsz` target mentions which were already chosen
            num_to_sample = tf.maximum(max_batch_mentions - 2 * bsz, 0)
            sampled_scores, sampled_indices = tf.math.top_k(tf.reshape(
                scores, [-1]),
                                                            num_to_sample,
                                                            sorted=True)

            # Note that negative scores indicate that we have double-sampled some of
            # the target mentions (we set their scores to negative right above).
            # In this case, we remove them.
            num_not_double_sampled = tf.reduce_sum(
                tf.cast(tf.not_equal(sampled_scores, -1), tf.int32))
            sampled_indices = sampled_indices[:num_not_double_sampled]

            # Combine target mentions (subject / object) with sampled mentions
            mention_target_indices = tf.cast(mention_target_indices,
                                             sampled_indices.dtype)
            sampled_indices = tf.concat(
                [mention_target_indices, sampled_indices], axis=0)

            sampled_indices = mention_preprocess_utils.dynamic_padding_1d(
                sampled_indices, max_batch_mentions)

            dtype = batch['mention_start_positions'].dtype
            mention_mask = tf.reshape(batch['mention_mask'],
                                      [n_candidate_mentions])
            new_batch['mention_mask'] = tf.gather(mention_mask,
                                                  sampled_indices)
            new_batch['mention_start_positions'] = tf.gather(
                tf.reshape(batch['mention_start_positions'],
                           [n_candidate_mentions]), sampled_indices)
            new_batch['mention_end_positions'] = tf.gather(
                tf.reshape(batch['mention_end_positions'],
                           [n_candidate_mentions]), sampled_indices)
            new_batch['mention_batch_positions'] = tf.gather(
                tf.repeat(tf.range(bsz, dtype=dtype),
                          config.max_mentions_per_sample), sampled_indices)

            new_batch['mention_target_indices'] = tf.range(2 * bsz,
                                                           dtype=dtype)
            new_batch['mention_subject_indices'] = tf.range(bsz,
                                                            dtype=dtype) * 2
            new_batch['mention_object_indices'] = tf.range(bsz,
                                                           dtype=dtype) * 2 + 1

            if config.get('max_length_with_entity_tokens') is not None:
                batch_with_entity_tokens = mention_preprocess_utils.add_entity_tokens(
                    text_ids=new_batch['text_ids'],
                    text_mask=new_batch['text_mask'],
                    mention_mask=new_batch['mention_mask'],
                    mention_batch_positions=new_batch[
                        'mention_batch_positions'],
                    mention_start_positions=new_batch[
                        'mention_start_positions'],
                    mention_end_positions=new_batch['mention_end_positions'],
                    new_length=config.max_length_with_entity_tokens,
                )
                # Update `text_ids`, `text_mask`, `mention_mask`, `mention_*_positions`
                new_batch.update(batch_with_entity_tokens)
                # Update `max_length`
                max_length = config.max_length_with_entity_tokens
            else:
                max_length = encoder_config.max_length

            new_batch['mention_target_batch_positions'] = tf.gather(
                new_batch['mention_batch_positions'],
                new_batch['mention_target_indices'])
            new_batch['mention_target_start_positions'] = tf.gather(
                new_batch['mention_start_positions'],
                new_batch['mention_target_indices'])
            new_batch['mention_target_end_positions'] = tf.gather(
                new_batch['mention_end_positions'],
                new_batch['mention_target_indices'])
            new_batch['mention_target_weights'] = tf.ones(2 * bsz)

            # Fake IDs -- some encoders (ReadTwice) need them
            new_batch['mention_target_ids'] = tf.zeros(2 * bsz)

            new_batch['segment_ids'] = tf.zeros_like(new_batch['text_ids'])

            position_ids = tf.expand_dims(tf.range(max_length), axis=0)
            new_batch['position_ids'] = tf.tile(position_ids, (bsz, 1))

            return new_batch
    def bootstrap_results(self, init_state):
        """Returns an object with the same type as returned by `one_step`.

    Args:
      init_state: `Tensor` or Python `list` of `Tensor`s representing the
        initial state(s) of the Markov chain(s).

    Returns:
      kernel_results: A (possibly nested) `tuple`, `namedtuple` or `list` of
        `Tensor`s representing internal calculations made within this function.
        This inculdes replica states.
    """
        with tf.name_scope(
                mcmc_util.make_name(self.name, 'remc', 'bootstrap_results')):
            init_state, unused_is_multipart_state = mcmc_util.prepare_state_parts(
                init_state)

            inverse_temperatures = tf.convert_to_tensor(
                self.inverse_temperatures, name='inverse_temperatures')

            if self._state_includes_replicas:
                it_n_replica = inverse_temperatures.shape[0]
                state_n_replica = init_state[0].shape[0]
                if ((it_n_replica is not None)
                        and (state_n_replica is not None)
                        and (it_n_replica != state_n_replica)):
                    raise ValueError(
                        'Number of replicas implied by initial state ({}) must equal '
                        'number of replicas implied by inverse_temperatures ({}), but '
                        'did not'.format(it_n_replica, state_n_replica))

            # We will now replicate each of a possible batch of initial stats, one for
            # each inverse_temperature. So if init_state=[x, y] of shapes [Sx, Sy]
            # then the new shape is [(T, Sx), (T, Sy)] where (a, b) means
            # concatenation and T=shape(inverse_temperature).
            num_replica = ps.size0(inverse_temperatures)
            replica_shape = ps.convert_to_shape_tensor([num_replica])

            if self._state_includes_replicas:
                replica_states = init_state
            else:
                replica_states = [
                    tf.broadcast_to(  # pylint: disable=g-complex-comprehension
                        x,
                        ps.concat([replica_shape, ps.shape(x)], axis=0),
                        name='replica_states') for x in init_state
                ]

            target_log_prob_for_inner_kernel = _make_replica_target_log_prob_fn(
                target_log_prob_fn=self.target_log_prob_fn,
                inverse_temperatures=inverse_temperatures,
                untempered_log_prob_fn=self.untempered_log_prob_fn,
                tempered_log_prob_fn=self.tempered_log_prob_fn,
            )
            # TODO(b/159636942): Clean up the helpful error msg after 2020-11-10.
            try:
                inner_kernel = self.make_kernel_fn(  # pylint: disable=not-callable
                    target_log_prob_for_inner_kernel)
            except TypeError as e:
                if 'argument' not in str(e):
                    raise
                raise TypeError(
                    '`ReplicaExchangeMC`s `make_kernel_fn` no longer receives a second '
                    '(`seed`) argument. `TransitionKernel` instances now receive seeds '
                    'via `one_step`.')

            replica_results = inner_kernel.bootstrap_results(replica_states)

            pre_swap_replica_target_log_prob = _get_field(
                replica_results, 'target_log_prob')

            replica_and_batch_shape = ps.shape(
                pre_swap_replica_target_log_prob)
            batch_shape = replica_and_batch_shape[1:]

            inverse_temperatures = bu.left_justified_broadcast_to(
                inverse_temperatures, replica_and_batch_shape)

            # Pretend we did a "null swap", which will always be accepted.
            swaps = bu.left_justified_broadcast_to(tf.range(num_replica),
                                                   replica_and_batch_shape)
            # is_swap_accepted.shape = [n_replica, n_replica] + batch_shape.
            is_swap_accepted = distribution_util.rotate_transpose(tf.eye(
                num_replica, batch_shape=batch_shape, dtype=tf.bool),
                                                                  shift=2)

            return ReplicaExchangeMCKernelResults(
                post_swap_replica_states=replica_states,
                pre_swap_replica_results=replica_results,
                post_swap_replica_results=_set_swapped_fields_to_nan(
                    replica_results),
                is_swap_proposed=is_swap_accepted,
                is_swap_accepted=is_swap_accepted,
                is_swap_proposed_adjacent=_sub_diag(is_swap_accepted),
                is_swap_accepted_adjacent=_sub_diag(is_swap_accepted),
                inverse_temperatures=self.inverse_temperatures,
                swaps=swaps,
                step_count=tf.zeros(shape=(), dtype=tf.int32),
                seed=samplers.zeros_seed(),
                potential_energy=tf.zeros_like(
                    pre_swap_replica_target_log_prob),
            )
Example #10
0
    def posterior_marginals(self,
                            observations,
                            mask=None,
                            name='posterior_marginals'):
        """Compute marginal posterior distribution for each state.

    This function computes, for each time step, the marginal
    conditional probability that the hidden Markov model was in
    each possible state given the observations that were made
    at each time step.
    So if the hidden states are `z[0],...,z[num_steps - 1]` and
    the observations are `x[0], ..., x[num_steps - 1]`, then
    this function computes `P(z[i] | x[0], ..., x[num_steps - 1])`
    for all `i` from `0` to `num_steps - 1`.

    This operation is sometimes called smoothing. It uses a form
    of the forward-backward algorithm.

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Args:
      observations: A tensor representing a batch of observations
        made on the hidden Markov model.  The rightmost dimension of this tensor
        gives the steps in a sequence of observations from a single sample from
        the hidden Markov model. The size of this dimension should match the
        `num_steps` parameter of the hidden Markov model object. The other
        dimensions are the dimensions of the batch and these are broadcast with
        the hidden Markov model's parameters.
      mask: optional bool-type `tensor` with rightmost dimension matching
        `num_steps` indicating which observations the result of this
        function should be conditioned on. When the mask has value
        `True` the corresponding observations aren't used.
        if `mask` is `None` then all of the observations are used.
        the `mask` dimensions left of the last are broadcast with the
        hmm batch as well as with the observations.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: "HiddenMarkovModel".

    Returns:
      posterior_marginal: A `Categorical` distribution object representing the
        marginal probability of the hidden Markov model being in each state at
        each step. The rightmost dimension of the `Categorical` distributions
        batch will equal the `num_steps` parameter providing one marginal
        distribution for each step. The other dimensions are the dimensions
        corresponding to the batch of observations.

    Raises:
      ValueError: if rightmost dimension of `observations` does not
      have size `num_steps`.
    """

        with self._name_and_control_scope(name):
            observation_tensor_shape = tf.shape(observations)
            observation_distribution = self.observation_distribution
            underlying_event_rank = tf.size(
                observation_distribution.event_shape_tensor())
            mask_tensor_shape = tf.shape(mask) if mask is not None else None
            num_states = self.transition_distribution.batch_shape_tensor()[-1]

            with self._observation_mask_shape_preconditions(
                    observation_tensor_shape, mask_tensor_shape,
                    underlying_event_rank):
                observation_log_probs = self._observation_log_probs(
                    observations, mask)
                log_init = _extract_log_probs(num_states,
                                              self.initial_distribution)
                log_prob = log_init + observation_log_probs[0]
                log_transition = _extract_log_probs(
                    num_states, self.transition_distribution)
                log_adjoint_prob = tf.zeros_like(log_prob)

                def _scan_multiple_steps_forwards():
                    def forward_step(log_previous_step, log_prob_observation):
                        return _log_vector_matrix(
                            log_previous_step,
                            log_transition) + log_prob_observation

                    forward_log_probs = tf.scan(forward_step,
                                                observation_log_probs[1:],
                                                initializer=log_prob,
                                                name='forward_log_probs')
                    return tf.concat([[log_prob], forward_log_probs], axis=0)

                forward_log_probs = prefer_static.cond(
                    self._num_steps > 1, _scan_multiple_steps_forwards,
                    lambda: tf.convert_to_tensor([log_prob]))

                total_log_prob = tf.reduce_logsumexp(forward_log_probs[-1],
                                                     axis=-1)

                def _scan_multiple_steps_backwards():
                    """Perform `scan` operation when `num_steps` > 1."""
                    def backward_step(log_previous_step, log_prob_observation):
                        return _log_matrix_vector(
                            log_transition,
                            log_prob_observation + log_previous_step)

                    backward_log_adjoint_probs = tf.scan(
                        backward_step,
                        observation_log_probs[1:],
                        initializer=log_adjoint_prob,
                        reverse=True,
                        name='backward_log_adjoint_probs')

                    return tf.concat(
                        [backward_log_adjoint_probs, [log_adjoint_prob]],
                        axis=0)

                backward_log_adjoint_probs = prefer_static.cond(
                    self._num_steps > 1, _scan_multiple_steps_backwards,
                    lambda: tf.convert_to_tensor([log_adjoint_prob]))

                log_likelihoods = forward_log_probs + backward_log_adjoint_probs

                marginal_log_probs = distribution_util.move_dimension(
                    log_likelihoods - total_log_prob[..., tf.newaxis], 0, -2)

                return categorical.Categorical(logits=marginal_log_probs)
Example #11
0
 def _inverse_log_det_jacobian(self, y):
     return tf.zeros_like(y[..., 0])
Example #12
0
    def _observation_log_probs(self, observations, mask):
        """Compute and shape tensor of log probs associated with observations.."""

        # Let E be the underlying event shape
        #     M the number of steps in the HMM
        #     N the number of states of the HMM
        #
        # Then the incoming observations have shape
        #
        # observations : batch_o [M] E
        #
        # and the mask (if present) has shape
        #
        # mask : batch_m [M]
        #
        # Let this HMM distribution have batch shape batch_d
        # We need to broadcast all three of these batch shapes together
        # into the shape batch.
        #
        # We need to move the step dimension to the first dimension to make
        # them suitable for folding or scanning over.
        #
        # When we call `log_prob` for our observations we need to
        # do this for each state the observation could correspond to.
        # We do this by expanding the dimensions by 1 so we end up with:
        #
        # observations : [M] batch [1] [E]
        #
        # After calling `log_prob` we get
        #
        # observation_log_probs : [M] batch [N]
        #
        # We wish to use `mask` to select from this so we also
        # reshape and broadcast it up to shape
        #
        # mask : [M] batch [N]

        observation_distribution = self.observation_distribution
        underlying_event_rank = tf.size(
            observation_distribution.event_shape_tensor())
        observation_tensor_shape = tf.shape(observations)
        observation_batch_shape = observation_tensor_shape[:-1 -
                                                           underlying_event_rank]
        observation_event_shape = observation_tensor_shape[
            -1 - underlying_event_rank:]

        if mask is not None:
            mask_tensor_shape = tf.shape(mask)
            mask_batch_shape = mask_tensor_shape[:-1]

        batch_shape = tf.broadcast_dynamic_shape(observation_batch_shape,
                                                 self.batch_shape_tensor())

        if mask is not None:
            batch_shape = tf.broadcast_dynamic_shape(batch_shape,
                                                     mask_batch_shape)
        observations = tf.broadcast_to(
            observations,
            tf.concat([batch_shape, observation_event_shape], axis=0))
        observation_rank = tf.rank(observations)
        observations = distribution_util.move_dimension(
            observations, observation_rank - underlying_event_rank - 1, 0)
        observations = tf.expand_dims(observations,
                                      observation_rank - underlying_event_rank)
        observation_log_probs = observation_distribution.log_prob(observations)

        if mask is not None:
            mask = tf.broadcast_to(
                mask, tf.concat([batch_shape, [self._num_steps]], axis=0))
            mask = distribution_util.move_dimension(mask, -1, 0)
            observation_log_probs = tf.where(
                mask[..., tf.newaxis], tf.zeros_like(observation_log_probs),
                observation_log_probs)

        return observation_log_probs
Example #13
0
def impute_missing_values(model,
                          observed_time_series,
                          parameter_samples,
                          include_observation_noise=False):
    """Runs posterior inference to impute the missing values in a time series.

  This method computes the posterior marginals `p(latent state | observations)`,
  given the time series at observed timesteps (a missingness mask should
  be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back
  through the observation model to impute a predictive distribution on the
  observed time series. At unobserved steps, this is an imputed value; at other
  steps it is interpreted as the model's estimate of the underlying noise-free
  series.

  Args:
    model: `tfp.sts.Sum` instance defining an additive STS model.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.
    include_observation_noise: If `False`, the imputed uncertainties
      represent the model's estimate of the noise-free time series at each
      timestep. If `True`, they represent the model's estimate of the range of
      values that could be *observed* at each timestep, including any i.i.d.
      observation noise.
      Default value: `False`.

  Returns:
    imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and batch shape `concat([sample_shape,
      model.batch_shape])`, with `num_posterior_draws` mixture components.

  #### Example

  To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`:

  ```python
  time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5]
  observed_time_series = tfp.sts.MaskedTimeSeries(
    time_series=time_series_with_nans,
    is_missing=tf.math.is_nan(time_series_with_nans))
  ```

  Masked time series can be passed to `tfp.sts` methods in place of a
  `observed_time_series` `Tensor`:

  ```python
  # Build model using observed time series to set heuristic priors.
  linear_trend_model = tfp.sts.LocalLinearTrend(
    observed_time_series=observed_time_series)
  model = tfp.sts.Sum([linear_trend_model],
                      observed_time_series=observed_time_series)

  # Fit model to data
  parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  After fitting a model, `impute_missing_values` will return a distribution
  ```python
  # Impute missing values
  imputed_series_distribution = tfp.sts.impute_missing_values(
    model, observed_time_series)
  print('imputed means and stddevs: ',
        imputed_series_distribution.mean(),
        imputed_series_distribution.stddev())
  ```

  """
    with tf.name_scope('impute_missing_values'):

        [observed_time_series,
         mask] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run smoothing over the training timesteps to extract the
        # predictive means and variances.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(observed_time_series))[-2]
        lgssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                             param_vals=parameter_samples)
        posterior_means, posterior_covs = lgssm.posterior_marginals(
            observed_time_series, mask=mask)

        observation_means, observation_covs = lgssm.latents_to_observations(
            latent_means=posterior_means, latent_covs=posterior_covs)

        if not include_observation_noise:
            # Extract just the variance of observation noise by pushing forward
            # zero-variance latents.
            _, observation_noise_covs = lgssm.latents_to_observations(
                latent_means=posterior_means,
                latent_covs=tf.zeros_like(posterior_covs))
            # Subtract out the observation noise that was added in the original
            # pushforward. Note that this could cause numerical issues if the
            # observation noise is very large. If this becomes an issue we could
            # avoid the subtraction by plumbing `include_observation_noise` through
            # `lgssm.latents_to_observations`.
            observation_covs -= observation_noise_covs

        # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
        # to a scalar time series.
        return sts_util.mix_over_posterior_draws(
            means=observation_means[..., 0],
            variances=observation_covs[..., 0, 0])
Example #14
0
def forecast(model,
             observed_time_series,
             parameter_samples,
             num_steps_forecast,
             include_observation_noise=True):
    """Construct predictive distribution over future observations.

  Given samples from the posterior over parameters, return the predictive
  distribution over future observations for num_steps_forecast timesteps.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.
    num_steps_forecast: scalar `int` `Tensor` number of steps to forecast.
    include_observation_noise: Python `bool` indicating whether the forecast
      distribution should include uncertainty from observation noise. If `True`,
      the forecast is over future observations, if `False`, the forecast is over
      future values of the latent noise-free time series.
      Default value: `True`.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_steps_forecast, 1] and batch shape
      `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws`
      mixture components.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `forecast`, we construct a forecast
  distribution:

  ```python
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                     parameter_samples=samples,
                                     num_steps_forecast=50)

    forecast_mean = forecast_dist.mean()[..., 0]  # shape: [50]
    forecast_scale = forecast_dist.stddev()[..., 0]  # shape: [50]
    forecast_samples = forecast_dist.sample(10)[..., 0]  # shape: [10, 50]
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                         parameter_samples=samples,
                                         num_steps_forecast=50)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_forecast(observed_time_series,
                      forecast_mean,
                      forecast_scale,
                      forecast_samples):
      plt.figure(figsize=(12, 6))

      num_steps = observed_time_series.shape[-1]
      num_steps_forecast = forecast_mean.shape[-1]
      num_steps_train = num_steps - num_steps_forecast

      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(np.arange(num_steps), observed_time_series,
               lw=2, color=c1, label='ground truth')

      forecast_steps = np.arange(num_steps_train,
                       num_steps_train+num_steps_forecast)
      plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1)
      plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2,
               label='forecast')
      plt.fill_between(forecast_steps,
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2)

      plt.xlim([0, num_steps])
      plt.legend()

    plot_forecast(observed_time_series,
                  forecast_mean=forecast_mean,
                  forecast_scale=forecast_scale,
                  forecast_samples=forecast_samples)
  ```

  """

    with tf.name_scope('forecast'):
        [observed_time_series,
         mask] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run filtering over the observed timesteps to extract the
        # latent state posterior at timestep T+1 (i.e., the final
        # filtering distribution, pushed through the transition model).
        # This is the prior for the forecast model ("today's prior
        # is yesterday's posterior").
        num_observed_steps = dist_util.prefer_static_value(
            tf.shape(observed_time_series))[-2]
        observed_data_ssm = model.make_state_space_model(
            num_timesteps=num_observed_steps, param_vals=parameter_samples)
        (_, _, _, predictive_means, predictive_covs, _,
         _) = observed_data_ssm.forward_filter(observed_time_series, mask=mask)

        # Build a batch of state-space models over the forecast period. Because
        # we'll use MixtureSameFamily to mix over the posterior draws, we need to
        # do some shenanigans to move the `[num_posterior_draws]` batch dimension
        # from the leftmost to the rightmost side of the model's batch shape.
        # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
        # arbitrary axis, and eliminate `move_dimension` calls here.
        parameter_samples = model._canonicalize_param_vals_as_map(
            parameter_samples)  # pylint: disable=protected-access
        parameter_samples_with_reordered_batch_dimension = {
            param.name: dist_util.move_dimension(
                parameter_samples[param.name], 0,
                -(1 + _prefer_static_event_ndims(param.prior)))
            for param in model.parameters
        }
        forecast_prior = tfd.MultivariateNormalFullCovariance(
            loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2),
            covariance_matrix=dist_util.move_dimension(
                predictive_covs[..., -1, :, :], 0, -3))

        # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather
        # than leading) dimension of parameters, the parameter batch shapes no
        # longer broadcast against the `constant_offset` attribute used in `sts.Sum`
        # models. We fix this by manually adding an extra broadcasting dim to
        # `constant_offset` if present.
        # The root cause of this hack is that we mucked with param dimensions above
        # and are now passing params that are 'invalid' in the sense that they don't
        # match the shapes of the model's param priors. The fix (as above) will be
        # to update MixtureSameFamily so we can avoid changing param dimensions
        # altogether.
        # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
        # arbitrary axis, and eliminate this hack.
        kwargs = {}
        if hasattr(model, 'constant_offset'):
            kwargs['constant_offset'] = tf.convert_to_tensor(
                value=model.constant_offset,
                dtype=forecast_prior.dtype)[..., tf.newaxis]

        if not include_observation_noise:
            parameter_samples_with_reordered_batch_dimension[
                'observation_noise_scale'] = tf.zeros_like(
                    parameter_samples_with_reordered_batch_dimension[
                        'observation_noise_scale'])

        # We assume that any STS model that has a `constant_offset` attribute
        # will allow it to be overridden as a kwarg. This is currently just
        # `sts.Sum`.
        # TODO(b/120245392): when kwargs hack is removed, switch back to calling
        # the public version of `_make_state_space_model`.
        forecast_ssm = model._make_state_space_model(  # pylint: disable=protected-access
            num_timesteps=num_steps_forecast,
            param_map=parameter_samples_with_reordered_batch_dimension,
            initial_state_prior=forecast_prior,
            initial_step=num_observed_steps,
            **kwargs)

        num_posterior_draws = dist_util.prefer_static_value(
            forecast_ssm.batch_shape_tensor())[-1]
        return tfd.MixtureSameFamily(mixture_distribution=tfd.Categorical(
            logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)),
                                     components_distribution=forecast_ssm)
 def drift_from_total_drift(t):
     start_time = tf.zeros_like(t)
     return gradient.fwd_gradient(
         lambda x: total_drift_fn(start_time, x), t)
Example #16
0
def sample_lkj(
    num_samples,
    dimension,
    concentration,
    cholesky_space=False,
    seed=None,
    name=None):
  """Returns a Tensor of samples from an LKJ distribution.

  Args:
    num_samples: Python `int`. The number of samples to draw.
    dimension: Python `int`. The dimension of correlation matrices.
    concentration: `Tensor` representing the concentration of the LKJ
      distribution.
    cholesky_space: Python `bool`. Whether to take samples from LKJ or
      Chol(LKJ).
    seed: Python integer seed for RNG
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    samples: A Tensor of correlation matrices (or Cholesky factors of
      correlation matrices if `cholesky_space = True`) with shape
      `[n] + B + [D, D]`, where `B` is the shape of the `concentration`
      parameter, and `D` is the `dimension`.

  Raises:
    ValueError: If `dimension` is negative.
  """
  if dimension < 0:
    raise ValueError(
        'Cannot sample negative-dimension correlation matrices.')
  # Notation below: B is the batch shape, i.e., tf.shape(concentration)

  # We need 1 seed for beta corr12, and 2 per loop iter.
  num_seeds = 1 + 2 * max(0, dimension - 2)
  seeds = list(samplers.split_seed(seed, n=num_seeds, salt='sample_lkj'))
  with tf.name_scope('sample_lkj' or name):
    concentration = tf.convert_to_tensor(concentration)
    if not dtype_util.is_floating(concentration.dtype):
      raise TypeError(
          'The concentration argument should have floating type, not '
          '{}'.format(dtype_util.name(concentration.dtype)))

    concentration = _replicate(num_samples, concentration)
    concentration_shape = tf.shape(concentration)
    if dimension <= 1:
      # For any dimension <= 1, there is only one possible correlation matrix.
      shape = tf.concat([
          concentration_shape, [dimension, dimension]], axis=0)
      return tf.ones(shape=shape, dtype=concentration.dtype)
    beta_conc = concentration + (dimension - 2.) / 2.
    beta_dist = beta.Beta(concentration1=beta_conc, concentration0=beta_conc)

    # Note that the sampler below deviates from [1], by doing the sampling in
    # cholesky space. This does not change the fundamental logic of the
    # sampler, but does speed up the sampling.

    # This is the correlation coefficient between the first two dimensions.
    # This is also `r` in reference [1].
    corr12 = 2. * beta_dist.sample(seed=seeds.pop()) - 1.

    # Below we construct the Cholesky of the initial 2x2 correlation matrix,
    # which is of the form:
    # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the
    # first two dimensions.
    # This is the top-left corner of the cholesky of the final sample.
    first_row = tf.concat([
        tf.ones_like(corr12)[..., tf.newaxis],
        tf.zeros_like(corr12)[..., tf.newaxis]], axis=-1)
    second_row = tf.concat([
        corr12[..., tf.newaxis],
        tf.sqrt(1 - corr12**2)[..., tf.newaxis]], axis=-1)

    chol_result = tf.concat([
        first_row[..., tf.newaxis, :],
        second_row[..., tf.newaxis, :]], axis=-2)

    for n in range(2, dimension):
      # Loop invariant: on entry, result has shape B + [n, n]
      beta_conc = beta_conc - 0.5
      # norm is y in reference [1].
      norm = beta.Beta(
          concentration1=n/2.,
          concentration0=beta_conc
      ).sample(seed=seeds.pop())
      # distance shape: B + [1] for broadcast
      distance = tf.sqrt(norm)[..., tf.newaxis]
      # direction is u in reference [1].
      # direction shape: B + [n]
      direction = _uniform_unit_norm(
          n, concentration_shape, concentration.dtype,
          seed=seeds.pop())
      # raw_correlation is w in reference [1].
      raw_correlation = distance * direction  # shape: B + [n]

      # This is the next row in the cholesky of the result,
      # which differs from the construction in reference [1].
      # In the reference, the new row `z` = chol_result @ raw_correlation^T
      # = C @ raw_correlation^T (where as short hand we use C = chol_result).
      # We prove that the below equation is the right row to add to the
      # cholesky, by showing equality with reference [1].
      # Let S be the sample constructed so far, and let `z` be as in
      # reference [1]. Then at this iteration, the new sample S' will be
      # [[S z^T]
      #  [z 1]]
      # In our case we have the cholesky decomposition factor C, so
      # we want our new row x (same size as z) to satisfy:
      #  [[S z^T]  [[C 0]    [[C^T  x^T]         [[CC^T  Cx^T]
      #   [z 1]] =  [x k]]    [0     k]]  =       [xC^t   xx^T + k**2]]
      # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible,
      # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k
      # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 -
      # distance**2).
      new_row = tf.concat(
          [raw_correlation, tf.sqrt(1. - norm[..., tf.newaxis])], axis=-1)

      # Finally add this new row, by growing the cholesky of the result.
      chol_result = tf.concat([
          chol_result,
          tf.zeros_like(chol_result[..., 0][..., tf.newaxis])], axis=-1)

      chol_result = tf.concat(
          [chol_result, new_row[..., tf.newaxis, :]], axis=-2)

    assert not seeds, 'Did not use all seeds: ' + len(seeds)
    if cholesky_space:
      return chol_result

    result = tf.matmul(chol_result, chol_result, transpose_b=True)
    # The diagonal for a correlation matrix should always be ones. Due to
    # numerical instability the matmul might not achieve that, so manually set
    # these to ones.
    result = tf.linalg.set_diag(
        result, tf.ones(shape=tf.shape(result)[:-1], dtype=result.dtype))
    # This sampling algorithm can produce near-PSD matrices on which standard
    # algorithms such as `tf.cholesky` or `tf.linalg.self_adjoint_eigvals`
    # fail. Specifically, as documented in b/116828694, around 2% of trials
    # of 900,000 5x5 matrices (distributed according to 9 different
    # concentration parameter values) contained at least one matrix on which
    # the Cholesky decomposition failed.
    return result
Example #17
0
 def _cdf(self, x):
     z = self._z(x) + tf.zeros_like(self.concentration)
     concentration = self.concentration + tf.zeros_like(z)
     return von_mises_cdf(z, concentration)
def european_option_price(*,
                          strikes=None,
                          expiries=None,
                          is_call_options=None,
                          variances=None,
                          kappas=None,
                          thetas=None,
                          sigmas=None,
                          rhos=None,
                          spots=None,
                          forwards=None,
                          discount_rates=None,
                          continuous_dividends=None,
                          cost_of_carries=None,
                          discount_factors=None,
                          integration_method=None,
                          dtype=None,
                          name=None,
                          **kwargs):
    """Calculates European option prices under the Heston model.

  Heston originally published in 1993 his eponymous model [3]. He provided
  a semi- analytical formula for pricing European option via Fourier transform
  under his model. However, as noted by Albrecher [1], the characteristic
  function used in Heston paper can suffer numerical issues because of the
  discontinuous nature of the square root function in the complex plane, and a
  second version of the characteric function which doesn't suffer this
  shortcoming should be used instead. Attari [2] further refined the numerical
  method by reducing the number of numerical integrations (only one Fourier
  transform instead of two) and with an integrand function decaying
  quadratically instead of linearly. Attari's numerical method is implemented
  here.

  Heston model:
  ```
    dF/F = sqrt(V) * dW_1
    dV = kappa * (theta - V) * dt * sigma * sqrt(V) * dW_2
    <dW_1,dW_2> = rho *dt
  ```
  The variance V follows a square root process.

  #### Example
  ```python
  import tf_quant_finance as tff
  import numpy as np
  prices = tff.models.heston.approximations.european_option_price(
      variances=0.11,
      strikes=102.0,
      expiries=1.2,
      forwards=100.0,
      is_call_options=True,
      kappas=2.0,
      thetas=0.5,
      sigmas=0.15,
      rhos=0.3,
      discount_factors=1.0,
      dtype=np.float64)
  # Expected print output of prices:
  # 24.82219619
  ```
  #### References
  [1] Hansjorg Albrecher, The Little Heston Trap
  https://perswww.kuleuven.be/~u0009713/HestonTrap.pdf
  [2] Mukarram Attari, Option Pricing Using Fourier Transforms: A Numerically
  Efficient Simplification
  https://papers.ssrn.com/sol3/papers.cfm?abstract_id=520042
  [3] Steven L. Heston, A Closed-Form Solution for Options with Stochastic
  Volatility with Applications to Bond and Currency Options
  http://faculty.baruch.cuny.edu/lwu/890/Heston93.pdf
  Args:
    strikes: A real `Tensor` of any shape and dtype. The strikes of the options
      to be priced.
    expiries: A real `Tensor` of the same dtype and compatible shape as
      `strikes`.  The expiry of each option.
    is_call_options: A boolean `Tensor` of a shape compatible with
      `strikes`. Indicates whether the option is a call (if True) or a put
      (if False). If not supplied, call options are assumed.
    variances: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The initial value of the variance.
    kappas: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The mean reversion strength of the variance square root
      process.
    thetas: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The mean reversion level of the variance square root process.
    sigmas: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The volatility of the variance square root process (volatility
      of volatility)
    rhos: A real `Tensor` of the same dtype and compatible shape as
      `strikes`. The correlation between spot and variance.
        spots: A real `Tensor` of any shape that broadcasts to the shape of the
      `volatilities`. The current spot price of the underlying. Either this
      argument or the `forwards` (but not both) must be supplied.
    forwards: A real `Tensor` of any shape that broadcasts to the shape of
      `strikes`. The forwards to maturity. Either this argument or the
      `spots` must be supplied but both must not be supplied.
    discount_rates: An optional real `Tensor` of same dtype as the
      `strikes` and of the shape that broadcasts with `strikes`.
      If not `None`, discount factors are calculated as e^(-rT),
      where r are the discount rates, or risk free rates. At most one of
      discount_rates and discount_factors can be supplied.
      Default value: `None`, equivalent to r = 0 and discount factors = 1 when
      discount_factors also not given.
    continuous_dividends: An optional real `Tensor` of same dtype as the
      `strikes` and of the shape that broadcasts with `strikes`.
      If not `None`, `cost_of_carries` is calculated as r - q,
      where r are the `discount_rates` and q is `continuous_dividends`. Either
      this or `cost_of_carries` can be given.
      Default value: `None`, equivalent to q = 0.
    cost_of_carries: An optional real `Tensor` of same dtype as the
      `strikes` and of the shape that broadcasts with `strikes`.
      Cost of storing a physical commodity, the cost of interest paid when
      long, or the opportunity cost, or the cost of paying dividends when short.
      If not `None`, and `spots` is supplied, used to calculate forwards from
      `spots`: F = e^(bT) * S, where F is the forwards price, b is the cost of
      carries, T is expiries and S is the spot price. If `None`, value assumed
      to be equal to the `discount_rate` - `continuous_dividends`
      Default value: `None`, equivalent to b = r.
    discount_factors: An optional real `Tensor` of same dtype as the
      `strikes`. If not `None`, these are the discount factors to expiry
      (i.e. e^(-rT)). Mutually exclusive with discount_rate and cost_of_carry.
      If neither is given, no discounting is applied (i.e. the undiscounted
      option price is returned). If `spots` is supplied and `discount_factors`
      is not `None` then this is also used to compute the forwards to expiry.
      At most one of discount_rates and discount_factors can be supplied.
      Default value: `None`, which maps to -log(discount_factors) / expiries
    integration_method: An instance of `math.integration.IntegrationMethod`.
      Default value: `None` which maps to the Simpsons integration rule.
    dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion
      of any supplied non-`Tensor` arguments to `Tensor`.
      Default value: None which maps to the default dtype inferred by
      TensorFlow.
    name: str. The name for the ops created by this function.
      Default value: None which is mapped to the default name
      `heston_price`.
    **kwargs: Additional parameters for the underlying integration method.
      If not supplied and `integration_method` is Simpson, then uses
      `IntegrationMethod.COMPOSITE_SIMPSONS_RULE` with `num_points=1001`, and
      bounds `lower=1e-9`, `upper=100`.
  Returns:
    A `Tensor` of the same shape as the input data which is the price of
    European options under the Heston model.
  """
    if (spots is None) == (forwards is None):
        raise ValueError(
            'Either spots or forwards must be supplied but not both.')
    if (discount_rates is not None) and (discount_factors is not None):
        raise ValueError(
            'At most one of discount_rates and discount_factors may '
            'be supplied')
    if (continuous_dividends is not None) and (cost_of_carries is not None):
        raise ValueError(
            'At most one of continuous_dividends and cost_of_carries '
            'may be supplied')

    with tf.compat.v1.name_scope(name, default_name='eu_option_price'):
        strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes')
        dtype = strikes.dtype
        expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')
        kappas = tf.convert_to_tensor(kappas, dtype=dtype, name='kappas')
        thetas = tf.convert_to_tensor(thetas, dtype=dtype, name='thetas')
        sigmas = tf.convert_to_tensor(sigmas, dtype=dtype, name='sigmas')
        rhos = tf.convert_to_tensor(rhos, dtype=dtype, name='rhos')
        variances = tf.convert_to_tensor(variances,
                                         dtype=dtype,
                                         name='variances')

        if discount_factors is not None:
            discount_factors = tf.convert_to_tensor(discount_factors,
                                                    dtype=dtype,
                                                    name='discount_factors')

        if discount_rates is not None:
            discount_rates = tf.convert_to_tensor(discount_rates,
                                                  dtype=dtype,
                                                  name='discount_rates')
        elif discount_factors is not None:
            discount_rates = -tf.math.log(discount_factors) / expiries
        else:
            discount_rates = tf.convert_to_tensor(0.0,
                                                  dtype=dtype,
                                                  name='discount_rates')

        if continuous_dividends is None:
            continuous_dividends = tf.convert_to_tensor(
                0.0, dtype=dtype, name='continuous_dividends')

        if cost_of_carries is not None:
            cost_of_carries = tf.convert_to_tensor(cost_of_carries,
                                                   dtype=dtype,
                                                   name='cost_of_carries')
        else:
            cost_of_carries = discount_rates - continuous_dividends

        if discount_factors is None:
            discount_factors = tf.exp(-discount_rates * expiries)  # pylint: disable=invalid-unary-operand-type

        if forwards is not None:
            forwards = tf.convert_to_tensor(forwards,
                                            dtype=dtype,
                                            name='forwards')
        else:
            spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots')
            forwards = spots * tf.exp(cost_of_carries * expiries)

        # Cast as complex for the characteristic function calculation
        expiries_real = tf.complex(expiries, tf.zeros_like(expiries))
        kappas_real = tf.complex(kappas, tf.zeros_like(kappas))
        thetas_real = tf.complex(thetas, tf.zeros_like(thetas))
        sigmas_real = tf.complex(sigmas, tf.zeros_like(sigmas))
        rhos_real = tf.complex(rhos, tf.zeros_like(rhos))
        variances_real = tf.complex(variances, tf.zeros_like(variances))

        # Prepare inputs to build an integrand_function
        expiries_real = tf.expand_dims(expiries_real, -1)
        kappas_real = tf.expand_dims(kappas_real, -1)
        thetas_real = tf.expand_dims(thetas_real, -1)
        sigmas_real = tf.expand_dims(sigmas_real, -1)
        rhos_real = tf.expand_dims(rhos_real, -1)
        variances_real = tf.expand_dims(variances_real, -1)
        if integration_method is None:
            integration_method = _COMPOSITE_SIMPSONS_RULE
        if integration_method == _COMPOSITE_SIMPSONS_RULE:
            if 'num_points' not in kwargs:
                kwargs['num_points'] = 1001
            if 'lower' not in kwargs:
                kwargs['lower'] = 1e-9
            if 'upper' not in kwargs:
                kwargs['upper'] = 100

        def char_fun(u):
            # Using 'second formula' for the (first) characteristic function of
            # log( spot_T / forwards )
            # (noted 'phi_2' in 'The Little Heston Trap', (Albrecher))
            u_real = tf.complex(u, tf.zeros_like(u))
            u_imag = tf.complex(tf.zeros_like(u), u)
            s = rhos_real * sigmas_real * u_imag
            # TODO(b/156221007): investigate why s_kappa = (s - kappas_real)**2 leads
            # to a wrong result in graph mode.
            s_kappa = (s - kappas_real) * s - (s - kappas_real) * kappas_real
            d = s_kappa - sigmas_real**2 * (-u_imag - u_real**2)
            d = tf.math.sqrt(d)
            g = (kappas_real - s - d) / (kappas_real - s + d)
            a = kappas_real * thetas_real
            h = g * tf.math.exp(-d * expiries_real)
            m = 2 * tf.math.log((1 - h) / (1 - g))
            c = (a / sigmas_real**2) * (
                (kappas_real - s - d) * expiries_real - m)
            e = (1 - tf.math.exp(-d * expiries_real))
            d_new = (kappas_real - s - d) / sigmas_real**2 * (e / (1 - h))
            return tf.math.exp(c + d_new * variances_real)

        def integrand_function(u, k):
            # Note that with [2], integrand is in 1 / u**2,
            # which converges faster than Heston 1993 (which is in 1 /u)
            char_fun_complex = char_fun(u)
            char_fun_real_part = tf.math.real(char_fun_complex)
            char_fun_imag_part = tf.math.imag(char_fun_complex)

            a = (char_fun_real_part + char_fun_imag_part / u) * tf.math.cos(
                u * k)
            b = (char_fun_imag_part - char_fun_real_part / u) * tf.math.sin(
                u * k)

            return (a + b) / (1.0 + u * u)

        k = tf.expand_dims(tf.math.log(strikes / forwards), axis=-1)

        integral = integration.integrate(lambda u: integrand_function(u, k),
                                         method=integration_method,
                                         dtype=dtype,
                                         **kwargs)
        undiscounted_call_prices = forwards - strikes * (0.5 + integral / _PI_)

        if is_call_options is None:
            return undiscounted_call_prices * discount_factors
        else:
            is_call_options = tf.convert_to_tensor(is_call_options,
                                                   dtype=tf.bool,
                                                   name='is_call_options')
            # Use call-put parity for Put
            undiscounted_put_prices = undiscounted_call_prices - forwards + strikes

            undiscount_prices = tf.where(is_call_options,
                                         undiscounted_call_prices,
                                         undiscounted_put_prices)
            return undiscount_prices * discount_factors
Example #19
0
 def _mean(self):
     return self.loc + tf.zeros_like(self.concentration)
Example #20
0
def minimize(value_and_gradients_function,
             initial_position,
             tolerance=1e-8,
             x_tolerance=0,
             f_relative_tolerance=0,
             max_iterations=50,
             parallel_iterations=1,
             stopping_condition=None,
             params=None,
             name=None):
  """Minimizes a differentiable function.

  Implementation of algorithm described in [HZ2006]. Updated formula for next
  search direction were taken from [HZ2013].

  Supports batches with 1-dimensional batch shape.

  ### References:
  [HZ2006] Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT,
    a conjugate gradient method with guaranteed descent."
    http://users.clas.ufl.edu/hager/papers/CG/cg_compare.pdf
  [HZ2013] W. W. Hager and H. Zhang (2013) The limited memory conjugate gradient
    method.
    https://pdfs.semanticscholar.org/8769/69f3911777e0ff0663f21b67dff30518726b.pdf

  ### Usage:
  The following example demonstrates this optimizer attempting to find the
  minimum for a simple two dimensional quadratic objective function.

  ```python
    minimum = np.array([1.0, 1.0])  # The center of the quadratic bowl.
    scales = np.array([2.0, 3.0])  # The scales along the two axes.

    # The objective function and the gradient.
    def quadratic(x):
      value = tf.reduce_sum(scales * (x - minimum) ** 2)
      return value, tf.gradients(value, x)[0]

    start = tf.constant([0.6, 0.8])  # Starting point for the search.
    optim_results = conjugate_gradient.minimize(
        quadratic, initial_position=start, tolerance=1e-8)

    with tf.Session() as session:
      results = session.run(optim_results)
      # Check that the search converged
      assert(results.converged)
      # Check that the argmin is close to the actual value.
      np.testing.assert_allclose(results.position, minimum)
  ```

  Args:
    value_and_gradients_function:  A Python callable that accepts a point as a
      real `Tensor` and returns a tuple of `Tensor`s of real dtype containing
      the value of the function and its gradient at that point. The function to
      be minimized. The input should be of shape `[..., n]`, where `n` is the
      size of the domain of input points, and all others are batching
      dimensions. The first component of the return value should be a real
      `Tensor` of matching shape `[...]`. The second component (the gradient)
      should also be of shape `[..., n]` like the input value to the function.
    initial_position: Real `Tensor` of shape `[..., n]`. The starting point, or
      points when using batching dimensions, of the search procedure. At these
      points the function value and the gradient norm should be finite.
    tolerance: Scalar `Tensor` of real dtype. Specifies the gradient tolerance
      for the procedure. If the supremum norm of the gradient vector is below
      this number, the algorithm is stopped.
    x_tolerance: Scalar `Tensor` of real dtype. If the absolute change in the
      position between one iteration and the next is smaller than this number,
      the algorithm is stopped.
    f_relative_tolerance: Scalar `Tensor` of real dtype. If the relative change
      in the objective value between one iteration and the next is smaller than
      this value, the algorithm is stopped.
    max_iterations: Scalar positive int32 `Tensor`. The maximum number of
      iterations.
    parallel_iterations: Positive integer. The number of iterations allowed to
      run in parallel.
    stopping_condition: (Optional) A Python function that takes as input two
      Boolean tensors of shape `[...]`, and returns a Boolean scalar tensor. The
      input tensors are `converged` and `failed`, indicating the current status
      of each respective batch member; the return value states whether the
      algorithm should stop. The default is tfp.optimizer.converged_all which
      only stops when all batch members have either converged or failed. An
      alternative is tfp.optimizer.converged_any which stops as soon as one
      batch member has converged, or when all have failed.
    params: ConjugateGradientParams object with adjustable parameters of the
      algorithm. If not supplied, default parameters will be used.
    name: (Optional) Python str. The name prefixed to the ops created by this
      function. If not supplied, the default name 'minimize' is used.

  Returns:
    optimizer_results: A namedtuple containing the following items:
      converged: boolean tensor of shape `[...]` indicating for each batch
        member whether the minimum was found within tolerance.
      failed:  boolean tensor of shape `[...]` indicating for each batch
        member whether a line search step failed to find a suitable step size
        satisfying Wolfe conditions. In the absence of any constraints on the
        number of objective evaluations permitted, this value will
        be the complement of `converged`. However, if there is
        a constraint and the search stopped due to available
        evaluations being exhausted, both `failed` and `converged`
        will be simultaneously False.
      num_objective_evaluations: The total number of objective
        evaluations performed.
      position: A tensor of shape `[..., n]` containing the last argument value
        found during the search from each starting point. If the search
        converged, then this value is the argmin of the objective function.
      objective_value: A tensor of shape `[...]` with the value of the
        objective function at the `position`. If the search converged, then
        this is the (local) minimum of the objective function.
      objective_gradient: A tensor of shape `[..., n]` containing the gradient
        of the objective function at the `position`. If the search converged
        the max-norm of this tensor should be below the tolerance.

  """
  with tf.compat.v1.name_scope(name, 'minimize', [initial_position, tolerance]):
    if params is None:
      params = ConjugateGradientParams()

    initial_position = tf.convert_to_tensor(
        value=initial_position, name='initial_position')
    dtype = initial_position.dtype
    tolerance = tf.convert_to_tensor(
        value=tolerance, dtype=dtype, name='grad_tolerance')
    f_relative_tolerance = tf.convert_to_tensor(
        value=f_relative_tolerance, dtype=dtype, name='f_relative_tolerance')
    x_tolerance = tf.convert_to_tensor(
        value=x_tolerance, dtype=dtype, name='x_tolerance')
    max_iterations = tf.convert_to_tensor(
        value=max_iterations, name='max_iterations')
    stopping_condition = stopping_condition or converged_all
    delta = tf.convert_to_tensor(
        params.sufficient_decrease_param, dtype=dtype, name='delta')
    sigma = tf.convert_to_tensor(
        params.curvature_param, dtype=dtype, name='sigma')
    eps = tf.convert_to_tensor(
        params.threshold_use_approximate_wolfe_condition,
        dtype=dtype,
        name='sigma')
    eta = tf.convert_to_tensor(
        params.direction_update_param, dtype=dtype, name='eta')
    psi_1 = tf.convert_to_tensor(
        params.initial_guess_small_factor, dtype=dtype, name='psi_1')
    psi_2 = tf.convert_to_tensor(
        params.initial_guess_step_multiplier, dtype=dtype, name='psi_2')

    f0, df0 = value_and_gradients_function(initial_position)
    converged = tf.norm(df0, axis=-1) < tolerance

    initial_state = _OptimizerState(
        converged=converged,
        failed=tf.zeros_like(converged),  # All false.
        num_iterations=tf.convert_to_tensor(value=0),
        num_objective_evaluations=tf.convert_to_tensor(value=1),
        position=initial_position,
        objective_value=f0,
        objective_gradient=df0,
        direction=-df0,
        prev_step=tf.ones_like(f0),
    )

    def _cond(state):
      """Continue if iterations remain and stopping condition is not met."""
      return (
          (state.num_iterations < max_iterations)
          & tf.logical_not(stopping_condition(state.converged, state.failed)))

    def _body(state):
      """Main optimization loop."""
      # We use notation of [HZ2006] for brevity.
      x_k = state.position
      d_k = state.direction
      f_k = state.objective_value
      g_k = state.objective_gradient
      a_km1 = state.prev_step  # Means a_{k-1}.

      # Define scalar function, which is objective restricted to direction.
      def ls_func(alpha):
        pt = x_k + tf.expand_dims(alpha, axis=-1) * d_k
        objective_value, gradient = value_and_gradients_function(pt)
        return ValueAndGradient(
            x=alpha,
            f=objective_value,
            df=_dot(gradient, d_k),
            full_gradient=gradient)

      # Generate initial guess for line search.
      # [HZ2006] suggests to generate first initial guess separately, but
      # [JuliaLineSearches] generates it as if previous step length was 1, and
      # we do the same.
      phi_0 = f_k
      dphi_0 = _dot(g_k, d_k)
      ls_val_0 = ValueAndGradient(
          x=tf.zeros_like(phi_0), f=phi_0, df=dphi_0, full_gradient=g_k)
      step_guess_result = _init_step(ls_val_0, a_km1, ls_func, psi_1, psi_2,
                                     params.quad_step)
      init_step = step_guess_result.step

      # Check if initial step size already satisfies Wolfe condition, and in
      # that case don't perform line search.
      c = init_step.x
      phi_lim = phi_0 + eps * tf.abs(phi_0)
      phi_c = init_step.f
      dphi_c = init_step.df
      # Original Wolfe conditions, T1 in [HZ2006].
      suff_decrease_1 = delta * dphi_0 >= (phi_c - phi_0) / c
      curvature = dphi_c >= sigma * dphi_0
      wolfe1 = suff_decrease_1 & curvature
      # Approximate Wolfe conditions, T2 in [HZ2006].
      suff_decrease_2 = (2 * delta - 1) * dphi_0 >= dphi_c
      curvature = dphi_c >= sigma * dphi_0
      wolfe2 = suff_decrease_2 & curvature & (phi_c <= phi_lim)
      wolfe = wolfe1 | wolfe2
      skip_line_search = (step_guess_result.may_terminate
                          & wolfe) | state.failed | state.converged

      # Call Hager-Zhang line search (L0-L3 in [HZ2006]).
      # Parameter theta from [HZ2006] is not adjustable, it's always 0.5.
      ls_result = linesearch.hager_zhang(
          ls_func,
          value_at_zero=ls_val_0,
          converged=skip_line_search,
          initial_step_size=init_step.x,
          value_at_initial_step=init_step,
          shrinkage_param=params.shrinkage_param,
          expansion_param=params.expansion_param,
          sufficient_decrease_param=delta,
          curvature_param=sigma,
          threshold_use_approximate_wolfe_condition=eps)

      # Moving to the next point, using step length from line search.
      # If line search was skipped, take step length from initial guess.
      # To save objective evaluation, use objective value and gradient returned
      # by line search or initial guess.
      a_k = tf.compat.v1.where(
          skip_line_search, init_step.x, ls_result.left.x)
      x_kp1 = state.position + tf.expand_dims(a_k, -1) * d_k
      f_kp1 = tf.compat.v1.where(
          skip_line_search, init_step.f, ls_result.left.f)
      g_kp1 = tf.compat.v1.where(skip_line_search, init_step.full_gradient,
                                 ls_result.left.full_gradient)

      # Evaluate next direction.
      # Use formulas (2.7)-(2.11) from [HZ2013] with P_k=I.
      y_k = g_kp1 - g_k
      d_dot_y = _dot(d_k, y_k)
      b_k = (_dot(y_k, g_kp1) -
             _norm_sq(y_k) * _dot(g_kp1, d_k) / d_dot_y) / d_dot_y
      eta_k = eta * _dot(d_k, g_k) / _norm_sq(d_k)
      b_k = tf.maximum(b_k, eta_k)
      d_kp1 = -g_kp1 + tf.expand_dims(b_k, -1) * d_k

      # Check convergence criteria.
      grad_converged = _norm_inf(g_kp1) <= tolerance
      x_converged = (_norm_inf(x_kp1 - x_k) <= x_tolerance)
      f_converged = (
          tf.math.abs(f_kp1 - f_k) <= f_relative_tolerance * tf.math.abs(f_k))
      converged = grad_converged | x_converged | f_converged

      # Construct new state for next iteration.
      new_state = _OptimizerState(
          converged=converged,
          failed=state.failed,
          num_iterations=state.num_iterations + 1,
          num_objective_evaluations=state.num_objective_evaluations +
          step_guess_result.func_evals + ls_result.func_evals,
          position=tf.compat.v1.where(state.converged, x_k, x_kp1),
          objective_value=tf.compat.v1.where(state.converged, f_k, f_kp1),
          objective_gradient=tf.compat.v1.where(state.converged, g_k, g_kp1),
          direction=d_kp1,
          prev_step=a_k)
      return (new_state,)

    final_state = tf.while_loop(
        _cond, _body, (initial_state,),
        parallel_iterations=parallel_iterations)[0]
    return OptimizerResult(
        converged=final_state.converged,
        failed=final_state.failed,
        num_iterations=final_state.num_iterations,
        num_objective_evaluations=final_state.num_objective_evaluations,
        position=final_state.position,
        objective_value=final_state.objective_value,
        objective_gradient=final_state.objective_gradient)
def prepare_args(model_matrix,
                 response,
                 model_coefficients,
                 predicted_linear_response,
                 offset,
                 name=None):
    """Helper to `fit` which sanitizes input args.

  Args:
    model_matrix: (Batch of) `float`-like, matrix-shaped `Tensor` where each row
      represents a sample's features.
    response: (Batch of) vector-shaped `Tensor` where each element represents a
      sample's observed response (to the corresponding row of features). Must
      have same `dtype` as `model_matrix`.
    model_coefficients: Optional (batch of) vector-shaped `Tensor` representing
      the model coefficients, one for each column in `model_matrix`. Must have
      same `dtype` as `model_matrix`.
      Default value: `tf.zeros(tf.shape(model_matrix)[-1], model_matrix.dtype)`.
    predicted_linear_response: Optional `Tensor` with `shape`, `dtype` matching
      `response`; represents `offset` shifted initial linear predictions based
      on current `model_coefficients`.
      Default value: `offset` if `model_coefficients is None`, and
      `tf.linalg.matvec(model_matrix, model_coefficients_start) + offset`
      otherwise.
    offset: Optional `Tensor` with `shape`, `dtype` matching `response`;
      represents constant shift applied to `predicted_linear_response`.
      Default value: `None` (i.e., `tf.zeros_like(response)`).
    name: Python `str` used as name prefix to ops created by this function.
      Default value: `"prepare_args"`.

  Returns:
    model_matrix: A `Tensor` with `shape`, `dtype` and values of the
      `model_matrix` argument.
    response: A `Tensor` with `shape`, `dtype` and values of the
      `response` argument.
    model_coefficients_start: A `Tensor` with `shape`, `dtype` and
      values of the `model_coefficients_start` argument if specified.
      A (batch of) vector-shaped `Tensors` with `dtype` matching `model_matrix`
      containing the default starting point otherwise.
    predicted_linear_response:  A `Tensor` with `shape`, `dtype` and
      values of the `predicted_linear_response` argument if specified.
      A `Tensor` with `shape`, `dtype` matching `response` containing the
      default value otherwise.
    offset: A `Tensor` with `shape`, `dtype` and values of the `offset` argument
      if specified or `None` otherwise.
  """
    graph_deps = [
        model_matrix, response, model_coefficients, predicted_linear_response,
        offset
    ]
    with tf.name_scope(name or 'prepare_args'):
        dtype = dtype_util.common_dtype(graph_deps, np.float32)

        model_matrix = tf.convert_to_tensor(model_matrix,
                                            dtype=dtype,
                                            name='model_matrix')

        if offset is not None:
            offset = tf.convert_to_tensor(offset, dtype=dtype, name='offset')

        response = tf.convert_to_tensor(response, dtype=dtype, name='response')

        use_default_model_coefficients = model_coefficients is None
        if use_default_model_coefficients:
            # User did not supply model coefficients; assume they're all zero.
            batch_shape = tf.shape(model_matrix)[:-2]
            num_columns = tf.shape(model_matrix)[-1]
            model_coefficients = tf.zeros(shape=tf.concat(
                [batch_shape, [num_columns]], axis=0),
                                          dtype=dtype,
                                          name='model_coefficients')
        else:
            # User did supply model coefficients; convert to Tensor in case it's
            # numpy or literal.
            model_coefficients = tf.convert_to_tensor(
                model_coefficients, dtype=dtype, name='model_coefficients')

        if predicted_linear_response is None:
            if use_default_model_coefficients:
                # Since we're using zeros for model_coefficients, we know the predicted
                # linear response will also be all zeros.
                if offset is None:
                    predicted_linear_response = tf.zeros_like(
                        response, dtype, name='predicted_linear_response')
                else:
                    predicted_linear_response = tf.broadcast_to(
                        offset,
                        tf.shape(response),
                        name='predicted_linear_response')
            else:
                # We were given model_coefficients but not the predicted linear
                # response.
                predicted_linear_response = compute_predicted_linear_response(
                    model_matrix, model_coefficients, offset)
        else:
            predicted_linear_response = tf.convert_to_tensor(
                predicted_linear_response,
                dtype=dtype,
                name='predicted_linear_response')

    return [
        model_matrix,
        response,
        model_coefficients,
        predicted_linear_response,
        offset,
    ]
Example #22
0
    def _body(state):
      """Main optimization loop."""
      # We use notation of [HZ2006] for brevity.
      x_k = state.position
      d_k = state.direction
      f_k = state.objective_value
      g_k = state.objective_gradient
      a_km1 = state.prev_step  # Means a_{k-1}.

      # Define scalar function, which is objective restricted to direction.
      def ls_func(alpha):
        pt = x_k + tf.expand_dims(alpha, axis=-1) * d_k
        objective_value, gradient = value_and_gradients_function(pt)
        return ValueAndGradient(
            x=alpha,
            f=objective_value,
            df=_dot(gradient, d_k),
            full_gradient=gradient)

      # Generate initial guess for line search.
      # [HZ2006] suggests to generate first initial guess separately, but
      # [JuliaLineSearches] generates it as if previous step length was 1, and
      # we do the same.
      phi_0 = f_k
      dphi_0 = _dot(g_k, d_k)
      ls_val_0 = ValueAndGradient(
          x=tf.zeros_like(phi_0), f=phi_0, df=dphi_0, full_gradient=g_k)
      step_guess_result = _init_step(ls_val_0, a_km1, ls_func, psi_1, psi_2,
                                     params.quad_step)
      init_step = step_guess_result.step

      # Check if initial step size already satisfies Wolfe condition, and in
      # that case don't perform line search.
      c = init_step.x
      phi_lim = phi_0 + eps * tf.abs(phi_0)
      phi_c = init_step.f
      dphi_c = init_step.df
      # Original Wolfe conditions, T1 in [HZ2006].
      suff_decrease_1 = delta * dphi_0 >= (phi_c - phi_0) / c
      curvature = dphi_c >= sigma * dphi_0
      wolfe1 = suff_decrease_1 & curvature
      # Approximate Wolfe conditions, T2 in [HZ2006].
      suff_decrease_2 = (2 * delta - 1) * dphi_0 >= dphi_c
      curvature = dphi_c >= sigma * dphi_0
      wolfe2 = suff_decrease_2 & curvature & (phi_c <= phi_lim)
      wolfe = wolfe1 | wolfe2
      skip_line_search = (step_guess_result.may_terminate
                          & wolfe) | state.failed | state.converged

      # Call Hager-Zhang line search (L0-L3 in [HZ2006]).
      # Parameter theta from [HZ2006] is not adjustable, it's always 0.5.
      ls_result = linesearch.hager_zhang(
          ls_func,
          value_at_zero=ls_val_0,
          converged=skip_line_search,
          initial_step_size=init_step.x,
          value_at_initial_step=init_step,
          shrinkage_param=params.shrinkage_param,
          expansion_param=params.expansion_param,
          sufficient_decrease_param=delta,
          curvature_param=sigma,
          threshold_use_approximate_wolfe_condition=eps)

      # Moving to the next point, using step length from line search.
      # If line search was skipped, take step length from initial guess.
      # To save objective evaluation, use objective value and gradient returned
      # by line search or initial guess.
      a_k = tf.compat.v1.where(
          skip_line_search, init_step.x, ls_result.left.x)
      x_kp1 = state.position + tf.expand_dims(a_k, -1) * d_k
      f_kp1 = tf.compat.v1.where(
          skip_line_search, init_step.f, ls_result.left.f)
      g_kp1 = tf.compat.v1.where(skip_line_search, init_step.full_gradient,
                                 ls_result.left.full_gradient)

      # Evaluate next direction.
      # Use formulas (2.7)-(2.11) from [HZ2013] with P_k=I.
      y_k = g_kp1 - g_k
      d_dot_y = _dot(d_k, y_k)
      b_k = (_dot(y_k, g_kp1) -
             _norm_sq(y_k) * _dot(g_kp1, d_k) / d_dot_y) / d_dot_y
      eta_k = eta * _dot(d_k, g_k) / _norm_sq(d_k)
      b_k = tf.maximum(b_k, eta_k)
      d_kp1 = -g_kp1 + tf.expand_dims(b_k, -1) * d_k

      # Check convergence criteria.
      grad_converged = _norm_inf(g_kp1) <= tolerance
      x_converged = (_norm_inf(x_kp1 - x_k) <= x_tolerance)
      f_converged = (
          tf.math.abs(f_kp1 - f_k) <= f_relative_tolerance * tf.math.abs(f_k))
      converged = grad_converged | x_converged | f_converged

      # Construct new state for next iteration.
      new_state = _OptimizerState(
          converged=converged,
          failed=state.failed,
          num_iterations=state.num_iterations + 1,
          num_objective_evaluations=state.num_objective_evaluations +
          step_guess_result.func_evals + ls_result.func_evals,
          position=tf.compat.v1.where(state.converged, x_k, x_kp1),
          objective_value=tf.compat.v1.where(state.converged, f_k, f_kp1),
          objective_gradient=tf.compat.v1.where(state.converged, g_k, g_kp1),
          direction=d_kp1,
          prev_step=a_k)
      return (new_state,)
def windowed_mean(x, low_indices=None, high_indices=None, axis=0, name=None):
    """Windowed estimates of mean.

  Computes means among data in the Tensor `x` along the given windows:

    result[i] = mean(x[low_indices[i]:high_indices[i]+1])

  efficiently.  To wit, if K is the size of `low_indices` and
  `high_indices`, and `N` is the size of `x` along the given `axis`,
  the computation takes O(K + N) work, O(log(N)) depth (the length of
  the longest series of operations that are performed sequentially),
  and only uses O(1) TensorFlow kernel invocations.

  This function can be useful for assessing the behavior over time of
  trailing-window estimators from some iterative process, such as the
  last half of an MCMC chain.

  Suppose `x` has shape `Bx + [N] + E`, where the `Bx` component has
  rank `axis`, and `low_indices` and `high_indices` broadcast to shape
  `[M]`.  Then each element of `low_indices` and `high_indices`
  must be between 0 and N+1, and the shape of the output will be
  `Bx + [M] + E`.  Batch shape in the indices is not currently supported.

  The default windows are
  `[0, 1), [1, 2), [1, 3), [2, 4), [2, 5), ...`
  This corresponds to analyzing `x` as though it were streaming, for
  example successive states of an MCMC sampler, and we were interested
  in the variance of the last half of the data at each point.

  Args:
    x: A numeric `Tensor` holding `N` samples along the given `axis`,
      whose windowed means are desired.
    low_indices: An integer `Tensor` defining the lower boundary
      (inclusive) of each window.  Default: elementwise half of
      `high_indices`.
    high_indices: An integer `Tensor` defining the upper boundary
      (exclusive) of each window.  Must be broadcast-compatible with
      `low_indices`.  Default: `tf.range(1, N+1)`, i.e., N windows
      that each end in the corresponding datum from `x` (inclusive)`.
    axis: Scalar `Tensor` designating the axis holding samples.  This
      is the axis of `x` along which we take windows, and therefore
      the axis that `low_indices` and `high_indices` index into.
      Other axes are treated in batch.  Default value: `0` (leftmost
      dimension).
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., `'windowed_mean'`).

  Returns:
    means: A numeric `Tensor` holding the windowed means of `x` along
      the `axis` dimension.

  """
    with tf.name_scope(name or 'windowed_mean'):
        x = tf.convert_to_tensor(x)
        low_indices, high_indices, low_counts, high_counts = _prepare_window_args(
            x, low_indices, high_indices, axis)

        raw_cumsum = tf.cumsum(x, axis=axis)
        cum_sums = tf.concat(
            [tf.zeros_like(tf.gather(raw_cumsum, [0], axis=axis)), raw_cumsum],
            axis=axis)
        low_sums = tf.gather(cum_sums, low_indices, axis=axis)
        high_sums = tf.gather(cum_sums, high_indices, axis=axis)

        counts = high_counts - low_counts
        return _safe_average(high_sums - low_sums, counts)
Example #24
0
def _init_step(pos, prev_step, func, psi_1, psi_2, quad_step):
  """Finds initial step size for line seacrh at given point.

  Corresponds to I1-I2 in [HZ2006].

  Args:
    pos: ValueAndGradient for current point.
    prev_step: Step size at previous iteration.
    func: Callable taking real `Tensor` and returning ValueAndGradient,
      describes scalar function for line search.
    psi_1: Real scalar `Tensor`. Factor to multiply previous step to get right
      point for quadratic interpolation.
    psi_2: Real scalar `Tesnor`. Factor to multiply previous step if qudratic
      interpolation failed.
    quad_step: Boolean. Whether to try quadratic interpolation.

  Returns:
    _StepGuessResult namedtuple containing initial guess and additional data.
  """
  phi_0 = pos.f
  derphi_0 = pos.df
  step = func(psi_1 * prev_step)
  can_take = step.f > phi_0
  result = _StepGuessResult(
      step=step,
      func_evals=1,
      can_take=can_take,
      may_terminate=tf.zeros_like(can_take))

  # Try to approximate function with a parabola and take its minimum as initial
  # guess.
  if quad_step:
    # Quadratic coefficient of parabola. If it's positive, parabola is convex
    # and has minimum.
    q_koef = step.f - phi_0 - step.x * derphi_0
    quad_step_success = tf.logical_and(step.f <= phi_0, q_koef > 0.0)

    def update_result_1():
      new_x = tf.compat.v1.where(
          quad_step_success,
          -0.5 * (derphi_0 * step.x**2) / q_koef, result.step.x)
      return _StepGuessResult(
          step=func(new_x),
          func_evals=result.func_evals + 1,
          can_take=tf.logical_or(result.can_take, quad_step_success),
          may_terminate=tf.logical_or(result.may_terminate, quad_step_success))

    result = tf.cond(
        tf.reduce_any(quad_step_success), update_result_1, lambda: result)

  def update_result_2():
    new_x = tf.compat.v1.where(can_take, result.step.x, psi_2 * prev_step)
    return _StepGuessResult(
        step=func(new_x),
        func_evals=result.func_evals + 1,
        can_take=tf.ones_like(can_take),
        may_terminate=result.may_terminate)

  # According to [HZ2006] we should fall back to psi_2*prev_step when quadratic
  # interpolation failed. However, [JuliaLineSearches] retains guess
  # psi_1*prev_step if func(psi_1 * prev_step) > func(0), because then local
  # minimum is within (0, psi_1*prev_step).
  result = tf.cond(
      tf.reduce_all(result.can_take), lambda: result, update_result_2)

  return result
Example #25
0
    def one_step(self, current_state, previous_kernel_results):
        """Runs one iteration of the No U-Turn Sampler.

    Args:
      current_state: `Tensor` or Python `list` of `Tensor`s representing the
        current state(s) of the Markov chain(s). The first `r` dimensions index
        independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
      previous_kernel_results: `collections.namedtuple` containing `Tensor`s
        representing values from previous calls to this function (or from the
        `bootstrap_results` function.)

    Returns:
      next_state: `Tensor` or Python list of `Tensor`s representing the state(s)
        of the Markov chain(s) after taking `self.num_trajectories_per_step`
        steps. Has same type and shape as `current_state`.
      kernel_results: `collections.namedtuple` of internal calculations used to
        advance the chain.
    """
        if self.stackless and not tf.executing_eagerly():
            raise ValueError(
                "Cannot use stackless auto-batching in graph mode.")
        current_target_log_prob = previous_kernel_results.target_log_prob
        current_grads_log_prob = previous_kernel_results.grads_target_log_prob
        leapfrogs_taken = previous_kernel_results.leapfrogs_taken
        leapfrogs_computed = previous_kernel_results.leapfrogs_computed
        with tf1.name_scope(self.name,
                            values=[
                                current_state, self.step_size,
                                current_target_log_prob, current_grads_log_prob
                            ]):
            unwrap_state_list = False
            with tf1.name_scope("initialize"):
                if not tf.nest.is_nested(current_state):
                    unwrap_state_list = True
                    current_state = [current_state]
                current_state = [
                    tf.convert_to_tensor(value=s) for s in current_state
                ]
                step_size = self.step_size
                if not tf.nest.is_nested(step_size):
                    step_size = [step_size]
                step_size = [tf.convert_to_tensor(value=s) for s in step_size]
                if len(step_size) == 1:
                    step_size = step_size * len(current_state)
                if len(step_size) != len(current_state):
                    raise ValueError(
                        "Expected either one step size or {} (size of "
                        "`current_state`), but found {}".format(
                            len(current_state), len(step_size)))

            num_steps = tf.constant([self.num_trajectories_per_step],
                                    dtype=tf.int64)
            if self.backend is None:
                if self._seed_stream() is not None:
                    # The user wanted reproducible results; limit the parallel iterations
                    backend = ab.TensorFlowBackend(while_parallel_iterations=1)
                else:
                    backend = ab.TensorFlowBackend()
            else:
                backend = self.backend
            # The `dry_run` and `max_stack_depth` arguments are added by the
            # @ctx.batch decorator, confusing pylint.
            # pylint: disable=unexpected-keyword-arg
            ((next_state, next_target_log_prob, next_grads_target_log_prob),
             new_leapfrogs) = self.many_steps(
                 num_steps,
                 current_state,
                 current_target_log_prob,
                 current_grads_log_prob,
                 step_size,
                 tf.zeros_like(leapfrogs_taken),  # leapfrogs
                 dry_run=not self.use_auto_batching,
                 stackless=self.stackless,
                 backend=backend,
                 max_stack_depth=self.max_tree_depth + 4,
                 block_code_cache=self._block_code_cache)

            if unwrap_state_list:
                next_state = next_state[0]
            return next_state, NUTSKernelResults(
                next_target_log_prob, next_grads_target_log_prob,
                leapfrogs_taken + new_leapfrogs, leapfrogs_computed +
                tf.math.reduce_max(input_tensor=new_leapfrogs))
Example #26
0
def barrier_price(*,
                  volatilities: types.RealTensor,
                  strikes: types.RealTensor,
                  expiries: types.RealTensor,
                  spots: types.RealTensor,
                  barriers: types.RealTensor,
                  rebates: types.RealTensor = None,
                  discount_rates: types.RealTensor = None,
                  dividend_rates: types.RealTensor = None,
                  is_barrier_down: types.BoolTensor = None,
                  is_knock_out: types.BoolTensor = None,
                  is_call_options: types.BoolTensor = None,
                  dtype: tf.DType = None,
                  name: str = None) -> types.RealTensor:
  """Prices barrier options in a Black-Scholes Model.

  Computes the prices of options with a single barrier in Black-Scholes world as
  described in Ref. [1]. Note that the barrier is applied continuously.

  #### Example

  This example is taken from Ref. [2], Page 154.

  ```python
  import tf_quant_finance as tff

  dtype = np.float32
  discount_rates = np.array([.08, .08])
  dividend_rates = np.array([.04, .04])
  spots = np.array([100., 100.])
  strikes = np.array([90., 90.])
  barriers = np.array([95. 95.])
  rebates = np.array([3. 3.])
  volatilities = np.array([.25, .25])
  expiries = np.array([.5, .5])
  barriers_type = np.array([5, 1])
  is_barrier_down = np.array([True, False])
  is_knock_out = np.array([False, False])
  is_call_option = np.array([True, True])

  price = tff.black_scholes.barrier_price(
    discount_rates, dividend_rates, spots, strikes,
    barriers, rebates, volatilities,
    expiries, is_barrier_down, is_knock_out, is_call_options)

  # Expected output
  #  `Tensor` with values [9.024, 7.7627]
  ```

  #### References

  [1]: Lee Clewlow, Javier Llanos, Chris Strickland, Caracas Venezuela
    Pricing Exotic Options in a Black-Scholes World, 1994
    https://warwick.ac.uk/fac/soc/wbs/subjects/finance/research/wpaperseries/1994/94-54.pdf
  [2]: Espen Gaarder Haug, The Complete Guide to Option Pricing Formulas,
    2nd Edition, 1997

  Args:
    volatilities: Real `Tensor` of any shape and dtype. The volatilities to
      expiry of the options to price.
    strikes: A real `Tensor` of the same dtype and compatible shape as
      `volatilities`. The strikes of the options to be priced.
    expiries: A real `Tensor` of same dtype and compatible shape as
      `volatilities`. The expiry of each option. The units should be such that
      `expiry * volatility**2` is dimensionless.
    spots: A real `Tensor` of any shape that broadcasts to the shape of the
      `volatilities`. The current spot price of the underlying.
    barriers: A real `Tensor` of same dtype as the `volatilities` and of the
      shape that broadcasts with `volatilities`. The barriers of each option.
    rebates: A real `Tensor` of same dtype as the `volatilities` and of the
      shape that broadcasts with `volatilities`. For knockouts, this is a
      fixed cash payout in case the barrier is breached. For knockins, this is a
      fixed cash payout in case the barrier level is not breached. In the former
      case, the rebate is paid immediately on breach whereas in the latter, the
      rebate is paid at the expiry of the option.
      Default value: `None` which maps to no rebates.
    discount_rates: A real `Tensor` of same dtype as the
      `volatilities` and of the shape that broadcasts with `volatilities`.
      Discount rates, or risk free rates.
      Default value: `None`, equivalent to discount_rate = 0.
    dividend_rates: A real `Tensor` of same dtype as the
      `volatilities` and of the shape that broadcasts with `volatilities`. A
      continuous dividend rate paid by the underlier. If `None`, then
      defaults to zero dividends.
      Default value: `None`, equivalent to zero dividends.
    is_barrier_down: A real `Tensor` of `boolean` values and of the shape
      that broadcasts with `volatilities`. True if barrier is below asset
      price at expiration.
      Default value: `True`.
    is_knock_out: A real `Tensor` of `boolean` values and of the shape
      that broadcasts with `volatilities`. True if option is knock out
      else false.
      Default value: `True`.
    is_call_options: A real `Tensor` of `boolean` values and of the shape
      that broadcasts with `volatilities`. True if option is call else
      false.
      Default value: `True`.
    dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion
      of any supplied non-`Tensor` arguments to `Tensor`.
      Default value: `None` which maps to the default dtype inferred by
      TensorFlow.
    name: str. The name for the ops created by this function.
      Default value: `None` which is mapped to the default name `barrier_price`.
  Returns:
    option_prices: A `Tensor` of same shape as `spots`. The approximate price of
    the barriers option under black scholes.
  """
  # The computation is done as in Ref [2] where each integral is split into
  # two matrices. The first matrix contains the algebraic terms and the second
  # matrix contains the probability distribution terms. Masks are used to filter
  # appropriate terms for calculating the integral. Then a dot product of each
  # row in the matricies coupled with the masks work to calculate the prices of
  # the barriers option.
  with tf.name_scope(name or 'barrier_price'):
    spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots')
    dtype = spots.dtype
    strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes')
    volatilities = tf.convert_to_tensor(
        volatilities, dtype=dtype, name='volatilities')
    expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')
    barriers = tf.convert_to_tensor(barriers, dtype=dtype, name='barriers')
    if rebates is not None:
      rebates = tf.convert_to_tensor(rebates, dtype=dtype, name='rebates')
    else:
      rebates = tf.zeros_like(spots, dtype=dtype, name='rebates')

    # Convert all to tensor and enforce float dtype where required
    if discount_rates is not None:
      discount_rates = tf.convert_to_tensor(
          discount_rates, dtype=dtype, name='discount_rates')
    else:
      discount_rates = tf.convert_to_tensor(
          0.0, dtype=dtype, name='discount_rates')

    if dividend_rates is not None:
      dividend_rates = tf.convert_to_tensor(
          dividend_rates, dtype=dtype, name='dividend_rates')
    else:
      dividend_rates = tf.convert_to_tensor(
          0.0, dtype=dtype, name='dividend_rates')

    if is_barrier_down is None:
      is_barrier_down = tf.constant(1, name='is_barrier_down')
    else:
      is_barrier_down = tf.convert_to_tensor(is_barrier_down, dtype=tf.bool,
                                             name='is_barrier_down')
      is_barrier_down = tf.where(is_barrier_down, 1, 0)
    if is_knock_out is None:
      is_knock_out = tf.constant(1, name='is_knock_out')
    else:
      is_knock_out = tf.convert_to_tensor(is_knock_out, dtype=tf.bool,
                                          name='is_knock_out')
      is_knock_out = tf.where(is_knock_out, 1, 0)
    if is_call_options is None:
      is_call_options = tf.constant(1, name='is_call_options')
    else:
      is_call_options = tf.convert_to_tensor(is_call_options, dtype=tf.bool,
                                             name='is_call_options')
      is_call_options = tf.where(is_call_options, 1, 0)

    # Indices which range from 0-7 are used to select the appropriate
    # mask for each barrier
    indices = tf.bitwise.left_shift(
        is_barrier_down, 2) + tf.bitwise.left_shift(
            is_knock_out, 1) + is_call_options

    # Masks select the appropriate terms for integral approximations
    # Integrals are separated by algebraic terms and probability
    # distribution terms. This give 12 different terms per matrix
    # (6 integrals, 2 terms each)
    # shape = [8, 12]
    mask_matrix_greater_strike = tf.constant([
        [1, 1, -1, -1, 0, 0, 1, 1, 1, 1, 0, 0],  # up and in put
        [1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],  # up and in call
        [0, 0, 1, 1, 0, 0, -1, -1, 0, 0, 1, 1],  # up and out put
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],  # up and out call
        [0, 0, 1, 1, -1, -1, 1, 1, 0, 0, 1, 1],  # down and in put
        [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0],  # down and in call
        [1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 1, 1],  # down and out put
        [1, 1, 0, 0, -1, -1, 0, 0, 0, 0, 1, 1]])  # down and out call

    mask_matrix_lower_strike = tf.constant([
        [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0],  # up and in put
        [0, 0, 1, 1, -1, -1, 1, 1, 1, 1, 0, 0],  # up and in call
        [1, 1, 0, 0, -1, -1, 0, 0, 0, 0, 1, 1],  # up and out put
        [1, 1, -1, -1, 1, 1, -1, -1, 0, 0, 1, 1],  # up and out call
        [1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],  # down and in put
        [1, 1, -1, -1, 0, 0, 1, 1, 1, 1, 0, 0],  # down and in call
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],  # down and out put
        [0, 0, 1, 1, 0, 0, -1, -1, 0, 0, 1, 1]])  # down and out call

    # Create masks
    # Masks are shape [strikes.shape, 12]
    masks_lower = tf.gather(mask_matrix_lower_strike, indices, axis=0)
    masks_greater = tf.gather(mask_matrix_greater_strike, indices, axis=0)
    strikes_greater = tf.expand_dims(strikes > barriers, axis=-1)
    masks = tf.where(strikes_greater, masks_greater, masks_lower)
    masks = tf.cast(masks, dtype=dtype)
    one = tf.constant(1, dtype=dtype)
    call_or_put = tf.cast(tf.where(tf.equal(is_call_options, 0), -one, one),
                          dtype=dtype)
    below_or_above = tf.cast(tf.where(tf.equal(is_barrier_down, 0), -one, one),
                             dtype=dtype)

    # Calculate params for integrals
    sqrt_var = volatilities * tf.math.sqrt(expiries)
    mu = (discount_rates - dividend_rates) - ((volatilities**2) / 2)
    lamda = 1 + (mu / (volatilities**2))
    x = (tf.math.log(spots / strikes) / (sqrt_var)) + (lamda * sqrt_var)
    x1 = (tf.math.log(spots / barriers) / (sqrt_var)) + (lamda * sqrt_var)
    y = (tf.math.log((barriers**2) / (spots * strikes)) / (
        sqrt_var)) + (lamda * sqrt_var)
    y1 = (tf.math.log(barriers / spots) / (sqrt_var)) + (lamda * sqrt_var)
    b = ((mu**2) + (2 * (volatilities**2) * discount_rates)) / (volatilities**2)
    z = (tf.math.log(barriers / spots) / (sqrt_var)) + (b * sqrt_var)
    a = mu / (volatilities**2)

    # Other params used for integrals
    discount_factors = tf.math.exp(
        -discount_rates * expiries, name='discount_factors')
    barriers_ratio = tf.math.divide(barriers, spots, name='barriers_ratio')
    spots_term = call_or_put * spots * tf.math.exp(-dividend_rates * expiries)
    strikes_term = call_or_put * strikes * discount_factors

    # rank is used to stack elements and reduce_sum
    strike_rank = strikes.shape.rank

    # Constructing Matrix with first and second algebraic terms for each
    # integral [strike.shape, 12]
    terms_mat = tf.stack(
        (spots_term, -strikes_term,
         spots_term, -strikes_term,
         spots_term * (barriers_ratio**(2 * lamda)),
         -strikes_term * (barriers_ratio**((2 * lamda) - 2)),
         spots_term * (barriers_ratio**(2 * lamda)),
         -strikes_term * (barriers_ratio**((2 * lamda) - 2)),
         rebates * discount_factors,
         -rebates * discount_factors * (  # pylint: disable=invalid-unary-operand-type
             barriers_ratio**((2 * lamda) - 2)),
         rebates * (barriers_ratio**(a + b)),
         rebates * (barriers_ratio**(a - b))),
        name='term_matrix', axis=strike_rank)

    # Constructing Matrix with first and second norm for each integral
    # [strikes.shape, 12]
    cdf_mat = tf.stack(
        (call_or_put * x,
         call_or_put * (x - sqrt_var),
         call_or_put * x1,
         call_or_put * (x1 - sqrt_var),
         below_or_above * y,
         below_or_above * (y - sqrt_var),
         below_or_above * y1,
         below_or_above * (y1 - sqrt_var),
         below_or_above * (x1 - sqrt_var),
         below_or_above * (y1 - sqrt_var),
         below_or_above * z,
         below_or_above * (z - (2 * b * sqrt_var))),
        name='cdf_matrix', axis=strike_rank)
    cdf_mat = _ncdf(cdf_mat)
    # Calculating and returning price for each option
    return tf.reduce_sum(masks * terms_mat * cdf_mat, axis=strike_rank)
Example #27
0
 def _mean(self):
     # Shape is broadcasted with + tf.zeros_like().
     return self.loc + tf.zeros_like(self.concentration)
Example #28
0
def binary_price(*,
                 volatilities: types.RealTensor,
                 strikes: types.RealTensor,
                 expiries: types.RealTensor,
                 spots: types.RealTensor = None,
                 forwards: types.RealTensor = None,
                 discount_rates: types.RealTensor = None,
                 dividend_rates: types.RealTensor = None,
                 discount_factors: types.RealTensor = None,
                 is_call_options: types.BoolTensor = None,
                 is_normal_volatility: bool = False,
                 dtype: tf.DType = None,
                 name: str = None) -> types.RealTensor:
  """Computes the Black Scholes price for a batch of binary call or put options.

  The binary call (resp. put) option priced here is that which pays off a unit
  of cash if the underlying asset has a value greater (resp. smaller) than the
  strike price at expiry. Hence the binary option price is the discounted
  probability that the asset will end up higher (resp. lower) than the
  strike price at expiry.

  #### Example

  ```python
    # Price a batch of 5 binary call options.
    volatilities = np.array([0.0001, 102.0, 2.0, 0.1, 0.4])
    forwards = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
    # Strikes will automatically be broadcasted to shape [5].
    strikes = np.array([3.0])
    # Expiries will be broadcast to shape [5], i.e. each option has strike=3
    # and expiry = 1.
    expiries = 1.0
    computed_prices = tff.black_scholes.binary_price(
        volatilities=volatilities,
        strikes=strikes,
        expiries=expiries,
        forwards=forwards)
  # Expected print output of prices:
  # [0.         0.         0.15865525 0.99764937 0.85927418]
  ```

  #### References:

  [1] Hull, John C., Options, Futures and Other Derivatives. Pearson, 2018.
  [2] Wikipedia contributors. Binary option. Available at:
  https://en.wikipedia.org/w/index.php?title=Binary_option

  Args:
    volatilities: Real `Tensor` of any shape and dtype. The volatilities to
      expiry of the options to price.
    strikes: A real `Tensor` of the same dtype and compatible shape as
      `volatilities`. The strikes of the options to be priced.
    expiries: A real `Tensor` of same dtype and compatible shape as
      `volatilities`. The expiry of each option. The units should be such that
      `expiry * volatility**2` is dimensionless.
    spots: A real `Tensor` of any shape that broadcasts to the shape of the
      `volatilities`. The current spot price of the underlying. Either this
      argument or the `forwards` (but not both) must be supplied.
    forwards: A real `Tensor` of any shape that broadcasts to the shape of
      `volatilities`. The forwards to maturity. Either this argument or the
      `spots` must be supplied but both must not be supplied.
    discount_rates: An optional real `Tensor` of same dtype as the
      `volatilities` and of the shape that broadcasts with `volatilities`.
      If not `None`, discount factors are calculated as e^(-rT),
      where r are the discount rates, or risk free rates. At most one of
      discount_rates and discount_factors can be supplied.
      Default value: `None`, equivalent to r = 0 and discount factors = 1 when
      discount_factors also not given.
    dividend_rates: An optional real `Tensor` of same dtype as the
      `volatilities` and of the shape that broadcasts with `volatilities`.
      Default value: `None`, equivalent to q = 0.
    discount_factors: An optional real `Tensor` of same dtype as the
      `volatilities`. If not None, these are the discount factors to expiry
      (i.e. e^(-rT)). If None, no discounting is applied (i.e. the undiscounted
      option price is returned). If `spots` is supplied and `discount_factors`
      is not None then this is also used to compute the forwards to expiry.
      Default value: None, equivalent to discount factors = 1.
    is_call_options: A boolean `Tensor` of a shape compatible with
      `volatilities`. Indicates whether the option is a call (if True) or a put
      (if False). If not supplied, call options are assumed.
    is_normal_volatility: An optional Python boolean specifying whether the
      `volatilities` correspond to lognormal Black volatility (if False) or
      normal Black volatility (if True).
      Default value: False, which corresponds to lognormal volatility.
    dtype: Optional `tf.DType`. If supplied, the dtype to be used for conversion
      of any supplied non-`Tensor` arguments to `Tensor`.
      Default value: None which maps to the default dtype inferred by TensorFlow
        (float32).
    name: str. The name for the ops created by this function.
      Default value: None which is mapped to the default name `binary_price`.

  Returns:
    binary_prices: A `Tensor` of the same shape as `forwards`. The Black
    Scholes price of the binary options.

  Raises:
    ValueError: If both `forwards` and `spots` are supplied or if neither is
      supplied.
    ValueError: If both `discount_rates` and `discount_factors` is supplied.
  """
  if (spots is None) == (forwards is None):
    raise ValueError('Either spots or forwards must be supplied but not both.')
  if (discount_rates is not None) and (discount_factors is not None):
    raise ValueError('At most one of discount_rates and discount_factors may '
                     'be supplied')

  with tf.name_scope(name or 'binary_price'):
    strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes')
    dtype = strikes.dtype
    volatilities = tf.convert_to_tensor(
        volatilities, dtype=dtype, name='volatilities')
    expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')

    if discount_rates is not None:
      discount_rates = tf.convert_to_tensor(
          discount_rates, dtype=dtype, name='discount_rates')
      discount_factors = tf.exp(-discount_rates * expiries)
    elif discount_factors is not None:
      discount_factors = tf.convert_to_tensor(
          discount_factors, dtype=dtype, name='discount_factors')
      discount_rates = -tf.math.log(discount_factors) / expiries
    else:
      discount_rates = tf.convert_to_tensor(
          0.0, dtype=dtype, name='discount_rates')
      discount_factors = tf.convert_to_tensor(
          1.0, dtype=dtype, name='discount_factors')

    if dividend_rates is None:
      dividend_rates = tf.convert_to_tensor(
          0.0, dtype=dtype, name='dividend_rates')

    if forwards is not None:
      forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='forwards')
    else:
      spots = tf.convert_to_tensor(spots, dtype=dtype, name='spots')
      forwards = spots / discount_factors

    sqrt_var = volatilities * tf.math.sqrt(expiries)

    if is_normal_volatility:  # normal model
      d2 = (forwards - strikes) / sqrt_var
    else:  # lognormal model
      d2 = tf.math.log(forwards / strikes) / sqrt_var - sqrt_var / 2

    zero_volatility_call_payoff = tf.where(forwards > strikes,
                                           tf.ones_like(strikes, dtype=dtype),
                                           tf.zeros_like(strikes, dtype=dtype))
    undiscounted_calls = tf.where(sqrt_var > 0, _ncdf(d2),
                                  zero_volatility_call_payoff)

    if is_call_options is None:
      return discount_factors * undiscounted_calls

    undiscounted_puts = 1 - undiscounted_calls
    predicate = tf.broadcast_to(is_call_options, tf.shape(undiscounted_calls))
    return discount_factors * tf.where(predicate, undiscounted_calls,
                                       undiscounted_puts)
Example #29
0
 def _variance(self):
     return tf.zeros_like(self.loc)
Example #30
0
def _update_trajectory_grad(previous_kernel_results,
                            previous_state,
                            proposed_state,
                            proposed_velocity,
                            trajectory_jitter,
                            accept_prob,
                            step_size,
                            criterion_fn,
                            max_leapfrog_steps,
                            experimental_shard_axis_names=None,
                            experimental_chain_axis_names=None):
    """Updates the trajectory length."""

    # Compute criterion grads.
    def leapfrog_action(dt):
        # This represents the effect on the criterion value as the state follows the
        # proposed velocity. This implicitly assumes an identity mass matrix.
        def adjust_state(x, v, shard_axes=None):
            broadcasted_dt = distribute_lib.pbroadcast(
                bu.left_justified_expand_dims_like(dt, v), shard_axes)
            return x + broadcasted_dt * v

        adjusted_state = _map_structure_up_to_with_axes(
            proposed_state,
            adjust_state,
            proposed_state,
            proposed_velocity,
            experimental_shard_axis_names=experimental_shard_axis_names)
        return criterion_fn(previous_state, adjusted_state, accept_prob)

    criterion, trajectory_grad = gradient.value_and_gradient(
        leapfrog_action, tf.zeros_like(accept_prob))
    trajectory_grad *= trajectory_jitter

    # Weight by acceptance probability.
    experimental_chain_axis_names = distribute_lib.canonicalize_named_axis(
        experimental_chain_axis_names)
    trajectory_grad = tf.where(accept_prob > 1e-4, trajectory_grad, 0.)
    trajectory_grad = tf.where(tf.math.is_finite(trajectory_grad),
                               trajectory_grad, 0.)
    trajectory_grad = (_reduce_sum_with_axes(
        trajectory_grad * accept_prob, None, experimental_chain_axis_names) /
                       _reduce_sum_with_axes(accept_prob + 1e-20, None,
                                             experimental_chain_axis_names))

    # Compute Adam/RMSProp step size.
    dtype = previous_kernel_results.adaptation_rate.dtype
    iteration_f = tf.cast(previous_kernel_results.step, dtype) + 1.
    msg_adaptation_rate = 0.05
    new_averaged_sq_grad = (
        (1 - msg_adaptation_rate) * previous_kernel_results.averaged_sq_grad +
        msg_adaptation_rate * trajectory_grad**2)
    adjusted_averaged_sq_grad = new_averaged_sq_grad / (
        1. - (1 - msg_adaptation_rate)**iteration_f)
    trajectory_step_size = (previous_kernel_results.adaptation_rate /
                            tf.sqrt(adjusted_averaged_sq_grad + 1e-20))

    # Apply the gradient. Clip absolute value to ~log(2)/2.
    log_update = tf.clip_by_value(trajectory_step_size * trajectory_grad,
                                  -0.35, 0.35)
    new_max_trajectory_length = previous_kernel_results.max_trajectory_length * tf.exp(
        log_update)

    # Iterate averaging.
    average_weight = iteration_f**(-0.5)
    new_averaged_max_trajectory_length = tf.exp(
        average_weight * tf.math.log(new_max_trajectory_length) +
        (1 - average_weight) *
        tf.math.log(1e-10 +
                    previous_kernel_results.averaged_max_trajectory_length))

    # Clip the maximum trajectory length.
    new_max_trajectory_length = _clip_max_trajectory_length(
        new_max_trajectory_length, step_size,
        previous_kernel_results.adaptation_rate, max_leapfrog_steps)

    return previous_kernel_results._replace(
        criterion=criterion,
        max_trajectory_length=new_max_trajectory_length,
        averaged_sq_grad=new_averaged_sq_grad,
        averaged_max_trajectory_length=new_averaged_max_trajectory_length)