Beispiel #1
0
 def _variance(self):
     variance = (tf.square(self.concentration * self.mixing_rate /
                           (self.mixing_concentration - 1.)) /
                 (self.mixing_concentration - 2.))
     if self.allow_nan_stats:
         nan = tf.fill(self.batch_shape_tensor(),
                       dtype_util.as_numpy_dtype(self.dtype)(np.nan),
                       name="nan")
         return tf.where(self.mixing_concentration > 2., variance, nan)
     else:
         return distribution_util.with_dependencies([
             assert_util.assert_less(
                 tf.ones([], self.dtype) * 2.,
                 self.mixing_concentration,
                 message=
                 "variance undefined when `mixing_concentration` <= 2"),
         ], variance)
Beispiel #2
0
def compress(condition, a, axis=None):
    """Compresses `a` by selecting values along `axis` with `condition` true.

  Uses `tf.boolean_mask`.

  Args:
    condition: 1-d array of bools. If `condition` is shorter than the array
      axis (or the flattened array if axis is None), it is padded with False.
    a: array_like. Could be an ndarray, a Tensor or any object that can
      be converted to a Tensor using `tf.convert_to_tensor`.
    axis: Optional. Axis along which to select elements. If None, `condition` is
      applied on flattened array.

  Returns:
    An ndarray.

  Raises:
    ValueError: if `condition` is not of rank 1.
  """
    condition = asarray(condition, dtype=bool)
    a = asarray(a)

    if condition.ndim != 1:
        raise ValueError('condition must be a 1-d array.')
    # `np.compress` treats scalars as 1-d arrays.
    if a.ndim == 0:
        a = ravel(a)

    if axis is None:
        a = ravel(a)
        axis = 0

    if axis < 0:
        axis += a.ndim

    assert axis >= 0 and axis < a.ndim

    # `tf.boolean_mask` requires the first dimensions of array and condition to
    # match. `np.compress` pads condition with False when it is shorter.
    condition_t = condition.data
    a_t = a.data
    if condition.shape[0] < a.shape[axis]:
        padding = tf.fill([a.shape[axis] - condition.shape[0]], False)
        condition_t = tf.concat([condition_t, padding], axis=0)
    return utils.tensor_to_ndarray(
        tf.boolean_mask(tensor=a_t, mask=condition_t, axis=axis))
Beispiel #3
0
    def _mean(self):
        mean = _broadcast_to_shape(self.loc, self._sample_shape())
        df = _broadcast_to_shape(self.df[..., tf.newaxis],
                                 tf.shape(input=mean))

        if self.allow_nan_stats:
            nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype())
            return tf.where(df > 1., mean,
                            tf.fill(tf.shape(input=mean), nan, name="nan"))
        else:
            with tf.control_dependencies([
                    assert_util.assert_less(
                        tf.cast(1., self.dtype),
                        df,
                        message="mean not defined for components of df <= 1"),
            ]):
                return tf.identity(mean)
Beispiel #4
0
 def _mode(self):
     k = tf.cast(self.event_shape_tensor()[0], self.dtype)
     mode = (self.concentration -
             1.) / (self.total_concentration[..., tf.newaxis] - k)
     if self.allow_nan_stats:
         nan = tf.fill(tf.shape(input=mode),
                       np.array(np.nan, dtype=self.dtype.as_numpy_dtype()),
                       name="nan")
         return tf.where(
             tf.reduce_all(input_tensor=self.concentration > 1., axis=-1),
             mode, nan)
     return distribution_util.with_dependencies([
         assert_util.assert_less(
             tf.ones([], self.dtype),
             self.concentration,
             message="Mode undefined when any concentration <= 1"),
     ], mode)
def _initial_discount_rates(bond_cashflows,
                            bond_cashflow_times,
                            present_values,
                            name='initial_discount_rates'):
    """Constructs a guess for the initial rates as the yields to maturity."""
    n = len(bond_cashflows)
    groups = []
    for i in range(n):
        groups.append(tf.fill(tf.shape(bond_cashflows[i]), i))
    bond_cashflows = tf.concat(bond_cashflows, axis=0)
    bond_cashflow_times = tf.concat(bond_cashflow_times, axis=0)
    groups = tf.concat(groups, axis=0)
    return cashflows.yields_from_pv(bond_cashflows,
                                    bond_cashflow_times,
                                    present_values,
                                    groups=groups,
                                    name=name)
Beispiel #6
0
  def fill(self, value, size, dtype, shape, name=None):
    """Fill a fresh batched Tensor of the given shape and dtype with `value`.

    Args:
      value: Scalar to fill with.
      size: Scalar `int` `Tensor` specifying the number of VM threads.
      dtype: `tf.DType` of the zeros to be returned.
      shape: Rank 1 `int` `Tensor`, the per-thread value shape.
      name: Optional name for the op.

    Returns:
      result: `Tensor` of `dtype` `value`s with shape `[size, *shape]`
    """
    with tf.name_scope(name or 'VM.fill'):
      size = tf.convert_to_tensor(value=size, name='size')
      shape = tf.convert_to_tensor(value=shape, name='shape', dtype=size.dtype)
      return tf.fill(tf.concat([[size], shape], axis=0),
                     value=tf.cast(value, dtype=dtype))
Beispiel #7
0
def estimate_tail(func, target, shape, dtype):
    """Estimates approximate tail quantiles."""
    dtype = tf.as_dtype(dtype)
    shape = tf.convert_to_tensor(shape, tf.int32)
    target = tf.convert_to_tensor(target, dtype)
    opt = tf.keras.optimizers.Adam(learning_rate=.1)
    tails = tf.Variable(tf.zeros(shape, dtype=dtype),
                        trainable=False,
                        name="tails")
    loss = best_loss = tf.fill(shape, tf.constant(float("inf"), dtype=dtype))
    while tf.reduce_any(loss == best_loss):
        with tf.GradientTape(watch_accessed_variables=False) as tape:
            tape.watch(tails)
            loss = abs(func(tails) - target)
        best_loss = tf.minimum(best_loss, loss)
        gradient = tape.gradient(loss, tails)
        opt.apply_gradients([(gradient, tails)])
    return tails.value()
Beispiel #8
0
    def _setup(self):
        """Setup relevant tensors for efficient computations."""

        reset_dates = []
        contract_idx = []
        daycount_fractions = []
        for i in range(self._batch_size):
            instr_reset_dates = dates.PeriodicSchedule(
                start_date=self._start_date[i] + self._rate_tenor,
                end_date=self._end_date[i],
                tenor=self._rate_tenor,
                holiday_calendar=self._holiday_calendar,
                roll_convention=dates.BusinessDayConvention.FOLLOWING).dates()

            # Append the start_date of the contract
            instr_reset_dates = dates.DateTensor.concat(
                [self._start_date[i].expand_dims(axis=0), instr_reset_dates],
                axis=0)

            # Add one day beyond the end of the delivery period to compute the
            # accrual on the last day of the delivery.
            one_period_past_enddate = self._end_date[i] + self._rate_tenor
            instr_reset_dates = dates.DateTensor.concat([
                instr_reset_dates,
                one_period_past_enddate.expand_dims(axis=0)
            ],
                                                        axis=0)

            instr_daycount_fractions = rc.get_daycount_fraction(
                instr_reset_dates[:-1], instr_reset_dates[1:],
                self._daycount_convention, self._dtype)

            reset_dates.append(instr_reset_dates[:-1])
            daycount_fractions.append(instr_daycount_fractions)
            contract_idx.append(tf.fill(tf.shape(instr_daycount_fractions), i))

        self._reset_dates = dates.DateTensor.concat(reset_dates, axis=0)
        self._accrual_start_dates = self._reset_dates
        self._accrual_end_dates = self._reset_dates + self._rate_tenor
        self._accrual_daycount = rc.get_daycount_fraction(
            self._accrual_start_dates, self._accrual_end_dates,
            self._daycount_convention, self._dtype)
        self._daycount_fractions = tf.concat(daycount_fractions, axis=0)
        self._contract_idx = tf.concat(contract_idx, axis=0)
        def mutate(current_state, log_scalings, num_steps,
                   inverse_temperature):
            """Mutate the state using a Transition kernel."""
            with tf.name_scope('mutate_states'):
                scalings = tf.exp(log_scalings)
                kernel = make_kernel_fn(make_tempered_target_log_prob_fn(
                    prior_log_prob_fn, likelihood_log_prob_fn,
                    inverse_temperature),
                                        current_state,
                                        scalings,
                                        seed=seed_stream)
                pkr = kernel.bootstrap_results(current_state)
                kernel_log_accept_ratio, _ = gather_mh_like_result(pkr)

                def mutate_onestep(i, state, pkr, log_accept_prob_sum):
                    next_state, next_kernel_results = kernel.one_step(
                        state, pkr)
                    kernel_log_accept_ratio, _ = gather_mh_like_result(pkr)
                    log_accept_prob = tf.minimum(kernel_log_accept_ratio, 0.)
                    log_accept_prob_sum = log_add_exp(log_accept_prob_sum,
                                                      log_accept_prob)
                    return i + 1, next_state, next_kernel_results, log_accept_prob_sum

                (
                    _, next_state, next_kernel_results, log_accept_prob_sum
                ) = tf.while_loop(
                    cond=lambda i, *args: i < num_steps,
                    body=mutate_onestep,
                    loop_vars=(
                        tf.zeros([], dtype=tf.int32),
                        current_state,
                        pkr,
                        # we accumulate the acceptance probability in log space.
                        tf.fill(
                            ps.shape(kernel_log_accept_ratio),
                            tf.constant(-np.inf,
                                        kernel_log_accept_ratio.dtype))),
                    parallel_iterations=parallel_iterations)
                _, kernel_target_log_prob = gather_mh_like_result(
                    next_kernel_results)
                avg_log_accept_prob_per_particle = log_accept_prob_sum - tf.math.log(
                    tf.cast(num_steps + 1, log_accept_prob_sum.dtype))
                return (next_state, avg_log_accept_prob_per_particle,
                        kernel_target_log_prob)
Beispiel #10
0
 def _mode(self):
     mode = (self.concentration1 - 1.) / (self.total_concentration - 2.)
     if self.allow_nan_stats:
         nan = tf.fill(self.batch_shape_tensor(),
                       dtype_util.as_numpy_dtype(self.dtype)(np.nan),
                       name="nan")
         is_defined = tf.logical_and(self.concentration1 > 1.,
                                     self.concentration0 > 1.)
         return tf.where(is_defined, mode, nan)
     return distribution_util.with_dependencies([
         assert_util.assert_less(
             tf.ones([], dtype=self.dtype),
             self.concentration1,
             message="Mode undefined for concentration1 <= 1."),
         assert_util.assert_less(
             tf.ones([], dtype=self.dtype),
             self.concentration0,
             message="Mode undefined for concentration0 <= 1.")
     ], mode)
Beispiel #11
0
def _validate_args_control_deps(bond_cashflows, bond_cashflow_times,
                                pv_settle_times):
    """Returns assertions for the validity of the arguments."""
    cashflows_are_strictly_increasing = []
    cashflow_after_settlement = []
    final_cashflow_is_the_largest = []
    for bond_index, bond_cashflow in enumerate(bond_cashflows):
        times = bond_cashflow_times[bond_index]
        time_difference = times[1:] - times[:-1]
        cashflows_are_strictly_increasing.append(
            tf.debugging.assert_positive(time_difference))
        cashflow_after_settlement.append(
            tf.debugging.assert_greater(times[0], pv_settle_times[bond_index]))
        final_cashflow_is_the_largest.append(
            tf.debugging.assert_greater(
                tf.fill(tf.shape(bond_cashflow[:-1]), bond_cashflow[-1]),
                bond_cashflow[:-1]))
    return (cashflow_after_settlement + cashflows_are_strictly_increasing +
            final_cashflow_is_the_largest)
Beispiel #12
0
 def _mode(self):
     concentration = tf.convert_to_tensor(self.concentration)
     rate = tf.convert_to_tensor(self.rate)
     mode = (concentration - 1.) / rate
     if self.allow_nan_stats:
         assertions = []
     else:
         assertions = [
             assert_util.assert_less(
                 tf.ones([], self.dtype),
                 concentration,
                 message="Mode not defined when any concentration <= 1.")
         ]
     with tf.control_dependencies(assertions):
         nan = tf.fill(self._batch_shape_tensor(concentration=concentration,
                                                rate=rate),
                       dtype_util.as_numpy_dtype(self.dtype)(np.nan),
                       name="nan")
         return tf.where(concentration > 1., mode, nan)
Beispiel #13
0
def make_prior(num_topics, initial_value):
    """Create the prior distribution.

  Args:
    num_topics: Number of topics.
    initial_value: The starting value for the prior parameters.

  Returns:
    prior: A `callable` that returns a `tf.distribution.Distribution`
        instance, the prior distribution.
  """
    concentration = tfp.util.TransformedVariable(tf.fill([1, num_topics],
                                                         initial_value),
                                                 tfb.Softplus(),
                                                 name="concentration")

    return tfd.Dirichlet(concentration=tfp.util.DeferredTensor(
        concentration, _clip_dirichlet_parameters),
                         name="topics_prior")
Beispiel #14
0
    def test_tril(self, use_default):
        if tf.executing_eagerly():
            self.skipTest(
                'b/169882656 Too many warnings are issued in eager logs')
        cov = 0.9 * tf.ones([3, 3]) + 0.1 * tf.eye(3)
        scale = tf.linalg.cholesky(cov)
        mv_tril = tfd.MultivariateNormalTriL(loc=[1., 2., 3.],
                                             scale_tril=scale)

        if use_default:
            momentum_distribution = None
            step_size = 0.3
        else:
            momentum_distribution = _CompositeMultivariateNormalPrecisionFactorLinearOperator(
                # TODO(b/170015229) Don't use the covariance as inverse scale,
                # it is the wrong preconditioner.
                precision_factor=tf.linalg.LinearOperatorFullMatrix(cov), )
            step_size = 1.1
        nuts_kernel = tfp.experimental.mcmc.PreconditionedNoUTurnSampler(
            target_log_prob_fn=mv_tril.log_prob,
            momentum_distribution=momentum_distribution,
            step_size=step_size,
            max_tree_depth=4)
        draws = tfp.mcmc.sample_chain(120,
                                      tf.zeros(3),
                                      kernel=nuts_kernel,
                                      seed=test_util.test_seed(),
                                      trace_fn=None)
        ess = tfp.mcmc.effective_sample_size(
            draws[-100:],
            filter_threshold=0,
            filter_beyond_positive_pairs=False)

        # TODO(b/170015229): These and other tests like it, which assert ess is
        # greater than some number, were all passing, even though the preconditioner
        # was the wrong one. Why is that? A guess is that since there are *many*
        # ways to have larger ess, these tests don't really test correctness.
        # Perhaps remove all tests like these.
        if not use_default:
            self.assertAllClose(ess, tf.fill([3], 100.))
        else:
            self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
  def _get_rpn_samples(self, match_results):
    """Computes anchor labels.

    This function performs subsampling for foreground (fg) and background (bg)
    anchors.
    Args:
      match_results: A integer tensor with shape [N] representing the
        matching results of anchors. (1) match_results[i]>=0,
        meaning that column i is matched with row match_results[i].
        (2) match_results[i]=-1, meaning that column i is not matched.
        (3) match_results[i]=-2, meaning that column i is ignored.
    Returns:
      score_targets: a integer tensor with the a shape of [N].
        (1) score_targets[i]=1, the anchor is a positive sample.
        (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
        don't care (ignore).
    """
    sampler = (
        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
            positive_fraction=self._rpn_fg_fraction, is_static=False))
    # indicator includes both positive and negative labels.
    # labels includes only positives labels.
    # positives = indicator & labels.
    # negatives = indicator & !labels.
    # ignore = !indicator.
    indicator = tf.greater(match_results, -2)
    labels = tf.greater(match_results, -1)

    samples = sampler.subsample(
        indicator, self._rpn_batch_size_per_im, labels)
    positive_labels = tf.where(
        tf.logical_and(samples, labels),
        tf.constant(2, dtype=tf.int32, shape=match_results.shape),
        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
    negative_labels = tf.where(
        tf.logical_and(samples, tf.logical_not(labels)),
        tf.constant(1, dtype=tf.int32, shape=match_results.shape),
        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
    ignore_labels = tf.fill(match_results.shape, -1)

    return (ignore_labels + positive_labels + negative_labels,
            positive_labels, negative_labels)
    def test_diag(self, use_default):
        """Test that a diagonal multivariate normal can be effectively sampled from.

    Note that the effective sample size is expected to be exactly 100: this is
    because the step size is tuned well enough that a single HMC step takes
    a point to nearly the antipodal point, which causes a negative lag 1
    autocorrelation, and the effective sample size calculation cuts off when
    the autocorrelation drops below zero.

    Args:
      use_default: bool, whether to use a custom momentum distribution, or
        the default.
    """
        mvn = tfd.MultivariateNormalDiag(loc=[1., 2., 3.],
                                         scale_diag=[0.1, 1., 10.])

        if use_default:
            momentum_distribution = None
            step_size = 0.1
        else:
            momentum_distribution = tfde.MultivariateNormalPrecisionFactorLinearOperator(
                precision_factor=mvn.scale, )
            step_size = 0.3
        hmc_kernel = tfp.experimental.mcmc.PreconditionedHamiltonianMonteCarlo(
            target_log_prob_fn=mvn.log_prob,
            momentum_distribution=momentum_distribution,
            step_size=step_size,
            num_leapfrog_steps=10)
        draws = tfp.mcmc.sample_chain(110,
                                      tf.zeros(3),
                                      kernel=hmc_kernel,
                                      seed=test_util.test_seed(),
                                      trace_fn=None)
        ess = tfp.mcmc.effective_sample_size(
            draws[-100:],
            filter_threshold=0,
            filter_beyond_positive_pairs=False)

        if not use_default:
            self.assertAllClose(ess, tf.fill([3], 100.))
        else:
            self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
 def test_kahan_precision(self, jit=False):
   maybe_jit = lambda f: f
   if jit:
     self.skip_if_no_xla()
     maybe_jit = tf.function(jit_compile=True)
   stream = test_util.test_seed_stream()
   n = 20_000
   samps = tfd.Poisson(rate=1.).sample(n, seed=stream())
   log_rate = tf.fill([n], tfd.Normal(0, .2).sample(seed=stream()))
   pois = tfd.Poisson(log_rate=log_rate)
   lp_fn = maybe_jit(tfd.Independent(pois, reinterpreted_batch_ndims=1,
                                     experimental_use_kahan_sum=True).log_prob)
   lp = lp_fn(samps)
   pois64 = tfd.Poisson(log_rate=tf.cast(log_rate, tf.float64))
   lp64 = tfd.Independent(pois64, reinterpreted_batch_ndims=1).log_prob(
       tf.cast(samps, tf.float64))
   # Evaluate together to ensure we use the same samples.
   lp, lp64 = self.evaluate((tf.cast(lp, tf.float64), lp64))
   # Fails ~75% CPU, 1-75% GPU --vary_seed runs w/o experimental_use_kahan_sum.
   self.assertAllClose(lp64, lp, rtol=0., atol=.01)
    def testMarginalLikelihoodGradientIsDefined(self):
        num_particles = 16
        seeds = samplers.split_seed(test_util.test_seed(), n=3)
        initial_state = self.evaluate(
            WeightedParticles(
                particles=samplers.normal([num_particles], seed=seeds[0]),
                log_weights=tf.fill([num_particles],
                                    -tf.math.log(float(num_particles)))))

        def propose_and_update_log_weights_fn(_,
                                              weighted_particles,
                                              transition_scale,
                                              seed=None):
            proposal_dist = tfd.Normal(loc=weighted_particles.particles,
                                       scale=1.)
            transition_dist = tfd.Normal(loc=weighted_particles.particles,
                                         scale=transition_scale)
            proposed_particles = proposal_dist.sample(seed=seed)
            return WeightedParticles(
                particles=proposed_particles,
                log_weights=(weighted_particles.log_weights +
                             transition_dist.log_prob(proposed_particles) -
                             proposal_dist.log_prob(proposed_particles)))

        def marginal_logprob(transition_scale):
            kernel = SequentialMonteCarlo(
                propose_and_update_log_weights_fn=functools.partial(
                    propose_and_update_log_weights_fn,
                    transition_scale=transition_scale))
            state, results = kernel.one_step(
                state=initial_state,
                kernel_results=kernel.bootstrap_results(initial_state),
                seed=seeds[1])
            state, results = kernel.one_step(state=state,
                                             kernel_results=results,
                                             seed=seeds[2])
            return results.accumulated_log_marginal_likelihood

        _, grad_lp = tfp.math.value_and_gradient(marginal_logprob, 1.5)
        self.assertIsNotNone(grad_lp)
        self.assertNotAllZero(grad_lp)
def _prepare_grid(*, times, time_step, dtype):
    """Prepares grid of times for path generation.

  Args:
    times:  Rank 1 `Tensor` of increasing positive real values. The times at
      which the path points are to be evaluated.
    time_step: Rank 0 real `Tensor`. Maximal distance between points in
      resulting grid.
    dtype: `tf.Dtype` of the input and output `Tensor`s.

  Returns:
    Tuple `(all_times, mask, time_points)`.
    `all_times` is a 1-D real `Tensor` containing all points from 'times` and
    the uniform grid of points between `[0, times[-1]]` with grid size equal to
    `time_step`. The `Tensor` is sorted in ascending order and may contain
    duplicates.
    `mask` is a boolean 1-D `Tensor` of the same shape as 'all_times', showing
    which elements of 'all_times' correspond to THE values from `times`.
    Guarantees that times[0]=0 and mask[0]=False.
    `time_indices`. An integer `Tensor` of the same shape as `times` indicating
    `times` indices in `all_times`.
  """
    grid = tf.range(0.0, times[-1], time_step, dtype=dtype)
    all_times = tf.concat([times, grid], axis=0)
    # Remove duplicate points
    all_times = tf.unique(all_times).y
    # Sort sequence. Identify the time indices of interest
    # TODO(b/169400743): use tf.sort instead of argsort and casting when XLA
    # float64 support is extended for tf.sort
    args = tf.argsort(tf.cast(all_times, dtype=tf.float32))
    all_times = tf.gather(all_times, args)
    time_indices = tf.searchsorted(all_times, times, out_type=tf.int32)
    # Create a boolean mask to identify the iterations that have to be recorded.
    mask_sparse = tf.sparse.SparseTensor(indices=tf.expand_dims(tf.cast(
        time_indices, dtype=tf.int64),
                                                                axis=1),
                                         values=tf.fill(times.shape, True),
                                         dense_shape=all_times.shape)
    mask = tf.sparse.to_dense(mask_sparse)
    return all_times, mask, time_indices
Beispiel #20
0
    def _mode(self):
        a = self.concentration1
        b = self.concentration0
        mode = ((a - 1) / (a * b - 1))**(1. / a)
        if self.allow_nan_stats:
            nan = tf.fill(self.batch_shape_tensor(),
                          dtype_util.as_numpy_dtype(self.dtype)(np.nan),
                          name="nan")
            is_defined = (self.concentration1 > 1.) & (self.concentration0 >
                                                       1.)
            return tf.where(is_defined, mode, nan)

        return distribution_util.with_dependencies([
            assert_util.assert_less(
                tf.ones([], dtype=self.concentration1.dtype),
                self.concentration1,
                message="Mode undefined for concentration1 <= 1."),
            assert_util.assert_less(
                tf.ones([], dtype=self.concentration0.dtype),
                self.concentration0,
                message="Mode undefined for concentration0 <= 1.")
        ], mode)
Beispiel #21
0
  def test_tril(self):
    cov = 0.9 * tf.ones([3, 3]) + 0.1 * tf.eye(3)
    scale = tf.linalg.cholesky(cov)
    mv_tril = tfd.MultivariateNormalTriL(loc=[1., 2., 3.],
                                         scale_tril=scale)

    if self.use_default_momentum_distribution:
      momentum_distribution = None
    else:
      momentum_distribution = tfde.MultivariateNormalPrecisionFactorLinearOperator(
          # TODO(b/170015229) Don't use the covariance as inverse scale,
          # it is the wrong preconditioner.
          precision_factor=tf.linalg.LinearOperatorFullMatrix(cov),
      )
    hmc_kernel = tfp.experimental.mcmc.PreconditionedHamiltonianMonteCarlo(
        target_log_prob_fn=mv_tril.log_prob,
        momentum_distribution=momentum_distribution,
        step_size=0.2,
        num_leapfrog_steps=10)
    draws = tfp.mcmc.sample_chain(
        120,
        tf.zeros(3),
        kernel=hmc_kernel,
        seed=test_util.test_seed(),
        trace_fn=None)
    ess = tfp.mcmc.effective_sample_size(draws[-100:],
                                         filter_threshold=0,
                                         filter_beyond_positive_pairs=False)

    # TODO(b/170015229): These and other tests like it, which assert ess is
    # greater than some number, were all passing, even though the preconditioner
    # was the wrong one. Why is that? A guess is that since there are *many*
    # ways to have larger ess, these tests don't really test correctness.
    # Perhaps remove all tests like these.
    if not self.use_default_momentum_distribution:
      self.assertAllClose(ess, tf.fill([3], 100.))
    else:
      self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
Beispiel #22
0
  def build(self, var_list):
    """Initialize optimizer variables.

    Args:
      var_list: list of model variables to build Ftrl variables on.
    """
    super().build(var_list)
    if hasattr(self, '_built') and self._built:
      return
    self._accumulators = []
    self._linears = []
    for var in var_list:
      self._accumulators.append(
          self.add_variable_from_reference(
              model_variable=var,
              variable_name='accumulator',
              initial_value=tf.cast(
                  tf.fill(dims=var.shape, value=self.initial_accumulator_value),
                  dtype=var.dtype)))
      self._linears.append(
          self.add_variable_from_reference(
              model_variable=var, variable_name='linear'))
    self._built = True
Beispiel #23
0
    def testOffsetWorksCorrectly(self):
        n = int(1e5)
        offset = tf.fill([n], 1.0)
        [
            model_matrix,
            response,
            model_coefficients_true,
            linear_response_true,
        ] = self.make_dataset(n=n, d=3, link='probit', offset=offset)
        model_coefficients, linear_response, is_converged, _ = tfp.glm.fit(
            model_matrix,
            response,
            tfp.glm.BernoulliNormalCDF(),
            offset=offset,
            fast_unsafe_numerics=self.fast,
            maximum_iterations=20)
        [
            model_coefficients_,
            linear_response_,
            is_converged_,
            model_coefficients_true_,
            linear_response_true_,
        ] = self.evaluate([
            model_coefficients,
            linear_response,
            is_converged,
            model_coefficients_true,
            linear_response_true,
        ])

        self.assertTrue(is_converged_)
        avg_response_diff = np.mean(linear_response_ - linear_response_true_)
        self.assertNear(0., avg_response_diff, err=3e-3)
        self.assertAllClose(model_coefficients_true_,
                            model_coefficients_,
                            atol=0.03,
                            rtol=0.15)
Beispiel #24
0
    def test_diag(self, use_default):
        """Test that a diagonal multivariate normal can be effectively sampled from.

    Args:
      use_default: bool, whether to use a custom momentum distribution, or
        the default.
    """
        mvn = tfd.MultivariateNormalDiag(loc=[1., 2., 3.],
                                         scale_diag=[0.1, 1., 10.])

        if use_default:
            momentum_distribution = None
            step_size = 0.1
        else:
            momentum_distribution = _CompositeMultivariateNormalPrecisionFactorLinearOperator(
                precision_factor=mvn.scale, )
            step_size = 1.1
        nuts_kernel = tfp.experimental.mcmc.PreconditionedNoUTurnSampler(
            target_log_prob_fn=mvn.log_prob,
            momentum_distribution=momentum_distribution,
            step_size=step_size,
            max_tree_depth=4)
        draws = tfp.mcmc.sample_chain(110,
                                      tf.zeros(3),
                                      kernel=nuts_kernel,
                                      seed=test_util.test_seed(),
                                      trace_fn=None)
        ess = tfp.mcmc.effective_sample_size(
            draws[-100:],
            filter_threshold=0,
            filter_beyond_positive_pairs=False)

        if not use_default:
            self.assertAllClose(ess, tf.fill([3], 100.))
        else:
            self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
 def _log_prob(self, x):
     x = tf.convert_to_tensor(x, name='x')
     right_indices = tf.minimum(
         tf.size(self.outcomes) - 1,
         tf.reshape(
             tf.searchsorted(self.outcomes,
                             values=tf.reshape(x, shape=[-1]),
                             side='right'),
             dist_util.prefer_static_shape(x)))
     use_right_indices = self._is_equal_or_close(
         x, tf.gather(self.outcomes, indices=right_indices))
     left_indices = tf.maximum(0, right_indices - 1)
     use_left_indices = self._is_equal_or_close(
         x, tf.gather(self.outcomes, indices=left_indices))
     log_probs = self._categorical.log_prob(
         tf1.where(use_left_indices, left_indices, right_indices))
     should_be_neg_inf = tf.broadcast_to(
         tf.logical_not(use_left_indices | use_right_indices),
         shape=dist_util.prefer_static_shape(log_probs))
     return tf1.where(
         should_be_neg_inf,
         tf.fill(dist_util.prefer_static_shape(should_be_neg_inf),
                 dtype_util.as_numpy_dtype(log_probs.dtype)(-np.inf)),
         log_probs)
Beispiel #26
0
  def call(self, lm_output, sentence_output, lm_label_ids, lm_label_weights,
           sentence_labels):
    """Implements call() for the layer."""
    lm_label_weights = tf.cast(lm_label_weights, tf.float32)
    lm_output = tf.cast(lm_output, tf.float32)
    sentence_output = tf.cast(sentence_output, tf.float32)

    mask_label_loss = losses.weighted_sparse_categorical_crossentropy_loss(
        labels=lm_label_ids, predictions=lm_output, weights=lm_label_weights)
    sentence_loss = losses.weighted_sparse_categorical_crossentropy_loss(
        labels=sentence_labels, predictions=sentence_output)
    loss = mask_label_loss + sentence_loss
    batch_shape = tf.slice(tf.shape(sentence_labels), [0], [1])
    # TODO(hongkuny): Avoids the hack and switches add_loss.
    final_loss = tf.fill(batch_shape, loss)

    # TODO(b/122840926): metrics use distribution strategy merge_call() and do
    # not work with tf.function(compile=True). Either fix this issue or move
    # metric aggregation outside the model.
    metric_outputs = self._add_metrics(lm_output, lm_label_ids,
                                       lm_label_weights, mask_label_loss,
                                       sentence_output, sentence_labels,
                                       sentence_loss)
    return final_loss, metric_outputs
    def one_step(self, state, kernel_results, seed=None):
        """Takes one Sequential Monte Carlo inference step.

    Args:
      state: instance of `tfp.experimental.mcmc.WeightedParticles` representing
        the current particles with (log) weights. The `log_weights` must be
        a float `Tensor` of shape `[num_particles, b1, ..., bN]`. The
        `particles` may be any structure of `Tensor`s, each of which
        must have shape `concat([log_weights.shape, event_shape])` for some
        `event_shape`, which may vary across components.
      kernel_results: instance of
        `tfp.experimental.mcmc.SequentialMonteCarloResults` representing results
        from a previous step.
      seed: Optional seed for reproducible sampling.

    Returns:
      state: instance of `tfp.experimental.mcmc.WeightedParticles` representing
        new particles with (log) weights.
      kernel_results: instance of
        `tfp.experimental.mcmc.SequentialMonteCarloResults`.
    """
        with tf.name_scope(self.name):
            with tf.name_scope('one_step'):
                seed = samplers.sanitize_seed(seed)
                proposal_seed, resample_seed = samplers.split_seed(seed)

                state = WeightedParticles(*state)  # Canonicalize.
                num_particles = ps.size0(state.log_weights)

                # Propose new particles and update weights for this step, unless it's
                # the initial step, in which case, use the user-provided initial
                # particles and weights.
                proposed_state = self.propose_and_update_log_weights_fn(
                    # Propose state[t] from state[t - 1].
                    ps.maximum(0, kernel_results.steps - 1),
                    state,
                    seed=proposal_seed)
                is_initial_step = ps.equal(kernel_results.steps, 0)
                # TODO(davmre): this `where` assumes the state size didn't change.
                state = tf.nest.map_structure(
                    lambda a, b: tf.where(is_initial_step, a, b), state,
                    proposed_state)

                normalized_log_weights = tf.nn.log_softmax(state.log_weights,
                                                           axis=0)
                # Every entry of `log_weights` differs from `normalized_log_weights`
                # by the same normalizing constant. We extract that constant by
                # examining an arbitrary entry.
                incremental_log_marginal_likelihood = (
                    state.log_weights[0] - normalized_log_weights[0])

                do_resample = self.resample_criterion_fn(state)

                # Some batch elements may require resampling and others not, so
                # we first do the resampling for all elements, then select whether to
                # use the resampled values for each batch element according to
                # `do_resample`. If there were no batching, we might prefer to use
                # `tf.cond` to avoid the resampling computation on steps where it's not
                # needed---but we're ultimately interested in adaptive resampling
                # for statistical (not computational) purposes, so this isn't a
                # dealbreaker.
                resampled_particles, resample_indices = weighted_resampling.resample(
                    state.particles,
                    state.log_weights,
                    self.resample_fn,
                    seed=resample_seed)
                uniform_weights = tf.fill(
                    ps.shape(state.log_weights),
                    value=-tf.math.log(
                        tf.cast(num_particles, state.log_weights.dtype)))
                (resampled_particles, resample_indices,
                 log_weights) = tf.nest.map_structure(
                     lambda r, p: ps.where(do_resample, r, p),
                     (resampled_particles, resample_indices, uniform_weights),
                     (state.particles, _dummy_indices_like(resample_indices),
                      normalized_log_weights))

            return (
                WeightedParticles(particles=resampled_particles,
                                  log_weights=log_weights),
                SequentialMonteCarloResults(
                    steps=kernel_results.steps + 1,
                    parent_indices=resample_indices,
                    incremental_log_marginal_likelihood=(
                        incremental_log_marginal_likelihood),
                    accumulated_log_marginal_likelihood=(
                        kernel_results.accumulated_log_marginal_likelihood +
                        incremental_log_marginal_likelihood),
                    seed=seed))
Beispiel #28
0
    def _build_sub_tree(self, directions, integrator, current_step_meta_info,
                        nsteps, initial_state, continue_tree, not_divergence,
                        momentum_state_memory):
        with tf.name_scope('build_sub_tree'):
            batch_shape = prefer_static.shape(
                current_step_meta_info.init_energy)
            # We never want to select the inital state
            if MULTINOMIAL_SAMPLE:
                init_weight = tf.fill(
                    batch_shape,
                    tf.constant(
                        -np.inf,
                        dtype=current_step_meta_info.init_energy.dtype))
            else:
                init_weight = tf.zeros(batch_shape, dtype=TREE_COUNT_DTYPE)

            init_momentum_cumsum = [
                tf.zeros_like(x) for x in initial_state.momentum
            ]
            initial_state_candidate = TreeDoublingStateCandidate(
                state=initial_state.state,
                target=initial_state.target,
                target_grad_parts=initial_state.target_grad_parts,
                energy=initial_state.target,
                weight=init_weight)
            energy_diff_sum = tf.zeros_like(current_step_meta_info.init_energy,
                                            name='energy_diff_sum')
            [
                _,
                energy_diff_tree_sum,
                momentum_tree_cumsum,
                leapfrogs_taken,
                final_state,
                candidate_tree_state,
                final_continue_tree,
                final_not_divergence,
                momentum_state_memory,
            ] = tf.while_loop(
                cond=lambda iter_, energy_diff_sum, init_momentum_cumsum,  # pylint: disable=g-long-lambda
                leapfrogs_taken, state, state_c, continue_tree, not_divergence,
                momentum_state_memory: (
                    (iter_ < nsteps) & tf.reduce_any(continue_tree)),
                body=lambda iter_, energy_diff_sum, init_momentum_cumsum,  # pylint: disable=g-long-lambda
                leapfrogs_taken, state, state_c, continue_tree, not_divergence,
                momentum_state_memory: (self._loop_build_sub_tree(
                    directions, integrator, current_step_meta_info, iter_,
                    energy_diff_sum, init_momentum_cumsum, leapfrogs_taken,
                    state, state_c, continue_tree, not_divergence,
                    momentum_state_memory)),
                loop_vars=(
                    tf.zeros([], dtype=tf.int32, name='iter'),
                    energy_diff_sum,
                    init_momentum_cumsum,
                    tf.zeros(batch_shape, dtype=TREE_COUNT_DTYPE),
                    initial_state,
                    initial_state_candidate,
                    continue_tree,
                    not_divergence,
                    momentum_state_memory,
                ),
                parallel_iterations=self.parallel_iterations)

        return (
            candidate_tree_state,
            final_state,
            final_not_divergence,
            final_continue_tree,
            energy_diff_tree_sum,
            momentum_tree_cumsum,
            leapfrogs_taken,
        )
Beispiel #29
0
def calibration(*,
                prices: types.RealTensor,
                strikes: types.RealTensor,
                expiries: types.RealTensor,
                forwards: types.RealTensor,
                is_call_options: types.BoolTensor,
                beta: types.RealTensor,
                nu: types.RealTensor,
                rho: types.RealTensor,
                volatility_type: SabrImpliedVolatilityType = None,
                approximation_type: SabrApproximationType = None,
                volatility_based_calibration: bool = True,
                alpha: types.RealTensor = None,
                alpha_lower_bound: types.RealTensor = None,
                alpha_upper_bound: types.RealTensor = None,
                calibrate_beta: bool = False,
                beta_lower_bound: types.RealTensor = 0.0,
                beta_upper_bound: types.RealTensor = 1.0,
                nu_lower_bound: types.RealTensor = 0.0,
                nu_upper_bound: types.RealTensor = 1.0,
                rho_lower_bound: types.RealTensor = -1.0,
                rho_upper_bound: types.RealTensor = 1.0,
                optimizer_fn: Callable[..., types.RealTensor] = None,
                tolerance: types.RealTensor = 1e-6,
                maximum_iterations: types.RealTensor = 100,
                validate_args: bool = False,
                dtype: tf.DType = None,
                name: str = None) -> CalibrationResult:
    """Calibrates the SABR model using European option prices.

  The SABR model specifies the risk neutral dynamics of the underlying as the
  following set of stochastic differential equations:

  ```
    dF = sigma F^beta dW_1
    dsigma = nu sigma dW_2
    dW1 dW2 = rho dt

    F(0) = f
    sigma(0) = alpha
  ```
  where F(t) represents the value of the forward price as a function of time,
  and sigma(t) is the volatility.

  Given a set of European option prices, this function estimates the SABR model
  parameters which best describe the input data. Calibration is done using the
  closed-form approximations for European option pricing.

  #### Example

  ```python
  import tf_quant_finance as tff
  import tensorflow.compat.v2 as tf

  dtype = np.float64

  # Set some market conditions.
  observed_prices = np.array(
      [[20.09689284, 10.91953054, 4.25012702, 1.11561839, 0.20815853],
       [3.34813209, 6.03578711, 10.2874194, 16.26824328, 23.73850935]],
      dtype=dtype)
  strikes = np.array(
      [[80.0, 90.0, 100.0, 110.0, 120.0], [80.0, 90.0, 100.0, 110.0, 120.0]],
      dtype=dtype)
  expiries = np.array([[0.5], [1.0]], dtype=dtype)
  forwards = 100.0
  is_call_options = np.array([[True], [False]])

  # Calibrate the model.
  # In this example, we are calibrating a SABR model using the lognormal
  # volatility approximation for implied volatility, and we explicitly fix the
  # betas ourselves.
  beta = np.array([0.5, 0.5], dtype=dtype)
  models, is_converged, _ = tff.models.sabr.approximations.calibration(
      prices=observed_prices,
      strikes=strikes,
      expiries=expiries,
      forwards=forwards,
      is_call_options=is_call_options,
      beta=beta,
      calibrate_beta=False,
      nu=np.array([1.0, 1.0], dtype=dtype),
      nu_lower_bound=0.0,
      nu_upper_bound=10.0,
      rho=np.array([0.0, 0.0], dtype=dtype),
      rho_lower_bound=-0.75,
      rho_upper_bound=0.75,
      maximum_iterations=1000)

  # This will return two `SabrModel`s, where:
  # Model 1 has alpha = 1.5, beta = 0.5, volvol = 0.33, and rho = 0.1
  # Model 2 has alpha = 2.5, beta = 0.5, volvol = 0.66, and rho = -0.1

  ```

  Args:
    prices: Real `Tensor` of shape [batch_size, num_strikes] specifying the
      observed options prices. Here, `batch_size` refers to the number of SABR
      models calibrated in this invocation.
    strikes: Real `Tensor` of shape [batch_size, num_strikes] specifying the
      strike prices of the options.
    expiries: Real `Tensor` of shape compatible with [batch_size, num_strikes]
      specifying the options expiries.
    forwards: Real `Tensor` of shape compatible with [batch_size, num_strikes]
      specifying the observed forward prices/rates.
    is_call_options: Boolean `Tensor` of shape compatible with [batch_size,
      num_strikes] specifying whether or not the prices correspond to a call
      option (=True) or a put option (=False).
    beta: Real `Tensor` of shape [batch_size], specifying the initial estimate
      of the model `beta`. Values must satisify 0 <= `beta` <= 1
    nu: Real `Tensor` of shape [batch_size], specifying the initial estimate of
      the vol-vol parameter. Values must satisfy 0 <= `nu`.
    rho: Real `Tensor` of shape [batch_size], specifying the initial estimate of
      the correlation between the forward price and the volatility. Values must
      satisfy -1 < `rho` < 1.
    volatility_type: Either SabrImpliedVolatility.NORMAL or LOGNORMAL.
      Default value: `None` which maps to `LOGNORMAL`
    approximation_type: Instance of `SabrApproxmationScheme`.
      Default value: `None` which maps to `HAGAN`.
    volatility_based_calibration: Boolean. If `True`, then the options prices
      are first converted to implied volatilities, and the calibration is then
      performed by minimizing the difference between input implied volatilities
      and the model implied volatilities. Otherwise, the calibration is
      performed by minimizing the mean-squared-loss of the *log1p* of the input
      and estimated European options prices.
      Default value: True
    alpha: Real `Tensor` of shape [batch_size], specifying the initial estimate
      of initial level of the volatility. Values must be strictly positive. If
      this is not provided, then an initial value will be estimated, along with
      lower and upper bounds.
      Default value: `None`, indicating that the routine should try to find a
        reasonable initial estimate.
    alpha_lower_bound: Real `Tensor` compatible with that of `alpha`, specifying
      the lower bound for the calibrated value. This is ignored if `alpha` is
      `None`.
      Default value: `None`.
    alpha_upper_bound: Real `Tensor` compatible with that of `alpha`, specifying
      the upper bound for the calibrated value. This is ignored if `alpha` is
      `None`.
      Default value: `None`.
    calibrate_beta: Boolean value indicating whether or not the `beta`
      parameters should be calibrated. If `True`, then the `beta_lower_bound`
      and `beta_upper_bound` must be specified. If `False`, then the model will
      use the values specified in `beta`.
      Default value: `False`.
    beta_lower_bound: Only used if `calibrate_beta` is True. Real `Tensor`
      compatible with that of `beta`, specifying the lower bound for the
      calibrated value.
      Default value: 0.0.
    beta_upper_bound: Only used if `calibrate_beta` is True. Real `Tensor`
      compatible with that of `beta`, specifying the upper bound for the
      calibrated value.
      Defalut value: 1.0
    nu_lower_bound: Real `Tensor` compatible with that of `nu`, specifying the
      lower bound for the calibrated value.
      Default value: 0.0.
    nu_upper_bound: Real `Tensor` compatible with that of `nu`, specifying the
      lower bound for the calibrated value.
      Default value: 1.0.
    rho_lower_bound: Real `Tensor` compatible with that of `rho`, specifying the
      lower bound for the calibrated value.
      Default value: -1.0.
    rho_upper_bound: Real `Tensor` compatible with that of `rho`, specifying the
      upper bound for the calibrated value.
      Default value: 1.0.
    optimizer_fn: Optional Python callable which implements the algorithm used
      to minimize the objective function during calibration. It should have
      the following interface: result =
        optimizer_fn(value_and_gradients_function, initial_position, tolerance,
        max_iterations) `value_and_gradients_function` is a Python callable that
        accepts a point as a real `Tensor` and returns a tuple of `Tensor`s of
        real dtype containing the value of the function and its gradient at that
        point. 'initial_position' is a real `Tensor` containing the starting
        point of the optimization, 'tolerance' is a real scalar `Tensor` for
        stopping tolerance for the procedure and `max_iterations` specifies the
        maximum number of iterations.
      `optimizer_fn` should return a namedtuple containing the items: `position`
        (a tensor containing the optimal value), `converged` (a boolean
        indicating whether the optimize converged according the specified
        criteria), `failed` (a boolean indicating if the optimization resulted
        in a failure), `num_iterations` (the number of iterations used), and
        `objective_value` ( the value of the objective function at the optimal
        value). The default value for `optimizer_fn` is None and conjugate
        gradient algorithm is used.
      Default value: `None` - indicating conjugate gradient minimizer.
    tolerance: Scalar `Tensor` of real dtype. The absolute tolerance for
      terminating the iterations.
      Default value: 1e-6.
    maximum_iterations: Scalar positive integer `Tensor`. The maximum number of
      iterations during the optimization.
      Default value: 100.
    validate_args: Boolean value indicating whether or not to validate the shape
      and values of the input arguments, at the potential expense of performance
      degredation.
      Defalut value: False.
    dtype: The default dtype to use when converting values to `Tensor`s.
      Default value: `None`, which means that default dtypes inferred by
        TensorFlow are used.
    name: String. The name to give to the ops created by this function.
      Default value: `None`, which maps to the default name 'sabr_calibration'.

  Returns:
    A Tuple of three elements. The first is a `CalibrationResult` holding the
    calibrated alpha, beta, volvol, and rho, where alpha[i] corresponds to the
    calibrated `alpha` of the i-th batch, etc.
    The second and third elements contains the optimization status
    (whether the optimization algorithm succeeded in finding the optimal point
    based on the specified convergance criteria) and the number of iterations
    performed.
  """
    if approximation_type is None:
        approximation_type = SabrApproximationType.HAGAN
    if volatility_type is None:
        volatility_type = SabrImpliedVolatilityType.LOGNORMAL
    name = name or 'sabr_calibration'
    with tf.name_scope(name):
        prices = tf.convert_to_tensor(prices, dtype=dtype, name='prices')
        dtype = dtype or prices.dtype
        batch_size = tf.shape(prices)[0]

        strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes')
        expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries')
        forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='expiries')
        is_call_options = tf.convert_to_tensor(is_call_options,
                                               name='is_call',
                                               dtype=tf.bool)

        if optimizer_fn is None:
            optimizer_fn = optimizer.conjugate_gradient_minimize

        if alpha is None:
            # We set the initial value of alpha to be s.t. alpha * F^(beta - 1) is
            # on the order of 10%.
            initial_alpha_guess = tf.math.reduce_mean(forwards)
            alpha = tf.fill(dims=[batch_size], value=initial_alpha_guess)
            alpha = tf.pow(alpha, 1.0 - beta) * 0.1
            alpha_lower_bound = alpha * 0.1
            alpha_upper_bound = alpha * 10.0
        else:
            alpha_lower_bound = tf.convert_to_tensor(alpha_lower_bound,
                                                     dtype=dtype)
            alpha_upper_bound = tf.convert_to_tensor(alpha_upper_bound,
                                                     dtype=dtype)

        alpha = _assert_parameter_valid(validate_args,
                                        alpha,
                                        shape=[batch_size],
                                        lower_bound=alpha_lower_bound,
                                        upper_bound=alpha_upper_bound,
                                        message='`alpha` is invalid!')
        initial_alpha = _to_unconstrained(alpha, alpha_lower_bound,
                                          alpha_upper_bound)

        nu_lower_bound = tf.convert_to_tensor(nu_lower_bound, dtype=dtype)
        nu_upper_bound = tf.convert_to_tensor(nu_upper_bound, dtype=dtype)
        nu = _assert_parameter_valid(validate_args,
                                     nu,
                                     shape=[batch_size],
                                     lower_bound=nu_lower_bound,
                                     upper_bound=nu_upper_bound,
                                     message='`nu` is invalid!')
        initial_nu = _to_unconstrained(nu, nu_lower_bound, nu_upper_bound)

        rho_lower_bound = tf.convert_to_tensor(rho_lower_bound, dtype=dtype)
        rho_upper_bound = tf.convert_to_tensor(rho_upper_bound, dtype=dtype)
        rho = _assert_parameter_valid(validate_args,
                                      rho,
                                      shape=[batch_size],
                                      lower_bound=rho_lower_bound,
                                      upper_bound=rho_upper_bound,
                                      message='`rho` is invalid!')
        initial_rho = _to_unconstrained(rho, rho_lower_bound, rho_upper_bound)

        beta = tf.convert_to_tensor(beta, dtype=dtype)
        beta_lower_bound = tf.convert_to_tensor(beta_lower_bound, dtype=dtype)
        beta_upper_bound = tf.convert_to_tensor(beta_upper_bound, dtype=dtype)
        beta = _assert_parameter_valid(validate_args,
                                       beta,
                                       shape=[batch_size],
                                       lower_bound=beta_lower_bound,
                                       upper_bound=beta_upper_bound,
                                       message='`beta` is invalid!')
        if calibrate_beta:
            initial_beta = _to_unconstrained(beta, beta_lower_bound,
                                             beta_upper_bound)
            initial_x = tf.concat(
                [initial_alpha, initial_nu, initial_rho, initial_beta], axis=0)
        else:
            initial_x = tf.concat([initial_alpha, initial_nu, initial_rho],
                                  axis=0)

        optimizer_arg_handler = _OptimizerArgHandler(
            batch_size=batch_size,
            alpha_lower_bound=alpha_lower_bound,
            alpha_upper_bound=alpha_upper_bound,
            nu_lower_bound=nu_lower_bound,
            nu_upper_bound=nu_upper_bound,
            rho_lower_bound=rho_lower_bound,
            rho_upper_bound=rho_upper_bound,
            calibrate_beta=calibrate_beta,
            beta=beta,
            beta_lower_bound=beta_lower_bound,
            beta_upper_bound=beta_upper_bound)

        if volatility_based_calibration:
            loss_function = _get_loss_for_volatility_based_calibration(
                prices=prices,
                strikes=strikes,
                expiries=expiries,
                forwards=forwards,
                is_call_options=is_call_options,
                volatility_type=volatility_type,
                approximation_type=approximation_type,
                dtype=dtype,
                optimizer_arg_handler=optimizer_arg_handler)

        else:  # Price based calibration.
            loss_function = _get_loss_for_price_based_calibration(
                prices=prices,
                strikes=strikes,
                expiries=expiries,
                forwards=forwards,
                is_call_options=is_call_options,
                volatility_type=volatility_type,
                approximation_type=approximation_type,
                dtype=dtype,
                optimizer_arg_handler=optimizer_arg_handler)

        optimization_result = optimizer_fn(loss_function,
                                           initial_position=initial_x,
                                           tolerance=tolerance,
                                           max_iterations=maximum_iterations)

        calibration_parameters = optimization_result.position
        calibrated_alpha = optimizer_arg_handler.get_alpha(
            calibration_parameters)
        calibrated_nu = optimizer_arg_handler.get_nu(calibration_parameters)
        calibrated_rho = optimizer_arg_handler.get_rho(calibration_parameters)
        calibrated_beta = optimizer_arg_handler.get_beta(
            calibration_parameters)

        return (CalibrationResult(alpha=calibrated_alpha,
                                  beta=calibrated_beta,
                                  volvol=calibrated_nu,
                                  rho=calibrated_rho),
                optimization_result.converged,
                optimization_result.num_iterations)
 def fn(value):
     return tf.cast(tf.fill(dims=new_shape, value=value), tf.float32)