Ejemplo n.º 1
0
 def body(lop):
   return AutoDiag(lop.matvec(tf.ones([3]) * 2.)),
 def instant_forward_rate_2d_fn(t):
     return 0.01 * tf.ones(t.shape.as_list() + [2], dtype=t.dtype)
Ejemplo n.º 3
0
    def __init__(self,
                 loc,
                 scale,
                 skewness=None,
                 tailweight=None,
                 distribution=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='SinhArcsinh'):
        """Construct SinhArcsinh distribution on `(-inf, inf)`.

    Arguments `(loc, scale, skewness, tailweight)` must have broadcastable shape
    (indexing batch dimensions).  They must all have the same `dtype`.

    Args:
      loc: Floating-point `Tensor`.
      scale:  `Tensor` of same `dtype` as `loc`.
      skewness:  Skewness parameter.  Default is `0.0` (no skew).
      tailweight:  Tailweight parameter. Default is `1.0` (unchanged tailweight)
      distribution: `tf.Distribution`-like instance. Distribution that is
        transformed to produce this distribution.
        Default is `tfd.Normal(0., 1.)`.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a `SinhArcsinh` sample and `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())

        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([loc, scale, skewness, tailweight],
                                            tf.float32)
            self._loc = tensor_util.convert_nonref_to_tensor(loc,
                                                             name='loc',
                                                             dtype=dtype)
            self._scale = tensor_util.convert_nonref_to_tensor(scale,
                                                               name='scale',
                                                               dtype=dtype)
            tailweight = 1. if tailweight is None else tailweight
            has_default_skewness = skewness is None
            skewness = 0. if has_default_skewness else skewness
            self._tailweight = tensor_util.convert_nonref_to_tensor(
                tailweight, name='tailweight', dtype=dtype)
            self._skewness = tensor_util.convert_nonref_to_tensor(
                skewness, name='skewness', dtype=dtype)

            batch_shape = distribution_util.get_broadcast_shape(
                self._loc, self._scale, self._tailweight, self._skewness)

            # Recall, with Z a random variable,
            #   Y := loc + scale * F(Z),
            #   F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) * C
            #   C := 2 / F_0(2)
            #   F_0(Z) := Sinh( Arcsinh(Z) * tailweight )
            if distribution is None:
                # TODO(b/151180729): When `batch_shape` arg to `TransformedDistribution`
                # is deprecated, broadcast `loc` or `scale` parameter to `batch_shape`
                # and remove `else` condition.
                distribution = normal.Normal(loc=tf.zeros([], dtype=dtype),
                                             scale=tf.ones([], dtype=dtype),
                                             allow_nan_stats=allow_nan_stats,
                                             validate_args=validate_args)
            else:
                asserts = distribution_util.maybe_check_scalar_distribution(
                    distribution, dtype, validate_args)
                if asserts:
                    self._loc = distribution_util.with_dependencies(
                        asserts, self._loc)

            # Make the SAS bijector, 'F'.
            f = sinh_arcsinh_bijector.SinhArcsinh(skewness=self._skewness,
                                                  tailweight=self._tailweight,
                                                  validate_args=validate_args)

            # Make the AffineScalar bijector, Z --> loc + scale * Z (2 / F_0(2))
            affine = affine_scalar_bijector.AffineScalar(
                shift=self._loc,
                scale=self._scale,
                validate_args=validate_args)

            bijector = chain_bijector.Chain([affine, f])

            super(SinhArcsinh, self).__init__(distribution=distribution,
                                              bijector=bijector,
                                              batch_shape=batch_shape,
                                              validate_args=validate_args,
                                              name=name)
            self._parameters = parameters
Ejemplo n.º 4
0
    def test_inhomogeneous_poisson_process_example(self):
        # Toy 1D data.
        index_points = np.array([-10., -7.2, -4., -0.1, 0.1, 4., 6.2,
                                 9.]).reshape([-1, 1]).astype(np.float32)
        observed_counts = np.array([100, 90, 60, 13, 18, 37, 55,
                                    42]).astype(np.float32)

        # Trainable GP hyperparameters.
        kernel_log_amplitude = tf.Variable(0., name='kernel_log_amplitude')
        kernel_log_lengthscale = tf.Variable(0., name='kernel_log_lengthscale')
        observation_noise_log_scale = tf.Variable(
            0., name='observation_noise_log_scale')

        # Generative model.
        def model_fn():
            kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(
                amplitude=tf.exp(kernel_log_amplitude),
                length_scale=tf.exp(kernel_log_lengthscale))
            latent_log_rates = yield tfd.JointDistributionCoroutine.Root(
                tfd.GaussianProcess(kernel,
                                    index_points=index_points,
                                    observation_noise_variance=tf.exp(
                                        observation_noise_log_scale),
                                    name='latent_log_rates'))
            yield tfd.Independent(tfd.Poisson(log_rate=latent_log_rates),
                                  reinterpreted_batch_ndims=1,
                                  name='y')

        model = tfd.JointDistributionCoroutine(model_fn, name='model')

        # Variational model.
        logit_locs = tf.Variable(tf.zeros(observed_counts.shape))
        logit_softplus_scales = tf.Variable(
            tf.ones(observed_counts.shape) * -1)

        def variational_model_fn():
            _ = yield tfd.JointDistributionCoroutine.Root(
                tfd.Independent(tfd.Normal(
                    loc=logit_locs,
                    scale=tf.nn.softplus(logit_softplus_scales)),
                                reinterpreted_batch_ndims=1))
            _ = yield tfd.VectorDeterministic(observed_counts)

        q = tfd.JointDistributionCoroutine(variational_model_fn,
                                           name='variational_model')

        losses, sample_path = tfp.vi.fit_surrogate_posterior(
            target_log_prob_fn=lambda *args: model.log_prob(args),
            surrogate_posterior=q,
            optimizer=tf.optimizers.Adam(learning_rate=0.1),
            num_steps=100,
            seed=test_util.test_seed(),
            sample_size=1,
            trace_fn=lambda t: (t.loss, q.sample(seed=42)[0]))

        self.evaluate(tf1.global_variables_initializer())
        losses_, sample_path_ = self.evaluate((losses, sample_path))
        self.assertLess(losses_[-1], 80.)  # Optimal loss is roughly 40.
        # Optimal latent logits are approximately the log observed counts.
        self.assertAllClose(sample_path_[-1],
                            np.log(observed_counts),
                            atol=1.0)
Ejemplo n.º 5
0
 def run(key):
     return tfp_dist.Sharded(
         tfd.Independent(tfd.Normal(tf.zeros(1), tf.ones(1)), 1),
         shard_axis_name=self.axis_name).sample(seed=key)
Ejemplo n.º 6
0
 def testLossesForwarded(self):
     model = HasList()
     model_input = tf.ones([32, 2])
     model(model_input)
     self.assertEqual(2, len(model.losses))
Ejemplo n.º 7
0
def least_square_mc(sample_paths,
                    exercise_times,
                    payoff_fn,
                    basis_fn,
                    discount_factors=None,
                    dtype=None,
                    name=None):
    """Values Amercian style options using the LSM algorithm.

  The Least-Squares Monte-Carlo (LSM) algorithm is a Monte-Carlo approach to
  valuation of American style options. Using the sample paths of underlying
  assets, and a user supplied payoff function it attempts to find the optimal
  exercise point along each sample path. With optimal exercise points known,
  the option is valued as the average payoff assuming optimal exercise
  discounted to present value.

  ## Example. American put option price through Monte Carlo
  ```python
  # Let the underlying model be a Black-Scholes process
  # dS_t / S_t = rate dt + sigma**2 dW_t, S_0 = 1.0
  # with `rate = 0.1`, and volatility `sigma = 1.0`.
  # Define drift and volatility functions for log(S_t)
  rate = 0.1
  def drift_fn(_, x):
    return rate - tf.ones_like(x) / 2.
  def vol_fn(_, x):
    return tf.expand_dims(tf.ones_like(x), -1)
  # Use Euler scheme to propagate 100000 paths for 1 year into the future
  times = np.linspace(0., 1, num=50)
  num_samples = 100000
  log_paths = tf.function(tff.models.euler_sampling.sample)(
          dim=1,
          drift_fn=drift_fn, volatility_fn=vol_fn,
          random_type=tff.math.random.RandomType.PSEUDO_ANTITHETIC,
          times=times, num_samples=num_samples, seed=42, time_step=0.01)
  # Compute exponent to get samples of `S_t`
  paths = tf.math.exp(log_paths)
  # American put option price for strike 1.1 and expiry 1 (assuming actual day
  # count convention and no settlement adjustment)
  strike = [1.1]
  exercise_times = tf.range(times.shape[-1])
  discount_factors = tf.exp(-rate * times)
  payoff_fn = make_basket_put_payoff(strike)
  basis_fn = make_polynomial_basis(10)
  lsm_price(paths, exercise_times, payoff_fn, basis_fn,
            discount_factors=discount_factors)
  # Expected value: [0.397]
  # European put option price
  tff.black_scholes.option_price(volatilities=[1], strikes=strikes,
                                 expiries=[1], spots=[1.],
                                 discount_factors=discount_factors[-1],
                                 is_call_options=False,
                                 dtype=tf.float64)
  # Expected value: [0.379]
  ```
  ## References

  [1] Longstaff, F.A. and Schwartz, E.S., 2001. Valuing American options by
  simulation: a simple least-squares approach. The review of financial studies,
  14(1), pp.113-147.

  Args:
    sample_paths: A `Tensor` of shape `[num_samples, num_times, dim]`, the
      sample paths of the underlying ito process of dimension `dim` at
      `num_times` different points.
    exercise_times: An `int32` `Tensor` of shape `[num_exercise_times]`.
      Contents must be a subset of the integers `[0,...,num_times - 1]`,
      representing the ticks at which the option may be exercised.
    payoff_fn: Callable from a `Tensor` of shape `[num_samples, num_times, dim]`
      and an integer scalar positive `Tensor` (representing the current time
      index) to a `Tensor` of shape `[num_samples, payoff_dim]`
      of the same dtype as `samples`. The output represents the payout resulting
      from exercising the option at time `S`. The `payoff_dim` allows multiple
      options on the same underlying asset (i.e., `samples`) to be valued in
      parallel.
    basis_fn: Callable from a `Tensor` of shape `[num_samples, dim]` to a
      `Tensor` of shape `[basis_size, num_samples]` of the same dtype as
      `samples`. The result being the design matrix used in regression of the
      continuation value of options.
    discount_factors: A `Tensor` of shape `[num_exercise_times]` and the same
      `dtype` as `samples`, the k-th element of which represents the discount
      factor at time tick `k`.
      Default value: `None` which maps to a one-`Tensor` of the same `dtype`
        as `samples` and shape `[num_exercise_times]`.
    dtype: Optional `dtype`. Either `tf.float32` or `tf.float64`. The `dtype`
      If supplied, represents the `dtype` for the input and output `Tensor`s.
      Default value: `None`, which means that the `dtype` inferred by TensorFlow
      is used.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` which is mapped to the default name
      'least_square_mc'.
  Returns:
    A `Tensor` of shape `[num_samples, payoff_dim]` of the same dtype as
    `samples`.
  """
    with tf.compat.v1.name_scope(name,
                                 default_name='least_square_mc',
                                 values=[sample_paths, exercise_times]):
        # Conversion of the inputs to tensors
        sample_paths = tf.convert_to_tensor(sample_paths,
                                            dtype=dtype,
                                            name='sample_paths')
        exercise_times = tf.convert_to_tensor(exercise_times,
                                              name='exercise_times')
        num_times = exercise_times.shape.as_list()[-1]
        if discount_factors is None:
            discount_factors = tf.ones(shape=exercise_times.shape,
                                       dtype=sample_paths.dtype,
                                       name='discount_factors')
        else:
            discount_factors = tf.convert_to_tensor(discount_factors,
                                                    dtype=dtype,
                                                    name='discount_factors')
        discount_factors = tf.concat([[1], discount_factors], -1)
        # Initialise cashflow as the payoff at final sample.
        tick = exercise_times[num_times - 1]
        # Calculate the payoff of each path if exercised now. Shape
        # [num_samples, payoff_dim]
        exercise_value = payoff_fn(sample_paths, tick)
        zeros = tf.zeros(exercise_value.shape + [num_times - 1],
                         dtype=exercise_value.dtype)
        exercise_value = tf.expand_dims(exercise_value, -1)

        # Shape [num_samples, payoff_dim, num_exercise]
        cashflow = tf.concat([zeros, exercise_value], -1)
        # Starting state for loop iteration.
        lsm_loop_vars = LsmLoopVars(exercise_index=num_times - 1,
                                    cashflow=cashflow)

        def loop_body(exercise_index, cashflow):
            return _lsm_loop_body(sample_paths, exercise_times,
                                  discount_factors, payoff_fn, basis_fn,
                                  num_times, exercise_index, cashflow)

        loop_value = tf.while_loop(lsm_loop_cond, loop_body, lsm_loop_vars)
        present_values = continuation_value_fn(loop_value.cashflow,
                                               discount_factors, 0)
        return tf.math.reduce_mean(present_values, axis=0)
Ejemplo n.º 8
0
    def _sample_n(self, n, seed):
        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        batch_ndims = tf.shape(input=batch_shape)[0]

        ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
        shape = tf.concat([[n], batch_shape, event_shape], 0)
        stream = seed_stream.SeedStream(seed, salt="Wishart")

        # Complexity: O(nbk**2)
        x = tf.random.normal(shape=shape,
                             mean=0.,
                             stddev=1.,
                             dtype=self.dtype,
                             seed=stream())

        # Complexity: O(nbk)
        # This parametrization is equivalent to Chi2, i.e.,
        # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
        expanded_df = self.df * tf.ones(
            self.scale_operator.batch_shape_tensor(),
            dtype=dtype_util.base_dtype(self.df.dtype))

        g = tf.random.gamma(shape=[n],
                            alpha=self._multi_gamma_sequence(
                                0.5 * expanded_df, self.dimension),
                            beta=0.5,
                            dtype=self.dtype,
                            seed=stream())

        # Complexity: O(nbk**2)
        x = tf.linalg.band_part(x, -1, 0)  # Tri-lower.

        # Complexity: O(nbk)
        x = tf.linalg.set_diag(x, tf.sqrt(g))

        # Make batch-op ready.
        # Complexity: O(nbk**2)
        perm = tf.concat([tf.range(1, ndims), [0]], 0)
        x = tf.transpose(a=x, perm=perm)
        shape = tf.concat(
            [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0)
        x = tf.reshape(x, shape)

        # Complexity: O(nbM) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so
        # this step has complexity O(nbk^3).
        x = self.scale_operator.matmul(x)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat([batch_shape, event_shape, [n]], 0)
        x = tf.reshape(x, shape)
        perm = tf.concat([[ndims - 1], tf.range(0, ndims - 1)], 0)
        x = tf.transpose(a=x, perm=perm)

        if not self.input_output_cholesky:
            # Complexity: O(nbk**3)
            x = tf.matmul(x, x, adjoint_b=True)

        return x
Ejemplo n.º 9
0
    def _log_prob(self, x):
        if self.input_output_cholesky:
            x_sqrt = x
        else:
            # Complexity: O(nbk**3)
            x_sqrt = tf.linalg.cholesky(x)

        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        x_ndims = tf.rank(input=x_sqrt)
        num_singleton_axes_to_prepend = (
            tf.maximum(tf.size(input=batch_shape) + 2, x_ndims) - x_ndims)
        x_with_prepended_singletons_shape = tf.concat([
            tf.ones([num_singleton_axes_to_prepend], dtype=tf.int32),
            tf.shape(input=x_sqrt)
        ], 0)
        x_sqrt = tf.reshape(x_sqrt, x_with_prepended_singletons_shape)
        ndims = tf.rank(x_sqrt)
        # sample_ndims = ndims - batch_ndims - event_ndims
        sample_ndims = ndims - tf.size(input=batch_shape) - 2
        sample_shape = tf.shape(input=x_sqrt)[:sample_ndims]

        # We need to be able to pre-multiply each matrix by its corresponding
        # batch scale matrix. Since a Distribution Tensor supports multiple
        # samples per batch, this means we need to reshape the input matrix `x`
        # so that the first b dimensions are batch dimensions and the last two
        # are of shape [dimension, dimensions*number_of_samples]. Doing these
        # gymnastics allows us to do a batch_solve.
        #
        # After we're done with sqrt_solve (the batch operation) we need to undo
        # this reshaping so what we're left with is a Tensor partitionable by
        # sample, batch, event dimensions.

        # Complexity: O(nbk**2) since transpose must access every element.
        scale_sqrt_inv_x_sqrt = x_sqrt
        perm = tf.concat(
            [tf.range(sample_ndims, ndims),
             tf.range(0, sample_ndims)], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)
        last_dim_size = (
            tf.cast(self.dimension, dtype=tf.int32) * tf.reduce_prod(
                input_tensor=x_with_prepended_singletons_shape[:sample_ndims]))
        shape = tf.concat([
            x_with_prepended_singletons_shape[sample_ndims:-2],
            [tf.cast(self.dimension, dtype=tf.int32), last_dim_size]
        ],
                          axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)

        # Complexity: O(nbM*k) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each solve is O(k**2) so
        # this step has complexity O(nbk^3).
        scale_sqrt_inv_x_sqrt = self.scale_operator.solve(
            scale_sqrt_inv_x_sqrt)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat([
            tf.shape(input=scale_sqrt_inv_x_sqrt)[:-2], event_shape,
            sample_shape
        ],
                          axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)
        perm = tf.concat([
            tf.range(ndims - sample_ndims, ndims),
            tf.range(0, ndims - sample_ndims)
        ], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)

        # Write V = SS', X = LL'. Then:
        # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
        #              = tr[inv(S) L L' inv(S)']
        #              = tr[(inv(S) L) (inv(S) L)']
        #              = sum_{ik} (inv(S) L)_{ik}**2
        # The second equality follows from the cyclic permutation property.
        # Complexity: O(nbk**2)
        trace_scale_inv_x = tf.reduce_sum(
            input_tensor=tf.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1])

        # Complexity: O(nbk)
        half_log_det_x = tf.reduce_sum(input_tensor=tf.math.log(
            tf.linalg.diag_part(x_sqrt)),
                                       axis=[-1])

        # Complexity: O(nbk**2)
        log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                    0.5 * trace_scale_inv_x - self.log_normalization())

        # Set shape hints.
        # Try to merge what we know from the input x with what we know from the
        # parameters of this distribution.
        if tensorshape_util.rank(
                x.shape) is not None and tensorshape_util.rank(
                    self.batch_shape) is not None:
            tensorshape_util.set_shape(
                log_prob,
                tf.broadcast_static_shape(x.shape[:-2], self.batch_shape))

        return log_prob
Ejemplo n.º 10
0
def soft_multivariate_quantiles(x, quantiles, quantile_width=None, **kwargs):
    """Computes soft multivariate quantiles via optimal transport.

  Transport multivariate input values in x onto 2^d + 1 weighted points,
  {0,1}^d + [0.5, ..., 0.5]. Target weights are adjusted so
  that those values in x that are transported to the middle value in the target
  vector correspond to those concentrating around the quantile of interest.

  Args:
   x: Tensor<float> of shape [batch, N, d]
   quantiles: Tensor<float> of shape [r, d], r targeted quantiles of dimension d
   quantile_width: (float) mass given to the bucket supposed to attract points
     whose value concentrate around the desired quantile value. Bigger width
     means that we allow the soft quantile to be a mixture of more points
     further away from the quantile. If None, the width is set at 1/n where n is
     the number of values considered (the size along the 'axis').
   **kwargs: see sinkhorn.autodiff_sinkhorn for possible extra parameters.

  Returns:
    A Tensor<float> [N,r,d] of multivariate quantiles per batch.

  """
    quantiles = tf.constant(quantiles, tf.float32)
    batch_size = x.shape[0]
    n = tf.cast(x.shape[1], tf.float32)
    d = x.shape[2]
    if quantile_width is None:
        quantile_width = 2 / n
    num_quantiles = tf.shape(quantiles)[0]
    hypercube_vertices = tf.constant(
        list(itertools.product([-1, 1], repeat=d)), tf.float32)
    # weights attached to vertices for each quantile. this is n_quantiles x 2^r
    weights = quantiles[:,
                        tf.newaxis, :]**(0.5 *
                                         (1 - hypercube_vertices))[tf.newaxis,
                                                                   Ellipsis]
    weights *= (1 - quantiles)[:, tf.newaxis, :]**(
        0.5 * (1 + hypercube_vertices))[tf.newaxis, Ellipsis]

    weights = (1 - quantile_width) * tf.reduce_prod(weights, axis=2)
    # adding weights for quantile itself (in position 0).
    weights = tf.concat((quantile_width * tf.ones(
        (num_quantiles, 1)), weights),
                        axis=1)
    # augmenting and formating as batch_size * 2^r +1 * num_quantiles
    weights = tf.reshape(tf.tile(tf.transpose(weights), [batch_size, 1]),
                         [batch_size, 2**d + 1, num_quantiles])
    # set target locations, by adding the point at 0 that will absorb the quantile
    # augment it with batch_size
    y = tf.concat((tf.zeros((1, d), dtype=tf.float32), hypercube_vertices),
                  axis=0)
    y = tf.reshape(tf.tile(y, [batch_size, 1]), [batch_size, 2**d + 1, d])
    # center x
    x_mean = tf.reduce_mean(x, axis=1)
    x = x - x_mean[:, tf.newaxis, :]
    transports = sinkhorn.autodiff_sinkhorn(
        x, y,
        tf.ones([batch_size, n, num_quantiles], dtype=tf.float32) / n, weights,
        **kwargs)

    # recover convex combinations resulting from transporting to central point in
    # in all batches and quantile variations.
    transports = 1 / quantile_width * tf.reshape(transports[:, :, 0, :],
                                                 [batch_size, n, -1])
    # apply these convex combinations to data points + recenter.
    all_soft_quantiles = tf.reduce_sum(
        transports[:, :, :, tf.newaxis] * x[:, :, tf.newaxis, :],
        axis=1) + x_mean[:, tf.newaxis, :]
    # reshape those quantiles after having applied convex combinations.
    return tf.reshape(all_soft_quantiles, [batch_size, num_quantiles, d])
Ejemplo n.º 11
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None,
                                      multi_label=False,
                                      label_weights=None,
                                      thresholds_distributed_evenly=False):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value, float tensor, python list, or tuple of float
      thresholds in `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    label_weights: (optional) tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).
    thresholds_distributed_evenly: Boolean, whether the thresholds are evenly
      distributed within the list. An optimized method will be used if this is
      the case. See _update_confusion_matrix_variables_optimized() for more
      details.

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if multi_label and label_weights is not None:
        raise ValueError(
            '`label_weights` for multilabel data should be handled '
            'outside of `update_confusion_matrix_variables` when '
            '`multi_label` is True.')
    if variables_to_update is None:
        return
    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: '
            f'"{list(ConfusionMatrix)}". Received: "{variables_to_update.keys()}"'
        )

    variable_dtype = list(variables_to_update.values())[0].dtype

    y_true = tf.cast(y_true, dtype=variable_dtype)
    y_pred = tf.cast(y_pred, dtype=variable_dtype)

    if thresholds_distributed_evenly:
        # Check whether the thresholds has any leading or tailing epsilon added
        # for floating point imprecision. The leading and tailing threshold will be
        # handled bit differently as the corner case.
        # At this point, thresholds should be a list/array with more than 2 items,
        # and ranged between [0, 1]. See is_evenly_distributed_thresholds() for more
        # details.
        thresholds_with_epsilon = thresholds[0] < 0.0 or thresholds[-1] > 1.0

    thresholds = tf.convert_to_tensor(thresholds, dtype=variable_dtype)
    num_thresholds = thresholds.shape.as_list()[0]

    if multi_label:
        one_thresh = tf.equal(tf.cast(1, dtype=tf.int32),
                              tf.rank(thresholds),
                              name='one_set_of_thresholds_cond')
    else:
        [y_pred, y_true
         ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                             sample_weight)
        one_thresh = tf.cast(True, dtype=tf.bool)

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            f'Invalid keys: "{invalid_keys}". '
            f'Valid variable key options are: "{list(ConfusionMatrix)}"')

    if sample_weight is None:
        y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
            y_pred, y_true)
    else:
        sample_weight = tf.cast(sample_weight, dtype=variable_dtype)
        y_pred, y_true, sample_weight = (
            losses_utils.squeeze_or_expand_dimensions(
                y_pred, y_true, sample_weight=sample_weight))
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    if thresholds_distributed_evenly:
        return _update_confusion_matrix_variables_optimized(
            variables_to_update,
            y_true,
            y_pred,
            thresholds,
            multi_label=multi_label,
            sample_weights=sample_weight,
            label_weights=label_weights,
            thresholds_with_epsilon=thresholds_with_epsilon)

    pred_shape = tf.shape(y_pred)
    num_predictions = pred_shape[0]
    if y_pred.shape.ndims == 1:
        num_labels = 1
    else:
        num_labels = tf.math.reduce_prod(pred_shape[1:], axis=0)
    thresh_label_tile = tf.where(one_thresh, num_labels,
                                 tf.ones([], dtype=tf.int32))

    # Reshape predictions and labels, adding a dim for thresholding.
    if multi_label:
        predictions_extra_dim = tf.expand_dims(y_pred, 0)
        labels_extra_dim = tf.expand_dims(tf.cast(y_true, dtype=tf.bool), 0)
    else:
        # Flatten predictions and labels when not multilabel.
        predictions_extra_dim = tf.reshape(y_pred, [1, -1])
        labels_extra_dim = tf.reshape(tf.cast(y_true, dtype=tf.bool), [1, -1])

    # Tile the thresholds for every prediction.
    if multi_label:
        thresh_pretile_shape = [num_thresholds, 1, -1]
        thresh_tiles = [1, num_predictions, thresh_label_tile]
        data_tiles = [num_thresholds, 1, 1]
    else:
        thresh_pretile_shape = [num_thresholds, -1]
        thresh_tiles = [1, num_predictions * num_labels]
        data_tiles = [num_thresholds, 1]

    thresh_tiled = tf.tile(tf.reshape(thresholds, thresh_pretile_shape),
                           tf.stack(thresh_tiles))

    # Tile the predictions for every threshold.
    preds_tiled = tf.tile(predictions_extra_dim, data_tiles)

    # Compare predictions and threshold.
    pred_is_pos = tf.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = tf.tile(labels_extra_dim, data_tiles)

    if sample_weight is not None:
        sample_weight = tf.__internal__.ops.broadcast_weights(
            tf.cast(sample_weight, dtype=variable_dtype), y_pred)
        weights_tiled = tf.tile(tf.reshape(sample_weight, thresh_tiles),
                                data_tiles)
    else:
        weights_tiled = None

    if label_weights is not None and not multi_label:
        label_weights = tf.expand_dims(label_weights, 0)
        label_weights = tf.__internal__.ops.broadcast_weights(
            label_weights, y_pred)
        label_weights_tiled = tf.tile(tf.reshape(label_weights, thresh_tiles),
                                      data_tiles)
        if weights_tiled is None:
            weights_tiled = label_weights_tiled
        else:
            weights_tiled = tf.multiply(weights_tiled, label_weights_tiled)

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = tf.cast(tf.logical_and(label, pred), dtype=var.dtype)
        if weights is not None:
            label_and_pred *= tf.cast(weights, dtype=var.dtype)
        return var.assign_add(tf.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = tf.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = tf.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():

        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))

    return tf.group(update_ops)
Ejemplo n.º 12
0
def softquantiles(x,
                  quantiles,
                  quantile_width=None,
                  axis=-1,
                  may_squeeze=True,
                  **kwargs):
    """Computes soft quantiles via optimal transport.

  This operator takes advantage of the fact that an exhaustive softsort is not
  required to recover a single quantile. Instead, one can transport all
  input values in x onto only 3 weighted values. Target weights are adjusted so
  that those values in x that are transported to the middle value in the target
  vector y correspond to those concentrating around the quantile of interest.

  This idea generalizes to more quantiles, interleaving small weights on the
  quantile indices and bigger weights in between, corresponding to the gap from
  one desired quantile to the next one.

  Args:
   x: Tensor<float> of any shape.
   quantiles: list<float> the quantiles to be returned. It can also be a single
     float.
   quantile_width: (float) mass given to the bucket supposed to attract points
     whose value concentrate around the desired quantile value. Bigger width
     means that we allow the soft quantile to be a mixture of more points
     further away from the quantile. If None, the width is set at 1/n where n is
     the number of values considered (the size along the 'axis').
   axis: (int) the axis along which to compute the quantile.
   may_squeeze: (bool) should we squeeze the output tensor in case of a single
     quantile.
   **kwargs: see SoftQuantilizer for possible extra parameters.

  Returns:
    A Tensor<float> similar to the input tensor, but the axis dimension is
    replaced by the number of quantiles specified in the quantiles list.
    Hence, if only a quantile is requested (quantiles is a float) only one value
    in that axis is returned. When several quantiles are requested, the tensor
    will have that many values in that axis.

  Raises:
    tf.errors.InvalidArgumentError when the quantiles and quantile width are not
    correct, namely quantiles are either not in sorted order or the
    quantile_width is too large.
  """
    if isinstance(quantiles, float):
        quantiles = [quantiles]
    quantiles = tf.constant(quantiles, tf.float32)

    # Preprocesses submitted quantiles to check that they satisfy elementary
    # constraints.
    valid_quantiles = tf.boolean_mask(
        quantiles, tf.logical_and(quantiles > 0.0, quantiles < 1.0))
    num_quantiles = tf.shape(valid_quantiles)[0]

    # Includes values on both ends of [0,1].
    extended_quantiles = tf.concat([[0.0], valid_quantiles, [1.0]], axis=0)

    # Builds filler_weights in between the target quantiles.
    filler_weights = extended_quantiles[1:] - extended_quantiles[:-1]
    if quantile_width is None:
        quantile_width = tf.reduce_min(
            tf.concat([
                filler_weights,
                [1.0 / tf.cast(tf.shape(x)[axis], dtype=x.dtype)]
            ],
                      axis=0))

    # Takes into account quantile_width in the definition of weights
    shift = -tf.ones(tf.shape(filler_weights), dtype=x.dtype)
    shift = shift + 0.5 * (tf.one_hot(0, num_quantiles + 1) +
                           tf.one_hot(num_quantiles, num_quantiles + 1))
    filler_weights = filler_weights + quantile_width * shift

    assert_op = tf.Assert(tf.reduce_all(filler_weights >= 0.0),
                          [filler_weights])
    with tf.control_dependencies([assert_op]):
        # Adds one more value to have tensors of the same shape to interleave them.
        quantile_weights = tf.ones(num_quantiles + 1) * quantile_width

        # Interleaves the filler_weights with the quantile weights.
        weights = tf.reshape(
            tf.stack([filler_weights, quantile_weights], axis=1), (-1, ))[:-1]

        # Sends only the positive weights to the softsort operator.
        positive_weights = tf.boolean_mask(weights, weights > 0.0)
        all_quantiles = softsort(x,
                                 direction='ASCENDING',
                                 axis=axis,
                                 target_weights=positive_weights,
                                 **kwargs)

        # Recovers the indices corresponding to the desired quantiles.
        odds = tf.math.floormod(tf.range(weights.shape[0], dtype=tf.float32),
                                2)
        positives = tf.cast(weights > 0.0, tf.float32)
        indices = tf.cast(tf.math.cumsum(positives) * odds, dtype=tf.int32)
        indices = tf.boolean_mask(indices, indices > 0) - 1
        result = tf.gather(all_quantiles, indices, axis=axis)

        # In the specific case where we want a single quantile, squeezes the
        # quantile dimension.
        can_squeeze = tf.equal(tf.shape(result)[axis], 1)
        if tf.math.logical_and(can_squeeze, may_squeeze):
            result = tf.squeeze(result, axis=axis)
        return result
Ejemplo n.º 13
0
def spherical_uniform(shape,
                      dimension,
                      dtype=tf.float32,
                      seed=None,
                      name=None):
    """Generates `Tensor` drawn from a uniform distribution on the sphere.

  Args:
    shape: Vector-shaped, `int` `Tensor` representing shape of output.
    dimension: Scalar `int` `Tensor`, representing the dimensionality of the
      space where the sphere is embedded.
    dtype: (Optional) TF `dtype` representing `dtype` of output.
      Default value: `tf.float32`.
    seed: PRNG seed; see `tfp.random.sanitize_seed` for details.
      Default value: `None` (i.e., no seed).
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'random_rayleigh').

  Returns:
    spherical_uniform: `Tensor` with specified `shape` and `dtype` consisting
      of positive real values drawn from a Rayleigh distribution with specified
      `scale`.
  """
    with tf.name_scope(name or 'spherical_uniform'):
        seed = samplers.sanitize_seed(seed)
        dimension = ps.convert_to_shape_tensor(
            ps.cast(dimension, dtype=tf.int32))
        shape = ps.convert_to_shape_tensor(shape, dtype=tf.int32)
        dimension_static = tf.get_static_value(dimension)
        sample_shape = ps.concat([shape, [dimension]], axis=0)
        sample_shape = ps.convert_to_shape_tensor(sample_shape)
        # Special case one and two dimensions. This is to guard against the case
        # where the normal samples are zero. This can happen in dimensions 1 and 2.
        if dimension_static is not None:
            # This is equivalent to sampling Rademacher random variables.
            if dimension_static == 1:
                return rademacher(sample_shape, dtype=dtype, seed=seed)
            elif dimension_static == 2:
                u = samplers.uniform(shape,
                                     minval=0,
                                     maxval=2 * np.pi,
                                     dtype=dtype,
                                     seed=seed)
                return tf.stack([tf.math.cos(u), tf.math.sin(u)], axis=-1)
            else:
                normal_samples = samplers.normal(shape=ps.concat(
                    [shape, [dimension_static]], axis=0),
                                                 seed=seed,
                                                 dtype=dtype)
                unit_norm = normal_samples / tf.norm(
                    normal_samples, ord=2, axis=-1)[..., tf.newaxis]
                return unit_norm

        # If we can't determine the dimension statically, tf.where between the
        # different options.
        r_seed, u_seed, n_seed = samplers.split_seed(
            seed, n=3, salt='spherical_uniform_dynamic_shape')
        rademacher_samples = rademacher(sample_shape, dtype=dtype, seed=r_seed)
        u = samplers.uniform(shape,
                             minval=0,
                             maxval=2 * np.pi,
                             dtype=dtype,
                             seed=u_seed)
        twod_samples = tf.concat([
            tf.math.cos(u)[..., tf.newaxis],
            tf.math.sin(u)[..., tf.newaxis] *
            tf.ones([dimension - 1], dtype=dtype)
        ],
                                 axis=-1)

        normal_samples = samplers.normal(shape=ps.concat([shape, [dimension]],
                                                         axis=0),
                                         seed=n_seed,
                                         dtype=dtype)
        nd_samples = normal_samples / tf.norm(normal_samples, ord=2,
                                              axis=-1)[..., tf.newaxis]

        return tf.where(
            tf.math.equal(dimension, 1), rademacher_samples,
            tf.where(tf.math.equal(dimension, 2), twod_samples, nd_samples))
Ejemplo n.º 14
0
 def test_nested(self):
   lop = AutoBlockDiag([AutoDiag(tf.ones([2]) * 2), AutoIdentity(1)])
   self.assertAllClose(
       tf.constant([6., 6, 3]),
       tf.function(lambda lop: lop.matvec(3. * tf.ones([3])))(lop))
Ejemplo n.º 15
0
def _ones_like(input, dtype=None, name=None):  # pylint: disable=redefined-builtin
    s = _shape(input)
    if isinstance(s, (np.ndarray, np.generic)):
        return np.ones(s, _numpy_dtype(dtype or input.dtype))
    return tf.ones(s, dtype or s.dtype, name)
Ejemplo n.º 16
0
  def _sample_n(self, num_samples, seed=None, name=None):
    """Returns a Tensor of samples from an LKJ distribution.

    Args:
      num_samples: Python `int`. The number of samples to draw.
      seed: Python integer seed for RNG
      name: Python `str` name prefixed to Ops created by this function.

    Returns:
      samples: A Tensor of correlation matrices with shape `[n, B, D, D]`,
        where `B` is the shape of the `concentration` parameter, and `D`
        is the `dimension`.

    Raises:
      ValueError: If `dimension` is negative.
    """
    if self.dimension < 0:
      raise ValueError(
          'Cannot sample negative-dimension correlation matrices.')
    # Notation below: B is the batch shape, i.e., tf.shape(concentration)
    seed = SeedStream(seed, 'sample_lkj')
    with tf.name_scope('sample_lkj' or name):
      concentration = tf.convert_to_tensor(self.concentration)
      if not dtype_util.is_floating(concentration.dtype):
        raise TypeError(
            'The concentration argument should have floating type, not '
            '{}'.format(dtype_util.name(concentration.dtype)))

      concentration = _replicate(num_samples, concentration)
      concentration_shape = tf.shape(concentration)
      if self.dimension <= 1:
        # For any dimension <= 1, there is only one possible correlation matrix.
        shape = tf.concat([
            concentration_shape, [self.dimension, self.dimension]], axis=0)
        return tf.ones(shape=shape, dtype=concentration.dtype)
      beta_conc = concentration + (self.dimension - 2.) / 2.
      beta_dist = beta.Beta(concentration1=beta_conc, concentration0=beta_conc)

      # Note that the sampler below deviates from [1], by doing the sampling in
      # cholesky space. This does not change the fundamental logic of the
      # sampler, but does speed up the sampling.

      # This is the correlation coefficient between the first two dimensions.
      # This is also `r` in reference [1].
      corr12 = 2. * beta_dist.sample(seed=seed()) - 1.

      # Below we construct the Cholesky of the initial 2x2 correlation matrix,
      # which is of the form:
      # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the
      # first two dimensions.
      # This is the top-left corner of the cholesky of the final sample.
      first_row = tf.concat([
          tf.ones_like(corr12)[..., tf.newaxis],
          tf.zeros_like(corr12)[..., tf.newaxis]], axis=-1)
      second_row = tf.concat([
          corr12[..., tf.newaxis],
          tf.sqrt(1 - corr12**2)[..., tf.newaxis]], axis=-1)

      chol_result = tf.concat([
          first_row[..., tf.newaxis, :],
          second_row[..., tf.newaxis, :]], axis=-2)

      for n in range(2, self.dimension):
        # Loop invariant: on entry, result has shape B + [n, n]
        beta_conc = beta_conc - 0.5
        # norm is y in reference [1].
        norm = beta.Beta(
            concentration1=n/2.,
            concentration0=beta_conc
        ).sample(seed=seed())
        # distance shape: B + [1] for broadcast
        distance = tf.sqrt(norm)[..., tf.newaxis]
        # direction is u in reference [1].
        # direction shape: B + [n]
        direction = _uniform_unit_norm(
            n, concentration_shape, concentration.dtype, seed)
        # raw_correlation is w in reference [1].
        raw_correlation = distance * direction  # shape: B + [n]

        # This is the next row in the cholesky of the result,
        # which differs from the construction in reference [1].
        # In the reference, the new row `z` = chol_result @ raw_correlation^T
        # = C @ raw_correlation^T (where as short hand we use C = chol_result).
        # We prove that the below equation is the right row to add to the
        # cholesky, by showing equality with reference [1].
        # Let S be the sample constructed so far, and let `z` be as in
        # reference [1]. Then at this iteration, the new sample S' will be
        # [[S z^T]
        #  [z 1]]
        # In our case we have the cholesky decomposition factor C, so
        # we want our new row x (same size as z) to satisfy:
        #  [[S z^T]  [[C 0]    [[C^T  x^T]         [[CC^T  Cx^T]
        #   [z 1]] =  [x k]]    [0     k]]  =       [xC^t   xx^T + k**2]]
        # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible,
        # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k
        # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 -
        # distance**2).
        new_row = tf.concat(
            [raw_correlation, tf.sqrt(1. - norm[..., tf.newaxis])], axis=-1)

        # Finally add this new row, by growing the cholesky of the result.
        chol_result = tf.concat([
            chol_result,
            tf.zeros_like(chol_result[..., 0][..., tf.newaxis])], axis=-1)

        chol_result = tf.concat(
            [chol_result, new_row[..., tf.newaxis, :]], axis=-2)

      if self.input_output_cholesky:
        return chol_result

      result = tf.matmul(chol_result, chol_result, transpose_b=True)
      # The diagonal for a correlation matrix should always be ones. Due to
      # numerical instability the matmul might not achieve that, so manually set
      # these to ones.
      result = tf.linalg.set_diag(
          result, tf.ones(shape=tf.shape(result)[:-1], dtype=result.dtype))
      # This sampling algorithm can produce near-PSD matrices on which standard
      # algorithms such as `tf.cholesky` or `tf.linalg.self_adjoint_eigvals`
      # fail. Specifically, as documented in b/116828694, around 2% of trials
      # of 900,000 5x5 matrices (distributed according to 9 different
      # concentration parameter values) contained at least one matrix on which
      # the Cholesky decomposition failed.
      return result
Ejemplo n.º 17
0
    def _parameter_control_dependencies(self, is_init):
        assertions = []

        logits = self._logits
        probs = self._probs
        param, name = (probs, 'probs') if logits is None else (logits,
                                                               'logits')

        # In init, we can always build shape and dtype checks because
        # we assume shape doesn't change for Variable backed args.
        if is_init:
            if not dtype_util.is_floating(param.dtype):
                raise TypeError(
                    'Argument `{}` must having floating type.'.format(name))

            msg = 'Argument `{}` must have rank at least 1.'.format(name)
            shape_static = tensorshape_util.dims(param.shape)
            if shape_static is not None:
                if len(shape_static) < 1:
                    raise ValueError(msg)
            elif self.validate_args:
                param = tf.convert_to_tensor(param)
                assertions.append(
                    assert_util.assert_rank_at_least(param, 1, message=msg))
                with tf.control_dependencies(assertions):
                    param = tf.identity(param)

            msg1 = 'Argument `{}` must have final dimension >= 1.'.format(name)
            msg2 = 'Argument `{}` must have final dimension <= {}.'.format(
                name, dtype_util.max(tf.int32))
            event_size = shape_static[-1] if shape_static is not None else None
            if event_size is not None:
                if event_size < 1:
                    raise ValueError(msg1)
                if event_size > dtype_util.max(tf.int32):
                    raise ValueError(msg2)
            elif self.validate_args:
                param = tf.convert_to_tensor(param)
                assertions.append(
                    assert_util.assert_greater_equal(tf.shape(param)[-1],
                                                     1,
                                                     message=msg1))
                # NOTE: For now, we leave out a runtime assertion that
                # `tf.shape(param)[-1] <= tf.int32.max`.  An earlier `tf.shape` call
                # will fail before we get to this point.

        if not self.validate_args:
            assert not assertions  # Should never happen.
            return []

        if probs is not None:
            probs = param  # reuse tensor conversion from above
            if is_init != tensor_util.is_ref(probs):
                probs = tf.convert_to_tensor(probs)
                one = tf.ones([], dtype=probs.dtype)
                assertions.extend([
                    assert_util.assert_non_negative(probs),
                    assert_util.assert_less_equal(probs, one),
                    assert_util.assert_near(
                        tf.reduce_sum(probs, axis=-1),
                        one,
                        message='Argument `probs` must sum to 1.'),
                ])

        return assertions
Ejemplo n.º 18
0
def _replicate(n, tensor):
  """Replicate the input tensor n times along a new (major) dimension."""
  # TODO(axch) Does this already exist somewhere?  Should it get contributed?
  multiples = tf.concat([[n], tf.ones([tf.rank(tensor)], dtype=n.dtype)],
                        axis=0)
  return tf.tile(tensor[tf.newaxis], multiples)
Ejemplo n.º 19
0
 def testLossesForwarded(self):
     model = HasTuple()
     model_input = tf.ones([32, 2])
     model(model_input)
     self.assertLen(model.losses, 1)
Ejemplo n.º 20
0
    def _sample_n(self, n, seed=None):
        dim0_seed, otherdims_seed = samplers.split_seed(
            seed, salt='von_mises_fisher')
        # The sampling strategy relies on the fact that vMF variates are symmetric
        # about the mean direction. Accordingly, if we have a sampling strategy for
        # the away-from-mean angle, then we can uniformly sample the remaining
        # dimensions on the S^{dim-2} sphere for , and rotate these samples from a
        # (1, 0, 0, ..., 0)-mode distribution into the target orientation.
        #
        # This is easy to imagine on the 1-sphere (S^1; in 2-D space): sample a
        # von-Mises distributed `x` value in [-1, 1], then uniformly select what
        # amounts to a "up" or "down" additional degree of freedom after unit
        # normalizing, followed by a final rotation to the desired mean direction
        # from a basis of (1, 0).
        #
        # On S^2 (in 3-D), selecting a vMF `x` identifies a circle in `yz` on the
        # unit sphere over which the distribution is uniform, in particular the
        # circle where x = \hat{x} intersects the unit sphere. We pick a point on
        # that circle, then rotate to the desired mean direction from a basis of
        # (1, 0, 0).
        mean_direction = tf.convert_to_tensor(self.mean_direction)
        concentration = tf.convert_to_tensor(self.concentration)
        event_dim = (
            tf.compat.dimension_value(self.event_shape[0])
            or self._event_shape_tensor(mean_direction=mean_direction)[0])

        sample_batch_shape = ps.concat(
            [[n],
             self._batch_shape_tensor(mean_direction=mean_direction,
                                      concentration=concentration)],
            axis=0)
        dim = tf.cast(event_dim - 1, self.dtype)
        if event_dim == 3:
            samples_dim0 = self._sample_3d(n,
                                           mean_direction=mean_direction,
                                           concentration=concentration,
                                           seed=dim0_seed)
        else:
            # Wood'94 provides a rejection algorithm to sample the x coordinate.
            # Wood'94 definition of b:
            # b = (-2 * kappa + tf.sqrt(4 * kappa**2 + dim**2)) / dim
            # https://stats.stackexchange.com/questions/156729 suggests:
            b = dim / (2 * concentration +
                       tf.sqrt(4 * concentration**2 + dim**2))
            # TODO(bjp): Integrate any useful numerical tricks from hyperspherical VAE
            #     https://github.com/nicola-decao/s-vae-tf/
            x = (1 - b) / (1 + b)
            c = concentration * x + dim * tf.math.log1p(-x**2)
            beta = beta_lib.Beta(dim / 2, dim / 2)

            def cond_fn(w, should_continue, seed):
                del w, seed
                return tf.reduce_any(should_continue)

            def body_fn(w, should_continue, seed):
                """While loop body for sampling the angle `w`."""
                beta_seed, unif_seed, next_seed = samplers.split_seed(seed,
                                                                      n=3)
                z = beta.sample(sample_shape=sample_batch_shape,
                                seed=beta_seed)
                # set_shape needed here because of b/139013403
                tensorshape_util.set_shape(z, w.shape)
                w = tf.where(should_continue,
                             (1. - (1. + b) * z) / (1. - (1. - b) * z), w)
                if not self.allow_nan_stats:
                    w = tf.debugging.check_numerics(w, 'w')
                unif = samplers.uniform(sample_batch_shape,
                                        seed=unif_seed,
                                        dtype=self.dtype)
                # set_shape needed here because of b/139013403
                tensorshape_util.set_shape(unif, w.shape)
                should_continue = should_continue & (
                    concentration * w + dim * tf.math.log1p(-x * w) - c <
                    # Use log1p(-unif) to prevent log(0) and ensure that log(1) is
                    # possible.
                    tf.math.log1p(-unif))
                return w, should_continue, next_seed

            w = tf.zeros(sample_batch_shape, dtype=self.dtype)
            should_continue = tf.ones(sample_batch_shape, dtype=tf.bool)
            samples_dim0, _, _ = tf.while_loop(cond=cond_fn,
                                               body=body_fn,
                                               loop_vars=(w, should_continue,
                                                          dim0_seed))
            samples_dim0 = samples_dim0[..., tf.newaxis]
        if not self._allow_nan_stats:
            # Verify samples are w/in -1, 1, with useful error output tensors (top
            # value rather than all values).
            with tf.control_dependencies([
                    assert_util.assert_less_equal(
                        samples_dim0,
                        dtype_util.as_numpy_dtype(self.dtype)(1.01)),
                    assert_util.assert_greater_equal(
                        samples_dim0,
                        dtype_util.as_numpy_dtype(self.dtype)(-1.01)),
            ]):
                samples_dim0 = tf.identity(samples_dim0)
        samples_otherdims_shape = ps.concat(
            [sample_batch_shape, [event_dim - 1]], axis=0)
        unit_otherdims = tf.math.l2_normalize(samplers.normal(
            samples_otherdims_shape, seed=otherdims_seed, dtype=self.dtype),
                                              axis=-1)
        samples = tf.concat(
            [
                samples_dim0,  # we must avoid sqrt(1 - (>1)**2)
                tf.sqrt(tf.maximum(1 - samples_dim0**2, 0.)) * unit_otherdims
            ],
            axis=-1)
        samples = tf.math.l2_normalize(samples, axis=-1)
        if not self.allow_nan_stats:
            samples = tf.debugging.check_numerics(samples, 'samples')

        # Runtime assert that samples are unit length.
        if not self.allow_nan_stats:
            worst, _ = tf.math.top_k(
                tf.reshape(tf.abs(1 - tf.linalg.norm(samples, axis=-1)), [-1]))
            with tf.control_dependencies([
                    assert_util.assert_near(dtype_util.as_numpy_dtype(
                        self.dtype)(0),
                                            worst,
                                            atol=1e-4,
                                            summarize=100)
            ]):
                samples = tf.identity(samples)
        # The samples generated are symmetric around a mode at (1, 0, 0, ...., 0).
        # Now, we move the mode to `self.mean_direction` using a rotation matrix.
        if not self.allow_nan_stats:
            # Assert that the basis vector rotates to the mean direction, as expected.
            basis = tf.cast(
                tf.concat([[1.], tf.zeros([event_dim - 1])], axis=0),
                self.dtype)
            with tf.control_dependencies([
                    assert_util.assert_less(
                        tf.linalg.norm(self._rotate(
                            basis, mean_direction=mean_direction) -
                                       mean_direction,
                                       axis=-1),
                        dtype_util.as_numpy_dtype(self.dtype)(1e-5))
            ]):
                return self._rotate(samples, mean_direction=mean_direction)
        return self._rotate(samples, mean_direction=mean_direction)
Ejemplo n.º 21
0
def _ones_like(input, dtype=None, name=None):  # pylint: disable=redefined-builtin
    s = _shape(input)
    s_ = tf.get_static_value(s)
    if s_ is not None:
        return np.ones(s_, dtype_util.as_numpy_dtype(dtype or input.dtype))
    return tf.ones(s, dtype or s.dtype, name)
Ejemplo n.º 22
0
def logistic_regression(
    dataset_fn,
    name='logistic_regression',
):
    """Bayesian logistic regression with a Gaussian prior.

  Args:
    dataset_fn: A function to create a classification data set. The dataset must
      have binary labels.
    name: Name to prepend to ops created in this function, as well as to the
      `code_name` in the returned `TargetDensity`.

  Returns:
    target: `TargetDensity`.
  """
    with tf.name_scope(name) as name:
        dataset = dataset_fn()

        num_train_points = dataset.train_features.shape[0]
        num_test_points = dataset.test_features.shape[0]
        have_test = num_test_points > 0

        # Add bias.
        train_features = tf.concat(
            [dataset.train_features,
             tf.ones([num_train_points, 1])], axis=-1)
        train_labels = tf.convert_to_tensor(dataset.train_labels)
        test_features = tf.concat(
            [dataset.test_features,
             tf.ones([num_test_points, 1])], axis=-1)
        test_labels = tf.convert_to_tensor(dataset.test_labels)
        num_features = int(train_features.shape[1])

        root = tfd.JointDistributionCoroutine.Root
        zero = tf.zeros(num_features)
        one = tf.ones(num_features)

        def model_fn(features):
            weights = yield root(tfd.Independent(tfd.Normal(zero, one), 1))
            logits = tf.einsum('nd,...d->...n', features, weights)
            yield tfd.Independent(tfd.Bernoulli(logits=logits), 1)

        train_joint_dist = tfd.JointDistributionCoroutine(
            functools.partial(model_fn, features=train_features))
        test_joint_dist = tfd.JointDistributionCoroutine(
            functools.partial(model_fn, features=test_features))
        dist = joint_distribution_posterior.JointDistributionPosterior(
            train_joint_dist, (None, train_labels))

        expectations = {
            'params':
            target_spec.expectation(
                fn=lambda params: params[0],
                human_name='Parameters',
            )
        }
        if have_test:
            expectations['test_nll'] = target_spec.expectation(
                fn=lambda params: (  # pylint: disable=g-long-lambda
                    -test_joint_dist.sample_distributions(value=params)[0][-1].
                    log_prob(test_labels)),
                human_name='Test NLL',
            )
            expectations['per_example_test_nll'] = target_spec.expectation(
                fn=lambda params: (  # pylint: disable=g-long-lambda
                    -test_joint_dist.sample_distributions(value=params)[0][-1].
                    distribution.log_prob(test_labels)),
                human_name='Per-example Test NLL',
            )

        return target_spec.TargetDensity.from_distribution(
            distribution=dist,
            constraining_bijectors=(tfb.Identity(), ),
            expectations=expectations,
            code_name='{}_{}'.format(dataset.code_name, name),
            human_name='{} Logistic Regression'.format(dataset.human_name),
        )
Ejemplo n.º 23
0
    def test_increment_log_prob(self):

        root = tfd.JointDistributionCoroutine.Root
        prior_mean = 3.
        x_size = 100

        def custom_ll(w, x):
            return tf.reduce_sum(tfd.Normal(w, 1.).log_prob(x))

        def ulp_grad(w, x):
            @tfp.experimental.distribute.JointDistributionCoroutine
            def sharded_model():
                w = yield root(tfd.Normal(prior_mean, 1.))
                yield root(
                    sharded.Sharded(increment_log_prob.IncrementLogProb(
                        custom_ll(w, x)),
                                    shard_axis_name=self.axis_name))

            def ulp_fn(w):
                zeros = tf.zeros([x_size, 0])
                return sharded_model.unnormalized_log_prob(w, zeros)

            ulp, g = tfp.math.value_and_gradient(ulp_fn, (w, ))
            return ulp, g

        def true_ulp_grad(w, x):
            @tfd.JointDistributionCoroutine
            def model():
                w = yield root(tfd.Normal(prior_mean, 1.))
                yield root(increment_log_prob.IncrementLogProb(custom_ll(w,
                                                                         x)))

            def ulp_fn(w):
                zeros = tf.zeros([x_size, 0])
                return model.unnormalized_log_prob(w, zeros)

            ulp, g = tfp.math.value_and_gradient(ulp_fn, (w, ))
            return ulp, g

        def test_w_x(w, x):
            sharded_x = self.shard_values(
                tf.reshape(x, [test_lib.NUM_DEVICES, -1]))

            lp, g = self.evaluate(
                self.per_replica_to_tensor(
                    self.strategy_run(ulp_grad, (
                        w,
                        sharded_x,
                    ),
                                      in_axes=(None, 0))))
            true_lp, true_g = self.evaluate(true_ulp_grad(w, x))

            self.assertAllClose(true_lp, lp[0])
            self.assertAllClose(true_g[0], g[0][0])

        w = tf.constant(4.)
        zeros = tf.zeros([x_size])
        test_w_x(w, zeros)
        random_x = self.evaluate(
            tfd.Normal(loc=tf.zeros([x_size]),
                       scale=tf.ones([x_size])).sample(seed=self.key))
        test_w_x(w, random_x)
Ejemplo n.º 24
0
 def test_where_fallback(self):
   self.assertAllEqual([1., 0.],
                       ps.smart_where(
                           tf.constant([True, False]),
                           lambda: tf.ones([]),
                           lambda: tf.zeros([])))
Ejemplo n.º 25
0
 def compute_mask(self, inputs, mask=None):
     if mask is not None:
         return tf.ones(())
     else:
         return tf.zeros(())
Ejemplo n.º 26
0
 def test_ones_like(self):
   x = tf1.placeholder_with_default(tf.ones([2], dtype=tf.float32),
                                    shape=None)
   self.assertEqual(dtype_util.convert_to_dtype(ps.ones_like(x)),
                    tf.float32)
Ejemplo n.º 27
0
def percentile(x,
               q,
               axis=None,
               interpolation=None,
               keepdims=False,
               validate_args=False,
               preserve_gradients=True,
               keep_dims=None,
               name=None):
    """Compute the `q`-th percentile(s) of `x`.

  Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the
  way from the minimum to the maximum in a sorted copy of `x`.

  The values and distances of the two nearest neighbors as well as the
  `interpolation` parameter will determine the percentile if the normalized
  ranking does not match the location of `q` exactly.

  This function is the same as the median if `q = 50`, the same as the minimum
  if `q = 0` and the same as the maximum if `q = 100`.

  Multiple percentiles can be computed at once by using `1-D` vector `q`.
  Dimension zero of the returned `Tensor` will index the different percentiles.

  Compare to `numpy.percentile`.

  Args:
    x:  Numeric `N-D` `Tensor` with `N > 0`.  If `axis` is not `None`,
      `x` must have statically known number of dimensions.
    q:  Scalar or vector `Tensor` with values in `[0, 100]`. The percentile(s).
    axis:  Optional `0-D` or `1-D` integer `Tensor` with constant values. The
      axis that index independent samples over which to return the desired
      percentile.  If `None` (the default), treat every dimension as a sample
      dimension, returning a scalar.
    interpolation : {'nearest', 'linear', 'lower', 'higher', 'midpoint'}.
      Default value: 'nearest'.  This specifies the interpolation method to
      use when the desired quantile lies between two data points `i < j`:
        * linear: i + (j - i) * fraction, where fraction is the fractional part
          of the index surrounded by i and j.
        * lower: `i`.
        * higher: `j`.
        * nearest: `i` or `j`, whichever is nearest.
        * midpoint: (i + j) / 2.
      `linear` and `midpoint` interpolation do not work with integer dtypes.
    keepdims:  Python `bool`. If `True`, the last dimension is kept with size 1
      If `False`, the last dimension is removed from the output shape.
    validate_args:  Whether to add runtime checks of argument validity. If
      False, and arguments are incorrect, correct behavior is not guaranteed.
    preserve_gradients:  Python `bool`.  If `True`, ensure that gradient w.r.t
      the percentile `q` is preserved in the case of linear interpolation.
      If `False`, the gradient will be (incorrectly) zero when `q` corresponds
      to a point in `x`.
    keep_dims: deprecated, use keepdims instead.
    name:  A Python string name to give this `Op`.  Default is 'percentile'

  Returns:
    A `(rank(q) + N - len(axis))` dimensional `Tensor` of same dtype as `x`, or,
      if `axis` is `None`, a `rank(q)` `Tensor`.  The first `rank(q)` dimensions
      index quantiles for different values of `q`.

  Raises:
    ValueError:  If argument 'interpolation' is not an allowed type.
    ValueError:  If interpolation type not compatible with `dtype`.

  #### Examples

  ```python
  # Get 30th percentile with default ('nearest') interpolation.
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=30.)
  ==> 2.0

  # Get 30th percentile with 'linear' interpolation.
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=30., interpolation='linear')
  ==> 1.9

  # Get 30th and 70th percentiles with 'lower' interpolation
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=[30., 70.], interpolation='lower')
  ==> [1., 3.]

  # Get 100th percentile (maximum).  By default, this is computed over every dim
  x = [[1., 2.]
       [3., 4.]]
  tfp.stats.percentile(x, q=100.)
  ==> 4.

  # Treat the leading dim as indexing samples, and find the 100th quantile (max)
  # over all such samples.
  x = [[1., 2.]
       [3., 4.]]
  tfp.stats.percentile(x, q=100., axis=[0])
  ==> [3., 4.]
  ```

  """
    keepdims = keepdims if keep_dims is None else keep_dims
    del keep_dims
    name = name or 'percentile'
    allowed_interpolations = {
        'linear', 'lower', 'higher', 'nearest', 'midpoint'
    }

    if interpolation is None:
        interpolation = 'nearest'
    else:
        if interpolation not in allowed_interpolations:
            raise ValueError(
                'Argument `interpolation` must be in {}. Found {}.'.format(
                    allowed_interpolations, interpolation))

    with tf.name_scope(name):
        x = tf.convert_to_tensor(x, name='x')

        if (interpolation in {'linear', 'midpoint'}
                and dtype_util.is_integer(x.dtype)):
            raise TypeError(
                '{} interpolation not allowed with dtype {}'.format(
                    interpolation, x.dtype))

        # Double is needed here and below, else we get the wrong index if the array
        # is huge along axis.
        q = tf.cast(q, tf.float64)
        _get_static_ndims(q, expect_ndims_no_more_than=1)

        if validate_args:
            q = distribution_util.with_dependencies([
                assert_util.assert_rank_in(q, [0, 1]),
                assert_util.assert_greater_equal(q, tf.cast(0., tf.float64)),
                assert_util.assert_less_equal(q, tf.cast(100., tf.float64))
            ], q)

        # Move `axis` dims of `x` to the rightmost, call it `y`.
        if axis is None:
            y = tf.reshape(x, [-1])
        else:
            x_ndims = _get_static_ndims(x,
                                        expect_static=True,
                                        expect_ndims_at_least=1)
            axis = _make_static_axis_non_negative_list(axis, x_ndims)
            y = _move_dims_to_flat_end(x, axis, x_ndims, right_end=True)

        frac_at_q_or_below = q / 100.

        # Sort (in ascending order) everything which allows multiple calls to sort
        # only once (under the hood) and use CSE.
        sorted_y = tf.sort(y, axis=-1, direction='ASCENDING')

        d = ps.cast(ps.shape(y)[-1], tf.float64)

        def _get_indices(interp_type):
            """Get values of y at the indices implied by interp_type."""
            if interp_type == 'lower':
                indices = tf.math.floor((d - 1) * frac_at_q_or_below)
            elif interp_type == 'higher':
                indices = tf.math.ceil((d - 1) * frac_at_q_or_below)
            elif interp_type == 'nearest':
                indices = tf.round((d - 1) * frac_at_q_or_below)
            # d - 1 will be distinct from d in int32, but not necessarily double.
            # So clip to avoid out of bounds errors.
            return tf.clip_by_value(tf.cast(indices, tf.int32), 0,
                                    ps.shape(y)[-1] - 1)

        if interpolation in ['nearest', 'lower', 'higher']:
            gathered_y = tf.gather(sorted_y,
                                   _get_indices(interpolation),
                                   axis=-1)
        elif interpolation == 'midpoint':
            gathered_y = 0.5 * (
                tf.gather(sorted_y, _get_indices('lower'), axis=-1) +
                tf.gather(sorted_y, _get_indices('higher'), axis=-1))
        elif interpolation == 'linear':
            # Copy-paste of docstring on interpolation:
            # linear: i + (j - i) * fraction, where fraction is the fractional part
            # of the index surrounded by i and j.
            larger_y_idx = _get_indices('higher')
            exact_idx = (d - 1) * frac_at_q_or_below
            if preserve_gradients:
                # If q corresponds to a point in x, we will initially have
                # larger_y_idx == smaller_y_idx.
                # This results in the gradient w.r.t. fraction being zero (recall `q`
                # enters only through `fraction`...and see that things cancel).
                # The fix is to ensure that smaller_y_idx and larger_y_idx are always
                # separated by exactly 1.
                smaller_y_idx = tf.maximum(larger_y_idx - 1, 0)
                larger_y_idx = tf.minimum(smaller_y_idx + 1,
                                          tf.shape(y)[-1] - 1)
                fraction = tf.cast(larger_y_idx, tf.float64) - exact_idx
            else:
                smaller_y_idx = _get_indices('lower')
                fraction = tf.math.ceil(
                    (d - 1) * frac_at_q_or_below) - exact_idx

            fraction = tf.cast(fraction, y.dtype)
            gathered_y = (
                tf.gather(sorted_y, larger_y_idx, axis=-1) * (1 - fraction) +
                tf.gather(sorted_y, smaller_y_idx, axis=-1) * fraction)

        # Propagate NaNs
        if x.dtype in (tf.bfloat16, tf.float16, tf.float32, tf.float64):
            # Apparently tf.is_nan doesn't like other dtypes
            nan_batch_members = tf.reduce_any(tf.math.is_nan(x), axis=axis)
            right_rank_matched_shape = ps.pad(ps.shape(nan_batch_members),
                                              paddings=[[0, ps.rank(q)]],
                                              constant_values=1)
            nan_batch_members = tf.reshape(nan_batch_members,
                                           shape=right_rank_matched_shape)
            nan = np.array(np.nan, dtype_util.as_numpy_dtype(gathered_y.dtype))
            gathered_y = tf.where(nan_batch_members, nan, gathered_y)

        # Expand dimensions if requested
        if keepdims:
            if axis is None:
                ones_vec = tf.ones(shape=[
                    _get_best_effort_ndims(x) + _get_best_effort_ndims(q)
                ],
                                   dtype=tf.int32)
                gathered_y *= tf.ones(ones_vec, dtype=x.dtype)
            else:
                gathered_y = _insert_back_keepdims(gathered_y, axis)

        # If q is a scalar, then result has the right shape.
        # If q is a vector, then result has trailing dim of shape q.shape, which
        # needs to be rotated to dim 0.
        return distribution_util.rotate_transpose(gathered_y, ps.rank(q))
Ejemplo n.º 28
0
    def __init__(self,
                 loc=None,
                 scale=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 experimental_use_kahan_sum=False,
                 name='MultivariateNormalLinearOperator'):
        """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      experimental_use_kahan_sum: Python `bool`. When `True`, we use Kahan
        summation to aggregate independent underlying log_prob values. For best
        results, Kahan summation should also be applied when computing the
        log-determinant of the `LinearOperator` representing the scale matrix.
        Kahan summation improves against the precision of a naive float32 sum.
        This can be noticeable in particular for large dimensions in float32.
        See CPU caveat on `tfp.math.reduce_kahan_sum`.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
        parameters = dict(locals())
        self._experimental_use_kahan_sum = experimental_use_kahan_sum
        if scale is None:
            raise ValueError('Missing required `scale` parameter.')
        if not dtype_util.is_floating(scale.dtype):
            raise TypeError(
                '`scale` parameter must have floating-point dtype.')

        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([loc, scale],
                                            dtype_hint=tf.float32)
            # Since expand_dims doesn't preserve constant-ness, we obtain the
            # non-dynamic value if possible.
            loc = tensor_util.convert_nonref_to_tensor(loc,
                                                       dtype=dtype,
                                                       name='loc')
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, scale)
        self._loc = loc
        self._scale = scale

        bijector = scale_matvec_linear_operator.ScaleMatvecLinearOperator(
            scale, validate_args=validate_args)
        if loc is not None:
            bijector = shift_bijector.Shift(
                shift=loc, validate_args=validate_args)(bijector)
        super(MultivariateNormalLinearOperator, self).__init__(
            # TODO(b/137665504): Use batch-adding meta-distribution to set the batch
            # shape instead of tf.zeros.
            # We use `Sample` instead of `Independent` because `Independent`
            # requires concatenating `batch_shape` and `event_shape`, which loses
            # static `batch_shape` information when `event_shape` is not statically
            # known.
            distribution=sample.Sample(
                normal.Normal(loc=tf.zeros(batch_shape, dtype=dtype),
                              scale=tf.ones([], dtype=dtype)),
                event_shape,
                experimental_use_kahan_sum=experimental_use_kahan_sum),
            bijector=bijector,
            validate_args=validate_args,
            name=name)
        self._parameters = parameters
Ejemplo n.º 29
0
    def test_updates_and_losses_for_nested_models_in_subclassed_model(self):

        # Case 1: deferred-build sequential nested in subclass.
        class TestModel1(keras.Model):
            def __init__(self):
                super(TestModel1, self).__init__()
                self.fc = keras.layers.Dense(10,
                                             input_shape=(784, ),
                                             activity_regularizer='l1')
                self.bn = keras.Sequential(
                    [keras.layers.BatchNormalization(axis=1)])

            def call(self, x):
                return self.bn(self.fc(x))

        with tf.compat.v1.get_default_graph().as_default(
        ), self.cached_session():
            model = TestModel1()

            x = tf.ones(shape=[100, 784], dtype='float32')
            model(x)
            self.assertLen(model.updates, 2)
            self.assertLen(model.losses, 1)

        # Case 2: placeholder-sequential nested in subclass.
        class TestModel2(keras.Model):
            def __init__(self):
                super(TestModel2, self).__init__()
                self.fc = keras.layers.Dense(10,
                                             input_shape=(784, ),
                                             activity_regularizer='l1')
                self.bn = keras.Sequential([
                    keras.layers.BatchNormalization(axis=1, input_shape=(10, ))
                ])

            def call(self, x):
                return self.bn(self.fc(x))

        with tf.compat.v1.get_default_graph().as_default(
        ), self.cached_session():
            model = TestModel2()

            x = tf.ones(shape=[100, 784], dtype='float32')
            model(x)
            self.assertEqual(len(model.get_updates_for(x)), 2)
            self.assertEqual(len(model.get_losses_for(x)), 1)

        # Case 3: functional-API model nested in subclass.
        with tf.compat.v1.get_default_graph().as_default():
            inputs = keras.Input((10, ))
            outputs = keras.layers.BatchNormalization(axis=1)(inputs)
            bn = keras.Model(inputs, outputs)

            class TestModel3(keras.Model):
                def __init__(self):
                    super(TestModel3, self).__init__()
                    self.fc = keras.layers.Dense(10,
                                                 input_shape=(784, ),
                                                 activity_regularizer='l1')
                    self.bn = bn

                def call(self, x):
                    return self.bn(self.fc(x))

            with self.cached_session():
                model = TestModel3()

                x = tf.ones(shape=[100, 784], dtype='float32')
                model(x)
                self.assertEqual(len(model.get_updates_for(x)), 2)
                self.assertEqual(len(model.get_losses_for(x)), 1)
Ejemplo n.º 30
0
 def test_function(self):
   lop = AutoDiag(2. * tf.ones([3]))
   self.assertAllClose(
       6. * tf.ones([3]),
       tf.function(lambda lop: lop.matvec(3. * tf.ones([3])))(lop))