コード例 #1
0
 def state_space_model_likelihood(**param_vals):
     ssm = self.make_state_space_model(
         param_vals=param_vals,
         num_timesteps=num_timesteps,
         initial_step=initial_step,
         mask=mask,
         experimental_parallelize=experimental_parallelize)
     # Looping LGSSM methods are really expensive in eager mode; wrap them
     # to keep this from slowing things down in interactive use.
     ssm = tfe_util.JitPublicMethods(ssm, trace_only=True)
     if distribution_util.shape_may_be_nontrivial(trajectories_shape):
         return sample.Sample(ssm, sample_shape=trajectories_shape)
     return ssm
コード例 #2
0
def build_split_flow_surrogate_posterior(event_shape,
                                         trainable_bijector,
                                         constraining_bijector=None,
                                         base_distribution=normal.Normal,
                                         batch_shape=(),
                                         dtype=tf.float32,
                                         validate_args=False,
                                         name=None):
    """Builds a joint variational posterior by splitting a normalizing flow.

  Args:
    event_shape: (Nested) event shape of the surrogate posterior.
    trainable_bijector: A trainable `tfb.Bijector` instance that operates on
      `Tensor`s (not structures), e.g. `tfb.MaskedAutoregressiveFlow` or
      `tfb.RealNVP`. This bijector transforms the base distribution before it is
      split.
    constraining_bijector: `tfb.Bijector` instance, or nested structure of
      `tfb.Bijector` instances, that maps (nested) values in R^n to the support
      of the posterior. (This can be the
      `experimental_default_event_space_bijector` of the distribution over the
      prior latent variables.)
      Default value: `None` (i.e., the posterior is over R^n).
    base_distribution: A `tfd.Distribution` subclass parameterized by `loc` and
      `scale`. The base distribution for the transformed surrogate has `loc=0.`
      and `scale=1.`.
      Default value: `tfd.Normal`.
    batch_shape: The `batch_shape` of the output distribution.
      Default value: `()`.
    dtype: The `dtype` of the surrogate posterior.
      Default value: `tf.float32`.
    validate_args: Python `bool`. Whether to validate input with asserts. This
      imposes a runtime cost. If `validate_args` is `False`, and the inputs are
      invalid, correct behavior is not guaranteed.
      Default value: `False`.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` (i.e., 'build_split_flow_surrogate_posterior').

  Returns:
    surrogate_distribution: Trainable `tfd.TransformedDistribution` with event
      shape equal to `event_shape`.

  ### Examples
  ```python

  # Train a normalizing flow on the Eight Schools model [1].

  treatment_effects = [28., 8., -3., 7., -1., 1., 18., 12.]
  treatment_stddevs = [15., 10., 16., 11., 9., 11., 10., 18.]
  model = tfd.JointDistributionNamed({
      'avg_effect':
          tfd.Normal(loc=0., scale=10., name='avg_effect'),
      'log_stddev':
          tfd.Normal(loc=5., scale=1., name='log_stddev'),
      'school_effects':
          lambda log_stddev, avg_effect: (
              tfd.Independent(
                  tfd.Normal(
                      loc=avg_effect[..., None] * tf.ones(8),
                      scale=tf.exp(log_stddev[..., None]) * tf.ones(8),
                      name='school_effects'),
                  reinterpreted_batch_ndims=1)),
      'treatment_effects': lambda school_effects: tfd.Independent(
          tfd.Normal(loc=school_effects, scale=treatment_stddevs),
          reinterpreted_batch_ndims=1)
  })

  # Pin the observed values in the model.
  target_model = model.experimental_pin(treatment_effects=treatment_effects)

  # Create a Masked Autoregressive Flow bijector.
  net = tfb.AutoregressiveNetwork(2, hidden_units=[16, 16], dtype=tf.float32)
  maf = tfb.MaskedAutoregressiveFlow(shift_and_log_scale_fn=net)

  # Build and fit the surrogate posterior.
  surrogate_posterior = (
      tfp.experimental.vi.build_split_flow_surrogate_posterior(
          event_shape=target_model.event_shape_tensor(),
          trainable_bijector=maf,
          constraining_bijector=(
              target_model.experimental_default_event_space_bijector())))

  losses = tfp.vi.fit_surrogate_posterior(
      target_model.unnormalized_log_prob,
      surrogate_posterior,
      num_steps=100,
      optimizer=tf.optimizers.Adam(0.1),
      sample_size=10)
  ```

  #### References

  [1] Andrew Gelman, John Carlin, Hal Stern, David Dunson, Aki Vehtari, and
      Donald Rubin. Bayesian Data Analysis, Third Edition.
      Chapman and Hall/CRC, 2013.

  """
    with tf.name_scope(name or 'build_split_flow_surrogate_posterior'):

        shallow_structure = _get_event_shape_shallow_structure(event_shape)
        event_shape = nest.map_structure_up_to(shallow_structure,
                                               ps.convert_to_shape_tensor,
                                               event_shape)

        if nest.is_nested(constraining_bijector):
            constraining_bijector = joint_map.JointMap(
                nest.map_structure(
                    lambda b: identity.Identity()
                    if b is None else b, constraining_bijector),
                validate_args=validate_args)

        if constraining_bijector is None:
            unconstrained_event_shape = event_shape
        else:
            unconstrained_event_shape = (
                constraining_bijector.inverse_event_shape_tensor(event_shape))

        flat_base_event_shape = nest.flatten(unconstrained_event_shape)
        flat_base_event_size = nest.map_structure(tf.reduce_prod,
                                                  flat_base_event_shape)
        event_size = tf.reduce_sum(flat_base_event_size)

        base_distribution = sample.Sample(
            base_distribution(tf.zeros(batch_shape, dtype=dtype), scale=1.),
            [event_size])

        # After transforming base distribution samples with `trainable_bijector`,
        # split them into vector-valued components.
        split_bijector = split.Split(flat_base_event_size,
                                     validate_args=validate_args)

        # Reshape the vectors to the correct posterior event shape.
        event_reshape = joint_map.JointMap(nest.map_structure(
            reshape.Reshape, unconstrained_event_shape),
                                           validate_args=validate_args)

        # Restructure the flat list of components to the correct posterior
        # structure.
        event_unflatten = restructure.Restructure(
            nest.pack_sequence_as(unconstrained_event_shape,
                                  range(len(flat_base_event_shape))))

        bijectors = [] if constraining_bijector is None else [
            constraining_bijector
        ]
        bijectors.extend([
            event_reshape, event_unflatten, split_bijector, trainable_bijector
        ])
        bijector = chain.Chain(bijectors, validate_args=validate_args)

        return transformed_distribution.TransformedDistribution(
            base_distribution, bijector=bijector, validate_args=validate_args)
コード例 #3
0
def _affine_surrogate_posterior(event_shape,
                                operators='diag',
                                bijector=None,
                                base_distribution=normal.Normal,
                                dtype=tf.float32,
                                batch_shape=(),
                                validate_args=False,
                                name=None):
    """Builds a joint variational posterior with a given `event_shape`.

  This function builds a surrogate posterior by applying a trainable
  transformation to a standard base distribution and constraining the samples
  with `bijector`. The surrogate posterior has event shape equal to
  the input `event_shape`.

  This function is a convenience wrapper around
  `build_affine_surrogate_posterior_from_base_distribution` that allows the
  user to pass in the desired posterior `event_shape` instead of
  pre-constructed base distributions (at the expense of full control over the
  base distribution types and parameterizations).

  Args:
    event_shape: (Nested) event shape of the posterior.
    operators: Either a string or a list/tuple containing `LinearOperator`
      subclasses, `LinearOperator` instances, or callables returning
      `LinearOperator` instances. Supported string values are "diag" (to create
      a mean-field surrogate posterior) and "tril" (to create a full-covariance
      surrogate posterior). A list/tuple may be passed to induce other
      posterior covariance structures. If the list is flat, a
      `tf.linalg.LinearOperatorBlockDiag` instance will be created and applied
      to the base distribution. Otherwise the list must be singly-nested and
      have a first element of length 1, second element of length 2, etc.; the
      elements of the outer list are interpreted as rows of a lower-triangular
      block structure, and a `tf.linalg.LinearOperatorBlockLowerTriangular`
      instance is created. For complete documentation and examples, see
      `tfp.experimental.vi.util.build_trainable_linear_operator_block`, which
      receives the `operators` arg if it is list-like.
      Default value: `"diag"`.
    bijector: `tfb.Bijector` instance, or nested structure of `tfb.Bijector`
      instances, that maps (nested) values in R^n to the support of the
      posterior. (This can be the `experimental_default_event_space_bijector` of
      the distribution over the prior latent variables.)
      Default value: `None` (i.e., the posterior is over R^n).
    base_distribution: A `tfd.Distribution` subclass parameterized by `loc` and
      `scale`. The base distribution of the transformed surrogate has `loc=0.`
      and `scale=1.`.
      Default value: `tfd.Normal`.
    dtype: The `dtype` of the surrogate posterior.
      Default value: `tf.float32`.
    batch_shape: Batch shape (Python tuple, list, or int) of the surrogate
      posterior, to enable parallel optimization from multiple initializations.
      Default value: `()`.
    validate_args: Python `bool`. Whether to validate input with asserts. This
      imposes a runtime cost. If `validate_args` is `False`, and the inputs are
      invalid, correct behavior is not guaranteed.
      Default value: `False`.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` (i.e., 'build_affine_surrogate_posterior').
  Yields:
    *parameters: sequence of `trainable_state_util.Parameter` namedtuples.
      These are intended to be consumed by
      `trainable_state_util.as_stateful_builder` and
      `trainable_state_util.as_stateless_builder` to define stateful and
      stateless variants respectively.

  #### Examples

  ```python
  tfd = tfp.distributions
  tfb = tfp.bijectors

  # Define a joint probabilistic model.
  Root = tfd.JointDistributionCoroutine.Root
  def model_fn():
    concentration = yield Root(tfd.Exponential(1.))
    rate = yield Root(tfd.Exponential(1.))
    y = yield tfd.Sample(
        tfd.Gamma(concentration=concentration, rate=rate),
        sample_shape=4)
  model = tfd.JointDistributionCoroutine(model_fn)

  # Assume the `y` are observed, such that the posterior is a joint distribution
  # over `concentration` and `rate`. The posterior event shape is then equal to
  # the first two components of the model's event shape.
  posterior_event_shape = model.event_shape_tensor()[:-1]

  # Constrain the posterior values to be positive using the `Exp` bijector.
  bijector = [tfb.Exp(), tfb.Exp()]

  # Build a full-covariance surrogate posterior.
  surrogate_posterior = (
    tfp.experimental.vi.build_affine_surrogate_posterior(
        event_shape=posterior_event_shape,
        operators='tril',
        bijector=bijector))

  # For an example defining `'operators'` as a list to express an alternative
  # covariance structure, see
  # `build_affine_surrogate_posterior_from_base_distribution`.

  # Fit the model.
  y = [0.2, 0.5, 0.3, 0.7]
  target_model = model.experimental_pin(y=y)
  losses = tfp.vi.fit_surrogate_posterior(
      target_model.unnormalized_log_prob,
      surrogate_posterior,
      num_steps=100,
      optimizer=tf.optimizers.Adam(0.1),
      sample_size=10)
  ```
  """
    with tf.name_scope(name or 'build_affine_surrogate_posterior'):

        event_shape = nest.map_structure_up_to(
            _get_event_shape_shallow_structure(event_shape),
            lambda s: tf.convert_to_tensor(s, dtype=tf.int32), event_shape)

        if nest.is_nested(bijector):
            bijector = joint_map.JointMap(nest.map_structure(
                lambda b: identity.Identity() if b is None else b, bijector),
                                          validate_args=validate_args)

        if bijector is None:
            unconstrained_event_shape = event_shape
        else:
            unconstrained_event_shape = (
                bijector.inverse_event_shape_tensor(event_shape))

        standard_base_distribution = nest.map_structure(
            lambda s: base_distribution(loc=tf.zeros([], dtype=dtype),
                                        scale=1.), unconstrained_event_shape)
        standard_base_distribution = nest.map_structure(
            lambda d, s: (  # pylint: disable=g-long-lambda
                sample.Sample(d, sample_shape=s, validate_args=validate_args)
                if distribution_util.shape_may_be_nontrivial(s) else d),
            standard_base_distribution,
            unconstrained_event_shape)
        if distribution_util.shape_may_be_nontrivial(batch_shape):
            standard_base_distribution = nest.map_structure(
                lambda d: batch_broadcast.BatchBroadcast(  # pylint: disable=g-long-lambda
                    d,
                    to_shape=batch_shape,
                    validate_args=validate_args),
                standard_base_distribution)

        surrogate_posterior = yield from _affine_surrogate_posterior_from_base_distribution(
            standard_base_distribution,
            operators=operators,
            bijector=bijector,
            validate_args=validate_args)
        return surrogate_posterior
コード例 #4
0
  def __init__(self,
               loc=None,
               scale=None,
               validate_args=False,
               allow_nan_stats=True,
               experimental_use_kahan_sum=False,
               name='MultivariateNormalLinearOperator'):
    """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      experimental_use_kahan_sum: Python `bool`. When `True`, we use Kahan
        summation to aggregate independent underlying log_prob values. For best
        results, Kahan summation should also be applied when computing the
        log-determinant of the `LinearOperator` representing the scale matrix.
        Kahan summation improves against the precision of a naive float32 sum.
        This can be noticeable in particular for large dimensions in float32.
        See CPU caveat on `tfp.math.reduce_kahan_sum`.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
    parameters = dict(locals())
    self._experimental_use_kahan_sum = experimental_use_kahan_sum
    if scale is None:
      raise ValueError('Missing required `scale` parameter.')
    if not dtype_util.is_floating(scale.dtype):
      raise TypeError('`scale` parameter must have floating-point dtype.')

    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([loc, scale], dtype_hint=tf.float32)
      # Since expand_dims doesn't preserve constant-ness, we obtain the
      # non-dynamic value if possible.
      loc = tensor_util.convert_nonref_to_tensor(
          loc, dtype=dtype, name='loc')
      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
          loc, scale)
    self._loc = loc
    self._scale = scale

    bijector = scale_matvec_linear_operator.ScaleMatvecLinearOperator(
        scale, validate_args=validate_args)
    if loc is not None:
      bijector = shift_bijector.Shift(
          shift=loc, validate_args=validate_args)(bijector)
    super(MultivariateNormalLinearOperator, self).__init__(
        # TODO(b/137665504): Use batch-adding meta-distribution to set the batch
        # shape instead of tf.zeros.
        # We use `Sample` instead of `Independent` because `Independent`
        # requires concatenating `batch_shape` and `event_shape`, which loses
        # static `batch_shape` information when `event_shape` is not statically
        # known.
        distribution=sample.Sample(
            normal.Normal(
                loc=tf.zeros(batch_shape, dtype=dtype),
                scale=tf.ones([], dtype=dtype)),
            event_shape,
            experimental_use_kahan_sum=experimental_use_kahan_sum),
        bijector=bijector,
        validate_args=validate_args,
        name=name)
    self._parameters = parameters
コード例 #5
0
        def posterior_generator():

            prior_gen = prior._model_coroutine()  # pylint: disable=protected-access
            dist = next(prior_gen)

            i = 0
            try:
                while True:
                    original_dist = dist.distribution if isinstance(
                        dist, Root) else dist

                    if isinstance(original_dist,
                                  joint_distribution.JointDistribution):
                        # TODO(kateslin): Build inner JD surrogate in
                        # _make_asvi_trainable_variables to avoid rebuilding variables.
                        raise TypeError(
                            'Argument `prior` cannot be a nested `JointDistribution`.'
                        )

                    else:

                        original_dist = _as_trainable_family(original_dist)

                        try:
                            actual_dist = original_dist.distribution
                        except AttributeError:
                            actual_dist = original_dist

                        dist_params = actual_dist.parameters
                        temp_params_dict = {}

                        for param, value in dist_params.items():
                            if param in (
                                    _NON_STATISTICAL_PARAMS +
                                    _NON_TRAINABLE_PARAMS) or value is None:
                                temp_params_dict[param] = value
                            else:
                                prior_weight = param_dicts[i][
                                    param].prior_weight
                                mean_field_parameter = param_dicts[i][
                                    param].mean_field_parameter
                                if mean_field:
                                    temp_params_dict[
                                        param] = mean_field_parameter
                                else:
                                    temp_params_dict[
                                        param] = prior_weight * value + (
                                            1. - prior_weight
                                        ) * mean_field_parameter

                        if isinstance(original_dist, sample.Sample):
                            surrogate_dist = sample.Sample(
                                type(actual_dist)(**temp_params_dict))
                        else:
                            surrogate_dist = type(actual_dist)(
                                **temp_params_dict)

                        if isinstance(
                                original_dist, transformed_distribution.
                                TransformedDistribution):
                            surrogate_dist = transformed_distribution.TransformedDistribution(
                                surrogate_dist,
                                bijector=original_dist.bijector)

                        if isinstance(original_dist, independent.Independent):
                            surrogate_dist = independent.Independent(
                                surrogate_dist,
                                reinterpreted_batch_ndims=original_dist.
                                reinterpreted_batch_ndims)

                        if isinstance(dist, Root):
                            value_out = yield Root(surrogate_dist)
                        else:
                            value_out = yield surrogate_dist

                    dist = prior_gen.send(value_out)
                    i += 1
            except StopIteration:
                pass
    def __init__(self,
                 loc=None,
                 scale=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='VectorExponentialLinearOperator'):
        """Construct Vector Exponential distribution supported on a subset of `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
        parameters = dict(locals())
        if loc is None:
            loc = 0.0  # Implicit value for backwards compatibility.
        if scale is None:
            raise ValueError('Missing required `scale` parameter.')
        if not dtype_util.is_floating(scale.dtype):
            raise TypeError(
                '`scale` parameter must have floating-point dtype.')

        with tf.name_scope(name) as name:
            # Since expand_dims doesn't preserve constant-ness, we obtain the
            # non-dynamic value if possible.
            loc = loc if loc is None else tf.convert_to_tensor(
                loc, name='loc', dtype=scale.dtype)
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, scale)
            self._loc = loc
            self._scale = scale
            super(VectorExponentialLinearOperator, self).__init__(
                # TODO(b/137665504): Use batch-adding meta-distribution to set the
                # batch shape instead of tf.ones.
                # We use `Sample` instead of `Independent` because `Independent`
                # requires concatenating `batch_shape` and `event_shape`, which loses
                # static `batch_shape` information when `event_shape` is not
                # statically known.
                distribution=sample.Sample(
                    exponential.Exponential(rate=tf.ones(batch_shape,
                                                         dtype=scale.dtype),
                                            allow_nan_stats=allow_nan_stats),
                    event_shape),
                bijector=shift_bijector.Shift(shift=loc)(
                    scale_matvec_linear_operator.ScaleMatvecLinearOperator(
                        scale=scale, validate_args=validate_args)),
                validate_args=validate_args,
                name=name)
            self._parameters = parameters
コード例 #7
0
    def __init__(self,
                 design_matrix,
                 nonzero_prior_prob=0.5,
                 weights_prior_precision=None,
                 default_pseudo_observations=1.,
                 observation_noise_variance_prior_concentration=0.005,
                 observation_noise_variance_prior_scale=0.0025,
                 observation_noise_variance_upper_bound=None,
                 num_missing=0.):
        """Initializes priors for the spike and slab sampler.

    Args:
      design_matrix: (batch of) float `Tensor`(s) regression design matrix (`X`
        in [1]) having shape `[num_outputs, num_features]`.
      nonzero_prior_prob: scalar float `Tensor` prior probability of the 'slab',
        i.e., prior probability that any given feature has nonzero weight (`pi`
        in [1]). Default value: `0.5`.
      weights_prior_precision: (batch of) float `Tensor` complete prior
        precision matrix(s) over the weights, of shape `[num_features,
        num_features]`. If not specified, defaults to the Zellner g-prior
        specified in `[1]` as `Omega^{-1} = kappa * (X'X + diag(X'X)) / (2 *
        num_outputs)`, in which we've plugged in the suggested default of `w =
        0.5`. The parameter `kappa` is controlled by the
        `default_pseudo_observations` argument. Default value: `None`.
      default_pseudo_observations: scalar float `Tensor` Controls the number of
        pseudo-observations for the prior precision matrix over the weights.
        Corresponds to `kappa` in [1]. See also `weights_prior_precision`.
      observation_noise_variance_prior_concentration: scalar float `Tensor`
        concentration parameter of the inverse gamma prior on the noise
        variance. Corresponds to `nu / 2` in [1]. Default value: 0.005.
      observation_noise_variance_prior_scale: scalar float `Tensor` scale
        parameter of the inverse gamma prior on the noise variance. Corresponds
        to `ss / 2` in [1]. Default value: 0.0025.
      observation_noise_variance_upper_bound: optional scalar float `Tensor`
        maximum value of sampled observation noise variance. Specifying a bound
        can help avoid divergence when the sampler is initialized far from the
        posterior. Default value: `None`.
      num_missing: Optional scalar float `Tensor`. Corrects for how many missing
        values are are coded as zero in the design matrix.
    """
        with tf.name_scope('spike_slab_sampler'):
            dtype = dtype_util.common_dtype([
                design_matrix, nonzero_prior_prob, weights_prior_precision,
                observation_noise_variance_prior_concentration,
                observation_noise_variance_prior_scale,
                observation_noise_variance_upper_bound, num_missing
            ],
                                            dtype_hint=tf.float32)
            design_matrix = tf.convert_to_tensor(design_matrix, dtype=dtype)
            nonzero_prior_prob = tf.convert_to_tensor(nonzero_prior_prob,
                                                      dtype=dtype)
            observation_noise_variance_prior_concentration = tf.convert_to_tensor(
                observation_noise_variance_prior_concentration, dtype=dtype)
            observation_noise_variance_prior_scale = tf.convert_to_tensor(
                observation_noise_variance_prior_scale, dtype=dtype)
            num_missing = tf.convert_to_tensor(num_missing, dtype=dtype)
            if observation_noise_variance_upper_bound is not None:
                observation_noise_variance_upper_bound = tf.convert_to_tensor(
                    observation_noise_variance_upper_bound, dtype=dtype)

            design_shape = ps.shape(design_matrix)
            num_outputs = tf.cast(design_shape[-2], dtype=dtype) - num_missing
            num_features = design_shape[-1]

            x_transpose_x = tf.matmul(design_matrix,
                                      design_matrix,
                                      adjoint_a=True)
            if weights_prior_precision is None:
                # Default prior: 'Zellner’s g−prior' from section 3.2.1 of [1]:
                #   `omega^{-1} = kappa * (w X'X + (1 − w) diag(X'X))/n`
                # with default `w = 0.5`.
                padded_inputs = broadcast_util.left_justified_expand_dims_like(
                    num_outputs, x_transpose_x)
                weights_prior_precision = default_pseudo_observations * tf.linalg.set_diag(
                    0.5 * x_transpose_x,
                    tf.linalg.diag_part(x_transpose_x)) / padded_inputs

            observation_noise_variance_posterior_concentration = (
                observation_noise_variance_prior_concentration +
                tf.convert_to_tensor(num_outputs / 2., dtype=dtype))

            self.num_outputs = num_outputs
            self.num_features = num_features
            self.design_matrix = design_matrix
            self.x_transpose_x = x_transpose_x
            self.dtype = dtype
            self.nonzeros_prior = sample_dist.Sample(
                bernoulli.Bernoulli(probs=nonzero_prior_prob),
                sample_shape=[num_features])
            self.weights_prior_precision = weights_prior_precision
            self.observation_noise_variance_prior_concentration = (
                observation_noise_variance_prior_concentration)
            self.observation_noise_variance_prior_scale = (
                observation_noise_variance_prior_scale)
            self.observation_noise_variance_upper_bound = (
                observation_noise_variance_upper_bound)
            self.observation_noise_variance_posterior_concentration = (
                observation_noise_variance_posterior_concentration)
コード例 #8
0
 def _batched_isotropic_normal_like(state_part):
   return sample.Sample(
       normal.Normal(ps.zeros([], dtype=state_part.dtype), 1.),
       ps.shape(state_part)[batch_rank:])