Beispiel #1
0
def make_kernel_bias_posterior_mvn_diag(kernel_shape,
                                        bias_shape,
                                        dtype=tf.float32,
                                        kernel_initializer=None,
                                        bias_initializer=None):
    """Create learnable posterior for Variational layers with kernel and bias."""
    if kernel_initializer is None:
        kernel_initializer = tf.initializers.glorot_normal()
    if bias_initializer is None:
        bias_initializer = tf.initializers.glorot_normal()
    make_loc = lambda shape, init, name: tf.Variable(  # pylint: disable=g-long-lambda
        init(shape, dtype=dtype),
        name=name + '_loc')
    make_scale = lambda shape, name: TransformedVariable(  # pylint: disable=g-long-lambda
        tf.ones(shape, dtype=dtype),
        Chain([Shift(1e-5), Softplus()]),
        name=name + '_scale')
    return JointDistributionSequential([
        Independent(Normal(loc=make_loc(kernel_shape, kernel_initializer,
                                        'posterior_kernel'),
                           scale=make_scale(kernel_shape, 'posterior_kernel')),
                    reinterpreted_batch_ndims=prefer_static.size(kernel_shape),
                    name='posterior_kernel'),
        Independent(Normal(loc=make_loc(bias_shape, bias_initializer,
                                        'posterior_bias'),
                           scale=make_scale(bias_shape, 'posterior_bias')),
                    reinterpreted_batch_ndims=prefer_static.size(bias_shape),
                    name='posterior_bias'),
    ])
Beispiel #2
0
def make_kernel_bias_posterior_mvn_diag(
        kernel_shape,
        bias_shape,
        kernel_initializer=None,
        bias_initializer=None,
        kernel_batch_ndims=0,  # pylint: disable=unused-argument
        bias_batch_ndims=0,  # pylint: disable=unused-argument
        dtype=tf.float32,
        kernel_name='posterior_kernel',
        bias_name='posterior_bias'):
    """Create learnable posterior for Variational layers with kernel and bias.

  Args:
    kernel_shape: ...
    bias_shape: ...
    kernel_initializer: ...
      Default value: `None` (i.e., `tf.initializers.glorot_uniform()`).
    bias_initializer: ...
      Default value: `None` (i.e., `tf.zeros`).
    kernel_batch_ndims: ...
      Default value: `0`.
    bias_batch_ndims: ...
      Default value: `0`.
    dtype: ...
      Default value: `tf.float32`.
    kernel_name: ...
      Default value: `"posterior_kernel"`.
    bias_name: ...
      Default value: `"posterior_bias"`.

  Returns:
    kernel_and_bias_distribution: ...
  """
    if kernel_initializer is None:
        kernel_initializer = nn_init_lib.glorot_uniform()
    if bias_initializer is None:
        bias_initializer = tf.zeros
    make_loc = lambda init_fn, shape, batch_ndims, name: tf.Variable(  # pylint: disable=g-long-lambda
        _try_call_init_fn(init_fn, shape, dtype, batch_ndims),
        name=name + '_loc')
    # Setting the initial scale to a relatively small value causes the `loc` to
    # quickly move toward a lower loss value.
    make_scale = lambda shape, name: TransformedVariable(  # pylint: disable=g-long-lambda
        tf.fill(shape, value=tf.constant(1e-3, dtype=dtype)),
        Chain([Shift(1e-5), Softplus()]),
        name=name + '_scale')
    return JointDistributionSequential([
        Independent(Normal(loc=make_loc(kernel_initializer, kernel_shape,
                                        kernel_batch_ndims, kernel_name),
                           scale=make_scale(kernel_shape, kernel_name)),
                    reinterpreted_batch_ndims=prefer_static.size(kernel_shape),
                    name=kernel_name),
        Independent(Normal(loc=make_loc(bias_initializer, bias_shape,
                                        kernel_batch_ndims, bias_name),
                           scale=make_scale(bias_shape, bias_name)),
                    reinterpreted_batch_ndims=prefer_static.size(bias_shape),
                    name=bias_name),
    ])
Beispiel #3
0
def make_kernel_bias_posterior_mvn_diag(kernel_shape,
                                        bias_shape,
                                        dtype=tf.float32,
                                        kernel_initializer=None,
                                        bias_initializer=None,
                                        kernel_name='posterior_kernel',
                                        bias_name='posterior_bias'):
    """Create learnable posterior for Variational layers with kernel and bias.

  Args:
    kernel_shape: ...
    bias_shape: ...
    dtype: ...
      Default value: `tf.float32`.
    kernel_initializer: ...
      Default value: `None` (i.e., `tf.initializers.glorot_uniform()`).
    bias_initializer: ...
      Default value: `None` (i.e., `tf.zeros`).
    kernel_name: ...
      Default value: `"posterior_kernel"`.
    bias_name: ...
      Default value: `"posterior_bias"`.

  Returns:
    kernel_and_bias_distribution: ...
  """
    if kernel_initializer is None:
        kernel_initializer = tf.initializers.glorot_uniform()
    if bias_initializer is None:
        bias_initializer = tf.zeros
    make_loc = lambda shape, init, name: tf.Variable(  # pylint: disable=g-long-lambda
        init(shape, dtype=dtype),
        name=name + '_loc')
    make_scale = lambda shape, name: TransformedVariable(  # pylint: disable=g-long-lambda
        tf.ones(shape, dtype=dtype),
        Chain([Shift(1e-5), Softplus()]),
        name=name + '_scale')
    return JointDistributionSequential([
        Independent(Normal(loc=make_loc(kernel_shape, kernel_initializer,
                                        kernel_name),
                           scale=make_scale(kernel_shape, kernel_name)),
                    reinterpreted_batch_ndims=prefer_static.size(kernel_shape),
                    name=kernel_name),
        Independent(Normal(loc=make_loc(bias_shape, bias_initializer,
                                        bias_name),
                           scale=make_scale(bias_shape, bias_name)),
                    reinterpreted_batch_ndims=prefer_static.size(bias_shape),
                    name=bias_name),
    ])
Beispiel #4
0
def make_kernel_bias_prior_spike_and_slab(
        kernel_shape,
        bias_shape,
        kernel_initializer=None,  # pylint: disable=unused-argument
        bias_initializer=None,  # pylint: disable=unused-argument
        kernel_batch_ndims=0,  # pylint: disable=unused-argument
        bias_batch_ndims=0,  # pylint: disable=unused-argument
        dtype=tf.float32,
        kernel_name='prior_kernel',
        bias_name='prior_bias'):
    """Create prior for Variational layers with kernel and bias.

  Note: Distribution scale is inversely related to regularization strength.
  Consider a "Normal" prior; bigger scale corresponds to less L2 regularization.
  I.e.,
  ```python
  scale    = (2. * l2weight)**-0.5
  l2weight = scale**-2. / 2.
  ```
  have a similar regularizing effect.

  The std. deviation of each of the component distributions returned by this
  function is approximately `1415` (or approximately `l2weight = 25e-6`). In
  other words this prior is extremely "weak".

  Args:
    kernel_shape: ...
    bias_shape: ...
    kernel_initializer: Ignored.
      Default value: `None` (i.e., `tf.initializers.glorot_uniform()`).
    bias_initializer: Ignored.
      Default value: `None` (i.e., `tf.zeros`).
    kernel_batch_ndims: ...
      Default value: `0`.
    bias_batch_ndims: ...
      Default value: `0`.
    dtype: ...
      Default value: `tf.float32`.
    kernel_name: ...
      Default value: `"prior_kernel"`.
    bias_name: ...
      Default value: `"prior_bias"`.

  Returns:
    kernel_and_bias_distribution: ...
  """
    w = MixtureSameFamily(mixture_distribution=Categorical(probs=[0.5, 0.5]),
                          components_distribution=Normal(loc=0.,
                                                         scale=tf.constant(
                                                             [1., 2000.],
                                                             dtype=dtype)))
    return JointDistributionSequential([
        Sample(w, kernel_shape, name=kernel_name),
        Sample(w, bias_shape, name=bias_name),
    ])
Beispiel #5
0
def make_kernel_bias_prior_spike_and_slab(kernel_shape,
                                          bias_shape,
                                          dtype=tf.float32,
                                          kernel_initializer=None,
                                          bias_initializer=None):
    """Create prior for Variational layers with kernel and bias."""
    del kernel_initializer, bias_initializer
    w = MixtureSameFamily(mixture_distribution=Categorical(probs=[0.5, 0.5]),
                          components_distribution=Normal(loc=0.,
                                                         scale=tf.constant(
                                                             [1., 2000.],
                                                             dtype=dtype)))
    return JointDistributionSequential([
        Sample(w, kernel_shape, name='prior_kernel'),
        Sample(w, bias_shape, name='prior_bias'),
    ])
Beispiel #6
0
def random_walk_mvnorm_fn(covariance,
                          pu=0.95,
                          fixed_variance=0.01,
                          is_adaptive=1,
                          name=None):
    """Returns callable that adds Multivariate Normal (MVN) noise to the input.
  
  Args:
    covariance: Python `list` of `Tensor`s representing each covariance 
      matrix, size d x d, of the Multivariate Normal proposal. The number
      of parameters is d.
    pu: Python floating point number representing the bounded convergence
      parameter. If equal to 1, then all proposals are drawn
      from the MVN(0, `covariance`) distribution, if less than 1, 
      proposals are drawn from MVN(0, `covariance`) with probability `pu`,
      and MVN(0, `fixed_variance`/d) otherwise.
      Default value: 0.95.
    fixed_variance: Python floating point number representing the variance of
      the fixed proposal distribution of the form MVN(0, `fixed_variance`/d).
      Default value: 0.01.
    is_adaptive: Python list of `Tensor`s representing the type of proposal
      where for each batch 0 represents a fixed proposal and 1 an adaptive
      proposal.
      Default value: 1.
    name: Python `str` name.
      Given the default value of `None` the name is set to `random_walk_mvnorm_fn`.
 
  Returns:
      random_walk_mvnorm_fn: A callable accepting a Python `list` of `Tensor`s
      representing the state parts of the `current_state` and an `int`
      representing the random seed to be used to generate the proposal. The
      callable returns two quantities. First, a `Tensor` of type integer
      representing whether each state part was updated using the fixed
      (value=0) or adaptive (value=1) proposal.  Second, a `list` of 
      `Tensor`s, with the same-type as the input state parts, which represents
      the proposal for the Metropolis Hastings algorithm.
  """

    dtype = dtype_util.base_dtype(covariance[0].dtype)
    shape = tf.stack(covariance, axis=0).shape
    # for numerical stability ensure covariance matrix is positive semi-definite
    covariance = covariance + 1.0e-9 * tf.eye(
        shape[1], batch_shape=[shape[0]], dtype=dtype)
    scale_tril = tf.linalg.cholesky(covariance)
    rv_adaptive = MultivariateNormalTriL(loc=tf.zeros([shape[0], shape[1]],
                                                      dtype=dtype),
                                         scale_tril=scale_tril)
    rv_fixed = Normal(
        loc=tf.zeros([shape[0], shape[1]], dtype=dtype),
        scale=tf.constant(fixed_variance, dtype=dtype) / shape[2],
    )

    def _fn(state_parts, seed):
        with tf.name_scope(name or "random_walk_mvnorm_fn"):

            def proposal():
                # For parallel computation it is quicker to sample
                # both distributions then select the result
                rv = tf.stack(
                    [
                        rv_fixed.sample(seed=seed),
                        rv_adaptive.sample(seed=seed),
                    ],
                    axis=1,
                )
                return tf.squeeze(tf.gather(rv,
                                            is_adaptive,
                                            axis=1,
                                            batch_dims=1),
                                  axis=1)

            proposal_parts = tf.unstack(proposal())
            new_state_parts = [
                proposal_part + state_part for proposal_part, state_part in
                zip(proposal_parts, state_parts)
            ]
            return new_state_parts

    return _fn