def make_kernel_bias_posterior_mvn_diag(kernel_shape, bias_shape, dtype=tf.float32, kernel_initializer=None, bias_initializer=None): """Create learnable posterior for Variational layers with kernel and bias.""" if kernel_initializer is None: kernel_initializer = tf.initializers.glorot_normal() if bias_initializer is None: bias_initializer = tf.initializers.glorot_normal() make_loc = lambda shape, init, name: tf.Variable( # pylint: disable=g-long-lambda init(shape, dtype=dtype), name=name + '_loc') make_scale = lambda shape, name: TransformedVariable( # pylint: disable=g-long-lambda tf.ones(shape, dtype=dtype), Chain([Shift(1e-5), Softplus()]), name=name + '_scale') return JointDistributionSequential([ Independent(Normal(loc=make_loc(kernel_shape, kernel_initializer, 'posterior_kernel'), scale=make_scale(kernel_shape, 'posterior_kernel')), reinterpreted_batch_ndims=prefer_static.size(kernel_shape), name='posterior_kernel'), Independent(Normal(loc=make_loc(bias_shape, bias_initializer, 'posterior_bias'), scale=make_scale(bias_shape, 'posterior_bias')), reinterpreted_batch_ndims=prefer_static.size(bias_shape), name='posterior_bias'), ])
def make_kernel_bias_posterior_mvn_diag( kernel_shape, bias_shape, kernel_initializer=None, bias_initializer=None, kernel_batch_ndims=0, # pylint: disable=unused-argument bias_batch_ndims=0, # pylint: disable=unused-argument dtype=tf.float32, kernel_name='posterior_kernel', bias_name='posterior_bias'): """Create learnable posterior for Variational layers with kernel and bias. Args: kernel_shape: ... bias_shape: ... kernel_initializer: ... Default value: `None` (i.e., `tf.initializers.glorot_uniform()`). bias_initializer: ... Default value: `None` (i.e., `tf.zeros`). kernel_batch_ndims: ... Default value: `0`. bias_batch_ndims: ... Default value: `0`. dtype: ... Default value: `tf.float32`. kernel_name: ... Default value: `"posterior_kernel"`. bias_name: ... Default value: `"posterior_bias"`. Returns: kernel_and_bias_distribution: ... """ if kernel_initializer is None: kernel_initializer = nn_init_lib.glorot_uniform() if bias_initializer is None: bias_initializer = tf.zeros make_loc = lambda init_fn, shape, batch_ndims, name: tf.Variable( # pylint: disable=g-long-lambda _try_call_init_fn(init_fn, shape, dtype, batch_ndims), name=name + '_loc') # Setting the initial scale to a relatively small value causes the `loc` to # quickly move toward a lower loss value. make_scale = lambda shape, name: TransformedVariable( # pylint: disable=g-long-lambda tf.fill(shape, value=tf.constant(1e-3, dtype=dtype)), Chain([Shift(1e-5), Softplus()]), name=name + '_scale') return JointDistributionSequential([ Independent(Normal(loc=make_loc(kernel_initializer, kernel_shape, kernel_batch_ndims, kernel_name), scale=make_scale(kernel_shape, kernel_name)), reinterpreted_batch_ndims=prefer_static.size(kernel_shape), name=kernel_name), Independent(Normal(loc=make_loc(bias_initializer, bias_shape, kernel_batch_ndims, bias_name), scale=make_scale(bias_shape, bias_name)), reinterpreted_batch_ndims=prefer_static.size(bias_shape), name=bias_name), ])
def make_kernel_bias_prior_spike_and_slab( kernel_shape, bias_shape, kernel_initializer=None, # pylint: disable=unused-argument bias_initializer=None, # pylint: disable=unused-argument kernel_batch_ndims=0, # pylint: disable=unused-argument bias_batch_ndims=0, # pylint: disable=unused-argument dtype=tf.float32, kernel_name='prior_kernel', bias_name='prior_bias'): """Create prior for Variational layers with kernel and bias. Note: Distribution scale is inversely related to regularization strength. Consider a "Normal" prior; bigger scale corresponds to less L2 regularization. I.e., ```python scale = (2. * l2weight)**-0.5 l2weight = scale**-2. / 2. ``` have a similar regularizing effect. The std. deviation of each of the component distributions returned by this function is approximately `1415` (or approximately `l2weight = 25e-6`). In other words this prior is extremely "weak". Args: kernel_shape: ... bias_shape: ... kernel_initializer: Ignored. Default value: `None` (i.e., `tf.initializers.glorot_uniform()`). bias_initializer: Ignored. Default value: `None` (i.e., `tf.zeros`). kernel_batch_ndims: ... Default value: `0`. bias_batch_ndims: ... Default value: `0`. dtype: ... Default value: `tf.float32`. kernel_name: ... Default value: `"prior_kernel"`. bias_name: ... Default value: `"prior_bias"`. Returns: kernel_and_bias_distribution: ... """ w = MixtureSameFamily(mixture_distribution=Categorical(probs=[0.5, 0.5]), components_distribution=Normal(loc=0., scale=tf.constant( [1., 2000.], dtype=dtype))) return JointDistributionSequential([ Sample(w, kernel_shape, name=kernel_name), Sample(w, bias_shape, name=bias_name), ])
def make_kernel_bias_posterior_mvn_diag(kernel_shape, bias_shape, dtype=tf.float32, kernel_initializer=None, bias_initializer=None, kernel_name='posterior_kernel', bias_name='posterior_bias'): """Create learnable posterior for Variational layers with kernel and bias. Args: kernel_shape: ... bias_shape: ... dtype: ... Default value: `tf.float32`. kernel_initializer: ... Default value: `None` (i.e., `tf.initializers.glorot_uniform()`). bias_initializer: ... Default value: `None` (i.e., `tf.zeros`). kernel_name: ... Default value: `"posterior_kernel"`. bias_name: ... Default value: `"posterior_bias"`. Returns: kernel_and_bias_distribution: ... """ if kernel_initializer is None: kernel_initializer = tf.initializers.glorot_uniform() if bias_initializer is None: bias_initializer = tf.zeros make_loc = lambda shape, init, name: tf.Variable( # pylint: disable=g-long-lambda init(shape, dtype=dtype), name=name + '_loc') make_scale = lambda shape, name: TransformedVariable( # pylint: disable=g-long-lambda tf.ones(shape, dtype=dtype), Chain([Shift(1e-5), Softplus()]), name=name + '_scale') return JointDistributionSequential([ Independent(Normal(loc=make_loc(kernel_shape, kernel_initializer, kernel_name), scale=make_scale(kernel_shape, kernel_name)), reinterpreted_batch_ndims=prefer_static.size(kernel_shape), name=kernel_name), Independent(Normal(loc=make_loc(bias_shape, bias_initializer, bias_name), scale=make_scale(bias_shape, bias_name)), reinterpreted_batch_ndims=prefer_static.size(bias_shape), name=bias_name), ])
def make_kernel_bias_prior_spike_and_slab(kernel_shape, bias_shape, dtype=tf.float32, kernel_initializer=None, bias_initializer=None): """Create prior for Variational layers with kernel and bias.""" del kernel_initializer, bias_initializer w = MixtureSameFamily(mixture_distribution=Categorical(probs=[0.5, 0.5]), components_distribution=Normal(loc=0., scale=tf.constant( [1., 2000.], dtype=dtype))) return JointDistributionSequential([ Sample(w, kernel_shape, name='prior_kernel'), Sample(w, bias_shape, name='prior_bias'), ])