예제 #1
0
 def _dummy_dist(self, remove_independent=True):
   # deterministic case
   if self.is_deterministic:
     return obd.VectorDeterministic(loc=(0.,))
   # stochastic
   layer, _ = parse_distribution(self.posterior)
   # extra kwargs for params_size
   args, defaults = _args_and_defaults(layer.params_size)
   _, init_defaults = _args_and_defaults(layer.__init__)
   kw = {}
   if len(args) > 1:
     args = args[1:]
     for a in args:
       if a in self.kwargs:
         kw[a] = self.kwargs[a]
       elif a in defaults:
         kw[a] = defaults[a]
       elif a in init_defaults:
         kw[a] = init_defaults[a]
   # get the params_size
   if inspect.getfullargspec(layer.params_size).args[0] == 'event_size':
     size = layer.params_size(1, **kw)
     event_shape = 1
   else:
     size = layer.params_size(1, **kw)
     event_shape = (1,)
   param_shape = (1, size)
   # create a dummy dist
   params = array_ops.empty(shape=param_shape, dtype=tf.float32)
   dist = layer(event_shape, **self.kwargs)(params)
   # get original distribution
   if remove_independent:
     while isinstance(dist, obd.Independent):
       dist = dist.distribution
   return dist, size
예제 #2
0
 def create_posterior(self,
                      input_shape=None,
                      name=None) -> obl.DenseDistribution:
     r""" Initiate a Distribution for the random variable """
     prior = _default_prior(self.event_shape, self.posterior, self.prior,
                            self.kwargs)
     event_shape = self.event_shape
     posterior = self.posterior
     posterior_kwargs = dict(self.kwargs)
     name = self.name if name is None else str(name)
     # ====== deterministic distribution with loss function from tensorflow ====== #
     if posterior in dir(tf.losses) or posterior in dir(keras.activations):
         distribution_layer = obl.VectorDeterministicLayer
         if posterior in dir(tf.losses):
             activation = posterior_kwargs.pop('activation', 'relu')
             fn = tf.losses.get(str(posterior))
         else:  # just activation function, loss default MSE
             activation = keras.activations.get(self.posterior)
             fn = tf.losses.get(posterior_kwargs.pop('loss', 'mse'))
         posterior_kwargs['log_prob'] = \
           lambda self, y_true: -fn(y_true, self.mean())
     # ====== probabilistic loss ====== #
     else:
         distribution_layer = parse_distribution(self.posterior)[0]
         activation = 'linear'
     # ====== create distribution layers ====== #
     activation = posterior_kwargs.pop('activation', activation)
     kw = dict(disable_projection=not self.projection)
     if input_shape is not None:
         kw['input_shape'] = input_shape
     ### create the layer
     ## mixture distributions
     if posterior in ('mdn', 'mixdiag', 'mixfull', 'mixtril'):
         posterior_kwargs.pop('covariance', None)
         posterior_kwargs.update(kw)
         # dense network for projection
         layer = obl.MixtureDensityNetwork(event_shape,
                                           loc_activation=activation,
                                           scale_activation='softplus1',
                                           covariance=dict(
                                               mdn='none',
                                               mixdiag='diag',
                                               mixfull='tril',
                                               mixtril='tril')[posterior],
                                           name=name,
                                           prior=prior,
                                           **posterior_kwargs)
     ## non-mixture distribution
     else:
         layer = obl.DenseDistribution(event_shape,
                                       posterior=distribution_layer,
                                       prior=prior,
                                       activation=activation,
                                       posterior_kwargs=posterior_kwargs,
                                       name=name,
                                       **kw)
     ### set attributes
     if not hasattr(layer, 'event_shape'):
         layer.event_shape = event_shape
     return layer
예제 #3
0
 def __post_init__(self):
   self.posterior = str(self.posterior).lower().strip()
   shape = self.event_shape
   if not (tf.is_tensor(shape) or isinstance(shape, tf.TensorShape) or
           isinstance(shape, np.ndarray)):
     try:
       shape = [int(i) for i in tf.nest.flatten(self.event_shape)]
     except Exception as e:
       raise ValueError(f"No support for event_shape={shape}, error: {e}")
   self.event_shape = shape
   if self.name is None:
     _, cls = parse_distribution(self.posterior)
     self.name = f"{cls.__name__}Variable"
   else:
     self.name = str(self.name)
예제 #4
0
 def __init__(self,
              event_shape=(),
              posterior='normal',
              posterior_kwargs={},
              prior=None,
              convert_to_tensor_fn=Distribution.sample,
              dropout=0.0,
              activation='linear',
              use_bias=True,
              kernel_initializer='glorot_uniform',
              bias_initializer='zeros',
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              kernel_constraint=None,
              bias_constraint=None,
              disable_projection=False,
              **kwargs):
     assert prior is None or isinstance(prior, Distribution), \
       "prior can be None or instance of tensorflow_probability.Distribution"
     # duplicated event_shape or event_size in posterior_kwargs
     posterior_kwargs = dict(posterior_kwargs)
     if 'event_shape' in posterior_kwargs:
         event_shape = posterior_kwargs.pop('event_shape')
     if 'event_size' in posterior_kwargs:
         event_shape = posterior_kwargs.pop('event_size')
     convert_to_tensor_fn = posterior_kwargs.pop('convert_to_tensor_fn',
                                                 Distribution.sample)
     # process the posterior
     # TODO: support give instance of DistributionLambda directly
     if inspect.isclass(posterior) and issubclass(posterior,
                                                  DistributionLambda):
         post_layer_cls = posterior
     else:
         post_layer_cls, _ = parse_distribution(posterior)
     # create layers
     self._convert_to_tensor_fn = convert_to_tensor_fn
     self._posterior = posterior
     self._prior = prior
     self._event_shape = event_shape
     self._posterior_class = post_layer_cls
     self._posterior_kwargs = posterior_kwargs
     self._dropout = dropout
     # set more descriptive name
     name = kwargs.pop('name', None)
     if name is None:
         name = 'dense_%s' % (posterior if isinstance(
             posterior, string_types) else posterior.__class__.__name__)
     kwargs['name'] = name
     # params_size could be static function or method
     params_size = _params_size(self.posterior_layer(), event_shape)
     self._disable_projection = bool(disable_projection)
     super(DenseDistribution,
           self).__init__(units=params_size,
                          activation=activation,
                          use_bias=use_bias,
                          kernel_initializer=kernel_initializer,
                          bias_initializer=bias_initializer,
                          kernel_regularizer=kernel_regularizer,
                          bias_regularizer=bias_regularizer,
                          activity_regularizer=activity_regularizer,
                          kernel_constraint=kernel_constraint,
                          bias_constraint=bias_constraint,
                          **kwargs)
     # store the distribution from last call
     self._last_distribution = None
예제 #5
0
def _default_prior(event_shape, posterior, prior, posterior_kwargs):
  if not isinstance(event_shape, (Sequence, MutableSequence, tf.TensorShape)):
    raise ValueError("event_shape must be list of integer but given: "
                     f"{event_shape} type: {type(event_shape)}")
  if isinstance(prior, (Distribution, DistributionLambda, Callable)):
    return prior
  elif not isinstance(prior, (string_types, type(None))):
    raise ValueError("prior must be string or instance of "
                     f"Distribution or DistributionLambda, but given: {prior}")
  # no prior given
  layer, dist = parse_distribution(posterior)
  if isinstance(prior, dict):
    kw = dict(prior)
    prior = None
  else:
    kw = {}
  event_size = int(np.prod(event_shape))

  ## helper function
  def _kwargs(**args):
    for k, v in args.items():
      if k not in kw:
        kw[k] = v
    return kw

  ## Normal
  if layer == obl.GaussianLayer:
    prior = obd.Independent(
        obd.Normal(**_kwargs(loc=tf.zeros(shape=event_shape),
                             scale=tf.ones(shape=event_shape))),
        reinterpreted_batch_ndims=1,
    )
  ## Multivariate Normal
  elif issubclass(layer, obl.MultivariateNormalLayer):
    cov = layer._partial_kwargs['covariance']
    if cov == 'diag':  # diagonal covariance
      loc = tf.zeros(shape=event_shape)
      if tf.rank(loc) == 0:
        loc = tf.expand_dims(loc, axis=-1)
      prior = obd.MultivariateNormalDiag(
          **_kwargs(loc=loc, scale_identity_multiplier=1.))
    else:  # low-triangle covariance
      bijector = tfp.bijectors.FillScaleTriL(
          diag_bijector=tfp.bijectors.Identity(), diag_shift=1e-5)
      size = tf.reduce_prod(event_shape)
      loc = tf.zeros(shape=[size])
      scale_tril = bijector.forward(tf.ones(shape=[size * (size + 1) // 2]))
      prior = obd.MultivariateNormalTriL(
          **_kwargs(loc=loc, scale_tril=scale_tril))
  ## Log Normal
  elif layer == obl.LogNormalLayer:
    prior = obd.Independent(
        obd.LogNormal(**_kwargs(loc=tf.zeros(shape=event_shape),
                                scale=tf.ones(shape=event_shape))),
        reinterpreted_batch_ndims=1,
    )
  ## mixture
  elif issubclass(layer, obl.MixtureGaussianLayer):
    if hasattr(layer, '_partial_kwargs'):
      cov = layer._partial_kwargs['covariance']
    else:
      cov = 'none'
    n_components = int(posterior_kwargs.get('n_components', 2))
    if cov == 'diag':
      scale_shape = [n_components, event_size]
      fn = lambda l, s: obd.MultivariateNormalDiag(loc=l,
                                                   scale_diag=tf.nn.softplus(s))
    elif cov == 'none':
      scale_shape = [n_components, event_size]
      fn = lambda l, s: obd.Independent(
          obd.Normal(loc=l, scale=tf.math.softplus(s)),
          reinterpreted_batch_ndims=1,
      )
    elif cov in ('full', 'tril'):
      scale_shape = [n_components, event_size * (event_size + 1) // 2]
      fn = lambda l, s: obd.MultivariateNormalTriL(
          loc=l,
          scale_tril=tfp.bijectors.FillScaleTriL(diag_shift=1e-5)
          (tf.math.softplus(s)))
    loc = tf.cast(tf.fill([n_components, event_size], 0.), dtype=tf.float32)
    log_scale = tf.cast(tf.fill(scale_shape, np.log(np.expm1(1.))),
                        dtype=tf.float32)
    p = 1. / n_components
    mixture_logits = tf.cast(tf.fill([n_components], np.log(p / (1 - p))),
                             dtype=tf.float32)
    prior = obd.MixtureSameFamily(
        components_distribution=fn(loc, log_scale),
        mixture_distribution=obd.Categorical(logits=mixture_logits))
  ## discrete
  elif dist in (obd.OneHotCategorical, obd.Categorical) or \
    layer == obl.RelaxedOneHotCategoricalLayer:
    p = 1. / event_size
    prior = dist(**_kwargs(logits=[np.log(p / (1 - p))] * event_size),
                 dtype=tf.float32)
  elif dist == obd.Dirichlet:
    prior = dist(**_kwargs(concentration=[1.] * event_size))
  elif dist == obd.Bernoulli:
    prior = obd.Independent(
        obd.Bernoulli(**_kwargs(logits=np.zeros(event_shape)),
                      dtype=tf.float32),
        reinterpreted_batch_ndims=len(event_shape),
    )
  ## other
  return prior
예제 #6
0
 def distribution_layer(self):
   return parse_distribution(self.posterior)[0]
예제 #7
0
 def distribution(self):
   return parse_distribution(self.posterior)[1]
예제 #8
0
 def create_posterior(self,
                      input_shape: Optional[List[int]] = None,
                      name: Optional[str] = None) -> obl.DenseDistribution:
     r""" Initiate a Distribution for the random variable """
     # use Gaussian noise as prior distribution for  deterministic case
     if self.is_deterministic:
         prior = obd.Independent(
             obd.Normal(loc=tf.zeros(shape=self.event_shape),
                        scale=tf.ones(shape=self.event_shape)),
             reinterpreted_batch_ndims=1,
         )
     else:
         prior = _default_prior(self.event_shape, self.posterior,
                                self.prior, self.kwargs)
     event_shape = self.event_shape
     posterior = self.posterior
     posterior_kwargs = dict(self.kwargs)
     name = self.name if name is None else str(name)
     # ====== deterministic distribution with loss function from tensorflow ====== #
     if posterior in dir(tf.losses) or posterior in dir(keras.activations):
         distribution_layer = obl.VectorDeterministicLayer
         if posterior in dir(tf.losses):
             activation = 'linear'
             fn = tf.losses.get(str(posterior))
         else:  # just activation function, loss default MSE
             activation = keras.activations.get(self.posterior)
             fn = tf.losses.get(posterior_kwargs.pop('loss', 'mse'))
         posterior_kwargs['log_prob'] = \
           lambda self, y_true: -fn(y_true, self.mean())
     # ====== probabilistic loss ====== #
     else:
         distribution_layer = parse_distribution(self.posterior)[0]
         activation = self.preactivation
     # ====== create distribution layers ====== #
     kw = dict(projection=self.projection)
     if input_shape is not None:
         kw['input_shape'] = input_shape
     ### create the layer
     ## mixture distributions
     if posterior in ('mdn', 'mixdiag', 'mixfull', 'mixtril'):
         posterior_kwargs.pop('covariance', None)
         posterior_kwargs.update(kw)
         # dense network for projection
         layer = obl.MixtureDensityNetwork(event_shape,
                                           loc_activation=activation,
                                           scale_activation='softplus1',
                                           covariance=dict(
                                               mdn='none',
                                               mdndiag='diag',
                                               mdnfull='tril',
                                               mdntril='tril')[posterior],
                                           name=name,
                                           prior=prior,
                                           dropout=self.dropout,
                                           **posterior_kwargs)
     ## non-mixture distribution
     else:
         layer = obl.DenseDistribution(event_shape,
                                       posterior=distribution_layer,
                                       prior=prior,
                                       activation=activation,
                                       posterior_kwargs=posterior_kwargs,
                                       dropout=self.dropout,
                                       name=name,
                                       **kw)
     ### set attributes
     if not hasattr(layer, 'event_shape'):
         layer.event_shape = event_shape
     # build the layer in advance
     if input_shape is not None and layer.projection:
         inputs = keras.Input(shape=input_shape, batch_size=None)
         layer(inputs)
     return layer
예제 #9
0
def _default_prior(event_shape, posterior, prior, posterior_kwargs):
    if isinstance(prior, obd.Distribution):
        return prior
    layer, dist = parse_distribution(posterior)
    if isinstance(prior, dict):
        kw = dict(prior)
        prior = None
    else:
        kw = {}
    event_size = int(np.prod(event_shape))

    ## helper function
    def _kwargs(**args):
        for k, v in args.items():
            if k not in kw:
                kw[k] = v
        return kw

    ## Normal
    if layer == obl.GaussianLayer:
        prior = obd.Independent(
            obd.Normal(**_kwargs(loc=tf.zeros(shape=event_shape),
                                 scale=tf.ones(shape=event_shape))), 1)
    ## Multivariate Normal
    elif issubclass(layer, obl.MultivariateNormalLayer):
        cov = layer._partial_kwargs['covariance']
        if cov == 'diag':  # diagonal covariance
            loc = tf.zeros(shape=event_shape)
            if tf.rank(loc) == 0:
                loc = tf.expand_dims(loc, axis=-1)
            prior = obd.MultivariateNormalDiag(
                **_kwargs(loc=loc, scale_identity_multiplier=1.))
        else:  # low-triangle covariance
            bijector = tfp.bijectors.FillScaleTriL(
                diag_bijector=tfp.bijectors.Identity(), diag_shift=1e-5)
            size = tf.reduce_prod(event_shape)
            loc = tf.zeros(shape=[size])
            scale_tril = bijector.forward(
                tf.ones(shape=[size * (size + 1) // 2]))
            prior = obd.MultivariateNormalTriL(
                **_kwargs(loc=loc, scale_tril=scale_tril))
    ## Log Normal
    elif layer == obl.LogNormalLayer:
        prior = obd.Independent(
            obd.LogNormal(**_kwargs(loc=tf.zeros(shape=event_shape),
                                    scale=tf.ones(shape=event_shape))), 1)
    ## mixture
    elif issubclass(layer, obl.MixtureGaussianLayer):
        if hasattr(layer, '_partial_kwargs'):
            cov = layer._partial_kwargs['covariance']
        else:
            cov = 'none'
        n_components = int(posterior_kwargs.get('n_components', 2))
        if cov == 'diag':
            scale_shape = [n_components, event_size]
            fn = lambda l, s: obd.MultivariateNormalDiag(
                loc=l, scale_diag=tf.nn.softplus(s))
        elif cov == 'none':
            scale_shape = [n_components, event_size]
            fn = lambda l, s: obd.Independent(
                obd.Normal(loc=l, scale=tf.math.softplus(s)), 1)
        elif cov in ('full', 'tril'):
            scale_shape = [n_components, event_size * (event_size + 1) // 2]
            fn = lambda l, s: obd.MultivariateNormalTriL(
                loc=l,
                scale_tril=tfp.bijectors.FillScaleTriL(diag_shift=1e-5)
                (tf.math.softplus(s)))
        loc = tf.cast(tf.fill([n_components, event_size], 0.),
                      dtype=tf.float32)
        log_scale = tf.cast(tf.fill(scale_shape, np.log(np.expm1(1.))),
                            dtype=tf.float32)
        mixture_logits = tf.cast(tf.fill([n_components], 1.), dtype=tf.float32)
        prior = obd.MixtureSameFamily(
            components_distribution=fn(loc, log_scale),
            mixture_distribution=obd.Categorical(logits=mixture_logits))
    ## discrete
    elif dist in (obd.OneHotCategorical, obd.Categorical) or \
      layer == obl.RelaxedOneHotCategoricalLayer:
        prior = dist(**_kwargs(logits=np.log([1. / event_size] * event_size),
                               dtype=tf.float32))
    elif dist == obd.Dirichlet:
        prior = dist(**_kwargs(concentration=[1.] * event_size))
    elif dist == obd.Bernoulli:
        prior = obd.Independent(
            obd.Bernoulli(**_kwargs(logits=np.full(event_shape, np.log(0.5)),
                                    dtype=tf.float32)), len(event_shape))
    ## other
    return prior
예제 #10
0
 def __init__(
     self,
     event_shape: Union[int, Sequence[int]] = (),
     units: Optional[int] = None,
     posterior: Union[str, DistributionLambda,
                      Callable[[Tensor], Distribution]] = 'normal',
     posterior_kwargs: Optional[Dict[str, Any]] = None,
     prior: Optional[Union[Distribution, Callable[[], Distribution]]] = None,
     convert_to_tensor_fn: Callable[
       [Distribution], Tensor] = Distribution.sample,
     activation: Union[str, Callable[[Tensor], Tensor]] = 'linear',
     autoregressive: bool = False,
     use_bias: bool = True,
     kernel_initializer: Union[str, Initializer] = 'glorot_normal',
     bias_initializer: Union[str, Initializer] = 'zeros',
     kernel_regularizer: Union[None, str, Regularizer] = None,
     bias_regularizer: Union[None, str, Regularizer] = None,
     activity_regularizer: Union[None, str, Regularizer] = None,
     kernel_constraint: Union[None, str, Constraint] = None,
     bias_constraint: Union[None, str, Constraint] = None,
     dropout: float = 0.0,
     projection: bool = True,
     flatten_inputs: bool = False,
     **kwargs,
 ):
   if posterior_kwargs is None:
     posterior_kwargs = {}
   ## store init arguments (this is not intended for serialization but
   # for cloning)
   init_args = dict(locals())
   del init_args['self']
   del init_args['__class__']
   del init_args['kwargs']
   init_args.update(kwargs)
   self._init_args = init_args
   ## check prior type
   assert isinstance(prior, (Distribution, type(None))) or callable(prior), \
     ("prior can only be None or instance of Distribution, DistributionLambda"
      f",  but given: {prior}-{type(prior)}")
   self._projection = bool(projection)
   self.flatten_inputs = bool(flatten_inputs)
   ## duplicated event_shape or event_size in posterior_kwargs
   posterior_kwargs = dict(posterior_kwargs)
   if 'event_shape' in posterior_kwargs:
     event_shape = posterior_kwargs.pop('event_shape')
   if 'event_size' in posterior_kwargs:
     event_shape = posterior_kwargs.pop('event_size')
   convert_to_tensor_fn = posterior_kwargs.pop('convert_to_tensor_fn',
                                               Distribution.sample)
   ## process the posterior
   if isinstance(posterior, DistributionLambda):  # instance
     self._posterior_layer = posterior
     self._posterior_class = type(posterior)
   elif (inspect.isclass(posterior) and
         issubclass(posterior, DistributionLambda)):  # subclass
     self._posterior_layer = None
     self._posterior_class = posterior
   elif isinstance(posterior, string_types):  # alias
     from odin.bay.distribution_alias import parse_distribution
     self._posterior_layer = None
     self._posterior_class, _ = parse_distribution(posterior)
   elif callable(posterior):  # callable
     if isinstance(posterior, LambdaType):
       posterior = tf.autograph.experimental.do_not_convert(posterior)
     self._posterior_layer = DistributionLambda(
       make_distribution_fn=posterior,
       convert_to_tensor_fn=convert_to_tensor_fn)
     self._posterior_class = type(posterior)
   else:
     raise ValueError('posterior could be: string, DistributionLambda, '
                      f'callable or type; but give: {posterior}')
   self._posterior = posterior
   self._posterior_kwargs = posterior_kwargs
   self._posterior_sample_shape = ()
   ## create layers
   self._convert_to_tensor_fn = convert_to_tensor_fn
   self._prior = prior
   self._event_shape = event_shape
   self._dropout = dropout
   ## set more descriptive name
   name = kwargs.pop('name', None)
   if name is None:
     posterior_name = (posterior if isinstance(posterior, string_types) else
                       posterior.__class__.__name__)
     name = f'dense_{posterior_name}'
   kwargs['name'] = name
   ## params_size could be static function or method
   if not projection:
     self._params_size = 0
   else:
     if not hasattr(self.posterior_layer, 'params_size'):
       if units is None:
         raise ValueError(
           f'posterior layer of type {type(self.posterior_layer)} '
           "doesn't has method params_size, number of parameters "
           'must be provided as `units` argument, but given: None')
       self._params_size = int(units)
     else:
       self._params_size = int(
         _params_size(self.posterior_layer, event_shape,
                      **self._posterior_kwargs))
   super().__init__(**kwargs)
   self.autoregressive = autoregressive
   if autoregressive:
     from odin.bay.layers.autoregressive_layers import AutoregressiveDense
     self._dense = AutoregressiveDense(
       params=self._params_size / self.event_size,
       event_shape=(self.event_size,),
       activation=activation,
       use_bias=use_bias,
       kernel_initializer=kernel_initializer,
       bias_initializer=bias_initializer,
       kernel_regularizer=kernel_regularizer,
       bias_regularizer=bias_regularizer,
       activity_regularizer=activity_regularizer,
       kernel_constraint=kernel_constraint,
       bias_constraint=bias_constraint)
   else:
     self._dense = Dense(units=self._params_size,
                         activation=activation,
                         use_bias=use_bias,
                         kernel_initializer=kernel_initializer,
                         bias_initializer=bias_initializer,
                         kernel_regularizer=kernel_regularizer,
                         bias_regularizer=bias_regularizer,
                         activity_regularizer=activity_regularizer,
                         kernel_constraint=kernel_constraint,
                         bias_constraint=bias_constraint)
   # store the distribution from last call,
   self._most_recently_built_distribution = None
   spec = inspect.getfullargspec(self.posterior_layer)
   self._posterior_call_kw = set(spec.args + spec.kwonlyargs)
예제 #11
0
def _nparams(distribution, distribution_kw):
    from odin.bay.distribution_alias import parse_distribution
    distribution, _ = parse_distribution(distribution)
    return int(
        tf.reduce_prod(distribution.params_size(1, **distribution_kw)).numpy())
예제 #12
0
 def __init__(
     self,
     event_shape: List[int] = (),
     posterior: Union[str, DistributionLambda] = 'normal',
     posterior_kwargs: dict = {},
     prior: Optional[Union[Distribution, Callable[[], Distribution]]] = None,
     convert_to_tensor_fn: Callable[..., Tensor] = Distribution.sample,
     dropout: float = 0.0,
     activation: Union[str, Callable[..., Tensor]] = 'linear',
     use_bias: bool = True,
     kernel_initializer: Union[str, Initializer] = 'glorot_normal',
     bias_initializer: Union[str, Initializer] = 'zeros',
     kernel_regularizer: Union[str, Regularizer] = None,
     bias_regularizer: Union[str, Regularizer] = None,
     activity_regularizer: Union[str, Regularizer] = None,
     kernel_constraint: Union[str, Constraint] = None,
     bias_constraint: Union[str, Constraint] = None,
     projection: bool = True,
     **kwargs,
 ):
   assert isinstance(prior, (Distribution, Callable, type(None))), \
     ("prior can only be None or instance of Distribution, DistributionLambda"
      f",  but given: {prior}-{type(prior)}")
   # duplicated event_shape or event_size in posterior_kwargs
   posterior_kwargs = dict(posterior_kwargs)
   if 'event_shape' in posterior_kwargs:
     event_shape = posterior_kwargs.pop('event_shape')
   if 'event_size' in posterior_kwargs:
     event_shape = posterior_kwargs.pop('event_size')
   convert_to_tensor_fn = posterior_kwargs.pop('convert_to_tensor_fn',
                                               Distribution.sample)
   # process the posterior
   if inspect.isclass(posterior) and issubclass(posterior, DistributionLambda):
     post_layer_cls = posterior
   else:
     from odin.bay.distribution_alias import parse_distribution
     post_layer_cls, _ = parse_distribution(posterior)
   # create layers
   self._convert_to_tensor_fn = convert_to_tensor_fn
   self._posterior = posterior
   self._prior = prior
   self._event_shape = event_shape
   self._dropout = dropout
   # for initializing the posterior
   self._posterior_class = post_layer_cls
   self._posterior_kwargs = posterior_kwargs
   self._posterior_sample_shape = ()
   self._posterior_layer = None
   # set more descriptive name
   name = kwargs.pop('name', None)
   if name is None:
     name = 'dense_%s' % (posterior if isinstance(posterior, string_types) else
                          posterior.__class__.__name__)
   kwargs['name'] = name
   # params_size could be static function or method
   if not projection:
     self._params_size = 0
   else:
     self._params_size = int(
         _params_size(self.posterior_layer, event_shape,
                      **self._posterior_kwargs))
   self._projection = bool(projection)
   super(DenseDistribution,
         self).__init__(units=self._params_size,
                        activation=activation,
                        use_bias=use_bias,
                        kernel_initializer=kernel_initializer,
                        bias_initializer=bias_initializer,
                        kernel_regularizer=kernel_regularizer,
                        bias_regularizer=bias_regularizer,
                        activity_regularizer=activity_regularizer,
                        kernel_constraint=kernel_constraint,
                        bias_constraint=bias_constraint,
                        **kwargs)
   # store the distribution from last call
   self._most_recent_distribution = None
   if 'input_shape' in kwargs and not self.built:
     pass