def _dummy_dist(self, remove_independent=True): # deterministic case if self.is_deterministic: return obd.VectorDeterministic(loc=(0.,)) # stochastic layer, _ = parse_distribution(self.posterior) # extra kwargs for params_size args, defaults = _args_and_defaults(layer.params_size) _, init_defaults = _args_and_defaults(layer.__init__) kw = {} if len(args) > 1: args = args[1:] for a in args: if a in self.kwargs: kw[a] = self.kwargs[a] elif a in defaults: kw[a] = defaults[a] elif a in init_defaults: kw[a] = init_defaults[a] # get the params_size if inspect.getfullargspec(layer.params_size).args[0] == 'event_size': size = layer.params_size(1, **kw) event_shape = 1 else: size = layer.params_size(1, **kw) event_shape = (1,) param_shape = (1, size) # create a dummy dist params = array_ops.empty(shape=param_shape, dtype=tf.float32) dist = layer(event_shape, **self.kwargs)(params) # get original distribution if remove_independent: while isinstance(dist, obd.Independent): dist = dist.distribution return dist, size
def create_posterior(self, input_shape=None, name=None) -> obl.DenseDistribution: r""" Initiate a Distribution for the random variable """ prior = _default_prior(self.event_shape, self.posterior, self.prior, self.kwargs) event_shape = self.event_shape posterior = self.posterior posterior_kwargs = dict(self.kwargs) name = self.name if name is None else str(name) # ====== deterministic distribution with loss function from tensorflow ====== # if posterior in dir(tf.losses) or posterior in dir(keras.activations): distribution_layer = obl.VectorDeterministicLayer if posterior in dir(tf.losses): activation = posterior_kwargs.pop('activation', 'relu') fn = tf.losses.get(str(posterior)) else: # just activation function, loss default MSE activation = keras.activations.get(self.posterior) fn = tf.losses.get(posterior_kwargs.pop('loss', 'mse')) posterior_kwargs['log_prob'] = \ lambda self, y_true: -fn(y_true, self.mean()) # ====== probabilistic loss ====== # else: distribution_layer = parse_distribution(self.posterior)[0] activation = 'linear' # ====== create distribution layers ====== # activation = posterior_kwargs.pop('activation', activation) kw = dict(disable_projection=not self.projection) if input_shape is not None: kw['input_shape'] = input_shape ### create the layer ## mixture distributions if posterior in ('mdn', 'mixdiag', 'mixfull', 'mixtril'): posterior_kwargs.pop('covariance', None) posterior_kwargs.update(kw) # dense network for projection layer = obl.MixtureDensityNetwork(event_shape, loc_activation=activation, scale_activation='softplus1', covariance=dict( mdn='none', mixdiag='diag', mixfull='tril', mixtril='tril')[posterior], name=name, prior=prior, **posterior_kwargs) ## non-mixture distribution else: layer = obl.DenseDistribution(event_shape, posterior=distribution_layer, prior=prior, activation=activation, posterior_kwargs=posterior_kwargs, name=name, **kw) ### set attributes if not hasattr(layer, 'event_shape'): layer.event_shape = event_shape return layer
def __post_init__(self): self.posterior = str(self.posterior).lower().strip() shape = self.event_shape if not (tf.is_tensor(shape) or isinstance(shape, tf.TensorShape) or isinstance(shape, np.ndarray)): try: shape = [int(i) for i in tf.nest.flatten(self.event_shape)] except Exception as e: raise ValueError(f"No support for event_shape={shape}, error: {e}") self.event_shape = shape if self.name is None: _, cls = parse_distribution(self.posterior) self.name = f"{cls.__name__}Variable" else: self.name = str(self.name)
def __init__(self, event_shape=(), posterior='normal', posterior_kwargs={}, prior=None, convert_to_tensor_fn=Distribution.sample, dropout=0.0, activation='linear', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, disable_projection=False, **kwargs): assert prior is None or isinstance(prior, Distribution), \ "prior can be None or instance of tensorflow_probability.Distribution" # duplicated event_shape or event_size in posterior_kwargs posterior_kwargs = dict(posterior_kwargs) if 'event_shape' in posterior_kwargs: event_shape = posterior_kwargs.pop('event_shape') if 'event_size' in posterior_kwargs: event_shape = posterior_kwargs.pop('event_size') convert_to_tensor_fn = posterior_kwargs.pop('convert_to_tensor_fn', Distribution.sample) # process the posterior # TODO: support give instance of DistributionLambda directly if inspect.isclass(posterior) and issubclass(posterior, DistributionLambda): post_layer_cls = posterior else: post_layer_cls, _ = parse_distribution(posterior) # create layers self._convert_to_tensor_fn = convert_to_tensor_fn self._posterior = posterior self._prior = prior self._event_shape = event_shape self._posterior_class = post_layer_cls self._posterior_kwargs = posterior_kwargs self._dropout = dropout # set more descriptive name name = kwargs.pop('name', None) if name is None: name = 'dense_%s' % (posterior if isinstance( posterior, string_types) else posterior.__class__.__name__) kwargs['name'] = name # params_size could be static function or method params_size = _params_size(self.posterior_layer(), event_shape) self._disable_projection = bool(disable_projection) super(DenseDistribution, self).__init__(units=params_size, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) # store the distribution from last call self._last_distribution = None
def _default_prior(event_shape, posterior, prior, posterior_kwargs): if not isinstance(event_shape, (Sequence, MutableSequence, tf.TensorShape)): raise ValueError("event_shape must be list of integer but given: " f"{event_shape} type: {type(event_shape)}") if isinstance(prior, (Distribution, DistributionLambda, Callable)): return prior elif not isinstance(prior, (string_types, type(None))): raise ValueError("prior must be string or instance of " f"Distribution or DistributionLambda, but given: {prior}") # no prior given layer, dist = parse_distribution(posterior) if isinstance(prior, dict): kw = dict(prior) prior = None else: kw = {} event_size = int(np.prod(event_shape)) ## helper function def _kwargs(**args): for k, v in args.items(): if k not in kw: kw[k] = v return kw ## Normal if layer == obl.GaussianLayer: prior = obd.Independent( obd.Normal(**_kwargs(loc=tf.zeros(shape=event_shape), scale=tf.ones(shape=event_shape))), reinterpreted_batch_ndims=1, ) ## Multivariate Normal elif issubclass(layer, obl.MultivariateNormalLayer): cov = layer._partial_kwargs['covariance'] if cov == 'diag': # diagonal covariance loc = tf.zeros(shape=event_shape) if tf.rank(loc) == 0: loc = tf.expand_dims(loc, axis=-1) prior = obd.MultivariateNormalDiag( **_kwargs(loc=loc, scale_identity_multiplier=1.)) else: # low-triangle covariance bijector = tfp.bijectors.FillScaleTriL( diag_bijector=tfp.bijectors.Identity(), diag_shift=1e-5) size = tf.reduce_prod(event_shape) loc = tf.zeros(shape=[size]) scale_tril = bijector.forward(tf.ones(shape=[size * (size + 1) // 2])) prior = obd.MultivariateNormalTriL( **_kwargs(loc=loc, scale_tril=scale_tril)) ## Log Normal elif layer == obl.LogNormalLayer: prior = obd.Independent( obd.LogNormal(**_kwargs(loc=tf.zeros(shape=event_shape), scale=tf.ones(shape=event_shape))), reinterpreted_batch_ndims=1, ) ## mixture elif issubclass(layer, obl.MixtureGaussianLayer): if hasattr(layer, '_partial_kwargs'): cov = layer._partial_kwargs['covariance'] else: cov = 'none' n_components = int(posterior_kwargs.get('n_components', 2)) if cov == 'diag': scale_shape = [n_components, event_size] fn = lambda l, s: obd.MultivariateNormalDiag(loc=l, scale_diag=tf.nn.softplus(s)) elif cov == 'none': scale_shape = [n_components, event_size] fn = lambda l, s: obd.Independent( obd.Normal(loc=l, scale=tf.math.softplus(s)), reinterpreted_batch_ndims=1, ) elif cov in ('full', 'tril'): scale_shape = [n_components, event_size * (event_size + 1) // 2] fn = lambda l, s: obd.MultivariateNormalTriL( loc=l, scale_tril=tfp.bijectors.FillScaleTriL(diag_shift=1e-5) (tf.math.softplus(s))) loc = tf.cast(tf.fill([n_components, event_size], 0.), dtype=tf.float32) log_scale = tf.cast(tf.fill(scale_shape, np.log(np.expm1(1.))), dtype=tf.float32) p = 1. / n_components mixture_logits = tf.cast(tf.fill([n_components], np.log(p / (1 - p))), dtype=tf.float32) prior = obd.MixtureSameFamily( components_distribution=fn(loc, log_scale), mixture_distribution=obd.Categorical(logits=mixture_logits)) ## discrete elif dist in (obd.OneHotCategorical, obd.Categorical) or \ layer == obl.RelaxedOneHotCategoricalLayer: p = 1. / event_size prior = dist(**_kwargs(logits=[np.log(p / (1 - p))] * event_size), dtype=tf.float32) elif dist == obd.Dirichlet: prior = dist(**_kwargs(concentration=[1.] * event_size)) elif dist == obd.Bernoulli: prior = obd.Independent( obd.Bernoulli(**_kwargs(logits=np.zeros(event_shape)), dtype=tf.float32), reinterpreted_batch_ndims=len(event_shape), ) ## other return prior
def distribution_layer(self): return parse_distribution(self.posterior)[0]
def distribution(self): return parse_distribution(self.posterior)[1]
def create_posterior(self, input_shape: Optional[List[int]] = None, name: Optional[str] = None) -> obl.DenseDistribution: r""" Initiate a Distribution for the random variable """ # use Gaussian noise as prior distribution for deterministic case if self.is_deterministic: prior = obd.Independent( obd.Normal(loc=tf.zeros(shape=self.event_shape), scale=tf.ones(shape=self.event_shape)), reinterpreted_batch_ndims=1, ) else: prior = _default_prior(self.event_shape, self.posterior, self.prior, self.kwargs) event_shape = self.event_shape posterior = self.posterior posterior_kwargs = dict(self.kwargs) name = self.name if name is None else str(name) # ====== deterministic distribution with loss function from tensorflow ====== # if posterior in dir(tf.losses) or posterior in dir(keras.activations): distribution_layer = obl.VectorDeterministicLayer if posterior in dir(tf.losses): activation = 'linear' fn = tf.losses.get(str(posterior)) else: # just activation function, loss default MSE activation = keras.activations.get(self.posterior) fn = tf.losses.get(posterior_kwargs.pop('loss', 'mse')) posterior_kwargs['log_prob'] = \ lambda self, y_true: -fn(y_true, self.mean()) # ====== probabilistic loss ====== # else: distribution_layer = parse_distribution(self.posterior)[0] activation = self.preactivation # ====== create distribution layers ====== # kw = dict(projection=self.projection) if input_shape is not None: kw['input_shape'] = input_shape ### create the layer ## mixture distributions if posterior in ('mdn', 'mixdiag', 'mixfull', 'mixtril'): posterior_kwargs.pop('covariance', None) posterior_kwargs.update(kw) # dense network for projection layer = obl.MixtureDensityNetwork(event_shape, loc_activation=activation, scale_activation='softplus1', covariance=dict( mdn='none', mdndiag='diag', mdnfull='tril', mdntril='tril')[posterior], name=name, prior=prior, dropout=self.dropout, **posterior_kwargs) ## non-mixture distribution else: layer = obl.DenseDistribution(event_shape, posterior=distribution_layer, prior=prior, activation=activation, posterior_kwargs=posterior_kwargs, dropout=self.dropout, name=name, **kw) ### set attributes if not hasattr(layer, 'event_shape'): layer.event_shape = event_shape # build the layer in advance if input_shape is not None and layer.projection: inputs = keras.Input(shape=input_shape, batch_size=None) layer(inputs) return layer
def _default_prior(event_shape, posterior, prior, posterior_kwargs): if isinstance(prior, obd.Distribution): return prior layer, dist = parse_distribution(posterior) if isinstance(prior, dict): kw = dict(prior) prior = None else: kw = {} event_size = int(np.prod(event_shape)) ## helper function def _kwargs(**args): for k, v in args.items(): if k not in kw: kw[k] = v return kw ## Normal if layer == obl.GaussianLayer: prior = obd.Independent( obd.Normal(**_kwargs(loc=tf.zeros(shape=event_shape), scale=tf.ones(shape=event_shape))), 1) ## Multivariate Normal elif issubclass(layer, obl.MultivariateNormalLayer): cov = layer._partial_kwargs['covariance'] if cov == 'diag': # diagonal covariance loc = tf.zeros(shape=event_shape) if tf.rank(loc) == 0: loc = tf.expand_dims(loc, axis=-1) prior = obd.MultivariateNormalDiag( **_kwargs(loc=loc, scale_identity_multiplier=1.)) else: # low-triangle covariance bijector = tfp.bijectors.FillScaleTriL( diag_bijector=tfp.bijectors.Identity(), diag_shift=1e-5) size = tf.reduce_prod(event_shape) loc = tf.zeros(shape=[size]) scale_tril = bijector.forward( tf.ones(shape=[size * (size + 1) // 2])) prior = obd.MultivariateNormalTriL( **_kwargs(loc=loc, scale_tril=scale_tril)) ## Log Normal elif layer == obl.LogNormalLayer: prior = obd.Independent( obd.LogNormal(**_kwargs(loc=tf.zeros(shape=event_shape), scale=tf.ones(shape=event_shape))), 1) ## mixture elif issubclass(layer, obl.MixtureGaussianLayer): if hasattr(layer, '_partial_kwargs'): cov = layer._partial_kwargs['covariance'] else: cov = 'none' n_components = int(posterior_kwargs.get('n_components', 2)) if cov == 'diag': scale_shape = [n_components, event_size] fn = lambda l, s: obd.MultivariateNormalDiag( loc=l, scale_diag=tf.nn.softplus(s)) elif cov == 'none': scale_shape = [n_components, event_size] fn = lambda l, s: obd.Independent( obd.Normal(loc=l, scale=tf.math.softplus(s)), 1) elif cov in ('full', 'tril'): scale_shape = [n_components, event_size * (event_size + 1) // 2] fn = lambda l, s: obd.MultivariateNormalTriL( loc=l, scale_tril=tfp.bijectors.FillScaleTriL(diag_shift=1e-5) (tf.math.softplus(s))) loc = tf.cast(tf.fill([n_components, event_size], 0.), dtype=tf.float32) log_scale = tf.cast(tf.fill(scale_shape, np.log(np.expm1(1.))), dtype=tf.float32) mixture_logits = tf.cast(tf.fill([n_components], 1.), dtype=tf.float32) prior = obd.MixtureSameFamily( components_distribution=fn(loc, log_scale), mixture_distribution=obd.Categorical(logits=mixture_logits)) ## discrete elif dist in (obd.OneHotCategorical, obd.Categorical) or \ layer == obl.RelaxedOneHotCategoricalLayer: prior = dist(**_kwargs(logits=np.log([1. / event_size] * event_size), dtype=tf.float32)) elif dist == obd.Dirichlet: prior = dist(**_kwargs(concentration=[1.] * event_size)) elif dist == obd.Bernoulli: prior = obd.Independent( obd.Bernoulli(**_kwargs(logits=np.full(event_shape, np.log(0.5)), dtype=tf.float32)), len(event_shape)) ## other return prior
def __init__( self, event_shape: Union[int, Sequence[int]] = (), units: Optional[int] = None, posterior: Union[str, DistributionLambda, Callable[[Tensor], Distribution]] = 'normal', posterior_kwargs: Optional[Dict[str, Any]] = None, prior: Optional[Union[Distribution, Callable[[], Distribution]]] = None, convert_to_tensor_fn: Callable[ [Distribution], Tensor] = Distribution.sample, activation: Union[str, Callable[[Tensor], Tensor]] = 'linear', autoregressive: bool = False, use_bias: bool = True, kernel_initializer: Union[str, Initializer] = 'glorot_normal', bias_initializer: Union[str, Initializer] = 'zeros', kernel_regularizer: Union[None, str, Regularizer] = None, bias_regularizer: Union[None, str, Regularizer] = None, activity_regularizer: Union[None, str, Regularizer] = None, kernel_constraint: Union[None, str, Constraint] = None, bias_constraint: Union[None, str, Constraint] = None, dropout: float = 0.0, projection: bool = True, flatten_inputs: bool = False, **kwargs, ): if posterior_kwargs is None: posterior_kwargs = {} ## store init arguments (this is not intended for serialization but # for cloning) init_args = dict(locals()) del init_args['self'] del init_args['__class__'] del init_args['kwargs'] init_args.update(kwargs) self._init_args = init_args ## check prior type assert isinstance(prior, (Distribution, type(None))) or callable(prior), \ ("prior can only be None or instance of Distribution, DistributionLambda" f", but given: {prior}-{type(prior)}") self._projection = bool(projection) self.flatten_inputs = bool(flatten_inputs) ## duplicated event_shape or event_size in posterior_kwargs posterior_kwargs = dict(posterior_kwargs) if 'event_shape' in posterior_kwargs: event_shape = posterior_kwargs.pop('event_shape') if 'event_size' in posterior_kwargs: event_shape = posterior_kwargs.pop('event_size') convert_to_tensor_fn = posterior_kwargs.pop('convert_to_tensor_fn', Distribution.sample) ## process the posterior if isinstance(posterior, DistributionLambda): # instance self._posterior_layer = posterior self._posterior_class = type(posterior) elif (inspect.isclass(posterior) and issubclass(posterior, DistributionLambda)): # subclass self._posterior_layer = None self._posterior_class = posterior elif isinstance(posterior, string_types): # alias from odin.bay.distribution_alias import parse_distribution self._posterior_layer = None self._posterior_class, _ = parse_distribution(posterior) elif callable(posterior): # callable if isinstance(posterior, LambdaType): posterior = tf.autograph.experimental.do_not_convert(posterior) self._posterior_layer = DistributionLambda( make_distribution_fn=posterior, convert_to_tensor_fn=convert_to_tensor_fn) self._posterior_class = type(posterior) else: raise ValueError('posterior could be: string, DistributionLambda, ' f'callable or type; but give: {posterior}') self._posterior = posterior self._posterior_kwargs = posterior_kwargs self._posterior_sample_shape = () ## create layers self._convert_to_tensor_fn = convert_to_tensor_fn self._prior = prior self._event_shape = event_shape self._dropout = dropout ## set more descriptive name name = kwargs.pop('name', None) if name is None: posterior_name = (posterior if isinstance(posterior, string_types) else posterior.__class__.__name__) name = f'dense_{posterior_name}' kwargs['name'] = name ## params_size could be static function or method if not projection: self._params_size = 0 else: if not hasattr(self.posterior_layer, 'params_size'): if units is None: raise ValueError( f'posterior layer of type {type(self.posterior_layer)} ' "doesn't has method params_size, number of parameters " 'must be provided as `units` argument, but given: None') self._params_size = int(units) else: self._params_size = int( _params_size(self.posterior_layer, event_shape, **self._posterior_kwargs)) super().__init__(**kwargs) self.autoregressive = autoregressive if autoregressive: from odin.bay.layers.autoregressive_layers import AutoregressiveDense self._dense = AutoregressiveDense( params=self._params_size / self.event_size, event_shape=(self.event_size,), activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) else: self._dense = Dense(units=self._params_size, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) # store the distribution from last call, self._most_recently_built_distribution = None spec = inspect.getfullargspec(self.posterior_layer) self._posterior_call_kw = set(spec.args + spec.kwonlyargs)
def _nparams(distribution, distribution_kw): from odin.bay.distribution_alias import parse_distribution distribution, _ = parse_distribution(distribution) return int( tf.reduce_prod(distribution.params_size(1, **distribution_kw)).numpy())
def __init__( self, event_shape: List[int] = (), posterior: Union[str, DistributionLambda] = 'normal', posterior_kwargs: dict = {}, prior: Optional[Union[Distribution, Callable[[], Distribution]]] = None, convert_to_tensor_fn: Callable[..., Tensor] = Distribution.sample, dropout: float = 0.0, activation: Union[str, Callable[..., Tensor]] = 'linear', use_bias: bool = True, kernel_initializer: Union[str, Initializer] = 'glorot_normal', bias_initializer: Union[str, Initializer] = 'zeros', kernel_regularizer: Union[str, Regularizer] = None, bias_regularizer: Union[str, Regularizer] = None, activity_regularizer: Union[str, Regularizer] = None, kernel_constraint: Union[str, Constraint] = None, bias_constraint: Union[str, Constraint] = None, projection: bool = True, **kwargs, ): assert isinstance(prior, (Distribution, Callable, type(None))), \ ("prior can only be None or instance of Distribution, DistributionLambda" f", but given: {prior}-{type(prior)}") # duplicated event_shape or event_size in posterior_kwargs posterior_kwargs = dict(posterior_kwargs) if 'event_shape' in posterior_kwargs: event_shape = posterior_kwargs.pop('event_shape') if 'event_size' in posterior_kwargs: event_shape = posterior_kwargs.pop('event_size') convert_to_tensor_fn = posterior_kwargs.pop('convert_to_tensor_fn', Distribution.sample) # process the posterior if inspect.isclass(posterior) and issubclass(posterior, DistributionLambda): post_layer_cls = posterior else: from odin.bay.distribution_alias import parse_distribution post_layer_cls, _ = parse_distribution(posterior) # create layers self._convert_to_tensor_fn = convert_to_tensor_fn self._posterior = posterior self._prior = prior self._event_shape = event_shape self._dropout = dropout # for initializing the posterior self._posterior_class = post_layer_cls self._posterior_kwargs = posterior_kwargs self._posterior_sample_shape = () self._posterior_layer = None # set more descriptive name name = kwargs.pop('name', None) if name is None: name = 'dense_%s' % (posterior if isinstance(posterior, string_types) else posterior.__class__.__name__) kwargs['name'] = name # params_size could be static function or method if not projection: self._params_size = 0 else: self._params_size = int( _params_size(self.posterior_layer, event_shape, **self._posterior_kwargs)) self._projection = bool(projection) super(DenseDistribution, self).__init__(units=self._params_size, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) # store the distribution from last call self._most_recent_distribution = None if 'input_shape' in kwargs and not self.built: pass