Beispiel #1
0
 def _inverse(self, y):
   y = y - self.low if self.low is not None else y
   if self.hinge_softness is None:
     return tfp_math.softplus_inverse(y)
   hinge_softness = tf.cast(self.hinge_softness, y.dtype)
   return hinge_softness * tfp_math.softplus_inverse(
       y / hinge_softness)
Beispiel #2
0
    def _forward_log_det_jacobian(self, x):
        x = x - 0.5
        fractional_part = x - tf.math.floor(x)
        inner_part = (fractional_part - 0.5) / self.temperature

        offset = (tf.math.log(self.temperature) -
                  tf.math.softplus(0.5 / self.temperature) +
                  tfp_math.softplus_inverse(0.5 / self.temperature))

        return (-tf.math.softplus(-inner_part) - tf.math.softplus(inner_part) -
                offset)
Beispiel #3
0
def build_trainable_location_scale_distribution(initial_loc,
                                                initial_scale,
                                                event_ndims,
                                                distribution_fn=tfd.Normal,
                                                validate_args=False,
                                                name=None):
    """Builds a variational distribution from a location-scale family.

  Args:
    initial_loc: Float `Tensor` initial location.
    initial_scale: Float `Tensor` initial scale.
    event_ndims: Integer `Tensor` number of event dimensions in `initial_loc`.
    distribution_fn: Optional constructor for a `tfd.Distribution` instance
      in a location-scale family. This should have signature `dist =
      distribution_fn(loc, scale, validate_args)`.
      Default value: `tfd.Normal`.
    validate_args: Python `bool`. Whether to validate input with asserts. This
      imposes a runtime cost. If `validate_args` is `False`, and the inputs are
      invalid, correct behavior is not guaranteed.
      Default value: `False`.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` (i.e.,
        'build_trainable_location_scale_distribution').
  Returns:
    posterior_dist: A `tfd.Distribution` instance.
  """
    with tf.name_scope(name or 'build_trainable_location_scale_distribution'):
        dtype = dtype_util.common_dtype([initial_loc, initial_scale],
                                        dtype_hint=tf.float32)
        initial_loc = tf.convert_to_tensor(initial_loc, dtype=dtype)
        initial_scale = tf.convert_to_tensor(initial_scale, dtype=dtype)

        loc = tf.Variable(initial_value=initial_loc, name='loc')
        scale = tfp_util.DeferredTensor(
            tf.nn.softplus,
            tf.Variable(initial_value=tf.broadcast_to(
                tfp_math.softplus_inverse(initial_scale),
                shape=prefer_static.shape(initial_loc)),
                        name='inverse_softplus_scale'))
        posterior_dist = distribution_fn(loc=loc,
                                         scale=scale,
                                         validate_args=validate_args)

        # Ensure the distribution has the desired number of event dimensions.
        static_event_ndims = tf.get_static_value(event_ndims)
        if static_event_ndims is None or static_event_ndims > 0:
            posterior_dist = tfd.Independent(
                posterior_dist,
                reinterpreted_batch_ndims=event_ndims,
                validate_args=validate_args)

    return posterior_dist
Beispiel #4
0
    def _forward_log_det_jacobian(self, x):
        t = tf.convert_to_tensor(self.temperature)
        fractional_part = x - tf.math.floor(x)
        # Because our function is from [0.5, 1.5], we need to transform our
        # fractional_part to that domain like in the forward transformation.
        fractional_part = tf.where(fractional_part < 0.5,
                                   fractional_part + 0.5,
                                   fractional_part - 0.5)
        inner_part = (fractional_part - 0.5) / t

        offset = (tf.math.log(t) - tf.math.softplus(0.5 / t) +
                  tfp_math.softplus_inverse(0.5 / t))

        return (-tf.math.softplus(-inner_part) - tf.math.softplus(inner_part) -
                offset)
    def __init__(self,
                 n_components=10,
                 components_prior=0.7,
                 encoder_layers=[64, 64],
                 activation='relu',
                 n_mcmc_samples=1,
                 analytic=True,
                 random_state=None):
        super(LatentDirichletAllocation, self).__init__()
        self._random_state = np.random.RandomState(seed=random_state) \
            if not isinstance(random_state, np.random.RandomState) \
              else random_state
        self._initializer = tf.initializers.GlorotNormal(
            seed=self._random_state.randint(1e8))

        self.n_components = int(n_components)
        self.components_prior = np.array(softplus_inverse(components_prior))

        self.n_mcmc_samples = n_mcmc_samples
        self.analytic = analytic
        # ====== encoder ====== #
        encoder = Sequential(name="Encoder")
        for num_hidden_units in encoder_layers:
            encoder.add(
                Dense(num_hidden_units,
                      activation=activation,
                      kernel_initializer=self._initializer))
        encoder.add(
            Dense(n_components,
                  activation=tf.nn.softplus,
                  kernel_initializer=self._initializer,
                  name="DenseConcentration"))
        encoder.add(
            DirichletLayer(clip_for_stable=True,
                           pre_softplus=False,
                           name="topics_posterior"))
        self.encoder = encoder
        # ====== decoder ====== #
        # The observations are bag of words and therefore not one-hot. However,
        # log_prob of OneHotCategorical computes the probability correctly in
        # this case.
        self.decoder = OneHotCategoricalLayer(probs_input=True,
                                              name="bag_of_words")
Beispiel #6
0
    def _create_loc_scale_vars(self, shape: Tuple[Optional[int], ...]) -> None:
        self.loc = self.add_weight(
            name="location",
            shape=shape,
            initializer=self.location_initializer,
            trainable=self.location_trainable,
        )

        if self.scale_trainable:
            self.scale = self.add_weight(
                name="scale",
                shape=shape,
                initializer=self.scale_initializer,
                trainable=self.scale_trainable,
                constraint=GreaterEqualEpsilon(-2.0),
            )
            self.scale.assign(softplus_inverse(self.scale))
        else:
            self.scale = self.add_weight(
                name="scale",
                shape=shape,
                initializer=self.scale_initializer,
                trainable=self.scale_trainable,
            )
 def _inverse(self, y):
   if self.hinge_softness is None:
     return tfp_math.softplus_inverse(y)
   hinge_softness = tf.cast(self.hinge_softness, y.dtype)
   return hinge_softness * tfp_math.softplus_inverse(
       y / hinge_softness)
Beispiel #8
0
 def __init__(
     self,
     n_words: int,
     n_topics: int = 20,
     posterior: Literal['gaussian', 'dirichlet'] = 'dirichlet',
     posterior_activation: Union[str, Callable[[], Tensor]] = 'softplus',
     concentration_clip: bool = True,
     distribution: Literal['onehot', 'negativebinomial', 'binomial',
                           'poisson', 'zinb'] = 'onehot',
     dropout: float = 0.0,
     dropout_strategy: Literal['all', 'warmup', 'finetune'] = 'warmup',
     batch_norm: bool = False,
     trainable_prior: bool = True,
     warmup: int = 10000,
     step: Union[int, Variable] = 0,
     input_shape: Optional[List[int]] = None,
     name: str = "Topics",
 ):
     super().__init__(name=name)
     self.n_words = int(n_words)
     self.n_topics = int(n_topics)
     self.batch_norm = bool(batch_norm)
     self.warmup = int(warmup)
     self.posterior = str(posterior).lower()
     self.distribution = str(distribution).lower()
     self.dropout = float(dropout)
     self.warmup = int(warmup)
     assert dropout_strategy in ('all', 'warmup', 'finetune'), \
       ("Support dropout strategy: all, warmup, finetune; "
        f"but given:{dropout_strategy}")
     self.dropout_strategy = str(dropout_strategy)
     if isinstance(step, Variable):
         self.step = step
     else:
         self.step = Variable(int(step),
                              dtype=tf.float32,
                              trainable=False,
                              name="Step")
     ### batch norm
     if self.batch_norm:
         self._batch_norm_layer = BatchNormalization(trainable=True)
     ### posterior
     kw = dict(event_shape=(n_topics, ), name="TopicsPosterior")
     if posterior == 'dirichlet':
         kw['posterior'] = DirichletLayer
         init_value = softplus_inverse(0.7).numpy()
         post_kw = dict(concentration_activation=posterior_activation,
                        concentration_clip=concentration_clip)
     elif posterior == "gaussian":
         kw['posterior'] = MultivariateNormalLayer
         init_value = 0.
         post_kw = dict(covariance='diag',
                        loc_activation='identity',
                        scale_activation=posterior_activation)
     else:
         raise NotImplementedError(
             "Support one of the following latent distribution: "
             "'gaussian', 'dirichlet'")
     self.topics_prior_logits = self.add_weight(
         initializer=tf.initializers.constant(value=init_value),
         shape=[1, n_topics],
         trainable=bool(trainable_prior),
         name="topics_prior_logits")
     self.posterior_layer = DenseDistribution(
         posterior_kwargs=post_kw,
         prior=self.topics_prior_distribution,
         projection=True,
         **kw)
     ### output distribution
     kw = dict(event_shape=(self.n_words, ), name="WordsDistribution")
     count_activation = 'softplus'
     if self.distribution in ('onehot', ):
         self.distribution_layer = OneHotCategoricalLayer(probs_input=True,
                                                          **kw)
         self.n_parameterization = 1
     elif self.distribution in ('poisson', ):
         self.distribution_layer = PoissonLayer(**kw)
         self.n_parameterization = 1
     elif self.distribution in ('negativebinomial', 'nb'):
         self.distribution_layer = NegativeBinomialLayer(
             count_activation=count_activation, **kw)
         self.n_parameterization = 2
     elif self.distribution in ('zinb', ):
         self.distribution_layer = ZINegativeBinomialLayer(
             count_activation=count_activation, **kw)
         self.n_parameterization = 3
     elif self.distribution in ('binomial', ):
         self.distribution_layer = BinomialLayer(
             count_activation=count_activation, **kw)
         self.n_parameterization = 2
     else:
         raise ValueError(
             f"No support for word distribution: {self.distribution}")
     # topics words parameterization
     self.topics_words_params = self.add_weight(
         'topics_words_params',
         shape=[self.n_topics, self.n_words * self.n_parameterization],
         initializer=tf.initializers.glorot_normal(),
         trainable=True)
     # initialize the Model if input_shape given
     if input_shape is not None:
         self(Input(shape=input_shape, dtype=self.dtype))
    def __init__(self,
                 n_questions,
                 n_answers,
                 n_components=10,
                 components_prior=0.7,
                 encoder_layers=[16, 16],
                 activation='relu',
                 n_mcmc_samples=1,
                 random_state=None):
        super(GradeMembershipModel, self).__init__()
        self._random_state = np.random.RandomState(seed=random_state) \
            if not isinstance(random_state, np.random.RandomState) \
              else random_state
        self._initializer = tf.initializers.GlorotNormal(
            seed=self._random_state.randint(1e8))

        self.n_questions = int(n_questions)
        # this assume the same amount of answers
        # are given to every question
        self.n_answers = int(n_answers)
        self.n_components = int(n_components)

        self.components_prior = np.array(softplus_inverse(components_prior))

        self.n_mcmc_samples = n_mcmc_samples

        self.encoder = []
        self.decoder = []
        # Each question get 1 encoder and decoder
        for question_idx in range(self.n_questions):
            # ====== encoder ====== #
            encoder = Sequential(name="EncoderQ%d" % question_idx)
            for num_hidden_units in encoder_layers:
                encoder.add(
                    Dense(num_hidden_units,
                          activation=activation,
                          kernel_initializer=self._initializer))
            encoder.add(
                Dense(n_components,
                      activation=tf.nn.softplus,
                      kernel_initializer=self._initializer,
                      name="DenseConcentration"))
            encoder.add(
                DirichletLayer(clip_for_stable=True,
                               pre_softplus=False,
                               name="topics_posteriorQ%d" % question_idx))
            # this is a must for the Model store Layer parameters
            setattr(self, 'encoder%d' % question_idx, encoder)
            self.encoder.append(encoder)
            # ====== decoder ====== #
            # decoder
            group_answer_logits = self.add_weight(
                name="topics_words_logits%d" % question_idx,
                shape=[self.n_components, n_answers],
                initializer=self._initializer)
            # The observations are bag of words and therefore not one-hot. However,
            # log_prob of OneHotCategorical computes the probability correctly in
            # this case.
            decoder = OneHotCategoricalLayer(probs_input=True,
                                             name="AnswerSheetQ%d" %
                                             question_idx)
            # this is a must for the Model store Layer parameters
            setattr(self, 'decoder%d' % question_idx, decoder)
            self.decoder.append([group_answer_logits, decoder])

        # same prior for all questions
        self.prior_logit = self.add_weight(
            name="prior_logit",
            shape=[1, self.n_components],
            trainable=False,
            initializer=tf.initializers.Constant(self.components_prior))