def new(params, event_shape=(), softplus_scale=True, validate_args=False, name=None): """Create the distribution instance from a `params` vector.""" with tf.compat.v1.name_scope(name, 'LogNormal', [params, event_shape]): params = tf.convert_to_tensor(value=params, name='params') event_shape = dist_util.expand_to_vector(tf.convert_to_tensor( value=event_shape, name='event_shape', dtype=tf.int32), tensor_name='event_shape') output_shape = tf.concat([ tf.shape(input=params)[:-1], event_shape, ], axis=0) loc_params, scale_params = tf.split(params, 2, axis=-1) if softplus_scale: scale_params = tf.math.softplus( scale_params) + tfd.softplus_inverse(1.0) return tfd.Independent( tfd.LogNormal(loc=tf.reshape(loc_params, output_shape), scale=tf.reshape(scale_params, output_shape), validate_args=validate_args), reinterpreted_batch_ndims=tf.size(input=event_shape), validate_args=validate_args)
def new(params, event_size, covariance_type, softplus_scale, validate_args=False, name=None): """Create the distribution instance from a `params` vector.""" covariance_type = str(covariance_type).lower().strip() assert covariance_type in ('full', 'tril', 'diag'), \ "No support for given covariance_type: '%s'" % covariance_type scale_fn = lambda x: tf.math.softplus(x) + tfd.softplus_inverse(1.0) \ if bool(softplus_scale) else x with tf.compat.v1.name_scope(name, 'MultivariateNormal', [params, event_size]): params = tf.convert_to_tensor(value=params, name='params') if covariance_type == 'tril': scale_tril = tfb.ScaleTriL( diag_shift=np.array(1e-5, params.dtype.as_numpy_dtype()), validate_args=validate_args) return tfd.MultivariateNormalTriL( loc=params[..., :event_size], scale_tril=scale_tril(scale_fn(params[..., event_size:])), validate_args=validate_args) elif covariance_type == 'diag': return tfd.MultivariateNormalDiag( loc=params[..., :event_size], scale_diag=scale_fn(params[..., event_size:])) elif covariance_type == 'full': return tfd.MultivariateNormalFullCovariance( loc=params[..., :event_size], covariance_matrix=tf.reshape(scale_fn(params[..., event_size:]), (event_size, event_size)))
def new(params, event_size, covariance_type, softplus_scale, validate_args=False, name=None): """Create the distribution instance from a `params` vector.""" covariance_type = str(covariance_type).lower().strip() assert covariance_type in ('full', 'tril', 'diag'), \ "No support for given covariance_type: '%s'" % covariance_type scale_fn = lambda x: tf.math.softplus(x) + tfd.softplus_inverse(1.0) \ if bool(softplus_scale) else x with tf.compat.v1.name_scope(name, 'MultivariateNormal', [params, event_size]): params = tf.convert_to_tensor(value=params, name='params') if covariance_type == 'tril': scale_tril = tfb.ScaleTriL(diag_shift=np.array( 1e-5, params.dtype.as_numpy_dtype()), validate_args=validate_args) return tfd.MultivariateNormalTriL( loc=params[..., :event_size], scale_tril=scale_tril(scale_fn(params[..., event_size:])), validate_args=validate_args) elif covariance_type == 'diag': return tfd.MultivariateNormalDiag( loc=params[..., :event_size], scale_diag=scale_fn(params[..., event_size:])) elif covariance_type == 'full': return tfd.MultivariateNormalFullCovariance( loc=params[..., :event_size], covariance_matrix=tf.reshape( scale_fn(params[..., event_size:]), (event_size, event_size)))
def new(params, event_shape=(), softplus_scale=True, validate_args=False, name=None): """Create the distribution instance from a `params` vector.""" with tf.compat.v1.name_scope(name, 'Normal', [params, event_shape]): params = tf.convert_to_tensor(value=params, name='params') event_shape = dist_util.expand_to_vector( tf.convert_to_tensor( value=event_shape, name='event_shape', dtype=tf.int32), tensor_name='event_shape') output_shape = tf.concat([ tf.shape(input=params)[:-1], event_shape, ], axis=0) loc_params, scale_params = tf.split(params, 2, axis=-1) if softplus_scale: scale_params = tf.math.softplus(scale_params) + tfd.softplus_inverse(1.0) return tfd.Independent( tfd.Normal( loc=tf.reshape(loc_params, output_shape), scale=tf.reshape(scale_params, output_shape), validate_args=validate_args), reinterpreted_batch_ndims=tf.size(input=event_shape), validate_args=validate_args)
def __init__(self, n_questions, n_answers, n_components=10, components_prior=0.7, encoder_layers=[16, 16], activation='relu', n_mcmc_samples=1, random_state=None): super(GradeMembershipModel, self).__init__() self._random_state = np.random.RandomState(seed=random_state) \ if not isinstance(random_state, np.random.RandomState) \ else random_state self._initializer = tf.initializers.GlorotNormal( seed=self._random_state.randint(1e8)) self.n_questions = int(n_questions) # this assume the same amount of answers # are given to every question self.n_answers = int(n_answers) self.n_components = int(n_components) self.components_prior = np.array(softplus_inverse(components_prior)) self.n_mcmc_samples = n_mcmc_samples self.encoder = [] self.decoder = [] # Each question get 1 encoder and decoder for question_idx in range(self.n_questions): # ====== encoder ====== # encoder = Sequential(name="EncoderQ%d" % question_idx) for num_hidden_units in encoder_layers: encoder.add( Dense(num_hidden_units, activation=activation, kernel_initializer=self._initializer)) encoder.add( Dense(n_components, activation=tf.nn.softplus, kernel_initializer=self._initializer, name="DenseConcentration")) encoder.add( DirichletLayer(clip_for_stable=True, pre_softplus=False, name="topics_posteriorQ%d" % question_idx)) # this is a must for the Model store Layer parameters setattr(self, 'encoder%d' % question_idx, encoder) self.encoder.append(encoder) # ====== decoder ====== # # decoder group_answer_logits = self.add_weight( name="topics_words_logits%d" % question_idx, shape=[self.n_components, n_answers], initializer=self._initializer) # The observations are bag of words and therefore not one-hot. However, # log_prob of OneHotCategorical computes the probability correctly in # this case. decoder = OneHotCategorical(probs_input=True, name="AnswerSheetQ%d" % question_idx) # this is a must for the Model store Layer parameters setattr(self, 'decoder%d' % question_idx, decoder) self.decoder.append([group_answer_logits, decoder]) # same prior for all questions self.prior_logit = self.add_weight( name="prior_logit", shape=[1, self.n_components], trainable=False, initializer=tf.initializers.Constant(self.components_prior))
def csiszar_vimco_helper(logu, name=None): """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`. `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e., ```none logu[j] = log(u[j]) u[j] = p(x, h[j]) / q(h[j] | x) h[j] iid~ q(H | x) ``` Args: logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`. name: Python `str` name prefixed to Ops created by this function. Returns: log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the average of `u`. The sum of the gradient of `log_avg_u` is `1`. log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the average of `u`` except that the average swaps-out `u[i]` for the leave-`i`-out Geometric-average. The mean of the gradient of `log_sooavg_u` is `1`. Mathematically `log_sooavg_u` is, ```none log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1}) h[j ; i] = { u[j] j!=i { GeometricAverage{u[k] : k != i} j==i ``` """ with tf.compat.v1.name_scope(name, "csiszar_vimco_helper", [logu]): logu = tf.convert_to_tensor(value=logu, name="logu") n = tf.compat.dimension_value(logu.shape.with_rank_at_least(1)[0]) if n is None: n = tf.shape(input=logu)[0] log_n = tf.math.log(tf.cast(n, dtype=logu.dtype)) nm1 = tf.cast(n - 1, dtype=logu.dtype) else: log_n = np.log(n).astype(logu.dtype.as_numpy_dtype) nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype) # Throughout we reduce across axis=0 since this is presumed to be iid # samples. log_max_u = tf.reduce_max(input_tensor=logu, axis=0) log_sum_u_minus_log_max_u = tf.reduce_logsumexp(input_tensor=logu - log_max_u, axis=0) # log_loosum_u[i] = # = logsumexp(logu[j] : j != i) # = log( exp(logsumexp(logu)) - exp(logu[i]) ) # = log( exp(logsumexp(logu - logu[i])) exp(logu[i]) - exp(logu[i])) # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1) # = logu[i] + log(exp(logsumexp(logu) - logu[i]) - 1) # = logu[i] + softplus_inverse(logsumexp(logu) - logu[i]) d = log_sum_u_minus_log_max_u + (log_max_u - logu) # We use `d != 0` rather than `d > 0.` because `d < 0.` should never # happens; if it does we want to complain loudly (which `softplus_inverse` # will). d_ok = tf.not_equal(d, 0.) safe_d = tf.compat.v1.where(d_ok, d, tf.ones_like(d)) d_ok_result = logu + tfd.softplus_inverse(safe_d) inf = np.array(np.inf, dtype=logu.dtype.as_numpy_dtype) # When not(d_ok) and is_positive_and_largest then we manually compute the # log_loosum_u. (We can efficiently do this for any one point but not all, # hence we still need the above calculation.) This is good because when # this condition is met, we cannot use the above calculation; its -inf. # We now compute the log-leave-out-max-sum, replicate it to every # point and make sure to select it only when we need to. is_positive_and_largest = tf.logical_and( logu > 0., tf.equal(logu, log_max_u[tf.newaxis, ...])) log_lomsum_u = tf.reduce_logsumexp(input_tensor=tf.compat.v1.where( is_positive_and_largest, tf.fill(tf.shape(input=logu), -inf), logu), axis=0, keepdims=True) log_lomsum_u = tf.tile( log_lomsum_u, multiples=1 + tf.pad(tensor=[n - 1], paddings=[[0, tf.rank(logu) - 1]])) d_not_ok_result = tf.compat.v1.where(is_positive_and_largest, log_lomsum_u, tf.fill(tf.shape(input=d), -inf)) log_loosum_u = tf.compat.v1.where(d_ok, d_ok_result, d_not_ok_result) # The swap-one-out-sum ("soosum") is n different sums, each of which # replaces the i-th item with the i-th-left-out average, i.e., # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i])) # = exp(log_loosum_u[i]) + exp(looavg_logu[i]) looavg_logu = (tf.reduce_sum(input_tensor=logu, axis=0) - logu) / nm1 log_soosum_u = tf.reduce_logsumexp(input_tensor=tf.stack( [log_loosum_u, looavg_logu]), axis=0) log_avg_u = log_sum_u_minus_log_max_u + log_max_u - log_n log_sooavg_u = log_soosum_u - log_n log_avg_u.set_shape(logu.shape.with_rank_at_least(1)[1:]) log_sooavg_u.set_shape(logu.shape) return log_avg_u, log_sooavg_u
def csiszar_vimco_helper(logu, name=None): """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`. `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e., ```none logu[j] = log(u[j]) u[j] = p(x, h[j]) / q(h[j] | x) h[j] iid~ q(H | x) ``` Args: logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`. name: Python `str` name prefixed to Ops created by this function. Returns: log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the average of `u`. The sum of the gradient of `log_avg_u` is `1`. log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the average of `u`` except that the average swaps-out `u[i]` for the leave-`i`-out Geometric-average. The mean of the gradient of `log_sooavg_u` is `1`. Mathematically `log_sooavg_u` is, ```none log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1}) h[j ; i] = { u[j] j!=i { GeometricAverage{u[k] : k != i} j==i ``` """ with tf.name_scope(name, "csiszar_vimco_helper", [logu]): logu = tf.convert_to_tensor(logu, name="logu") n = logu.shape.with_rank_at_least(1)[0].value if n is None: n = tf.shape(logu)[0] log_n = tf.log(tf.cast(n, dtype=logu.dtype)) nm1 = tf.cast(n - 1, dtype=logu.dtype) else: log_n = np.log(n).astype(logu.dtype.as_numpy_dtype) nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype) # Throughout we reduce across axis=0 since this is presumed to be iid # samples. log_max_u = tf.reduce_max(logu, axis=0) log_sum_u_minus_log_max_u = tf.reduce_logsumexp( logu - log_max_u, axis=0) # log_loosum_u[i] = # = logsumexp(logu[j] : j != i) # = log( exp(logsumexp(logu)) - exp(logu[i]) ) # = log( exp(logsumexp(logu - logu[i])) exp(logu[i]) - exp(logu[i])) # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1) # = logu[i] + log(exp(logsumexp(logu) - logu[i]) - 1) # = logu[i] + softplus_inverse(logsumexp(logu) - logu[i]) d = log_sum_u_minus_log_max_u + (log_max_u - logu) # We use `d != 0` rather than `d > 0.` because `d < 0.` should never # happens; if it does we want to complain loudly (which `softplus_inverse` # will). d_ok = tf.not_equal(d, 0.) safe_d = tf.where(d_ok, d, tf.ones_like(d)) d_ok_result = logu + tfd.softplus_inverse(safe_d) inf = np.array(np.inf, dtype=logu.dtype.as_numpy_dtype) # When not(d_ok) and is_positive_and_largest then we manually compute the # log_loosum_u. (We can efficiently do this for any one point but not all, # hence we still need the above calculation.) This is good because when # this condition is met, we cannot use the above calculation; its -inf. # We now compute the log-leave-out-max-sum, replicate it to every # point and make sure to select it only when we need to. is_positive_and_largest = tf.logical_and( logu > 0., tf.equal(logu, log_max_u[tf.newaxis, ...])) log_lomsum_u = tf.reduce_logsumexp( tf.where(is_positive_and_largest, tf.fill(tf.shape(logu), -inf), logu), axis=0, keep_dims=True) log_lomsum_u = tf.tile( log_lomsum_u, multiples=1 + tf.pad([n-1], [[0, tf.rank(logu)-1]])) d_not_ok_result = tf.where( is_positive_and_largest, log_lomsum_u, tf.fill(tf.shape(d), -inf)) log_loosum_u = tf.where(d_ok, d_ok_result, d_not_ok_result) # The swap-one-out-sum ("soosum") is n different sums, each of which # replaces the i-th item with the i-th-left-out average, i.e., # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i])) # = exp(log_loosum_u[i]) + exp(looavg_logu[i]) looavg_logu = (tf.reduce_sum(logu, axis=0) - logu) / nm1 log_soosum_u = tf.reduce_logsumexp( tf.stack([log_loosum_u, looavg_logu]), axis=0) log_avg_u = log_sum_u_minus_log_max_u + log_max_u - log_n log_sooavg_u = log_soosum_u - log_n log_avg_u.set_shape(logu.shape.with_rank_at_least(1)[1:]) log_sooavg_u.set_shape(logu.shape) return log_avg_u, log_sooavg_u