예제 #1
0
def create_prior(K,
                 a_p=1,
                 b_p=1,
                 a_gamma=1,
                 b_gamma=1,
                 m_loc=0,
                 g_loc=0.1,
                 m_sigma=3,
                 s_sigma=2,
                 m_nu=0,
                 s_nu=1,
                 m_skew=0,
                 g_skew=0.1,
                 dtype=np.float64):
    return tfd.JointDistributionNamed(
        dict(
            p=tfd.Beta(dtype(a_p), dtype(b_p)),
            gamma_C=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)),
            gamma_T=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)),
            eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
            eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
            nu=tfd.Sample(tfd.LogNormal(dtype(m_nu), s_nu), sample_shape=K),
            sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(m_sigma),
                                                 dtype(s_sigma)),
                                sample_shape=K),
            loc=lambda sigma_sq: tfd.Independent(tfd.Normal(
                dtype(m_loc), g_loc * tf.sqrt(sigma_sq)),
                                                 reinterpreted_batch_ndims=1),
            skew=lambda sigma_sq: tfd.Independent(tfd.Normal(
                dtype(m_skew), g_skew * tf.sqrt(sigma_sq)),
                                                  reinterpreted_batch_ndims=1),
        ))
예제 #2
0
def create_dp_sb_gmm(nobs, K, dtype=np.float64):
    return tfd.JointDistributionNamed(
        dict(
            # Mixture means
            mu=tfd.Independent(tfd.Normal(np.zeros(K, dtype), 3),
                               reinterpreted_batch_ndims=1),
            # Mixture scales
            sigma=tfd.Independent(tfd.LogNormal(loc=np.full(K, -2, dtype),
                                                scale=0.5),
                                  reinterpreted_batch_ndims=1),
            # Mixture weights (stick-breaking construction)
            alpha=tfd.Gamma(concentration=np.float64(1.0), rate=10.0),
            v=lambda alpha: tfd.Independent(
                # NOTE: Dave Moore suggests doing this instead, to ensure
                # that a batch dimension in alpha doesn't conflict with
                # the other parameters.
                tfd.Beta(np.ones(K - 1, dtype), alpha[..., tf.newaxis]),
                reinterpreted_batch_ndims=1),
            # Observations (likelihood)
            obs=lambda mu, sigma, v: tfd.Sample(
                tfd.MixtureSameFamily(
                    # This will be marginalized over.
                    mixture_distribution=tfd.Categorical(probs=stickbreak(v)),
                    components_distribution=tfd.Normal(mu, sigma)),
                sample_shape=nobs)))
예제 #3
0
def create_model(n_C, n_T, K, neg_inf=-10, dtype=np.float64):
    return tfd.JointDistributionNamed(
        dict(p=tfd.Beta(dtype(1), dtype(1)),
             gamma_C=tfd.Gamma(dtype(3), dtype(3)),
             gamma_T=tfd.Gamma(dtype(3), dtype(3)),
             eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
             eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
             loc=tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K),
             sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)),
                                 sample_shape=K),
             y_C=lambda gamma_C, eta_C, loc, sigma_sq: mix(
                 gamma_C, eta_C, loc, tf.sqrt(sigma_sq), dtype(neg_inf), n_C),
             y_T=lambda gamma_C, gamma_T, eta_C, eta_T, p, loc, sigma_sq:
             mix_T(gamma_C, gamma_T, eta_C, eta_T, p, loc, tf.sqrt(sigma_sq),
                   dtype(neg_inf), n_T)))
예제 #4
0
    def create_distributions(self):
        """Create distribution objects
        """
        self.bijectors = {
            'u': tfb.Softplus(),
            'v': tfb.Softplus(),
            'u_eta': tfb.Softplus(),
            'u_tau': tfb.Softplus(),
            's': tfb.Softplus(),
            's_eta': tfb.Softplus(),
            's_tau': tfb.Softplus(),
            'w': tfb.Softplus()
        }
        symmetry_breaking_decay = self.symmetry_breaking_decay**tf.cast(
            tf.range(self.latent_dim), self.dtype)[tf.newaxis, ...]

        distribution_dict = {
            'v':
            tfd.Independent(tfd.HalfNormal(scale=0.1 * tf.ones(
                (self.latent_dim, self.feature_dim), dtype=self.dtype)),
                            reinterpreted_batch_ndims=2),
            'w':
            tfd.Independent(tfd.HalfNormal(
                scale=tf.ones((1, self.feature_dim), dtype=self.dtype)),
                            reinterpreted_batch_ndims=2)
        }
        if self.horseshoe_plus:
            distribution_dict = {
                **distribution_dict,
                'u':
                lambda u_eta, u_tau: tfd.Independent(tfd.HalfNormal(
                    scale=u_eta * u_tau * symmetry_breaking_decay),
                                                     reinterpreted_batch_ndims=
                                                     2),
                'u_eta':
                tfd.Independent(tfd.HalfCauchy(
                    loc=tf.zeros((self.feature_dim, self.latent_dim),
                                 dtype=self.dtype),
                    scale=tf.ones((self.feature_dim, self.latent_dim),
                                  dtype=self.dtype)),
                                reinterpreted_batch_ndims=2),
                'u_tau':
                tfd.Independent(tfd.HalfCauchy(
                    loc=tf.zeros((1, self.latent_dim), dtype=self.dtype),
                    scale=tf.ones((1, self.latent_dim), dtype=self.dtype) *
                    self.u_tau_scale),
                                reinterpreted_batch_ndims=2),
            }
            distribution_dict['s'] = lambda s_eta, s_tau: tfd.Independent(
                tfd.HalfNormal(scale=s_eta * s_tau),
                reinterpreted_batch_ndims=2)
            distribution_dict['s_eta'] = tfd.Independent(
                tfd.HalfCauchy(loc=tf.zeros((2, self.feature_dim),
                                            dtype=self.dtype),
                               scale=tf.ones((2, self.feature_dim),
                                             dtype=self.dtype)),
                reinterpreted_batch_ndims=2)
            distribution_dict['s_tau'] = tfd.Independent(
                tfd.HalfCauchy(loc=tf.zeros((1, self.feature_dim),
                                            dtype=self.dtype),
                               scale=tf.ones(
                                   (1, self.feature_dim), dtype=self.dtype) *
                               self.s_tau_scale),
                reinterpreted_batch_ndims=2)

            self.bijectors['u_eta_a'] = tfb.Softplus()
            self.bijectors['u_tau_a'] = tfb.Softplus()

            self.bijectors['s_eta_a'] = tfb.Softplus()
            self.bijectors['s_tau_a'] = tfb.Softplus()

            distribution_dict['u_eta'] = lambda u_eta_a: tfd.Independent(
                SqrtInverseGamma(concentration=0.5 * tf.ones(
                    (self.feature_dim, self.latent_dim), dtype=self.dtype),
                                 scale=1.0 / u_eta_a),
                reinterpreted_batch_ndims=2)
            distribution_dict['u_eta_a'] = tfd.Independent(
                tfd.InverseGamma(concentration=0.5 * tf.ones(
                    (self.feature_dim, self.latent_dim), dtype=self.dtype),
                                 scale=tf.ones(
                                     (self.feature_dim, self.latent_dim),
                                     dtype=self.dtype)),
                reinterpreted_batch_ndims=2)
            distribution_dict['u_tau'] = lambda u_tau_a: tfd.Independent(
                SqrtInverseGamma(concentration=0.5 * tf.ones(
                    (1, self.latent_dim), dtype=self.dtype),
                                 scale=1.0 / u_tau_a),
                reinterpreted_batch_ndims=2)
            distribution_dict['u_tau_a'] = tfd.Independent(
                tfd.InverseGamma(concentration=0.5 * tf.ones(
                    (1, self.latent_dim), dtype=self.dtype),
                                 scale=tf.ones(
                                     (1, self.latent_dim), dtype=self.dtype) /
                                 self.u_tau_scale**2),
                reinterpreted_batch_ndims=2)

            distribution_dict['s_eta'] = lambda s_eta_a: tfd.Independent(
                SqrtInverseGamma(concentration=0.5 * tf.ones(
                    (2, self.feature_dim), dtype=self.dtype),
                                 scale=1.0 / s_eta_a),
                reinterpreted_batch_ndims=2)
            distribution_dict['s_eta_a'] = tfd.Independent(
                tfd.InverseGamma(concentration=0.5 * tf.ones(
                    (2, self.feature_dim), dtype=self.dtype),
                                 scale=tf.ones((2, self.feature_dim),
                                               dtype=self.dtype)),
                reinterpreted_batch_ndims=2)
            distribution_dict['s_tau'] = lambda s_tau_a: tfd.Independent(
                SqrtInverseGamma(concentration=0.5 * tf.ones(
                    (1, self.feature_dim), dtype=self.dtype),
                                 scale=1.0 / s_tau_a),
                reinterpreted_batch_ndims=2)
            distribution_dict['s_tau_a'] = tfd.Independent(
                tfd.InverseGamma(concentration=0.5 * tf.ones(
                    (1, self.feature_dim), dtype=self.dtype),
                                 scale=tf.ones(
                                     (1, self.feature_dim), dtype=self.dtype) /
                                 self.s_tau_scale**2),
                reinterpreted_batch_ndims=2)
        else:
            distribution_dict = {
                **distribution_dict, 'u':
                tfd.Independent(
                    AbsHorseshoe(
                        scale=(self.u_tau_scale * symmetry_breaking_decay *
                               tf.ones((self.feature_dim, self.latent_dim),
                                       dtype=self.dtype)),
                        reinterpreted_batch_ndims=2)),
                's':
                tfd.Independent(AbsHorseshoe(
                    scale=self.s_tau_scale *
                    tf.ones((1, self.feature_dim), dtype=self.dtype)),
                                reinterpreted_batch_ndims=2)
            }

        self.prior_distribution = tfd.JointDistributionNamed(distribution_dict)

        surrogate_dict = {
            'v':
            self.bijectors['v'](build_trainable_normal_dist(
                -6. * tf.ones(
                    (self.latent_dim, self.feature_dim), dtype=self.dtype),
                5e-4 * tf.ones(
                    (self.latent_dim, self.feature_dim), dtype=self.dtype),
                2,
                strategy=self.strategy)),
            'w':
            self.bijectors['w'](build_trainable_normal_dist(
                -6 * tf.ones((1, self.feature_dim), dtype=self.dtype),
                5e-4 * tf.ones((1, self.feature_dim), dtype=self.dtype),
                2,
                strategy=self.strategy))
        }
        if self.horseshoe_plus:
            surrogate_dict = {
                **surrogate_dict,
                'u':
                self.bijectors['u'](build_trainable_normal_dist(
                    -6. * tf.ones(
                        (self.feature_dim, self.latent_dim), dtype=self.dtype),
                    5e-4 * tf.ones(
                        (self.feature_dim, self.latent_dim), dtype=self.dtype),
                    2,
                    strategy=self.strategy)),
                'u_eta':
                self.bijectors['u_eta'](build_trainable_InverseGamma_dist(
                    3 * tf.ones(
                        (self.feature_dim, self.latent_dim), dtype=self.dtype),
                    tf.ones((self.feature_dim, self.latent_dim),
                            dtype=self.dtype),
                    2,
                    strategy=self.strategy)),
                'u_tau':
                self.bijectors['u_tau'](build_trainable_InverseGamma_dist(
                    3 * tf.ones((1, self.latent_dim), dtype=self.dtype),
                    tf.ones((1, self.latent_dim), dtype=self.dtype),
                    2,
                    strategy=self.strategy)),
            }

            surrogate_dict['s_eta'] = self.bijectors['s_eta'](
                build_trainable_InverseGamma_dist(tf.ones(
                    (2, self.feature_dim), dtype=self.dtype),
                                                  tf.ones(
                                                      (2, self.feature_dim),
                                                      dtype=self.dtype),
                                                  2,
                                                  strategy=self.strategy))
            surrogate_dict['s_tau'] = self.bijectors['s_tau'](
                build_trainable_InverseGamma_dist(1 * tf.ones(
                    (1, self.feature_dim), dtype=self.dtype),
                                                  tf.ones(
                                                      (1, self.feature_dim),
                                                      dtype=self.dtype),
                                                  2,
                                                  strategy=self.strategy))
            surrogate_dict['s'] = self.bijectors['s'](
                build_trainable_normal_dist(
                    tf.ones((2, self.feature_dim), dtype=self.dtype) *
                    tf.cast([[-2.], [-1.]], dtype=self.dtype),
                    1e-3 * tf.ones((2, self.feature_dim), dtype=self.dtype),
                    2,
                    strategy=self.strategy))

            self.bijectors['u_eta_a'] = tfb.Softplus()
            self.bijectors['u_tau_a'] = tfb.Softplus()
            surrogate_dict['u_eta_a'] = self.bijectors['u_eta_a'](
                build_trainable_InverseGamma_dist(
                    2. * tf.ones(
                        (self.feature_dim, self.latent_dim), dtype=self.dtype),
                    tf.ones((self.feature_dim, self.latent_dim),
                            dtype=self.dtype),
                    2,
                    strategy=self.strategy))
            surrogate_dict['u_tau_a'] = self.bijectors['u_tau_a'](
                build_trainable_InverseGamma_dist(
                    2. * tf.ones((1, self.latent_dim), dtype=self.dtype),
                    tf.ones((1, self.latent_dim), dtype=self.dtype) /
                    self.u_tau_scale**2,
                    2,
                    strategy=self.strategy))

            self.bijectors['s_eta_a'] = tfb.Softplus()
            self.bijectors['s_tau_a'] = tfb.Softplus()
            surrogate_dict['s_eta_a'] = self.bijectors['s_eta_a'](
                build_trainable_InverseGamma_dist(2. * tf.ones(
                    (2, self.feature_dim), dtype=self.dtype),
                                                  tf.ones(
                                                      (2, self.feature_dim),
                                                      dtype=self.dtype),
                                                  2,
                                                  strategy=self.strategy))
            surrogate_dict['s_tau_a'] = self.bijectors['s_tau_a'](
                build_trainable_InverseGamma_dist(
                    2. * tf.ones((1, self.feature_dim), dtype=self.dtype),
                    (tf.ones((1, self.feature_dim), dtype=self.dtype) /
                     self.s_tau_scale**2),
                    2,
                    strategy=self.strategy))
        else:
            surrogate_dict = {
                **surrogate_dict, 's':
                self.bijectors['s'](build_trainable_normal_dist(
                    tf.ones((2, self.feature_dim), dtype=self.dtype) *
                    tf.cast([[-2.], [-1.]], dtype=self.dtype),
                    1e-3 * tf.ones((2, self.feature_dim), dtype=self.dtype),
                    2,
                    strategy=self.strategy)),
                'u':
                self.bijectors['u'](build_trainable_normal_dist(
                    -9. * tf.ones(
                        (self.feature_dim, self.latent_dim), dtype=self.dtype),
                    5e-4 * tf.ones(
                        (self.feature_dim, self.latent_dim), dtype=self.dtype),
                    2,
                    strategy=self.strategy))
            }

        self.surrogate_distribution = tfd.JointDistributionNamed(
            surrogate_dict)

        self.surrogate_vars = self.surrogate_distribution.variables
        self.var_list = list(surrogate_dict.keys())
        self.set_calibration_expectations()
예제 #5
0
    return tf.linalg.cholesky(K)


def compute_f(alpha, rho, beta, eta):
    LK = compute_LK(alpha, rho, X)
    f = tf.linalg.matvec(LK, eta)  # LK * eta, (matrix * vector)
    return f + beta[..., tf.newaxis]


# GP Binary Classification Model.
gpc_model = tfd.JointDistributionNamed(
    dict(
        alpha=tfd.LogNormal(dtype(0), dtype(1)),
        rho=tfd.LogNormal(dtype(0), dtype(1)),
        beta=tfd.Normal(dtype(0), dtype(1)),
        eta=tfd.Sample(tfd.Normal(dtype(0), dtype(1)),
                       sample_shape=X.shape[0]),
        # NOTE: `Sample` and `Independent` resemble, respectively,
        # `filldist` and `arraydist` in Turing.
        obs=lambda alpha, rho, beta, eta: tfd.Independent(
            tfd.Bernoulli(logits=compute_f(alpha, rho, beta, eta)),
            reinterpreted_batch_ndims=1)))

### MODEL SET UP ###

# For some reason, this is needed for the compiler
# to know the correct model parameter dimensions.
_ = gpc_model.sample()

# Parameters as they appear in model definition.
# NOTE: Initial values should be defined in order appeared in model.
ordered_params = ['alpha', 'rho', 'beta', 'eta']
예제 #6
0
# Here we will use the squared exponential covariance function:
#
# $$
# \alpha^2 \cdot \exp\left\{-\frac{d^2}{2\rho^2}\right\}
# $$
#
# where $\alpha$ is the amplitude of the covariance, $\rho$ is the length scale which controls how slowly information decays with distance (larger $\rho$ means information about a point can be used for data far away); and $d$ is the distance.

# In[4]:

# Specify GP model
gp_model = tfd.JointDistributionNamed(
    dict(
        amplitude=tfd.LogNormal(dtype(0), dtype(0.1)),  # amplitude
        length_scale=tfd.LogNormal(dtype(0), dtype(1)),  # length scale
        v=tfd.LogNormal(dtype(0), dtype(1)),  # model sd
        obs=lambda length_scale, amplitude, v: tfd.GaussianProcess(
            kernel=tfp.math.psd_kernels.ExponentiatedQuadratic(
                amplitude, length_scale),
            index_points=X[..., np.newaxis],
            observation_noise_variance=v)))

# Run graph to make sure it works.
_ = gp_model.sample()

# Initial values.
initial_state = [
    1e-1 * tf.ones([], dtype=np.float64, name='amplitude'),
    1e-1 * tf.ones([], dtype=np.float64, name='length_scale'),
    1e-1 * tf.ones([], dtype=np.float64, name='v')
]
예제 #7
0
qv_rho = tf.Variable(tf.random.normal([ncomponents - 1], dtype=np.float64) - 1,
                     name='qv_rho')

qalpha_loc = tf.Variable(tf.random.normal([], dtype=np.float64),
                         name='qalpha_loc')
qalpha_rho = tf.Variable(tf.random.normal([], dtype=np.float64),
                         name='qalpha_rho')

# Create variational distribution.
surrogate_posterior = tfd.JointDistributionNamed(
    dict(
        # qmu
        mu=tfd.Independent(tfd.Normal(qmu_loc, tf.nn.softplus(qmu_rho)),
                           reinterpreted_batch_ndims=1),
        # qsigma
        sigma=tfd.Independent(tfd.LogNormal(qsigma_loc,
                                            tf.nn.softplus(qsigma_rho)),
                              reinterpreted_batch_ndims=1),
        # qv
        v=tfd.Independent(tfd.LogitNormal(qv_loc, tf.nn.softplus(qv_rho)),
                          reinterpreted_batch_ndims=1),
        # qalpha
        alpha=tfd.LogNormal(qalpha_loc, tf.nn.softplus(qalpha_rho))))

# In[12]:


# Run optimizer
# @tf.function(autograph=False) , experimental_compile=True)  # Makes slower?
def run_advi(optimizer, sample_size=1, num_steps=2000, seed=1):
    return tfp.vi.fit_surrogate_posterior(
        target_log_prob_fn=target_log_prob_fn,
예제 #8
0
파일: test_joint.py 프로젝트: bjfar/jmctf
from tensorflow_probability import distributions as tfd

N = 1000
dists = {"A": {}, "B": {}}
samples = []
for i in range(N):
    dists["A"][i] = tfd.Poisson(rate=1e-6)
    dists["B"][i] = tfd.Poisson(rate=1e-6)
    #dists += [tfd.Normal(loc = 0, scale = 1)]

joint = tfd.JointDistributionNamed(dists)
samples = joint.sample(N)
print("joint.log_prob =", joint.log_prob(samples))