Example #1
0
def create_prior(K,
                 a_p=1,
                 b_p=1,
                 a_gamma=1,
                 b_gamma=1,
                 m_loc=0,
                 g_loc=0.1,
                 m_sigma=3,
                 s_sigma=2,
                 m_nu=0,
                 s_nu=1,
                 m_skew=0,
                 g_skew=0.1,
                 dtype=np.float64):
    return tfd.JointDistributionNamed(
        dict(
            p=tfd.Beta(dtype(a_p), dtype(b_p)),
            gamma_C=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)),
            gamma_T=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)),
            eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
            eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
            nu=tfd.Sample(tfd.LogNormal(dtype(m_nu), s_nu), sample_shape=K),
            sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(m_sigma),
                                                 dtype(s_sigma)),
                                sample_shape=K),
            loc=lambda sigma_sq: tfd.Independent(tfd.Normal(
                dtype(m_loc), g_loc * tf.sqrt(sigma_sq)),
                                                 reinterpreted_batch_ndims=1),
            skew=lambda sigma_sq: tfd.Independent(tfd.Normal(
                dtype(m_skew), g_skew * tf.sqrt(sigma_sq)),
                                                  reinterpreted_batch_ndims=1),
        ))
Example #2
0
 def create_models(self, data):
     self.models = []
     for i in range(self.num_outputs):
         kern = gpflow.kernels.SquaredExponential(lengthscales=tf.ones([data[0].shape[1],], dtype=gpflow.config.default_float()))
         kern.lengthscales.prior = tfd.Gamma(to_default_float(1.1), to_default_float(1/10.0)) # priors have to be included before
         kern.variance.prior = tfd.Gamma(to_default_float(1.5), to_default_float(1/2.0))    # before the model gets compiled
         self.models.append(gpflow.models.GPR((data[0], data[1][:, i:i+1]), kernel=kern))
         self.models[-1].likelihood.prior = tfd.Gamma(to_default_float(1.2), to_default_float(1/0.05))
Example #3
0
 def logp(par):
     p = param
     p['beta1'] = par[0]
     p['gamma'] = par[1]
     beta_logp = tfd.Gamma(concentration=tf.constant(1., tf.float64),
                           rate=tf.constant(1., tf.float64)).log_prob(
                               p['beta1'])
     gamma_logp = tfd.Gamma(concentration=tf.constant(100., tf.float64),
                            rate=tf.constant(400., tf.float64)).log_prob(
                                p['gamma'])
     t, sim, solve = simulator.simulate(p, state_init)
     y_logp = covid19uk_logp(y_incr, sim, 0.1)
     logp = beta_logp + gamma_logp + tf.reduce_sum(y_logp)
     return logp
Example #4
0
def empirical_Ey_and_Ey2_tf(a=3,
                            ap=3,
                            bp=1.0,
                            c=3,
                            cp=3,
                            dp=1.0,
                            nsamples_latent=100,
                            nsamples_latent1=1,
                            nsamples_output=10,
                            K=25,
                            N=1,
                            M=1):
    """
        Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters.
        Parametrization like in: http://jakehofman.com/inprint/poisson_recs.pdf
    """
    if N != 1: warnings.warn("N!=1 will be ignored!")
    if N != 1: warnings.warn("M!=1 will be ignored!")

    #a, ap, bp, c, cp, dp = _ttf(a), _ttf(ap), _ttf(bp), _ttf(c), _ttf(cp), _ttf(dp) # cast to tf

    ksi = tfd.Gamma(ap, ap / bp).sample(nsamples_latent)  # NL0
    theta = tfd.Gamma(a, ksi).sample((K, nsamples_latent1))  # K x NL1 x NL0

    eta = tfd.Gamma(cp, cp / dp).sample(nsamples_latent)
    beta = tfd.Gamma(c, eta).sample((K, nsamples_latent1))

    latent = tf.reduce_sum(theta * beta, 0)  # NL1 x NL0
    latent = tf.reshape(latent, [-1])  # NL1*NL0

    poisson = tfd.Poisson(rate=latent)
    #y_samples = np.random.poisson(latent, size=[nsamples_output, nsamples_latent*nsamples_latent1]) # NO x NL1*NL0
    y_samples = tf.stop_gradient(poisson.sample([nsamples_output]))

    y_probs = tf.exp(poisson.log_prob(y_samples))
    #y_probs1 = np.array([[tf.exp(tfd.Poisson(rate=latent[i]).log_prob(y_samples[j,i])).numpy()
    #                                                        for j in range(nsamples_output)]
    #                                                        for i in range(nsamples_latent * nsamples_latent1)]).T
    #assert (y_probs - y_probs1).numpy().max()<1e-12
    total_prob = tf.reduce_sum(y_probs, 0)
    conditional_expectation = tf.reduce_sum(y_probs * y_samples,
                                            0) / total_prob
    conditional_expectation_squared = tf.reduce_sum(y_probs * (y_samples**2),
                                                    0) / total_prob

    expectation = tf.reduce_mean(conditional_expectation)
    expectation_squared = tf.reduce_mean(conditional_expectation_squared)

    return expectation, expectation_squared
Example #5
0
def create_model(n_C, n_T, K, neg_inf=-10, dtype=np.float64):
    return tfd.JointDistributionNamed(
        dict(p=tfd.Beta(dtype(1), dtype(1)),
             gamma_C=tfd.Gamma(dtype(3), dtype(3)),
             gamma_T=tfd.Gamma(dtype(3), dtype(3)),
             eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
             eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
             loc=tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K),
             sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)),
                                 sample_shape=K),
             y_C=lambda gamma_C, eta_C, loc, sigma_sq: mix(
                 gamma_C, eta_C, loc, tf.sqrt(sigma_sq), dtype(neg_inf), n_C),
             y_T=lambda gamma_C, gamma_T, eta_C, eta_T, p, loc, sigma_sq:
             mix_T(gamma_C, gamma_T, eta_C, eta_T, p, loc, tf.sqrt(sigma_sq),
                   dtype(neg_inf), n_T)))
Example #6
0
def empirical_Ey_and_Ey2_tf_logscore(a=3,
                                     ap=3,
                                     bp=1.0,
                                     c=3,
                                     cp=3,
                                     dp=1.0,
                                     nsamples_latent=100,
                                     nsamples_latent1=1,
                                     nsamples_output=10,
                                     K=25,
                                     N=1,
                                     M=1):
    """
        Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters.
        Parametrization like in: http://jakehofman.com/inprint/poisson_recs.pdf
        Gradients obtained with log-score derivative trick.
    """
    if N != 1: warnings.warn("N!=1 will be ignored!")
    if N != 1: warnings.warn("M!=1 will be ignored!")

    ksi = tfd.Gamma(ap, ap / bp).sample(nsamples_latent)  # NL0
    theta = tfd.Gamma(a, ksi).sample((K, nsamples_latent1))  # K x NL1 x NL0

    eta = tfd.Gamma(cp, cp / dp).sample(nsamples_latent)
    beta = tfd.Gamma(c, eta).sample((K, nsamples_latent1))

    latent = tf.reduce_sum(theta * beta, 0)  # NL1 x NL0
    latent = tf.reshape(latent, [-1])  # NL1*NL0

    poisson = tfd.Poisson(rate=latent)
    y_samples = poisson.sample([nsamples_output])

    conditional_expectation = tfp.monte_carlo.expectation(
        f=lambda x: x,
        samples=y_samples,
        log_prob=poisson.log_prob,
        use_reparameterization=False)

    conditional_expectation_squared = tfp.monte_carlo.expectation(
        f=lambda x: x * x,
        samples=y_samples,
        log_prob=poisson.log_prob,
        use_reparameterization=False)

    expectation = tf.reduce_mean(conditional_expectation)
    expectation_squared = tf.reduce_mean(conditional_expectation_squared)

    return expectation, expectation_squared
Example #7
0
    def __init__(self,
                 a,
                 theta,
                 alpha,
                 beta,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='Amoroso'):

        parameters = dict(locals())
        with tf.name_scope(name) as name:
            self._a = tensor_util.convert_nonref_to_tensor(a)
            self._theta = tensor_util.convert_nonref_to_tensor(theta)
            self._alpha = tensor_util.convert_nonref_to_tensor(alpha)
            self._beta = tensor_util.convert_nonref_to_tensor(beta)
            gamma = tfd.Gamma(alpha, 1.)

            chain = tfb.Invert(
                tfb.Chain([
                    tfb.Exp(),
                    tfb.Scale(beta),
                    tfb.Shift(-tf.math.log(theta)),
                    tfb.Log(),
                    tfb.Shift(-a),
                ]))

            super().__init__(distribution=gamma,
                             bijector=chain,
                             validate_args=validate_args,
                             parameters=parameters,
                             name=name)
Example #8
0
def create_dp_sb_gmm(nobs, K, dtype=np.float64):
    return tfd.JointDistributionNamed(
        dict(
            # Mixture means
            mu=tfd.Independent(tfd.Normal(np.zeros(K, dtype), 3),
                               reinterpreted_batch_ndims=1),
            # Mixture scales
            sigma=tfd.Independent(tfd.LogNormal(loc=np.full(K, -2, dtype),
                                                scale=0.5),
                                  reinterpreted_batch_ndims=1),
            # Mixture weights (stick-breaking construction)
            alpha=tfd.Gamma(concentration=np.float64(1.0), rate=10.0),
            v=lambda alpha: tfd.Independent(
                # NOTE: Dave Moore suggests doing this instead, to ensure
                # that a batch dimension in alpha doesn't conflict with
                # the other parameters.
                tfd.Beta(np.ones(K - 1, dtype), alpha[..., tf.newaxis]),
                reinterpreted_batch_ndims=1),
            # Observations (likelihood)
            obs=lambda mu, sigma, v: tfd.Sample(
                tfd.MixtureSameFamily(
                    # This will be marginalized over.
                    mixture_distribution=tfd.Categorical(probs=stickbreak(v)),
                    components_distribution=tfd.Normal(mu, sigma)),
                sample_shape=nobs)))
Example #9
0
def test_meta_distributions():
    N = 100
    sigma_tf = tfd.Gamma(np.asarray(1.), np.asarray(1.)).sample()
    epsilon_tf = tfd.Normal(np.zeros((N, 1)), sigma_tf).sample()
    beta_tf = tfd.Normal(np.zeros((2, 1)), 1).sample()
    X = np.vstack([np.random.randn(N), np.ones(N)]).T
    X_tf = tf.convert_to_tensor(X)

    Y_tf = tf.linalg.matmul(X_tf, beta_tf) + epsilon_tf

    Y_mt = mt(Y_tf)

    # Confirm that all `Operation`s are the same.
    assert_ops_equal(Y_mt, Y_tf)

    # Now, let's see if we can reconstruct it entirely from the
    # meta objects.
    def _remove_obj(meta_obj):
        if (hasattr(meta_obj, '_obj')
                and not isinstance(meta_obj, TFlowMetaOpDef)):
            meta_obj._obj = None

        if hasattr(meta_obj, 'ancestors'):
            for a in meta_obj.ancestors or []:
                _remove_obj(a)

    _remove_obj(Y_mt)

    Y_mt_tf = Y_mt.reify()

    assert_ops_equal(Y_mt, Y_mt_tf)
Example #10
0
 def __call__(self):
     """Get the distribution object from the backend"""
     if get_backend() == 'pytorch':
         import torch.distributions as tod
         return tod.gamma.Gamma(self.concentration, self.rate)
     else:
         from tensorflow_probability import distributions as tfd
         return tfd.Gamma(self.concentration, self.rate)
Example #11
0
 def logp(par):
     p = param
     p['beta1'] = par[0]
     p['beta3'] = par[1]
     p['gamma'] = par[2]
     p['I0'] = par[3]
     p['r'] = par[4]
     beta_logp = tfd.Gamma(concentration=tf.constant(1., dtype=DTYPE), rate=tf.constant(1., dtype=DTYPE)).log_prob(p['beta1'])
     beta3_logp = tfd.Gamma(concentration=tf.constant(200., dtype=DTYPE),
                            rate=tf.constant(200., dtype=DTYPE)).log_prob(p['beta3'])
     gamma_logp = tfd.Gamma(concentration=tf.constant(100., dtype=DTYPE), rate=tf.constant(400., dtype=DTYPE)).log_prob(p['gamma'])
     I0_logp = tfd.Gamma(concentration=tf.constant(1.5, dtype=DTYPE), rate=tf.constant(0.05, dtype=DTYPE)).log_prob(p['I0'])
     r_logp = tfd.Gamma(concentration=tf.constant(0.1, dtype=DTYPE), rate=tf.constant(0.1, dtype=DTYPE)).log_prob(p['gamma'])
     state_init = simulator.create_initial_state(init_matrix=seeding * p['I0'])
     t, sim, solve = simulator.simulate(p, state_init)
     y_logp = covid19uk_logp(y_incr, sim, 0.1, p['r'])
     logp = beta_logp + beta3_logp + gamma_logp + I0_logp + r_logp + tf.reduce_sum(y_logp)
     return logp
Example #12
0
    def sample_f(self):
        """
        Runs MCMC to sample posterior functions.
        """
        # add priors to the hyperparameters.
        self.model.kernel.lengthscales.prior = tfd.Gamma(f64(1.0), f64(1.0))
        self.model.kernel.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
        self.model.likelihood.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
        if self.mean_function is not None:
            self.model.mean_function.A.prior = tfd.Normal(f64(0.0), f64(10.0))
            self.model.mean_function.b.prior = tfd.Normal(f64(0.0), f64(10.0))

        # sample from the posterior using HMC (required to estimate epistemic uncertainty)
        num_burnin_steps = ci_niter(300)
        num_samples = ci_niter(self.num_samples)

        # Note that here we need model.trainable_parameters, not trainable_variables - only parameters can have priors!
        self.hmc_helper = gpflow.optimizers.SamplingHelper(
            self.model.log_posterior_density, self.model.trainable_parameters)

        hmc = tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=self.hmc_helper.target_log_prob_fn,
            num_leapfrog_steps=10,
            step_size=0.01)
        adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
            hmc,
            num_adaptation_steps=10,
            target_accept_prob=f64(0.75),
            adaptation_rate=0.1)

        @tf.function
        def run_chain_fn():
            return tfp.mcmc.sample_chain(
                num_results=num_samples,
                num_burnin_steps=num_burnin_steps,
                current_state=self.hmc_helper.current_state,
                kernel=adaptive_hmc,
                trace_fn=lambda _, pkr: pkr.inner_results.is_accepted,
            )

        self.samples, traces = run_chain_fn()
Example #13
0
    def create_models(self, X, Y):
        """
        Construct a separate GP model for every output/target dimensions, i.e. for every Delta_{t, i}.

        :param X: Data points, state-action pairs. (num_steps, state_dim + control_dim)
        :param Y: Data points, state differences. (num_steps, state_dim)
        :return:
        """
        for i in range(self.num_outputs):
            kern = gpflow.kernels.RBF()
            kern.lengthscales.prior = tfd.Gamma(1.0, 10.0)
            kern.variance.prior = tfd.Gamma(1.5, 2.0)

            model = gpflow.models.GPR(
                data=(X, Y[:, i : i + 1]), kernel=kern, mean_function=None
            )
            model.likelihood.variance.assign(2e-6)
            gpflow.set_trainable(model.likelihood, False)

            self.models.append(model)
            self.optimizers.append(gpflow.optimizers.Scipy())
Example #14
0
    def __init__(self,
                 Nc,
                 Ng,
                 Kc=0,
                 Kg=0,
                 effLen=None,
                 intercept=None,
                 intercept_mode='gene',
                 sigma=None,
                 tau_prior=[3, 27],
                 name=None):
        self.Nc = Nc
        self.Ng = Ng
        self.Kc = Kc
        self.Kg = Kg
        self.effLen = effLen  # (Ng, 3 * 2)
        self.intercept_mode = intercept_mode

        self.Z_loc = tf.Variable(
            tf.random.normal([Nc, Ng]),
            name='Z_loc',
            constraint=lambda t: tf.clip_by_value(t, -9, 9))
        self.Z_std_log = tf.Variable(tf.random.normal([Nc, Ng]), name='Z_var')

        self.Wc_loc = tf.Variable(tf.random.normal([Kc, Ng]), name='Wc_loc')
        self.Wg_loc = tf.Variable(tf.random.normal([Nc, Kg]), name='Wg_loc')

        if intercept_mode.upper() == 'GENE':
            _intercept_shape = (1, Ng)
            _sigma_shape = (1, Ng)
        elif intercept_mode.upper() == 'CELL':
            _intercept_shape = (Nc, 1)
            _sigma_shape = (Nc, 1)
        else:
            # print("[BIRE2] Error: intercept_mode only supports gene or cell")
            _intercept_shape = (1, Ng)
            _sigma_shape = (1, Ng)

        if intercept is None:
            self.intercept = tf.Variable(
                tf.random.normal(_intercept_shape),
                name='bias',
                constraint=lambda t: tf.clip_by_value(t, -9, 9))
        else:
            _intercept = tf.ones(_intercept_shape) * intercept
            self.intercept = tf.constant(_intercept, name='bias')

        self.tau_a_log = tf.Variable(tf.ones(_sigma_shape), name='tau_a_log')
        self.tau_b_log = tf.Variable(tf.ones(_sigma_shape), name='tau_b_log')

        print(tau_prior)
        self.tauPrior = tfd.Gamma(tau_prior[0], tau_prior[1])
def empirical_Ey_and_Ey2_tf_logscore(ct=1.0,
                                     rt=1.0,
                                     cb=0.1,
                                     rb=0.1,
                                     nsamples_latent=100,
                                     nsamples_output=3,
                                     N=1,
                                     M=1,
                                     K=25):
    """ Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters.
        The outputs are (tf) differentiable w.r.t. hyperparameters. 
        Gradients are obtained using log-score derivative trick.
    """
    if N != 1: warnings.warn("N!=1 will be ignored!")
    if N != 1: warnings.warn("M!=1 will be ignored!")

    theta = tfd.Gamma(ct, rt).sample((K, nsamples_latent))
    beta = tfd.Gamma(cb, rb).sample((K, nsamples_latent))

    latent = tf.reduce_sum(theta * beta, 0)
    poisson = tfd.Poisson(rate=latent)
    y_samples = poisson.sample([nsamples_output])

    conditional_expectation = tfp.monte_carlo.expectation(
        f=lambda x: x,
        samples=y_samples,
        log_prob=poisson.log_prob,
        use_reparameterization=False)

    conditional_expectation_squared = tfp.monte_carlo.expectation(
        f=lambda x: x * x,
        samples=y_samples,
        log_prob=poisson.log_prob,
        use_reparameterization=False)

    expectation = tf.reduce_mean(conditional_expectation)
    expectation_squared = tf.reduce_mean(conditional_expectation_squared)

    return expectation, expectation_squared
def empirical_Ey_and_Ey2_tf(ct=1.0,
                            rt=1.0,
                            cb=0.1,
                            rb=0.1,
                            nsamples_latent=100,
                            nsamples_output=3,
                            N=1,
                            M=1,
                            K=25):
    """ Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters.
        The outputs are (tf) differentiable w.r.t. hyperparameters. 
    """
    if N != 1: warnings.warn("N!=1 will be ignored!")
    if N != 1: warnings.warn("M!=1 will be ignored!")
    #ct, rt, cb, rb = _make_tf(ct), _make_tf(rt), _make_tf(cb), _make_tf(rb)

    theta = tfd.Gamma(ct, rt).sample((K, nsamples_latent))
    beta = tfd.Gamma(cb, rb).sample((K, nsamples_latent))

    latent = tf.reduce_sum(theta * beta, 0)
    poisson = tfd.Poisson(rate=latent)
    #y_samples = np.random.poisson(latent, size=[nsamples_output, nsamples_latent]) # NO x NL
    y_samples = tf.stop_gradient(poisson.sample([nsamples_output]))

    y_probs = tf.exp(poisson.log_prob(y_samples))
    total_prob = tf.reduce_sum(y_probs, 0)

    conditional_expectation = tf.reduce_sum(y_probs * y_samples,
                                            0) / total_prob
    conditional_expectation_squared = tf.reduce_sum(y_probs * (y_samples**2),
                                                    0) / total_prob

    expectation = tf.reduce_mean(conditional_expectation)
    expectation_squared = tf.reduce_mean(conditional_expectation_squared)

    return expectation, expectation_squared
 def create_models(self, data):
     self.models = []
     for i in range(self.num_outputs):
         kernel = gpflow.kernels.SquaredExponential(
             lengthscales=tf.ones([
                 data[0].shape[1],
             ], dtype=float_type))
         transformed_lengthscales = Parameter(
             kernel.lengthscales, transform=positive(lower=1e-3))
         kernel.lengthscales = transformed_lengthscales
         kernel.lengthscales.prior = tfd.Gamma(f64(1.1), f64(1 / 10.0))
         if i == 0:
             self.models.append(
                 FakeGPR((data[0], data[1][:, i:i + 1]), kernel))
         else:
             self.models.append(
                 FakeGPR((data[0], data[1][:, i:i + 1]), kernel,
                         self.models[-1].X))
Example #18
0
    def german_credit_model():
        x_numeric = tf.constant(numericals.astype(np.float32))
        x_categorical = [tf.one_hot(c, c.max() + 1) for c in categoricals]
        all_x = tf.concat([x_numeric] + x_categorical, 1)
        num_features = int(all_x.shape[1])

        overall_log_scale = ed.Normal(loc=0.,
                                      scale=10.,
                                      name='overall_log_scale')
        beta_log_scales = ed.TransformedDistribution(
            tfd.Gamma(0.5 * tf.ones([num_features]), 0.5),
            bijector=tfb.Invert(tfb.Exp()),
            name='beta_log_scales')
        beta = ed.Normal(loc=tf.zeros([num_features]),
                         scale=tf.exp(overall_log_scale + beta_log_scales),
                         name='beta')
        logits = tf.einsum('nd,md->mn', all_x, beta[tf.newaxis, :])
        return ed.Bernoulli(logits=logits, name='y')
Example #19
0
    def set_prior(self, mu_prior=None, sigma_prior=None, theta_prior=None):
        """Set prior ditributions
        """
        # Prior distributions for the means
        if mu_prior is None:
            self.mu_prior = tfd.Normal(tf.zeros((self.Nc, self.Nd)),
                                       tf.ones((self.Nc, self.Nd)))
        else:
            self.mu_prior = self.mu_prior

        # Prior distributions for the standard deviations
        if sigma_prior is None:
            self.sigma_prior = tfd.Gamma(2 * tf.ones((self.Nc, self.Nd)),
                                         2 * tf.ones((self.Nc, self.Nd)))
        else:
            self.sigma_prior = sigma_prior

        # Prior distributions for the component weights
        if theta_prior is None:
            self.theta_prior = tfd.Dirichlet(5 * tf.ones((self.Nc, )))
        else:
            self.theta_prior = theta_prior
Example #20
0
    def set_prior(self, mu_prior=None, sigma_prior=None, ident_prior=None):
        """Set prior ditributions
        """
        # Prior distributions for the means
        if mu_prior is None:
            self.mu_prior = tfd.Normal(tf.zeros((self.Nc, self.Nd)),
                                       tf.ones((self.Nc, self.Nd)))
        else:
            self.mu_prior = self.mu_prior

        # Prior distributions for the standard deviations
        if sigma_prior is None:
            self.sigma_prior = tfd.Gamma(2 * tf.ones((self.Nc, self.Nd)),
                                         2 * tf.ones((self.Nc, self.Nd)))
        else:
            self.sigma_prior = sigma_prior

        # Prior distributions for sample assignment
        if ident_prior is None:
            self.ident_prior = tfd.Multinomial(
                total_count=1, probs=tf.ones((self.Ns, self.Nc)) / self.Nc)
        else:
            self.ident_prior = ident_prior
Example #21
0
 def __repr__(self):
     component_mean = reprlib.repr(
         tfd.Gamma(concentration=.1,
                   rate=.001).sample([self.num_components, self.var_dim]))
     return str(component_mean)
Example #22
0
 def _base_dist(self, alpha: TensorLike, beta: TensorLike, *args, **kwargs):
     return tfd.Gamma(concentration=alpha, rate=beta, *args, **kwargs)
Example #23
0
 def sigma(self):
     """Variational posterior for distribution variance"""
     return tfd.Gamma(self.alpha, self.beta)
	def construct_model(self):

		with self.graph.as_default():

			self.sess.close()
			self.sess = tf.compat.v1.InteractiveSession()
			self.sess.as_default()

			self.x = tf.convert_to_tensor(self.rescaled_features,  dtype = tf.float32)
			self.y = tf.convert_to_tensor(self.targets,   dtype = tf.float32)

			# construct precisness
			self.tau_rescaling = np.zeros((self.num_obs, self.bnn_output_size))
			kernel_ranges      = self.config.kernel_ranges
			for obs_index in range(self.num_obs):
				self.tau_rescaling[obs_index] += kernel_ranges
			self.tau_rescaling = self.tau_rescaling**2
	
			# construct weight and bias shapes
			activations = [tf.nn.tanh]
			weight_shapes, bias_shapes = [[self.feature_size, self._hidden_shape]], [[self._hidden_shape]]
			for _ in range(1, self._num_layers - 1):
				activations.append(tf.nn.tanh)
				weight_shapes.append([self._hidden_shape, self._hidden_shape])
				bias_shapes.append([self._hidden_shape])
			activations.append(lambda x: x)
			weight_shapes.append([self._hidden_shape, self.bnn_output_size])
			bias_shapes.append([self.bnn_output_size])

			# construct prior
			self.prior_layer_outputs = [self.x]
			self.priors = {}
			for layer_index in range(self._num_layers):
				weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index]
				activation = activations[layer_index]
	
				weight = tfd.Normal(loc = tf.zeros(weight_shape) + self._weight_loc, scale = tf.zeros(weight_shape) + self._weight_scale)
				bias   = tfd.Normal(loc = tf.zeros(bias_shape)   + self._bias_loc,   scale = tf.zeros(bias_shape)   + self._bias_scale)
				self.priors['weight_%d' % layer_index] = weight
				self.priors['bias_%d'   % layer_index] = bias
	
				prior_layer_output = activation(tf.matmul(self.prior_layer_outputs[-1], weight.sample()) + bias.sample())
				self.prior_layer_outputs.append(prior_layer_output)
					
			self.prior_bnn_output = self.prior_layer_outputs[-1]
			self.prior_tau_normed = tfd.Gamma( self.num_obs**2 + tf.zeros((self.num_obs, self.bnn_output_size)), tf.ones((self.num_obs, self.bnn_output_size)))
			self.prior_tau        = self.prior_tau_normed.sample() / self.tau_rescaling
			self.prior_scale	  = tfd.Deterministic(1. / tf.sqrt(self.prior_tau))
	
			# construct posterior
			self.post_layer_outputs = [self.x]
			self.posteriors = {}
			for layer_index in range(self._num_layers):
				weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index]
				activation = activations[layer_index]
	
				weight = tfd.Normal(loc = tf.Variable(tf.random.normal(weight_shape)), scale = tf.nn.softplus(tf.Variable(tf.zeros(weight_shape))))
				bias   = tfd.Normal(loc = tf.Variable(tf.random.normal(bias_shape)),   scale = tf.nn.softplus(tf.Variable(tf.zeros(bias_shape))))

				self.posteriors['weight_%d' % layer_index] = weight
				self.posteriors['bias_%d'   % layer_index] = bias
	
				post_layer_output = activation(tf.matmul(self.post_layer_outputs[-1], weight.sample()) + bias.sample())
				self.post_layer_outputs.append(post_layer_output)
	
			self.post_bnn_output = self.post_layer_outputs[-1]				
			self.post_tau_normed = tfd.Gamma( self.num_obs**2 + tf.Variable(tf.zeros((self.num_obs, self.bnn_output_size))), tf.nn.softplus(tf.Variable(tf.ones((self.num_obs, self.bnn_output_size)))))
			self.post_tau        = self.post_tau_normed.sample() / self.tau_rescaling
			self.post_sqrt_tau   = tf.sqrt(self.post_tau)
			self.post_scale	     = tfd.Deterministic(1. / self.post_sqrt_tau)

			# map bnn output to prediction
			post_kernels = {}
			targets_dict = {}
			inferences   = []
	
			target_element_index = 0
			kernel_element_index = 0
	
			while kernel_element_index < len(self.config.kernel_names):
						
				kernel_type = self.config.kernel_types[kernel_element_index]
				kernel_size = self.config.kernel_sizes[kernel_element_index]
	
				feature_begin, feature_end = target_element_index, target_element_index + 1 
				kernel_begin, kernel_end   = kernel_element_index, kernel_element_index + kernel_size
	
				prior_relevant = self.prior_bnn_output[:, kernel_begin : kernel_end]
				post_relevant  = self.post_bnn_output[:,  kernel_begin : kernel_end]
							
				target  = self.y[:, kernel_begin : kernel_end]
				lowers, uppers = self.config.kernel_lowers[kernel_begin : kernel_end], self.config.kernel_uppers[kernel_begin : kernel_end]

				prior_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(prior_relevant) - 0.1) + lowers
				post_support  = (uppers - lowers) * (1.2 * tf.nn.sigmoid(post_relevant) - 0.1)  + lowers
	
				prior_predict = tfd.Normal(prior_support, self.prior_scale[:, kernel_begin : kernel_end].sample())				
				post_predict  = tfd.Normal(post_support,  self.post_scale[:,  kernel_begin : kernel_end].sample())
	
				targets_dict[prior_predict] = target
				post_kernels['param_%d' % target_element_index] = {
					'loc':       tfd.Deterministic(post_support),
					'sqrt_prec': tfd.Deterministic(self.post_sqrt_tau[:, kernel_begin : kernel_end]),
					'scale':     tfd.Deterministic(self.post_scale[:, kernel_begin : kernel_end].sample())}

				inference = {'pred': post_predict, 'target': target}
				inferences.append(inference)

				target_element_index += 1
				kernel_element_index += kernel_size

			self.post_kernels = post_kernels
			self.targets_dict = targets_dict
				
			loss = 0.
			for inference in inferences:
				loss += - tf.reduce_sum( inference['pred'].log_prob(inference['target']) )

			self.optimizer = tf.compat.v1.train.AdamOptimizer(self._learning_rate)
			self.train_op  = self.optimizer.minimize(loss)

			tf.compat.v1.global_variables_initializer().run()
Example #25
0
 def tauDist(self):
     return tfd.Gamma(tf.exp(self.tau_a_log), tf.exp(self.tau_b_log))
    def construct_model(self, learning_rate=None):

        if learning_rate is None:
            learning_rate = self.learning_rate

        with self.graph.as_default():

            self.sess.close()
            self.sess = tf.compat.v1.InteractiveSession()
            self.sess.as_default()

            self.x = tf.convert_to_tensor(self.rescaled_features, dtype=tf.float32)
            self.y = tf.convert_to_tensor(self.targets, dtype=tf.float32)

            # construct precisness
            self.tau_rescaling = np.zeros((self.num_obs, self.bnn_output_size))
            kernel_ranges      = self.config.kernel_ranges
            for obs_index in range(self.num_obs):
                self.tau_rescaling[obs_index] += kernel_ranges
            self.tau_rescaling = self.tau_rescaling**2

            # construct weight and bias shapes
            activations = [tf.nn.tanh]
            weight_shapes, bias_shapes = [[self.feature_size, self.hidden_shape]], [[self.hidden_shape]]
            for _ in range(1, self.num_layers - 1):
                activations.append(tf.nn.tanh)
                weight_shapes.append([self.hidden_shape, self.hidden_shape])
                bias_shapes.append([self.hidden_shape])
            activations.append(lambda x: x)
            weight_shapes.append([self.hidden_shape, self.bnn_output_size])
            bias_shapes.append([self.bnn_output_size])

            # ---------------
            # construct prior
            # ---------------
            self.prior_layer_outputs = [self.x]
            self.priors = {}
            for layer_index in range(self.num_layers):
                weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index]
                activation = activations[layer_index]

                weight = tfd.Normal(loc=tf.zeros(weight_shape) + self.weight_loc, scale=tf.zeros(weight_shape) + self.weight_scale)
                bias = tfd.Normal(loc=tf.zeros(bias_shape) + self.bias_loc, scale=tf.zeros(bias_shape) + self.bias_scale)
                self.priors['weight_%d' % layer_index] = weight
                self.priors['bias_%d' % layer_index] = bias

                prior_layer_output = activation(tf.matmul(self.prior_layer_outputs[-1], weight.sample()) + bias.sample())
                self.prior_layer_outputs.append(prior_layer_output)

            self.prior_bnn_output = self.prior_layer_outputs[-1]
            # draw precisions from gamma distribution
            self.prior_tau_normed = tfd.Gamma(
                            12*(self.num_obs/self.frac_feas)**2 + tf.zeros((self.num_obs, self.bnn_output_size)),
                            tf.ones((self.num_obs, self.bnn_output_size)),
                        )
            self.prior_tau        = self.prior_tau_normed.sample() / self.tau_rescaling
            self.prior_scale      = tfd.Deterministic(1. / tf.sqrt(self.prior_tau))

            # -------------------
            # construct posterior
            # -------------------
            self.post_layer_outputs = [self.x]
            self.posteriors = {}
            for layer_index in range(self.num_layers):
                weight_shape, bias_shape = weight_shapes[layer_index], bias_shapes[layer_index]
                activation = activations[layer_index]

                weight = tfd.Normal(loc=tf.Variable(tf.random.normal(weight_shape)), scale=tf.nn.softplus(tf.Variable(tf.zeros(weight_shape))))
                bias = tfd.Normal(loc=tf.Variable(tf.random.normal(bias_shape)), scale=tf.nn.softplus(tf.Variable(tf.zeros(bias_shape))))

                self.posteriors['weight_%d' % layer_index] = weight
                self.posteriors['bias_%d' % layer_index] = bias

                post_layer_output = activation(tf.matmul(self.post_layer_outputs[-1], weight.sample()) + bias.sample())
                self.post_layer_outputs.append(post_layer_output)

            self.post_bnn_output = self.post_layer_outputs[-1]
            self.post_tau_normed = tfd.Gamma(
                                12*(self.num_obs/self.frac_feas)**2 + tf.Variable(tf.zeros((self.num_obs, self.bnn_output_size))),
                                tf.nn.softplus(tf.Variable(tf.ones((self.num_obs, self.bnn_output_size)))),
                            )
            self.post_tau        = self.post_tau_normed.sample() / self.tau_rescaling
            self.post_sqrt_tau   = tf.sqrt(self.post_tau)
            self.post_scale	     = tfd.Deterministic(1. / self.post_sqrt_tau)

            # map bnn output to prediction
            post_kernels = {}
            targets_dict = {}
            inferences = []

            target_element_index = 0
            kernel_element_index = 0

            while kernel_element_index < len(self.config.kernel_names):

                kernel_type = self.config.kernel_types[kernel_element_index]
                kernel_size = self.config.kernel_sizes[kernel_element_index]

                feature_begin, feature_end = target_element_index, target_element_index + 1
                kernel_begin, kernel_end   = kernel_element_index, kernel_element_index + kernel_size

                prior_relevant = self.prior_bnn_output[:, kernel_begin: kernel_end]
                post_relevant  = self.post_bnn_output[:,  kernel_begin: kernel_end]

                if kernel_type == 'continuous':

                    target = self.y[:, kernel_begin: kernel_end]
                    lowers, uppers = self.config.kernel_lowers[kernel_begin: kernel_end], self.config.kernel_uppers[kernel_begin : kernel_end]

                    prior_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(prior_relevant) - 0.1) + lowers
                    post_support = (uppers - lowers) * (1.2 * tf.nn.sigmoid(post_relevant) - 0.1) + lowers

                    prior_predict = tfd.Normal(prior_support, self.prior_scale[:, kernel_begin: kernel_end].sample())
                    post_predict = tfd.Normal(post_support,  self.post_scale[:,  kernel_begin: kernel_end].sample())

                    targets_dict[prior_predict] = target
                    post_kernels['param_%d' % target_element_index] = {
                        'loc':       tfd.Deterministic(post_support),
                        'sqrt_prec': tfd.Deterministic(self.post_sqrt_tau[:, kernel_begin: kernel_end]),
                        'scale':     tfd.Deterministic(self.post_scale[:, kernel_begin: kernel_end].sample())}

                    inference = {'pred': post_predict, 'target': target}
                    inferences.append(inference)

                elif kernel_type in ['categorical', 'discrete']:
                    target = tf.cast(self.y[:, kernel_begin: kernel_end], tf.int32)

                    prior_temperature = 0.5 + 10.0 / (self.num_obs / self.frac_feas)
                    #prior_temperature = 1.0
                    post_temperature = prior_temperature

                    prior_support = prior_relevant
                    post_support = post_relevant

                    prior_predict_relaxed = tfd.RelaxedOneHotCategorical(prior_temperature, prior_support)
                    prior_predict = tfd.OneHotCategorical(probs=prior_predict_relaxed.sample())

                    post_predict_relaxed = tfd.RelaxedOneHotCategorical(post_temperature, post_support)
                    post_predict = tfd.OneHotCategorical(probs=post_predict_relaxed.sample())

                    targets_dict[prior_predict] = target
                    post_kernels['param_%d' % target_element_index] = {'probs': post_predict_relaxed}

                    inference = {'pred': post_predict, 'target': target}
                    inferences.append(inference)

                    '''
                        Temperature annealing schedule:
                            - temperature of 100   yields 1e-2 deviation from uniform
                            - temperature of  10   yields 1e-1 deviation from uniform
                            - temperature of   1   yields *almost* perfect agreement with expectation
                            - temperature of   0.1 yields perfect agreement with expectation
                    '''

                else:
                    GryffinUnknownSettingsError(f'did not understand kernel type: {kernel_type}')

                target_element_index += 1
                kernel_element_index += kernel_size

            self.post_kernels = post_kernels
            self.targets_dict = targets_dict

            self.loss = 0.
            for inference in inferences:
                self.loss += - tf.reduce_sum(inference['pred'].log_prob(inference['target']))

            self.optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)
            self.train_op = self.optimizer.minimize(self.loss)

            tf.compat.v1.global_variables_initializer().run()
Example #27
0
                                               validate_args=True,
                                               allow_nan_stats=False)
 },
 "gamma": {
     "parameters": {
         "concentration": {
             "support": [0, inf],
             "activation function": softplus
         },
         "rate": {
             "support": [0, inf],
             "activation function": softplus
         }
     },
     "class":
     lambda theta: tensorflow_distributions.Gamma(
         concentration=theta["concentration"], rate=theta["rate"])
 },
 "categorical": {
     "parameters": {
         "logits": {
             "support": [-inf, inf],
             "activation function": identity
         }
     },
     "class":
     lambda theta: tensorflow_distributions.Categorical(logits=theta[
         "logits"]),
 },
 "bernoulli": {
     "parameters": {
         "logits": {
Example #28
0
# Data used by the control model (pre-intervention)
xc = x[x <= ip]
yc = y[x <= ip]

xd = x[x > ip]
yd = y[x > ip]

# Data used by the (post-)intervention model
xi = xd[xd <= ip2]
yi = yd[xd <= ip2]

xe = xd[xd > ip2]
ye = yd[xd > ip2]

ks = [RBF(), RBF()]

ks[1].variance.prior = dist.Gamma(np.float64(20), np.float64(4.35))
m1 = None
for k in ks:
    m1 = GPMContainer(gf.utilities.deepcopy(k), [(x, y)], [])
    m2 = GPMContainer(gf.utilities.deepcopy(k), [(xc, yc), (xd, yd)], [ip])
    for name, m in zip(['c', 'd'], [m1, m2]):
        m.train()
        m.plot_regression()
        plt.show()
        print(f"{name} l: {m.log_posterior_density()}")

print(f"trainable parameters: {m1.trainable_parameters}")
print(f"log prior density: {m1.kernel.variance.log_prior_density()}")
Example #29
0
 def _init_distribution(conditions, **kwargs):
     concentration, rate = conditions["concentration"], conditions["rate"]
     return tfd.Gamma(concentration=concentration, rate=rate, **kwargs)
Example #30
0
# %% [markdown]
# Secondly, we initialize the model to the maximum likelihood solution.

# %%
optimizer = gpflow.optimizers.Scipy()
optimizer.minimize(model.training_loss, model.trainable_variables)

print(f"log posterior density at optimum: {model.log_posterior_density()}")

# %% [markdown]
# Thirdly, we add priors to the hyperparameters.

# %%
# tfp.distributions dtype is inferred from parameters - so convert to 64-bit
model.kernel.lengthscales.prior = tfd.Gamma(f64(1.0), f64(1.0))
model.kernel.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
model.likelihood.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
model.mean_function.A.prior = tfd.Normal(f64(0.0), f64(10.0))
model.mean_function.b.prior = tfd.Normal(f64(0.0), f64(10.0))

gpflow.utilities.print_summary(model)

# %% [markdown]
# We now sample from the posterior using HMC.

# %%
num_burnin_steps = ci_niter(300)
num_samples = ci_niter(500)

# Note that here we need model.trainable_parameters, not trainable_variables - only parameters can have priors!