def __init__(
        self,
        pvector: tf.Tensor,
        distribution: tfd.Distribution = tfd.Normal(loc=0., scale=1.)
    ) -> tfd.Distribution:
        """
        Generate the flow for the given parameter vector. This would be
        typically the output of a neural network.

        To use it as a loss function see
        `bernstein_flow.losses.BernsteinFlowLoss`.

        :param      pvector:       The paramter vector.
        :type       pvector:       Tensor
        :param      distribution:  The base distribution to use.
        :type       distribution:  Distribution

        :returns:   The transformed distribution (normalizing flow)
        :rtype:     Distribution
        """
        num_dist = pvector.shape[1]

        flows = []
        for d in range(num_dist):
            pv = pvector[:, d]
            flow = BernsteinFlow(pv)
            flows.append(flow)

        joint = tfd.JointDistributionSequential(flows, name='joint_bs_flows')
        super().__init__(joint, name='MultivariateBernsteinFlow')
Ejemplo n.º 2
0
def lossf(pars, data):
    thetasb, thetab = pars
    nuis_sb = [tfd.Normal(loc=thetasb[i], scale=1) for i in range(Npars)]
    poises_sb = [tfd.Poisson(rate=s + b + thetasb[i]) for i in range(Npars)]
    joint_sb = tfd.JointDistributionSequential(poises_sb + nuis_sb)

    nuis_b = [tfd.Normal(loc=thetab[i], scale=1) for i in range(Npars)]
    poises_b = [tfd.Poisson(rate=b + thetab[i]) for i in range(Npars)]
    joint_b = tfd.JointDistributionSequential(poises_b + nuis_b)

    # Tensor shape matching debugging
    #print("[sample_shape, batch_shape, event_shape]")
    #print("joint_sb.batch_shape:",joint_sb.batch_shape[0])
    #print("joint_sb.event_shape:",joint_sb.event_shape[0])
    #print("samples shapes:", [k.shape for k in samples0][0])

    # The broadcasting works like this:

    # 1. Define n = len(batch_shape) + len(event_shape). (For scalar distributions, len(event_shape)=0.)
    # 2. If the input tensor t has fewer than n dimensions, pad its shape by adding dimensions of size 1 on the left until it has exactly n dimensions. Call the resulting tensor t'.
    # 3. Broadcast the n rightmost dimensions of t' against the [batch_shape, event_shape] of the distribution you're computing a log_prob for. In more detail: for the dimensions where t' already matches the distribution, do nothing, and for the dimensions where t' has a singleton, replicate that singleton the appropriate number of times. Any other situation is an error. (For scalar distributions, we only broadcast against batch_shape, since event_shape = [].)
    # 4. Now we're finally able to compute the log_prob. The resulting tensor will have shape [sample_shape, batch_shape], where sample_shape is defined to be any dimensions of t or t' to the left of the n-rightmost dimensions: sample_shape = shape(t)[:-n].

    # We have, e.g.
    # joint_sb.batch_shape: (10000, 5)
    # joint_sb.event_shape: ()
    # and we want to compute (10000, 5) log probabilities, broadcasting
    # 10000 samples over the "5" dimension.
    # So for that, according to the above rules, the input sample tensor shape
    # should be: (10000, 1)
    # And the resulting log-probability tensor should have shape
    # (10000, 5)

    qsb = -2 * (joint_sb.log_prob(data))
    qb = -2 * (joint_b.log_prob(data))

    #print("qsb.shape:", qsb.shape)
    #print("qsb_s.shape:", qsb_s.shape)

    total_loss = tf.math.reduce_sum(qsb) + tf.math.reduce_sum(qb)

    # First return: total loss function value
    # Second return: 'true' parameter values (for convergence calculations)
    # Third return: extra variables whose final values you want to know at the end of the optimisation
    return total_loss, (thetasb, thetab), (qsb, qb)
Ejemplo n.º 3
0
def logp(x, y, z):
    X = tf.constant(z)
    u = 0
    jds = tfd.JointDistributionSequential([
        tfd.Normal(loc=x, scale=1.),  # m
        tfd.Normal(loc=y, scale=1.),  # b
        lambda b, m: tfd.Normal(loc=m * X + b, scale=1.)  # Y
    ])

    return jds.log_prob(x, y, z)
Ejemplo n.º 4
0
 def __call__(self, x):
     n_sample = len(x)
     joint_prob = tfd.JointDistributionSequential([
         tfd.Independent(
             tfd.Normal(
                 loc = tf.zeros((n_sample, n_factor), dtype=self.dtype),
                 scale = 1.0), 
             reinterpreted_batch_ndims=1),
         lambda eta: tfd.Independent(
             tfd.Bernoulli(
                 logits= self.intercept + eta @ tf.transpose(self.loading), 
                 dtype=self.dtype), 
             reinterpreted_batch_ndims=1)])             
     joint_prob._to_track=self
     return joint_prob
Ejemplo n.º 5
0
 def __call__(self, x):
     n_sample = len(x)
     c, d = create_cd(self.n_category, self.dtype)
     joint_prob = tfd.JointDistributionSequential([
         tfd.Independent(
             tfd.Normal(
                 loc = tf.zeros((n_sample, n_factor), dtype=self.dtype),
                 scale = 1.0),
             reinterpreted_batch_ndims=1),
         lambda eta: tfd.Independent(
             tfd.Categorical(
                 probs = grm_irf(eta, self.intercept, self.loading, c, d),
                     dtype = self.dtype),
             reinterpreted_batch_ndims=1)])
     joint_prob._to_track=self
     return joint_prob
    def gen_mixture(self, out):
        pvs = self.slice_parameter_vectors(out)
        mixtures = []

        for pv in pvs:
            logits, locs, log_scales = pv
            scales = tf.math.softmax(log_scales)
            mixtures.append(
                tfd.MixtureSameFamily(
                    mixture_distribution=tfd.Categorical(logits=logits),
                    components_distribution=tfd.Normal(loc=locs,
                                                       scale=scales)))

        joint = tfd.JointDistributionSequential(mixtures,
                                                name='joint_mixtures')
        blkws = tfd.Blockwise(joint)
        return blkws
Ejemplo n.º 7
0
#s_in = tf.constant([5],dtype=float)
s_in2 = tf.expand_dims(s_in, 0)
s = tf.broadcast_to(s_in2, shape=(N, len(s_in)))

b = tf.expand_dims(tf.constant(50, dtype=float), 0)

# Nuisance parameters (independent Gaussians)
zero = tf.expand_dims(tf.constant(0, dtype=float), 0)
nuis0 = [tfd.Normal(loc=zero, scale=1) for i in range(Npars)]

# Bunch of independent Poisson distributions that we want to combine
poises0 = [tfd.Poisson(rate=b) for i in range(Npars)]
poises0s = [tfd.Poisson(rate=s_in + b) for i in range(Npars)]

# Construct joint distributions
joint0 = tfd.JointDistributionSequential(poises0 + nuis0)
joint0s = tfd.JointDistributionSequential(poises0s + nuis0)

# Generate background-only pseudodata to be fitted
samples0 = joint0.sample(N)

# Generate signal+background pseudodata to be fitted
samples0s = joint0s.sample(N)

# We want the sample shapes to dimensionally match the versions of
# the distributions that have free parameters:
# [sample_shape, batch_shape, event_shape]

print("[sample_shape, batch_shape, event_shape]")
print("joint0.batch_shape:", joint0.batch_shape[0])
print("joint0.event_shape:", joint0.event_shape[0])
Ejemplo n.º 8
0
def nig(mean, scale1, alpha, beta):
    pd = tfd.JointDistributionSequential([
        tfd.Independent(tfd.InverseGaussian(scale1, (alpha**2 - beta**2)**.5)),
        lambda mix: tfd.Normal(loc=mean + beta * mix, scale=mix)
    ])
    return pd.log_prob([pd.sample()[0], x])
Ejemplo n.º 9
0
from functools import *
from tensorflow_probability import bijectors as tfb
from tensorflow_probability import distributions as tfd
from tensorflow_probability.python.internal import dtype_util
from tensorflow_probability.python.internal import prefer_static as ps
from tensorflow_probability.python.internal import tensorshape_util
tf.enable_v2_behavior()
X = tf.constant(1.0)
scale1 = 1.2
alpha = 12.3
beta = 0.2
mean = 0.0
x = tf.Variable(2.2)
pd = tfd.JointDistributionSequential([
    tfd.Independent(tfd.InverseGaussian(scale1, (alpha**2 - beta**2)**.5),
                    reinterpreted_batch_ndims=0),
    lambda mix: tfd.Normal(loc=mean + beta * mix, scale=mix)
])


def _make_val_and_grad_fn(value_fn):
    @functools.wraps(value_fn)
    def val_and_grad(x):
        return tfp.math.value_and_gradient(value_fn, x)

    return val_and_grad


def nig(mean, scale1, alpha, beta):
    pd = tfd.JointDistributionSequential([
        tfd.Independent(tfd.InverseGaussian(scale1, (alpha**2 - beta**2)**.5)),
Ejemplo n.º 10
0
def setup_and_run_hmc(threadid):
    np.random.seed(threadid)
    tf.random.set_seed(threadid)

    def sp(x):
        # softplus transform with shift
        return tf.nn.softplus(x) + 1e-4

    def local_periodic_kernel(x1):
        # locally periodic kernel with single variable parameter. Other parameters are set
        # to encode annual activity pattern (period=365), RBF kernel is set to allow for
        # slow varying mean locations (2-year lengthscale).

        k1 = tfp.math.psd_kernels.ExpSinSquared(x1, np.float64(1.0),
                                                np.float64(365.0))
        k2 = tfp.math.psd_kernels.ExponentiatedQuadratic(
            np.float64(1.0), np.float64(1 * 365.0))
        #k1 = tfp.math.psd_kernels.ExpSinSquared(x1,np.float64(0.5),np.float64(365.0))
        #k2 = tfp.math.psd_kernels.ExponentiatedQuadratic(np.float64(1.0),x2*np.float64(365.0))
        #k2 = tfp.math.psd_kernels.ExponentiatedQuadratic(x2,np.float64(2*365.0))
        return k1 * k2

    # initial value of kernel parameters
    mparams_init = [6.0]
    #mparams_init=[5.0,4.0]
    lparams_init = [5.0]
    aparams_init = [-1.0]

    # prior distribution on parameters - changed to 20
    lpriors = [tfd.Normal(loc=np.float64(5.), scale=np.float64(1))]
    apriors = [tfd.Normal(loc=np.float64(-1.), scale=np.float64(1))]

    # transform for parameter to ensure positive
    mtransforms = [sp]

    # prior distribution on parameter
    mpriors = [tfd.Normal(loc=np.float64(6.), scale=np.float64(0.1))
               ]  #, tfd.Normal(loc=np.float64(0.), scale=np.float64(0.1))]

    # create the model
    mover = moveNS(T,
                   X,
                   Z,
                   BATCH_SIZE=1000,
                   MIN_REMAIN=910,
                   mkernel=local_periodic_kernel,
                   mparams_init=mparams_init,
                   mpriors=mpriors,
                   mtransforms=mtransforms,
                   aparams_init=aparams_init,
                   apriors=apriors,
                   lparams_init=lparams_init,
                   lpriors=lpriors,
                   mean_obs_noise=0,
                   std_obs_noise=5.0)

    def build_trainable_location_scale_distribution(initial_loc,
                                                    initial_scale):

        with tf.name_scope('build_trainable_location_scale_distribution'):
            dtype = tf.float32
            initial_loc = initial_loc * tf.ones(tf.shape(initial_scale),
                                                dtype=dtype)
            initial_scale = tf.nn.softplus(initial_scale *
                                           tf.ones_like(initial_loc))
            loc = tf.Variable(initial_value=initial_loc, name='loc')
            scale = tfp.util.TransformedVariable(
                tf.Variable(initial_scale, name='scale'),
                tfp.bijectors.Softplus())
            posterior_dist = tfd.Normal(loc=loc, scale=scale)
            posterior_dist = tfd.Independent(posterior_dist)

        return posterior_dist

    flat_component_dists = []

    for kparam in mover.kernel_params:
        init_loc = kparam
        init_scale = tf.random.uniform(shape=kparam.shape,
                                       minval=-2,
                                       maxval=2,
                                       dtype=tf.dtypes.float32)
        flat_component_dists.append(
            build_trainable_location_scale_distribution(init_loc, init_scale))

    surrogate_posterior = tfd.JointDistributionSequential(flat_component_dists)

    def target_log_prob_fn(*inputs):
        params = [tf.squeeze(a) for a in inputs]
        loss = mover.log_posterior(*params)
        return loss

    start = time.time()
    losses = tfp.vi.fit_surrogate_posterior(target_log_prob_fn,
                                            surrogate_posterior,
                                            optimizer=tf.optimizers.Adam(
                                                learning_rate=0.1, beta_2=0.9),
                                            num_steps=1)  #4000)#0000)

    steps = []
    max_step = 0.0

    for i in range(len(mover.kernel_params)):
        stdstep = surrogate_posterior.stddev()[i].numpy()
        #print(threadid,i,stdstep)
        meanp = surrogate_posterior.mean()[i].numpy()
        mover.kernel_params[i].assign(meanp)
        if stdstep.max() > max_step:
            max_step = stdstep.max()
        steps.append(stdstep)

    steps = [(1e-2 / max_step) * s for s in steps]
    steps = [1e-2 for s in steps]

    start = time.time()

    # sample from the posterior
    num_samples = 2000  #000##000#0
    burn_in = 500  #000#4#000#5000
    kr = mover.hmc_sample(num_samples=num_samples,
                          skip=4,
                          num_leapfrog_steps=8,
                          burn_in=burn_in,
                          init_step=steps)
    print(np.sum(kr.inner_results.is_accepted.numpy() / num_samples))

    end = time.time()

    means_z = mover.get_mean_samples() + mean_x
    np.save('data/mean_shift_z_' + str(threadid) + '.npy', means_z)
    means = mover.get_mean_samples(X=T[::1]) + mean_x
    np.save('data/mean_shift_' + str(threadid) + '.npy', means)
    lengths = mover.get_lengthscale_samples()
    np.save('data/length_shift_' + str(threadid) + '.npy', lengths)
    amps = mover.get_amplitude_samples()
    np.save('data/amp_shift_' + str(threadid) + '.npy', amps)
    obs_noise_samples = tf.nn.softplus(mover.samples_[0]).numpy()
    np.save('data/obs_shift_' + str(threadid) + '.npy', obs_noise_samples)
    for i in range(len(mover.kernel_params)):
        output = mover.samples_[i].numpy()
        np.save('data/all_shift_' + str(i) + '_' + str(threadid) + '.npy',
                output)
    print(threadid, end - start)
Ejemplo n.º 11
0
cmin = -10.  # lower range of uniform distribution on c
cmax = 10.  # upper range of uniform distribution on c

mmu = 0.  # mean of Gaussian distribution on m
msigma = 10.  # standard deviation of Gaussian distribution on m

# convert x values and data to 32 bit float
x = x.astype(np.float32)  # x is being use globally here
data = data.astype(np.float32)

# set model - contains priors and the expected linear model
model = tfd.JointDistributionSequential([
    tfd.Normal(loc=mmu, scale=msigma, name="m"),  # m prior
    tfd.Uniform(cmin, cmax, name="c"),  # c prior
    lambda c, m: (tfd.Independent(
        tfd.Normal(loc=(m[..., tf.newaxis] * x + c[..., tf.newaxis]),
                   scale=sigma),
        name="data",
        reinterpreted_batch_ndims=1,
    ))
])


def target_log_prob_fn(mvalue, cvalue):
    """Unnormalized target density as a function of states."""
    return model.log_prob((mvalue, cvalue, data))


Nsamples = 2000  # final number of samples
Nburn = 2000  # number of tuning samples

Ejemplo n.º 12
0
def setup_and_run_hmc(threadid):
    np.random.seed(threadid)
    tf.random.set_seed(threadid)

    def sp(x):
        # softplus transform with shift 
        return tf.nn.softplus(x)+1e-4

    def rbf_kernel(x1):
        # RBF kernel with single variable parameter. Other parameters are set 
        # to encode lengthscale of 20 days
        return tfp.math.psd_kernels.ExponentiatedQuadratic(x1,np.float(2.0))
        



    # initial value of kernel amplitude
    lparams_init=[0.0, 3.0] 

    aparams_init=[0.0] 

    # transform for parameter to ensure positive
    transforms=[sp] 

    # prior distribution on parameter 
    lpriors = [tfd.Normal(loc = np.float64(0.),scale=np.float64(5.)), 
               tfd.Normal(loc=np.float64(3.), scale=np.float64(1))]
    #              tfd.Normal(loc=np.float64(0.), scale=np.float64(10.0))]

    apriors = [tfd.Normal(loc = np.float64(0.),scale=np.float64(5.))]


    # create the model 
    mover = moveNS(T,X,Z, ID, BATCH_SIZE=1460, velocity=True,
                           #akernel=rbf_kernel, 
                           aparams_init=aparams_init, 
                           apriors=apriors, 
                           #atransforms=transforms,
                           lkernel=rbf_kernel, 
                           lparams_init=lparams_init, 
                           lpriors=lpriors, 
                           ltransforms=transforms, 
                           mean_obs_noise=-5, std_obs_noise=1.0)


    def build_trainable_location_scale_distribution(initial_loc, initial_scale):
        
        with tf.name_scope('build_trainable_location_scale_distribution'):
            dtype = tf.float32
            initial_loc = initial_loc * tf.ones(tf.shape(initial_scale), dtype=dtype)
            initial_scale = tf.nn.softplus(initial_scale * tf.ones_like(initial_loc))
            loc = tf.Variable(initial_value=initial_loc, name='loc')
            scale=tfp.util.TransformedVariable(tf.Variable(initial_scale, name='scale'), tfp.bijectors.Softplus())
            posterior_dist = tfd.Normal(loc=loc, scale=scale)
            posterior_dist = tfd.Independent(posterior_dist)
            
        return posterior_dist


    flat_component_dists = []

    for kparam in mover.kernel_params:
        init_loc = kparam
        init_scale = tf.random.uniform(shape=kparam.shape, minval=-2, maxval=2, dtype=tf.dtypes.float32)
        flat_component_dists.append(build_trainable_location_scale_distribution(init_loc,init_scale))

    surrogate_posterior = tfd.JointDistributionSequential(flat_component_dists)



    def target_log_prob_fn(*inputs):
        params = [tf.squeeze(a) for a in inputs]
        loss = mover.log_posterior(*params)
        return loss




    start = time.time()
    losses = tfp.vi.fit_surrogate_posterior(target_log_prob_fn, surrogate_posterior,optimizer=tf.optimizers.Adam(learning_rate=0.1, beta_2=0.9), num_steps=5000)


    steps = []
    max_step = 0.0
    
    for i in range(len(mover.kernel_params)):
        stdstep = surrogate_posterior.stddev()[i].numpy()
        meanp = surrogate_posterior.mean()[i].numpy()
        mover.kernel_params[i].assign(meanp)
        if stdstep.max()>max_step:
            max_step = stdstep.max()
        steps.append(stdstep)

    steps = [(1e-2/max_step)*s for s in steps] 

    start = time.time()

    # sample from the posterior
    num_samples=200#4000
    burn_in=500
    kr = mover.hmc_sample(num_samples=num_samples, skip=8, burn_in=burn_in, init_step=steps)
    print(np.sum(kr.inner_results.is_accepted.numpy()/num_samples))



    # sample from the posterior
    #mover.hmc_sample(num_samples=2000, skip=0, burn_in=1000)
    end = time.time()
    lengths = mover.get_lengthscale_samples(X=pZ)
    np.save('data/length_switch_' + str(threadid) + '.npy',lengths)
    amps = mover.get_amplitude_samples()
    np.save('data/amp_switch_' + str(threadid) + '.npy',amps)

    for i in range(len(mover.kernel_params)):
        output = mover.samples_[i].numpy()
        np.save('data/all_switch_' + str(i) + '_' + str(threadid) + '.npy',output)


    print(threadid,end - start)