Esempio n. 1
0
    def from_loc_and_scale(cls,
                           loc,
                           scale,
                           low=0.,
                           high=1e10,
                           scale_shift=1e-7):
        """
        Instantiate a learnable distribution with good default bijectors.

        loc : array
            The initial location of the distribution
        scale : array
            The initial scale parameter of the distribution
        low : float or array (optional)
            The lower limit of the support for the distribution.
        high : float or array (optional)
            The upper limit of the support for the distribution.
        scale_shift : float (optional)
            A small constant added to the scale to increase numerical stability.
        """
        loc = tfp.util.TransformedVariable(
            loc,
            tfb.Softplus(),
        )
        scale = tfp.util.TransformedVariable(
            scale,
            tfb.Chain([
                tfb.Softplus(),
                tfb.Shift(scale_shift),
            ]),
        )
        return cls(loc, scale, low, high)
Esempio n. 2
0
    def test_stochastic_optimization(self):
        seed = test_util.test_seed_stream()
        tf.random.set_seed(seed())

        # Example of fitting one normal to another using a
        # Monte Carlo variational loss.
        locs = tf.Variable(tf.random.normal([10], seed=seed()))
        scales = tfp.util.TransformedVariable(
            tf.nn.softplus(tf.random.normal([10], seed=seed())),
            tfb.Softplus())
        trained_dist = tfd.Normal(locs, scales)
        target_dist = tfd.Normal(loc=-0.4, scale=1.2)

        optimizer = tf.optimizers.Adam(learning_rate=0.1)

        @tf.function(autograph=False)
        def optimization_step():
            with tf.GradientTape() as tape:
                loss = tfp.vi.monte_carlo_variational_loss(
                    target_log_prob_fn=target_dist.log_prob,
                    surrogate_posterior=trained_dist,
                    sample_size=20,
                    seed=seed())
            grads = tape.gradient(loss, trained_dist.trainable_variables)
            optimizer.apply_gradients(
                zip(grads, trained_dist.trainable_variables))
            return loss, grads

        criterion = (tfp.optimizer.convergence_criteria.
                     SuccessiveGradientsAreUncorrelated(window_size=10,
                                                        min_num_steps=20))

        loss, grads = optimization_step()
        self.evaluate(tf1.global_variables_initializer())
        auxiliary_state = criterion.bootstrap(loss, grads,
                                              trained_dist.trainable_variables)
        for step in range(1, 100):
            loss, grads = optimization_step()
            has_converged, auxiliary_state = criterion.one_step(
                step, loss, grads, trained_dist.trainable_variables,
                auxiliary_state)

            has_converged_ = self.evaluate(has_converged)
            if has_converged_:
                break

        # Check that the criterion successfully stopped the optimization
        # (at step 32 with the test seed as of this writing).
        self.assertLess(step, 99)  # pylint: disable=undefined-loop-variable

        # Because this is a stochastic optimization with no learning rate decay,
        # we will not converge to the true values, just to a stationary distribution
        # that (hopefully) includes them.
        self.assertLess(self.evaluate(tf.reduce_sum(loss)), 1.5)
        self.assertAllClose(*self.evaluate(
            (tf.reduce_mean(locs), target_dist.mean())),
                            atol=0.5)
        self.assertAllClose(*self.evaluate(
            (tf.reduce_mean(scales), target_dist.stddev())),
                            atol=0.5)
 def __init__(self, scale=None, **kwargs):
     if scale is None:
         scaling = tfb.Identity()
     else:
         scaling = tfb.Scale(scale)
     transform = tfb.Chain([tfb.Softplus(), scaling])
     super().__init__(transform, **kwargs)
Esempio n. 4
0
def test_laue(likelihood_model, prior_model, scaling_model, laue_inputs,
              mc_samples):
    nrefls = np.max(BaseModel.get_refl_id(laue_inputs)) + 1
    n_images = np.max(BaseModel.get_image_id(laue_inputs)) + 1

    #For the students
    dof = 4.
    if likelihood_model == StudentTLikelihood:
        likelihood = likelihood_model(dof)
    else:
        likelihood = likelihood_model()

    if prior_model == WilsonPrior:
        prior = prior_model(
            np.random.choice([True, False], nrefls),
            np.ones(nrefls).astype('float32'),
        )
    elif prior_model == StudentTReferencePrior:
        prior = prior_model(
            np.ones(nrefls).astype('float32'),
            np.ones(nrefls).astype('float32'), dof)
    else:
        prior = prior_model(
            np.ones(nrefls).astype('float32'),
            np.ones(nrefls).astype('float32'),
        )

    mlp_scaler = MLPScaler(2, 3)
    if scaling_model == HybridImageScaler:
        image_scaler = ImageScaler(n_images)
        scaler = HybridImageScaler(mlp_scaler, image_scaler)
    elif scaling_model == MLPScaler:
        scaler = mlp_scaler

    surrogate_posterior = tfd.TruncatedNormal(
        tf.Variable(prior.mean()),
        tfp.util.TransformedVariable(
            prior.stddev() / 10.,
            tfb.Softplus(),
        ),
        low=1e-5,
        high=1e10,
    )

    merger = VariationalMergingModel(surrogate_posterior, prior, likelihood,
                                     scaler, mc_samples)
    ipred = merger(laue_inputs)

    isfinite = np.all(np.isfinite(ipred.numpy()))
    assert isfinite

    merger = VariationalMergingModel(surrogate_posterior, prior, likelihood,
                                     scaler)
    merger.compile('Adam')
 def __init__(self, scale=None, skewness=None, tailweight=None, **kwargs):
     if scale is None:
         scaling = tfb.Identity()
     else:
         scaling = tfb.Scale(scale)
     transform = tfb.Chain([
         scaling,
         tfb.Softplus(),
         tfb.SinhArcsinh(
             skewness,
             tailweight,
         ),
     ])
     super().__init__(transform, **kwargs)
Esempio n. 6
0
# X represents the full set of points for which we have 
# measured values
X = np.vstack((grid_x,grid_y)).T

# y is the array of values at each x
y = np.sin(grid_x)*np.sin(grid_y/10.)

if dtype == tf.float32:
    X = X.astype(np.float32)
    y = y.astype(np.float32)


# These are kernel parameters. We will use an automatic relevance determining rbf kernel
amplitude = tfp.util.TransformedVariable(
    tf.Variable(1., dtype=dtype), tfb.Softplus(), dtype=dtype, name='amplitude')
length_scale = tfp.util.TransformedVariable(
    tf.Variable(tf.ones(2, dtype=dtype)), tfb.Softplus(), dtype=dtype, name='length_scale')

#The FeatureScaled object is how you create ARD kernels in tfp
kernel = tfp.math.psd_kernels.FeatureScaled(
        tfp.math.psd_kernels.ExponentiatedQuadratic(amplitude=amplitude), 
        length_scale
    )


#This is the overall amount of noise associated with each y-value
#This is called β^{-1} in the Hensman paper. In practice, this could
#be tuned or learned.
observation_noise_variance = tfp.util.TransformedVariable(
    1., tfb.Softplus(), dtype=dtype, name='observation_noise_variance')
Esempio n. 7
0
    )).reshape((2, gridpoints**2))

# y is the array of values at each x
y = np.sin(grid_x) * np.sin(grid_y / 10.)

# X represents the full set of points for which we have
# measured values
X = np.vstack((grid_x, grid_y)).T

if dtype == tf.float32:
    X = X.astype(np.float32)
    y = y.astype(np.float32)

# These are kernel parameters. We will use an automatic relevance determining rbf kernel
amplitude = tfp.util.TransformedVariable(tf.Variable(1., dtype=dtype),
                                         tfb.Softplus(),
                                         dtype=dtype,
                                         name='amplitude')
length_scale = tfp.util.TransformedVariable(tf.Variable(tf.ones(2,
                                                                dtype=dtype)),
                                            tfb.Softplus(),
                                            dtype=dtype,
                                            name='length_scale')

#The FeatureScaled object is how you create ARD kernels in tfp
kernel = tfp.math.psd_kernels.FeatureScaled(
    tfp.math.psd_kernels.ExponentiatedQuadratic(amplitude=amplitude),
    length_scale)

#This is the overall amount of noise associated with each y-value
#This is called β^{-1} in the Hensman paper. In practice, this could