def test_noise_variance_posterior_matches_expected(self):
        # Generate a synthetic regression task.
        num_features = 5
        num_outputs = 20
        design_matrix, _, targets = self.evaluate(
            self._random_regression_task(num_features=num_features,
                                         num_outputs=num_outputs,
                                         batch_shape=[2],
                                         seed=test_util.test_seed()))

        observation_noise_variance_prior_concentration = 0.03
        observation_noise_variance_prior_scale = 0.015
        # Posterior on noise variance if all weights are zero.
        naive_posterior = tfd.InverseGamma(
            concentration=(observation_noise_variance_prior_concentration +
                           num_outputs / 2.),
            scale=(observation_noise_variance_prior_scale +
                   tf.reduce_sum(tf.square(targets), axis=-1) / 2.))

        # Compare to sampler with weights constrained to near-zero.
        # We can do this by reducing the width of the slab (here),
        # or by reducing the probability of the slab (below). Both should give
        # equivalent noise posteriors.
        tight_slab_sampler = spike_and_slab.SpikeSlabSampler(
            design_matrix,
            weights_prior_precision=tf.eye(num_features) * 1e6,
            observation_noise_variance_prior_concentration=(
                observation_noise_variance_prior_concentration),
            observation_noise_variance_prior_scale=(
                observation_noise_variance_prior_scale))
        self.assertAllClose(
            tight_slab_sampler.
            observation_noise_variance_posterior_concentration,
            naive_posterior.concentration)
        self.assertAllClose(tight_slab_sampler._initialize_sampler_state(
            targets=targets, nonzeros=tf.ones(
                [num_features],
                dtype=tf.bool)).observation_noise_variance_posterior_scale,
                            naive_posterior.scale,
                            atol=1e-2)

        downweighted_slab_sampler = spike_and_slab.SpikeSlabSampler(
            design_matrix,
            observation_noise_variance_prior_concentration=(
                observation_noise_variance_prior_concentration),
            observation_noise_variance_prior_scale=(
                observation_noise_variance_prior_scale))
        self.assertAllClose(
            (downweighted_slab_sampler.
             observation_noise_variance_posterior_concentration),
            naive_posterior.concentration)
        self.assertAllClose(
            downweighted_slab_sampler._initialize_sampler_state(
                targets=targets,
                nonzeros=tf.zeros(
                    [num_features],
                    dtype=tf.bool)).observation_noise_variance_posterior_scale,
            naive_posterior.scale)
    def test_updated_state_matches_initial_computation(self):
        design_matrix, _, targets = self._random_regression_task(
            num_outputs=2,
            num_features=3,
            batch_shape=[],
            seed=test_util.test_seed())

        sampler = spike_and_slab.SpikeSlabSampler(design_matrix=design_matrix,
                                                  nonzero_prior_prob=0.3)

        all_nonzero_sampler_state = sampler._initialize_sampler_state(
            targets=targets, nonzeros=tf.convert_to_tensor([True, True, True]))

        # Flipping a weight from nonzero to zero (slab to spike) should result in
        # the same state as if we'd initialized with that sparsity pattern.
        flipped_state_from_update = sampler._flip_feature(
            all_nonzero_sampler_state, idx=0)
        flipped_state_from_scratch = sampler._initialize_sampler_state(
            targets=targets,
            nonzeros=tf.convert_to_tensor([False, True, True]))
        self.assertAllCloseNested(flipped_state_from_update,
                                  flipped_state_from_scratch)

        # Reverse direction (spike to slab).
        double_flipped_state_from_update = sampler._flip_feature(
            flipped_state_from_update, idx=0)
        self.assertAllCloseNested(double_flipped_state_from_update,
                                  all_nonzero_sampler_state,
                                  atol=1e-4)
Beispiel #3
0
  def test_sampler_respects_pseudo_observations(self):
    design_matrix = self.evaluate(
        samplers.uniform([2, 20, 5], seed=test_util.test_seed()))
    first_obs = 2.
    second_obs = 10.
    first_sampler = spike_and_slab.SpikeSlabSampler(
        design_matrix,
        default_pseudo_observations=first_obs)
    second_sampler = spike_and_slab.SpikeSlabSampler(
        design_matrix,
        default_pseudo_observations=second_obs)

    self.assertNotAllClose(
        first_sampler.weights_prior_precision,
        second_sampler.weights_prior_precision)
    self.assertAllClose(
        first_sampler.weights_prior_precision / first_obs,
        second_sampler.weights_prior_precision / second_obs)
    def test_posterior_on_nonzero_subset_matches_bayesian_regression(self):
        # Generate a synthetic regression task.
        design_matrix, _, targets = self.evaluate(
            self._random_regression_task(num_features=5,
                                         num_outputs=20,
                                         batch_shape=[2],
                                         seed=test_util.test_seed()))

        # Utilities to extract values for nonzero-weight features.
        nonzeros = tf.convert_to_tensor([True, False, True, False, True])
        nonzero_subvector = (
            lambda x: tf.boolean_mask(x, nonzeros, axis=ps.rank(x) - 1))
        nonzero_submatrix = lambda x: tf.boolean_mask(  # pylint: disable=g-long-lambda
            tf.boolean_mask(x, nonzeros, axis=ps.rank(x) - 2),
            nonzeros,
            axis=ps.rank(x) - 1)

        # Compute the weight posterior mean and precision for these nonzeros.
        sampler = spike_and_slab.SpikeSlabSampler(design_matrix)
        initial_state = sampler._initialize_sampler_state(targets=targets,
                                                          nonzeros=nonzeros)

        # Compute the analytic posterior for the regression problem restricted to
        # only the selected features. Note that by slicing a submatrix of the
        # prior precision we are implicitly *conditioning* on having observed the
        # other weights to be zero (which is sensible in this case), versus slicing
        # into the covariance which would give the marginal (unconditional) prior
        # on the selected weights.
        (restricted_weights_posterior_mean,
         restricted_weights_posterior_prec) = tfd.mvn_conjugate_linear_update(
             prior_scale=tf.linalg.cholesky(
                 tf.linalg.inv(
                     nonzero_submatrix(sampler.weights_prior_precision))),
             linear_transformation=nonzero_subvector(design_matrix),
             likelihood_scale=tf.eye(20),
             observation=targets)

        # The sampler's posterior should match the posterior from the restricted
        # problem.
        self.assertAllClose(
            nonzero_subvector(initial_state.conditional_weights_mean),
            restricted_weights_posterior_mean)
        self.assertAllClose(
            nonzero_submatrix(
                initial_state.conditional_posterior_precision_chol),
            tf.linalg.cholesky(restricted_weights_posterior_prec.to_dense()))
    def test_samples_from_weights_prior(self):
        nonzero_prior_prob = 0.7
        num_outputs, num_features = 200, 4

        # Setting the design matrix to zero, the targets provide no information
        # about weights, so the sampler should sample from the prior.
        design_matrix = tf.zeros([num_outputs, num_features])
        targets = 0.42 * samplers.normal([num_outputs],
                                         seed=test_util.test_seed())
        sampler = spike_and_slab.SpikeSlabSampler(
            design_matrix=design_matrix,
            weights_prior_precision=tf.eye(num_features),
            nonzero_prior_prob=nonzero_prior_prob)

        # Draw 100 posterior samples. Since all state needed for the
        # internal feature sweep is a function of the sparsity pattern, it's
        # sufficient to pass the sparsity pattern (by way of the weights) as
        # the outer-loop state.
        @tf.function(autograph=False)
        def loop_body(var_weights_seed, _):
            _, weights, seed = var_weights_seed
            seed, next_seed = samplers.split_seed(seed, n=2)
            variance, weights = sampler.sample_noise_variance_and_weights(
                initial_nonzeros=tf.not_equal(weights, 0.),
                targets=targets,
                seed=seed)
            return variance, weights, next_seed

        init_seed = test_util.test_seed(sampler_type='stateless')
        variance_samples, weight_samples, _ = tf.scan(
            fn=loop_body,
            initializer=(1., tf.ones([num_features]), init_seed),
            elems=tf.range(100))

        # With the default (relatively uninformative) prior, the noise variance
        # posterior mean should be close to the most-likely value.
        self.assertAllClose(tf.reduce_mean(variance_samples),
                            tf.math.reduce_std(targets)**2,
                            atol=0.03)
        # Since there is no evidence for the weights, the sparsity of our samples
        # should match the prior.
        nonzero_weight_samples = tf.cast(tf.not_equal(weight_samples, 0.),
                                         tf.float32)
        self.assertAllClose(nonzero_prior_prob,
                            tf.reduce_mean(nonzero_weight_samples),
                            atol=0.03)
Beispiel #6
0
  def test_sanity_check_sweep_over_features(self):
    num_outputs = 100
    num_features = 3
    batch_shape = [2]
    design_matrix, true_weights, targets = self.evaluate(
        self._random_regression_task(
            num_outputs=num_outputs,
            num_features=num_features,
            batch_shape=batch_shape,
            # Specify weights with a clear sparsity pattern.
            weights=tf.convert_to_tensor([[10., 0., -10.],
                                          [0., 0., 0.5]]),
            seed=test_util.test_seed()))

    sampler = spike_and_slab.SpikeSlabSampler(
        design_matrix,
        # Ensure the probability of keeping an irrelevant feature is tiny.
        nonzero_prior_prob=1e-6)
    initial_state = sampler._initialize_sampler_state(
        targets=targets,
        nonzeros=tf.convert_to_tensor([True, True, True]),
        observation_noise_variance=1.)
    final_state = self.evaluate(
        sampler._resample_all_features(
            initial_state, seed=test_util.test_seed()))

    # Check that we recovered the true sparsity pattern and approximate weights.
    self.assertAllEqual(final_state.nonzeros, [[True, False, True],
                                               [False, False, True]])
    self.assertAllClose(final_state.conditional_weights_mean,
                        true_weights, rtol=0.05, atol=0.15)
    # Check shapes of other components.
    self.assertAllEqual(final_state.conditional_prior_precision_chol.shape,
                        batch_shape + [num_features, num_features])
    self.assertAllEqual(final_state.conditional_posterior_precision_chol.shape,
                        batch_shape + [num_features, num_features])
    self.assertAllEqual(
        final_state.observation_noise_variance_posterior_scale.shape,
        batch_shape)

    posterior = sampler._get_conditional_posterior(final_state)
    posterior_variances, posterior_weights = self.evaluate(
        posterior.sample(10, seed=test_util.test_seed()))
    self.assertAllFinite(posterior_variances)
    self.assertAllFinite(posterior_weights)
Beispiel #7
0
  def test_deterministic_given_seed(self, use_xla):
    design_matrix, _, targets = self.evaluate(
        self._random_regression_task(
            num_outputs=3, num_features=4, batch_shape=[],
            seed=test_util.test_seed()))

    sampler = spike_and_slab.SpikeSlabSampler(design_matrix)

    initial_nonzeros = tf.convert_to_tensor([True, False, False, True])
    seed = test_util.test_seed(sampler_type='stateless')

    @tf.function(jit_compile=use_xla)
    def do_sample(seed):
      return sampler.sample_noise_variance_and_weights(
          targets, initial_nonzeros, seed=seed)
    variance1, weights1 = self.evaluate(do_sample(seed))
    variance2, weights2 = self.evaluate(do_sample(seed))
    self.assertAllFinite(variance1)
    self.assertAllClose(variance1, variance2)
    self.assertAllFinite(weights1)
    self.assertAllClose(weights1, weights2)
Beispiel #8
0
  def test_updated_state_matches_initial_computation(
      self, num_outputs, num_features, num_flips, batch_shape, use_xla):

    rng = test_util.test_np_rng()
    initial_nonzeros = rng.randint(
        low=0, high=2, size=batch_shape + [num_features]).astype(np.bool)
    flip_idxs = rng.choice(
        num_features, size=num_flips, replace=False).astype(np.int32)
    if batch_shape:
      should_flip = rng.randint(
          low=0, high=2, size=[num_flips] + batch_shape).astype(np.bool)
    else:
      should_flip = np.array([True] * num_flips)

    nonzeros = initial_nonzeros.copy()
    for i in range(num_flips):
      nonzeros[..., flip_idxs[i]] = (
          nonzeros[..., flip_idxs[i]] != should_flip[i])

    design_matrix, _, targets = self._random_regression_task(
        num_outputs=num_outputs, num_features=num_features,
        batch_shape=batch_shape, seed=test_util.test_seed())
    sampler = spike_and_slab.SpikeSlabSampler(design_matrix=design_matrix,
                                              nonzero_prior_prob=0.3)

    @tf.function(autograph=False, jit_compile=use_xla)
    def _do_flips():
      state = sampler._initialize_sampler_state(
          targets=targets,
          nonzeros=initial_nonzeros,
          observation_noise_variance=1.)
      def _do_flip(state, i):
        new_state = sampler._flip_feature(state, tf.gather(flip_idxs, i))
        return mcmc_util.choose(tf.gather(should_flip, i), new_state, state)
      return tf.foldl(_do_flip, elems=tf.range(num_flips), initializer=state)

    self.assertAllCloseNested(
        sampler._initialize_sampler_state(targets, nonzeros, 1.),
        _do_flips(),
        atol=num_outputs * 2e-4, rtol=num_outputs * 2e-4)
Beispiel #9
0
def _build_sampler_loop_body(model,
                             observed_time_series,
                             is_missing=None):
  """Builds a Gibbs sampler for the given model and observed data.

  Args:
    model: A `tf.sts.StructuralTimeSeries` model instance. This must be of the
      form constructed by `build_model_for_gibbs_sampling`.
    observed_time_series: Float `Tensor` time series of shape
      `[..., num_timesteps]`.
    is_missing: Optional `bool` `Tensor` of shape `[..., num_timesteps]`. A
      `True` value indicates that the observation for that timestep is missing.
  Returns:
    sampler_loop_body: Python callable that performs a single cycle of Gibbs
      sampling. Its first argument is a `GibbsSamplerState`, and it returns a
      new `GibbsSamplerState`. The second argument (passed by `tf.scan`) is
      ignored.
  """
  level_component = model.components[0]
  if not (isinstance(level_component, sts.LocalLevel) or
          isinstance(level_component, sts.LocalLinearTrend)):
    raise ValueError('Expected the first model component to be an instance of '
                     '`tfp.sts.LocalLevel` or `tfp.sts.LocalLinearTrend`; '
                     'instead saw {}'.format(level_component))
  model_has_slope = isinstance(level_component, sts.LocalLinearTrend)

  regression_component = model.components[1]
  if not (isinstance(regression_component, sts.LinearRegression) or
          isinstance(regression_component, SpikeAndSlabSparseLinearRegression)):
    raise ValueError('Expected the second model component to be an instance of '
                     '`tfp.sts.LinearRegression` or '
                     '`SpikeAndSlabSparseLinearRegression`; '
                     'instead saw {}'.format(regression_component))
  model_has_spike_slab_regression = isinstance(
      regression_component, SpikeAndSlabSparseLinearRegression)

  if is_missing is not None:  # Ensure series does not contain NaNs.
    observed_time_series = tf.where(is_missing,
                                    tf.zeros_like(observed_time_series),
                                    observed_time_series)

  num_observed_steps = prefer_static.shape(observed_time_series)[-1]
  design_matrix = _get_design_matrix(model).to_dense()[:num_observed_steps]
  if is_missing is not None:
    # Replace design matrix with zeros at unobserved timesteps. This ensures
    # they will not affect the posterior on weights.
    design_matrix = tf.where(is_missing[..., tf.newaxis],
                             tf.zeros_like(design_matrix),
                             design_matrix)

  # Untransform scale priors -> variance priors by reaching thru Sqrt bijector.
  observation_noise_param = model.parameters[0]
  if 'observation_noise' not in observation_noise_param.name:
    raise ValueError('Model parameters {} do not match the expected sampler '
                     'state.'.format(model.parameters))
  observation_noise_variance_prior = observation_noise_param.prior.distribution
  if model_has_slope:
    level_scale_variance_prior, slope_scale_variance_prior = [
        p.prior.distribution for p in level_component.parameters]
  else:
    level_scale_variance_prior = (
        level_component.parameters[0].prior.distribution)

  if model_has_spike_slab_regression:
    spike_and_slab_sampler = spike_and_slab.SpikeSlabSampler(
        design_matrix,
        weights_prior_precision=regression_component._weights_prior_precision,  # pylint: disable=protected-access
        nonzero_prior_prob=regression_component._sparse_weights_nonzero_prob,  # pylint: disable=protected-access
        observation_noise_variance_prior_concentration=(
            observation_noise_variance_prior.concentration),
        observation_noise_variance_prior_scale=(
            observation_noise_variance_prior.scale),
        observation_noise_variance_upper_bound=(
            observation_noise_variance_prior.upper_bound
            if hasattr(observation_noise_variance_prior, 'upper_bound')
            else None))
  else:
    weights_prior_scale = (
        regression_component.parameters[0].prior.scale)

  def sampler_loop_body(previous_sample, _):
    """Runs one sampler iteration, resampling all model variables."""

    (weights_seed,
     level_seed,
     observation_noise_scale_seed,
     level_scale_seed,
     loop_seed) = samplers.split_seed(
         previous_sample.seed, n=5, salt='sampler_loop_body')
    # Preserve backward-compatible seed behavior by splitting slope separately.
    slope_scale_seed, = samplers.split_seed(
        previous_sample.seed, n=1, salt='sampler_loop_body_slope')

    # We encourage a reasonable initialization by sampling the weights first,
    # so at the first step they are regressed directly against the observed
    # time series. If we instead sampled the level first it might 'explain away'
    # some observed variation that we would ultimately prefer to explain through
    # the regression weights, because the level can represent arbitrary
    # variation, while the weights are limited to representing variation in the
    # subspace given by the design matrix.
    if model_has_spike_slab_regression:
      (observation_noise_variance,
       weights) = spike_and_slab_sampler.sample_noise_variance_and_weights(
           initial_nonzeros=tf.not_equal(previous_sample.weights, 0.),
           targets=observed_time_series - previous_sample.level,
           seed=weights_seed)
      observation_noise_scale = tf.sqrt(observation_noise_variance)
    else:
      weights = _resample_weights(
          design_matrix=design_matrix,
          target_residuals=observed_time_series - previous_sample.level,
          observation_noise_scale=previous_sample.observation_noise_scale,
          weights_prior_scale=weights_prior_scale,
          seed=weights_seed)
      # Noise scale will be resampled below.
      observation_noise_scale = previous_sample.observation_noise_scale

    regression_residuals = observed_time_series - tf.linalg.matvec(
        design_matrix, weights)
    latents = _resample_latents(
        observed_residuals=regression_residuals,
        level_scale=previous_sample.level_scale,
        slope_scale=previous_sample.slope_scale if model_has_slope else None,
        observation_noise_scale=observation_noise_scale,
        initial_state_prior=level_component.initial_state_prior,
        is_missing=is_missing,
        seed=level_seed)
    level = latents[..., 0]
    level_residuals = level[..., 1:] - level[..., :-1]
    if model_has_slope:
      slope = latents[..., 1]
      level_residuals -= slope[..., :-1]
      slope_residuals = slope[..., 1:] - slope[..., :-1]

    # Estimate level scale from the empirical changes in level.
    level_scale = _resample_scale(
        prior=level_scale_variance_prior,
        observed_residuals=level_residuals,
        is_missing=None,
        seed=level_scale_seed)
    if model_has_slope:
      slope_scale = _resample_scale(
          prior=slope_scale_variance_prior,
          observed_residuals=slope_residuals,
          is_missing=None,
          seed=slope_scale_seed)
    if not model_has_spike_slab_regression:
      # Estimate noise scale from the residuals.
      observation_noise_scale = _resample_scale(
          prior=observation_noise_variance_prior,
          observed_residuals=regression_residuals - level,
          is_missing=is_missing,
          seed=observation_noise_scale_seed)

    return GibbsSamplerState(
        observation_noise_scale=observation_noise_scale,
        level_scale=level_scale,
        slope_scale=(slope_scale if model_has_slope
                     else previous_sample.slope_scale),
        weights=weights,
        level=level,
        slope=(slope if model_has_slope
               else previous_sample.slope),
        seed=loop_seed)
  return sampler_loop_body