Example #1
0
  def test_mix_over_posterior_draws(self):
    num_posterior_draws = 3
    batch_shape = [2, 1]
    means = np.random.randn(*np.concatenate([[num_posterior_draws],
                                             batch_shape]))
    variances = np.exp(np.random.randn(*np.concatenate(
        [[num_posterior_draws], batch_shape])))

    posterior_mixture_dist = sts_util.mix_over_posterior_draws(means, variances)

    # Compute the true statistics of the mixture distribution.
    mixture_mean = np.mean(means, axis=0)
    mixture_variance = np.mean(variances + means**2, axis=0) - mixture_mean**2

    self.assertAllClose(mixture_mean,
                        self.evaluate(posterior_mixture_dist.mean()))
    self.assertAllClose(mixture_variance,
                        self.evaluate(posterior_mixture_dist.variance()))
Example #2
0
def _decompose_from_posterior_marginals(
    model, posterior_means, posterior_covs, parameter_samples):
  """Utility method to decompose a joint posterior into components.

  Args:
    model: `tfp.sts.Sum` instance defining an additive STS model.
    posterior_means: float `Tensor` of shape `concat(
      [[num_posterior_draws], batch_shape, num_timesteps, latent_size])`
      representing the posterior mean over latents in an
      `AdditiveStateSpaceModel`.
    posterior_covs: float `Tensor` of shape `concat(
      [[num_posterior_draws], batch_shape, num_timesteps,
      latent_size, latent_size])`
      representing the posterior marginal covariances over latents in an
      `AdditiveStateSpaceModel`.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.

  Returns:
    component_dists: A `collections.OrderedDict` instance mapping
      component StructuralTimeSeries instances (elements of `model.components`)
      to `tfd.Distribution` instances representing the posterior marginal
      distributions on the process modeled by each component. Each distribution
      has batch shape matching that of `posterior_means`/`posterior_covs`, and
      event shape of `[num_timesteps]`.
  """

  try:
    model.components
  except AttributeError:
    raise ValueError('Model decomposed into components must be an instance of'
                     '`tfp.sts.Sum` (passed model {})'.format(model))

  with tf.compat.v1.name_scope('decompose_from_posterior_marginals'):

    # Extract the component means/covs from the joint latent posterior.
    latent_sizes = [component.latent_size for component in model.components]
    component_means = tf.split(posterior_means, latent_sizes, axis=-1)
    component_covs = _split_covariance_into_marginals(
        posterior_covs, latent_sizes)

    # Instantiate per-component state space models, and use them to push the
    # posterior means/covs through the observation model for each component.
    num_timesteps = dist_util.prefer_static_value(
        tf.shape(input=posterior_means))[-2]
    component_ssms = model.make_component_state_space_models(
        num_timesteps=num_timesteps,
        param_vals=parameter_samples)
    component_predictive_dists = collections.OrderedDict()
    for (component, component_ssm,
         component_mean, component_cov) in zip(model.components, component_ssms,
                                               component_means, component_covs):
      component_obs_mean, component_obs_cov = (
          component_ssm.latents_to_observations(
              latent_means=component_mean,
              latent_covs=component_cov))

      # Using the observation means and covs, build a mixture distribution
      # that integrates over the posterior draws.
      component_predictive_dists[component] = sts_util.mix_over_posterior_draws(
          means=component_obs_mean[..., 0],
          variances=component_obs_cov[..., 0, 0])
  return component_predictive_dists
Example #3
0
def impute_missing_values(model,
                          observed_time_series,
                          parameter_samples,
                          include_observation_noise=False):
    """Runs posterior inference to impute the missing values in a time series.

  This method computes the posterior marginals `p(latent state | observations)`,
  given the time series at observed timesteps (a missingness mask should
  be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back
  through the observation model to impute a predictive distribution on the
  observed time series. At unobserved steps, this is an imputed value; at other
  steps it is interpreted as the model's estimate of the underlying noise-free
  series.

  Args:
    model: `tfp.sts.Sum` instance defining an additive STS model.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.
    include_observation_noise: If `False`, the imputed uncertainties
      represent the model's estimate of the noise-free time series at each
      timestep. If `True`, they represent the model's estimate of the range of
      values that could be *observed* at each timestep, including any i.i.d.
      observation noise.
      Default value: `False`.

  Returns:
    imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and batch shape `concat([sample_shape,
      model.batch_shape])`, with `num_posterior_draws` mixture components.

  #### Example

  To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`:

  ```python
  time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5]
  observed_time_series = tfp.sts.MaskedTimeSeries(
    time_series=time_series_with_nans,
    is_missing=tf.math.is_nan(time_series_with_nans))
  ```

  Masked time series can be passed to `tfp.sts` methods in place of a
  `observed_time_series` `Tensor`:

  ```python
  # Build model using observed time series to set heuristic priors.
  linear_trend_model = tfp.sts.LocalLinearTrend(
    observed_time_series=observed_time_series)
  model = tfp.sts.Sum([linear_trend_model],
                      observed_time_series=observed_time_series)

  # Fit model to data
  parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  After fitting a model, `impute_missing_values` will return a distribution
  ```python
  # Impute missing values
  imputed_series_distribution = tfp.sts.impute_missing_values(
    model, observed_time_series)
  print('imputed means and stddevs: ',
        imputed_series_distribution.mean(),
        imputed_series_distribution.stddev())
  ```

  """
    with tf.name_scope('impute_missing_values'):

        [observed_time_series,
         mask] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run smoothing over the training timesteps to extract the
        # predictive means and variances.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        lgssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                             param_vals=parameter_samples)
        posterior_means, posterior_covs = lgssm.posterior_marginals(
            observed_time_series, mask=mask)

        observation_means, observation_covs = lgssm.latents_to_observations(
            latent_means=posterior_means, latent_covs=posterior_covs)

        if not include_observation_noise:
            # Extract just the variance of observation noise by pushing forward
            # zero-variance latents.
            _, observation_noise_covs = lgssm.latents_to_observations(
                latent_means=posterior_means,
                latent_covs=tf.zeros_like(posterior_covs))
            # Subtract out the observation noise that was added in the original
            # pushforward. Note that this could cause numerical issues if the
            # observation noise is very large. If this becomes an issue we could
            # avoid the subtraction by plumbing `include_observation_noise` through
            # `lgssm.latents_to_observations`.
            observation_covs -= observation_noise_covs

        # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
        # to a scalar time series.
        return sts_util.mix_over_posterior_draws(
            means=observation_means[..., 0],
            variances=observation_covs[..., 0, 0])
Example #4
0
def one_step_predictive(model, observed_time_series, parameter_samples):
    """Compute one-step-ahead predictive distributions for all timesteps.

  Given samples from the posterior over parameters, return the predictive
  distribution over observations at each time `T`, given observations up
  through time `T-1`.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and
      batch shape `concat([sample_shape, model.batch_shape])`, with
      `num_posterior_draws` mixture components. The `t`th step represents the
      forecast distribution `p(observed_time_series[t] |
      observed_time_series[0:t-1], parameter_samples)`.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `one_step_predictive`, we construct a
  one-step-ahead predictive distribution:

  ```python
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)

    predictive_means = one_step_predictive_dist.mean()
    predictive_scales = one_step_predictive_dist.stddev()
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_one_step_predictive(observed_time_series,
                                 forecast_mean,
                                 forecast_scale):
      plt.figure(figsize=(12, 6))
      num_timesteps = forecast_mean.shape[-1]
      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(observed_time_series, label="observed time series", color=c1)
      plt.plot(forecast_mean, label="one-step prediction", color=c2)
      plt.fill_between(np.arange(num_timesteps),
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale,
                       alpha=0.1, color=c2)
      plt.legend()

    plot_one_step_predictive(observed_time_series,
                             forecast_mean=predictive_means,
                             forecast_scale=predictive_scales)
  ```

  To detect anomalous timesteps, we check whether the observed value at each
  step is within a 95% predictive interval, i.e., two standard deviations from
  the mean:

  ```python
    z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1])
                 / predictive_scales[..., :-1])
    anomalous_timesteps = tf.boolean_mask(
        tf.range(1, num_timesteps),
        tf.abs(z_scores) > 2.0)
  ```

  """

    with tf.name_scope('one_step_predictive'):

        [observed_time_series,
         is_missing] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run filtering over the training timesteps to extract the
        # predictive means and variances.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        lgssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                             param_vals=parameter_samples)
        (_, _, _, _, _, observation_means,
         observation_covs) = lgssm.forward_filter(observed_time_series,
                                                  mask=is_missing)

        # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
        # to a scalar time series.
        return sts_util.mix_over_posterior_draws(
            means=observation_means[..., 0],
            variances=observation_covs[..., 0, 0])
Example #5
0
def one_step_predictive(model, observed_time_series, parameter_samples,
                        timesteps_are_event_shape=True):
  """Compute one-step-ahead predictive distributions for all timesteps.

  Given samples from the posterior over parameters, return the predictive
  distribution over observations at each time `T`, given observations up
  through time `T-1`.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. Any `NaN`s
        are interpreted as missing observations; missingness may be also be
        explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.
    timesteps_are_event_shape: Deprecated, for backwards compatibility only.
      If `False`, the predictive distribution will return per-timestep
      probabilities
      Default value: `True`.

  Returns:
    predictive_dist: a `tfd.MixtureSameFamily` instance with event shape
      `[num_timesteps] if timesteps_are_event_shape else []` and
      batch shape `concat([sample_shape, model.batch_shape,
      [] if timesteps_are_event_shape else [num_timesteps])`, with
      `num_posterior_draws` mixture components. The `t`th step represents the
      forecast distribution `p(observed_time_series[t] |
      observed_time_series[0:t-1], parameter_samples)`.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `one_step_predictive`, we construct a
  one-step-ahead predictive distribution:

  ```python
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)

    predictive_means = one_step_predictive_dist.mean()
    predictive_scales = one_step_predictive_dist.stddev()
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(
      model=model)
    loss_curve = tfp.vi.fit_surrogate_posterior(
      target_log_prob_fn=model.joint_distribution(observed_time_series).log_prob,
      surrogate_posterior=surrogate_posterior,
      optimizer=tf.optimizers.Adam(learning_rate=0.1),
      num_steps=200)
    samples = surrogate_posterior.sample(30)

    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_one_step_predictive(observed_time_series,
                                 forecast_mean,
                                 forecast_scale):
      plt.figure(figsize=(12, 6))
      num_timesteps = forecast_mean.shape[-1]
      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(observed_time_series, label="observed time series", color=c1)
      plt.plot(forecast_mean, label="one-step prediction", color=c2)
      plt.fill_between(np.arange(num_timesteps),
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale,
                       alpha=0.1, color=c2)
      plt.legend()

    plot_one_step_predictive(observed_time_series,
                             forecast_mean=predictive_means,
                             forecast_scale=predictive_scales)
  ```

  To detect anomalous timesteps, we check whether the observed value at each
  step is within a 95% predictive interval, i.e., two standard deviations from
  the mean:

  ```python
    z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1])
                 / predictive_scales[..., :-1])
    anomalous_timesteps = tf.boolean_mask(
        tf.range(1, num_timesteps),
        tf.abs(z_scores) > 2.0)
  ```

  """

  with tf.name_scope('one_step_predictive'):

    [
        observed_time_series,
        is_missing
    ] = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    # Run filtering over the training timesteps to extract the
    # predictive means and variances.
    num_timesteps = dist_util.prefer_static_value(
        tf.shape(observed_time_series))[-2]
    lgssm = tfe_util.JitPublicMethods(
        model.make_state_space_model(num_timesteps=num_timesteps,
                                     param_vals=parameter_samples),
        trace_only=True)  # Avoid eager overhead w/o introducing XLA dependence.
    (_, _, _, _, _, observation_means, observation_covs
    ) = lgssm.forward_filter(observed_time_series, mask=is_missing)

    # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
    # to a scalar time series.
    predictive_dist = sts_util.mix_over_posterior_draws(
        means=observation_means[..., 0],
        variances=observation_covs[..., 0, 0])
    if timesteps_are_event_shape:
      predictive_dist = tfd.Independent(
          predictive_dist, reinterpreted_batch_ndims=1)
    return predictive_dist