예제 #1
0
 def test_series_with_no_fixed_frequency_raises_error(self):
     with self.assertRaisesRegex(ValueError, 'no set frequency'):
         observed_time_series = pd.Series([1., 2., 4.],
                                          index=pd.to_datetime([
                                              '2014-01-01', '2014-01-02',
                                              '2014-01-04'
                                          ]))
         sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)
    def _build_sts(self, observed_time_series=None):
        max_timesteps = 100
        num_features = 3

        prior = tfd.Sample(tfd.Laplace(0., 1.), sample_shape=[num_features])

        # LinearRegression components don't currently take an `observed_time_series`
        # argument, so they can't infer a prior batch shape. This means we have to
        # manually set the batch shape expected by the tests.
        dtype = np.float32
        if observed_time_series is not None:
            observed_time_series_tensor, _ = (
                sts_util.canonicalize_observed_time_series_with_mask(
                    observed_time_series))
            batch_shape = tf.shape(observed_time_series_tensor)[:-2]
            dtype = dtype_util.as_numpy_dtype(
                observed_time_series_tensor.dtype)
            prior = tfd.Sample(tfd.Laplace(tf.zeros(batch_shape, dtype=dtype),
                                           1.),
                               sample_shape=[num_features])

        regression = LinearRegression(design_matrix=np.random.randn(
            max_timesteps, num_features).astype(dtype),
                                      weights_prior=prior)
        return Sum(components=[regression],
                   observed_time_series=observed_time_series)
예제 #3
0
def fit_with_gibbs_sampling(model,
                            observed_time_series,
                            num_results=2000,
                            num_warmup_steps=200,
                            initial_state=None,
                            seed=None):
    """Fits parameters for an STS model using Gibbs sampling."""
    if not hasattr(model, 'supports_gibbs_sampling'):
        raise ValueError(
            'This STS model does not support Gibbs sampling. Models '
            'for Gibbs sampling must be created using the '
            'method `build_model_for_gibbs_fitting`.')

    [observed_time_series,
     is_missing] = sts_util.canonicalize_observed_time_series_with_mask(
         observed_time_series)
    dtype = observed_time_series.dtype

    # The canonicalized time series always has trailing dimension `1`,
    # because although LinearGaussianSSMs support vector observations, STS models
    # describe scalar time series only. For our purposes it'll be cleaner to
    # remove this dimension.
    observed_time_series = observed_time_series[..., 0]
    batch_shape = prefer_static.shape(observed_time_series)[:-1]

    # Treat a LocalLevel model as the special case of LocalLinearTrend where
    # the slope_scale is always zero.
    initial_slope_scale = 0.
    initial_slope = 0.
    if isinstance(model.components[0], sts.LocalLinearTrend):
        initial_slope_scale = 1. * tf.ones(batch_shape, dtype=dtype)
        initial_slope = tf.zeros_like(observed_time_series)

    if initial_state is None:
        initial_state = GibbsSamplerState(
            observation_noise_scale=tf.ones(batch_shape, dtype=dtype),
            level_scale=tf.ones(batch_shape, dtype=dtype),
            slope_scale=initial_slope_scale,
            weights=tf.zeros(prefer_static.concat(
                [batch_shape,
                 _get_design_matrix(model).shape[-1:]], axis=0),
                             dtype=dtype),
            level=tf.zeros_like(observed_time_series),
            slope=initial_slope,
            seed=None)  # Set below.

    if isinstance(seed, six.integer_types):
        tf.random.set_seed(seed)

    # Always use the passed-in `seed` arg, ignoring any seed in the initial state.
    initial_state = initial_state._replace(
        seed=samplers.sanitize_seed(seed, salt='initial_GibbsSamplerState'))

    sampler_loop_body = _build_sampler_loop_body(model, observed_time_series,
                                                 is_missing)

    samples = tf.scan(sampler_loop_body,
                      np.arange(num_warmup_steps + num_results), initial_state)
    return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
def fit_with_gibbs_sampling(model,
                            observed_time_series,
                            num_results=2000,
                            num_warmup_steps=200,
                            compile_steps_with_xla=False,
                            initial_state=None,
                            seed=None):
  """Fits parameters for an STS model using Gibbs sampling."""
  if not hasattr(model, 'supports_gibbs_sampling'):
    raise ValueError('This STS model does not support Gibbs sampling. Models '
                     'for Gibbs sampling must be created using the '
                     'method `build_model_for_gibbs_fitting`.')

  [
      observed_time_series,
      is_missing
  ] = sts_util.canonicalize_observed_time_series_with_mask(
      observed_time_series)
  dtype = observed_time_series.dtype

  # The canonicalized time series always has trailing dimension `1`,
  # because although LinearGaussianSSMs support vector observations, STS models
  # describe scalar time series only. For our purposes it'll be cleaner to
  # remove this dimension.
  observed_time_series = observed_time_series[..., 0]

  batch_shape = prefer_static.shape(observed_time_series)[:-1]
  if initial_state is None:
    initial_state = GibbsSamplerState(
        observation_noise_scale=tf.ones(batch_shape, dtype=dtype),
        level_scale=tf.ones(batch_shape, dtype=dtype),
        weights=tf.zeros(prefer_static.concat([
            batch_shape,
            _get_design_matrix(model).shape[-1:]], axis=0), dtype=dtype),
        level=tf.zeros_like(observed_time_series),
        seed=None)  # Set below.

  if seed and isinstance(seed, six.integer_types):
    tf.random.set_seed(seed)

  # Always use the passed-in `seed` arg, ignoring any seed in the initial state.
  seeded_state = initial_state._asdict()
  seeded_state['seed'] = samplers.sanitize_seed(
      seed, salt='initial_GibbsSamplerState')
  initial_state = GibbsSamplerState(**seeded_state)

  sampler_loop_body = _build_sampler_loop_body(
      model, observed_time_series, is_missing,
      compile_steps_with_xla=compile_steps_with_xla,
      seed=seed)  # This is still an `int` seed, because the InverseGamma
                  # sampler currently requires stateful semantics.

  samples = tf.scan(sampler_loop_body,
                    np.arange(num_warmup_steps + num_results),
                    initial_state)
  return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
 def _build_sts(self, observed_time_series=None):
     one = 1.
     if observed_time_series is not None:
         observed_time_series = (
             sts_util.canonicalize_observed_time_series_with_mask(
                 observed_time_series))
         one = tf.ones_like(observed_time_series.time_series[..., 0, 0])
     return AutoregressiveIntegratedMovingAverage(
         ar_order=3,
         ma_order=1,
         integration_degree=0,
         level_drift_prior=tfd.Normal(loc=one, scale=one),
         observed_time_series=observed_time_series)
  def _build_sts(self, observed_time_series=None):
    max_timesteps = 100
    num_features = 3

    dtype = np.float32
    if observed_time_series is not None:
      observed_time_series_tensor, _ = (
          sts_util.canonicalize_observed_time_series_with_mask(
              observed_time_series))
      dtype = dtype_util.as_numpy_dtype(observed_time_series_tensor.dtype)

    return DynamicLinearRegression(
        design_matrix=np.random.randn(
            max_timesteps, num_features).astype(dtype),
        observed_time_series=observed_time_series)
예제 #7
0
 def test_canonicalizes_observed_time_series(self, observed_time_series,
                                             expected_shape,
                                             expected_is_missing):
     observed_time_series, is_missing = (
         sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series))
     # Evaluate with explicit identity ops to avoid TF1 error
     # `RuntimeError: The Session graph is empty.`
     observed_time_series, is_missing = self.evaluate(
         (observed_time_series, is_missing))
     self.assertAllEqual(observed_time_series.shape, expected_shape)
     if is_missing is None:
         self.assertIsNone(expected_is_missing)
     elif expected_is_missing is None:
         expected_is_missing = np.zeros(is_missing.shape, dtype=np.bool)
     self.assertAllEqual(expected_is_missing, is_missing)
def _fit_seasonal_model_with_gibbs_sampling(observed_time_series,
                                            seasonal_structure,
                                            num_warmup_steps=50,
                                            num_results=100,
                                            seed=None):
    """Builds a seasonality-as-regression model and fits it by Gibbs sampling."""
    with tf.name_scope('fit_seasonal_model_with_gibbs_sampling'):
        observed_time_series = sts_util.canonicalize_observed_time_series_with_mask(
            observed_time_series)
        dtype = observed_time_series.time_series.dtype
        design_matrix = seasonality_util.build_fixed_effects(
            num_steps=ps.shape(observed_time_series.time_series)[-2],
            seasonal_structure=seasonal_structure,
            dtype=dtype)

        # Default priors.
        # pylint: disable=protected-access
        one = tf.ones([], dtype=dtype)
        level_variance_prior = tfd.InverseGamma(concentration=16,
                                                scale=16. * 0.001**2 * one)
        level_variance_prior._upper_bound = one
        slope_variance_prior = tfd.InverseGamma(concentration=16,
                                                scale=16. * 0.05**2 * one)
        slope_variance_prior._upper_bound = 0.01 * one
        observation_noise_variance_prior = tfd.InverseGamma(concentration=0.05,
                                                            scale=0.05 * one)
        observation_noise_variance_prior._upper_bound = 1.2 * one
        # pylint: enable=protected-access

    model = gibbs_sampler.build_model_for_gibbs_fitting(
        observed_time_series=observed_time_series,
        design_matrix=design_matrix,
        weights_prior=tfd.Normal(loc=0., scale=one),
        level_variance_prior=level_variance_prior,
        slope_variance_prior=slope_variance_prior,
        observation_noise_variance_prior=observation_noise_variance_prior)
    return [
        model,
        gibbs_sampler.fit_with_gibbs_sampling(
            model,
            observed_time_series,
            num_results=num_results,
            num_warmup_steps=num_warmup_steps,
            seed=seed)
    ]
    def _build_sts(self, observed_time_series=None):
        max_timesteps = 100
        num_features = 3

        # LinearRegression components don't currently take an `observed_time_series`
        # argument, so they can't infer a prior batch shape. This means we have to
        # manually set the batch shape expected by the tests.
        batch_shape = None
        if observed_time_series is not None:
            observed_time_series_tensor, _ = (
                sts_util.canonicalize_observed_time_series_with_mask(
                    observed_time_series))
            batch_shape = tf.shape(observed_time_series_tensor)[:-2]

        regression = SparseLinearRegression(design_matrix=np.random.randn(
            max_timesteps, num_features).astype(np.float32),
                                            weights_batch_shape=batch_shape)
        return Sum(components=[regression],
                   observed_time_series=observed_time_series)
  def _build_sts(self, observed_time_series=None):
    max_timesteps = 100
    num_features = 3

    prior = tfd.Laplace(0., 1.)

    # LinearRegression components don't currently take an `observed_time_series`
    # argument, so they can't infer a prior batch shape. This means we have to
    # manually set the batch shape expected by the tests.
    if observed_time_series is not None:
      observed_time_series_tensor, _ = (
          sts_util.canonicalize_observed_time_series_with_mask(
              observed_time_series))
      batch_shape = tf.shape(input=observed_time_series_tensor)[:-2]
      prior = tfd.TransformedDistribution(prior, tfb.Identity(),
                                          event_shape=[num_features],
                                          batch_shape=batch_shape)

    regression = LinearRegression(
        design_matrix=tf.random.normal([max_timesteps, num_features]),
        weights_prior=prior)
    return Sum(components=[regression],
               observed_time_series=observed_time_series)
예제 #11
0
def forecast(model,
             observed_time_series,
             parameter_samples,
             num_steps_forecast,
             include_observation_noise=True):
    """Construct predictive distribution over future observations.

  Given samples from the posterior over parameters, return the predictive
  distribution over future observations for num_steps_forecast timesteps.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.
    num_steps_forecast: scalar `int` `Tensor` number of steps to forecast.
    include_observation_noise: Python `bool` indicating whether the forecast
      distribution should include uncertainty from observation noise. If `True`,
      the forecast is over future observations, if `False`, the forecast is over
      future values of the latent noise-free time series.
      Default value: `True`.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_steps_forecast, 1] and batch shape
      `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws`
      mixture components.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `forecast`, we construct a forecast
  distribution:

  ```python
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                     parameter_samples=samples,
                                     num_steps_forecast=50)

    forecast_mean = forecast_dist.mean()[..., 0]  # shape: [50]
    forecast_scale = forecast_dist.stddev()[..., 0]  # shape: [50]
    forecast_samples = forecast_dist.sample(10)[..., 0]  # shape: [10, 50]
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    forecast_dist = tfp.sts.forecast(model, observed_time_series,
                                         parameter_samples=samples,
                                         num_steps_forecast=50)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_forecast(observed_time_series,
                      forecast_mean,
                      forecast_scale,
                      forecast_samples):
      plt.figure(figsize=(12, 6))

      num_steps = observed_time_series.shape[-1]
      num_steps_forecast = forecast_mean.shape[-1]
      num_steps_train = num_steps - num_steps_forecast

      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(np.arange(num_steps), observed_time_series,
               lw=2, color=c1, label='ground truth')

      forecast_steps = np.arange(num_steps_train,
                       num_steps_train+num_steps_forecast)
      plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1)
      plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2,
               label='forecast')
      plt.fill_between(forecast_steps,
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2)

      plt.xlim([0, num_steps])
      plt.legend()

    plot_forecast(observed_time_series,
                  forecast_mean=forecast_mean,
                  forecast_scale=forecast_scale,
                  forecast_samples=forecast_samples)
  ```

  """

    with tf.name_scope('forecast'):
        [observed_time_series,
         mask] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run filtering over the observed timesteps to extract the
        # latent state posterior at timestep T+1 (i.e., the final
        # filtering distribution, pushed through the transition model).
        # This is the prior for the forecast model ("today's prior
        # is yesterday's posterior").
        num_observed_steps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        observed_data_ssm = model.make_state_space_model(
            num_timesteps=num_observed_steps, param_vals=parameter_samples)
        (_, _, _, predictive_means, predictive_covs, _,
         _) = observed_data_ssm.forward_filter(observed_time_series, mask=mask)

        # Build a batch of state-space models over the forecast period. Because
        # we'll use MixtureSameFamily to mix over the posterior draws, we need to
        # do some shenanigans to move the `[num_posterior_draws]` batch dimension
        # from the leftmost to the rightmost side of the model's batch shape.
        # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
        # arbitrary axis, and eliminate `move_dimension` calls here.
        parameter_samples = model._canonicalize_param_vals_as_map(
            parameter_samples)  # pylint: disable=protected-access
        parameter_samples_with_reordered_batch_dimension = {
            param.name: dist_util.move_dimension(
                parameter_samples[param.name], 0,
                -(1 + _prefer_static_event_ndims(param.prior)))
            for param in model.parameters
        }
        forecast_prior = tfd.MultivariateNormalFullCovariance(
            loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2),
            covariance_matrix=dist_util.move_dimension(
                predictive_covs[..., -1, :, :], 0, -3))

        # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather
        # than leading) dimension of parameters, the parameter batch shapes no
        # longer broadcast against the `constant_offset` attribute used in `sts.Sum`
        # models. We fix this by manually adding an extra broadcasting dim to
        # `constant_offset` if present.
        # The root cause of this hack is that we mucked with param dimensions above
        # and are now passing params that are 'invalid' in the sense that they don't
        # match the shapes of the model's param priors. The fix (as above) will be
        # to update MixtureSameFamily so we can avoid changing param dimensions
        # altogether.
        # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an
        # arbitrary axis, and eliminate this hack.
        kwargs = {}
        if hasattr(model, 'constant_offset'):
            kwargs['constant_offset'] = tf.convert_to_tensor(
                value=model.constant_offset,
                dtype=forecast_prior.dtype)[..., tf.newaxis]

        if not include_observation_noise:
            parameter_samples_with_reordered_batch_dimension[
                'observation_noise_scale'] = tf.zeros_like(
                    parameter_samples_with_reordered_batch_dimension[
                        'observation_noise_scale'])

        # We assume that any STS model that has a `constant_offset` attribute
        # will allow it to be overridden as a kwarg. This is currently just
        # `sts.Sum`.
        # TODO(b/120245392): when kwargs hack is removed, switch back to calling
        # the public version of `_make_state_space_model`.
        forecast_ssm = model._make_state_space_model(  # pylint: disable=protected-access
            num_timesteps=num_steps_forecast,
            param_map=parameter_samples_with_reordered_batch_dimension,
            initial_state_prior=forecast_prior,
            initial_step=num_observed_steps,
            **kwargs)

        num_posterior_draws = dist_util.prefer_static_value(
            forecast_ssm.batch_shape_tensor())[-1]
        return tfd.MixtureSameFamily(mixture_distribution=tfd.Categorical(
            logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)),
                                     components_distribution=forecast_ssm)
def detect_anomalies(series,
                     anomaly_threshold=0.01,
                     use_gibbs_predictive_dist=False,
                     num_warmup_steps=50,
                     num_samples=100,
                     jit_compile=False,
                     seed=None):
    """Detects anomalies in a Pandas time series using a default seasonal model.

  This function fits a `LocalLinearTrend` model with automatically determined
  seasonal effects, and returns a predictive credible interval at each step
  of the series. The fitting is done via Gibbs sampling, implemented
  specifically for this model class, which sometimes gives useful results more
  quickly than other fitting methods such as VI or HMC.

  Args:
    series: a Pandas `pd.Series` or `pd.DataFrame` instance indexed by a
      `pd.DateTimeIndex`. This may be irregular (missing timesteps) and/or
      contain unobserved steps indicated by `NaN` values (`NaN` values may also
      be provided to indicate future steps at which a forecast is desired).
      Multiple columns in a `pd.DataFrame` will generate results with a batch
      dimension.
    anomaly_threshold: float, confidence level for anomaly detection. An
        anomaly will be detected if the observed series falls outside the
        equal-tailed credible interval containing `(1 - anomaly_threshold)` of
        the posterior predictive probability mass.
    use_gibbs_predictive_dist: Python `bool`. If `True`, the predictive
      distribution is derived from Gibbs samples of the latent level, which
      incorporate information from the entire series *including future
      timesteps*. Otherwise, the predictive distribution is the 'filtering'
      distribution in which (conditioned on sampled parameters) the prediction
      at each step depends only on values observed at previous steps.
      Default value: `False`.
    num_warmup_steps: `int` number of steps to take before starting to collect
      samples.
      Default value: `50`.
    num_samples: `int` number of steps to take while sampling parameter
      values.
      Default value: `100`.
    jit_compile: Python `bool`. If `True`, compile the sampler with XLA. This
      adds overhead to the first call, but may speed up subsequent calls with
      series of the same shape and frequency.
      Default value: `True`.
    seed: PRNG seed; see `tfp.random.sanitize_seed` for details.
  Returns:
    prediction_output: instance of `PredictionOutput` named tuple containing
      the predicted credible intervals for each point (omitting the first) in
      the series.
  """
    regularized_series = regularization.regularize_series(series)
    observed_time_series = sts_util.canonicalize_observed_time_series_with_mask(
        regularized_series)
    anomaly_threshold = tf.convert_to_tensor(
        anomaly_threshold,
        dtype=observed_time_series.time_series.dtype,
        name='anomaly_threshold')

    seasonal_structure = seasonality_util.create_seasonal_structure(
        frequency=regularized_series.index.freq,
        num_steps=len(regularized_series))
    # Convert SeasonType keys into strings, because `tf.function` doesn't like
    # enum-valued arguments.
    seasonal_structure = {str(k): v for (k, v) in seasonal_structure.items()}
    inner_fn = (_detect_anomalies_inner_compiled
                if jit_compile else _detect_anomalies_inner)
    lower_limit, upper_limit, mean, tail_probabilities = inner_fn(
        observed_time_series,
        seasonal_structure=seasonal_structure,
        use_gibbs_predictive_dist=use_gibbs_predictive_dist,
        num_warmup_steps=num_warmup_steps,
        num_samples=num_samples,
        seed=seed)
    return PredictionOutput(
        times=regularized_series.index,
        observed_time_series=observed_time_series.time_series[..., 0],
        mean=mean,
        lower_limit=lower_limit,
        upper_limit=upper_limit,
        tail_probabilities=tail_probabilities,
        is_anomaly=tail_probabilities < anomaly_threshold)
예제 #13
0
def one_step_predictive(model, observed_time_series, parameter_samples,
                        timesteps_are_event_shape=True):
  """Compute one-step-ahead predictive distributions for all timesteps.

  Given samples from the posterior over parameters, return the predictive
  distribution over observations at each time `T`, given observations up
  through time `T-1`.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. Any `NaN`s
        are interpreted as missing observations; missingness may be also be
        explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.
    timesteps_are_event_shape: Deprecated, for backwards compatibility only.
      If `False`, the predictive distribution will return per-timestep
      probabilities
      Default value: `True`.

  Returns:
    predictive_dist: a `tfd.MixtureSameFamily` instance with event shape
      `[num_timesteps] if timesteps_are_event_shape else []` and
      batch shape `concat([sample_shape, model.batch_shape,
      [] if timesteps_are_event_shape else [num_timesteps])`, with
      `num_posterior_draws` mixture components. The `t`th step represents the
      forecast distribution `p(observed_time_series[t] |
      observed_time_series[0:t-1], parameter_samples)`.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `one_step_predictive`, we construct a
  one-step-ahead predictive distribution:

  ```python
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)

    predictive_means = one_step_predictive_dist.mean()
    predictive_scales = one_step_predictive_dist.stddev()
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(
      model=model)
    loss_curve = tfp.vi.fit_surrogate_posterior(
      target_log_prob_fn=model.joint_distribution(observed_time_series).log_prob,
      surrogate_posterior=surrogate_posterior,
      optimizer=tf.optimizers.Adam(learning_rate=0.1),
      num_steps=200)
    samples = surrogate_posterior.sample(30)

    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_one_step_predictive(observed_time_series,
                                 forecast_mean,
                                 forecast_scale):
      plt.figure(figsize=(12, 6))
      num_timesteps = forecast_mean.shape[-1]
      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(observed_time_series, label="observed time series", color=c1)
      plt.plot(forecast_mean, label="one-step prediction", color=c2)
      plt.fill_between(np.arange(num_timesteps),
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale,
                       alpha=0.1, color=c2)
      plt.legend()

    plot_one_step_predictive(observed_time_series,
                             forecast_mean=predictive_means,
                             forecast_scale=predictive_scales)
  ```

  To detect anomalous timesteps, we check whether the observed value at each
  step is within a 95% predictive interval, i.e., two standard deviations from
  the mean:

  ```python
    z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1])
                 / predictive_scales[..., :-1])
    anomalous_timesteps = tf.boolean_mask(
        tf.range(1, num_timesteps),
        tf.abs(z_scores) > 2.0)
  ```

  """

  with tf.name_scope('one_step_predictive'):

    [
        observed_time_series,
        is_missing
    ] = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    # Run filtering over the training timesteps to extract the
    # predictive means and variances.
    num_timesteps = dist_util.prefer_static_value(
        tf.shape(observed_time_series))[-2]
    lgssm = tfe_util.JitPublicMethods(
        model.make_state_space_model(num_timesteps=num_timesteps,
                                     param_vals=parameter_samples),
        trace_only=True)  # Avoid eager overhead w/o introducing XLA dependence.
    (_, _, _, _, _, observation_means, observation_covs
    ) = lgssm.forward_filter(observed_time_series, mask=is_missing)

    # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
    # to a scalar time series.
    predictive_dist = sts_util.mix_over_posterior_draws(
        means=observation_means[..., 0],
        variances=observation_covs[..., 0, 0])
    if timesteps_are_event_shape:
      predictive_dist = tfd.Independent(
          predictive_dist, reinterpreted_batch_ndims=1)
    return predictive_dist
예제 #14
0
    def joint_log_prob(self, observed_time_series):
        """Build the joint density `log p(params) + log p(y|params)` as a callable.

    Args:
      observed_time_series: Observed `Tensor` trajectories of shape
        `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing
        `1` dimension is optional if `num_timesteps > 1`), where
        `batch_shape` should match `self.batch_shape` (the broadcast batch
        shape of all priors on parameters for this structural time series
        model). Any `NaN`s are interpreted as missing observations; missingness
        may be also be explicitly specified by passing a
        `tfp.sts.MaskedTimeSeries` instance.

    Returns:
     log_joint_fn: A function taking a `Tensor` argument for each model
       parameter, in canonical order, and returning a `Tensor` log probability
       of shape `batch_shape`. Note that, *unlike* `tfp.Distributions`
       `log_prob` methods, the `log_joint` sums over the `sample_shape` from y,
       so that `sample_shape` does not appear in the output log_prob. This
       corresponds to viewing multiple samples in `y` as iid observations from a
       single model, which is typically the desired behavior for parameter
       inference.
    """
        with tf.name_scope('joint_log_prob'):
            [observed_time_series,
             mask] = sts_util.canonicalize_observed_time_series_with_mask(
                 observed_time_series)

            num_timesteps = ps.shape(observed_time_series)[-2]
            parameter_prior = self._joint_prior_distribution()

            def log_joint_fn(*param_vals, **param_kwargs):
                """Generated log-density function."""

                if param_kwargs:
                    if param_vals:
                        raise ValueError(
                            'log_joint_fn saw both positional args ({}) and named args ({}). '
                            'This is not supported: you have to choose!'.
                            format(param_vals, param_kwargs))
                    param_vals = [
                        param_kwargs[p.name] for p in self.parameters
                    ]

                param_lp = parameter_prior.log_prob(*param_vals)

                # Build a linear Gaussian state space model and evaluate the marginal
                # log_prob on observations.
                lgssm = self.make_state_space_model(
                    param_vals=param_vals, num_timesteps=num_timesteps)
                observation_lp = lgssm.log_prob(observed_time_series,
                                                mask=mask)

                # Sum over likelihoods from iid observations. Without this sum,
                # adding `param_lp + observation_lp` would broadcast the param priors
                # over the sample shape, which incorrectly multi-counts the param
                # priors.
                sample_ndims = tf.maximum(
                    0,
                    tf.rank(observation_lp) - tf.rank(param_lp))
                observation_lp = tf.reduce_sum(observation_lp,
                                               axis=tf.range(sample_ndims))

                return param_lp + observation_lp

        return log_joint_fn
예제 #15
0
    def joint_distribution(self,
                           observed_time_series=None,
                           num_timesteps=None,
                           trajectories_shape=None,
                           initial_step=0,
                           mask=None,
                           experimental_parallelize=False):
        """Constructs the joint distribution over parameters and observed values.

    Args:
      observed_time_series: Optional observed time series to model, as a
        `Tensor` or `tfp.sts.MaskedTimeSeries` instance having shape
        `concat([batch_shape, trajectories_shape, num_timesteps, 1])`. If
        an observed time series is provided, the `num_timesteps`,
        `trajectories_shape`, and `mask` arguments are ignored, and
        an unnormalized (pinned) distribution over parameter values is returned.
        Default value: `None`.
      num_timesteps: scalar `int` `Tensor` number of timesteps to model. This
        must be specified either directly or by passing an
        `observed_time_series`.
        Default value: `0`.
      trajectories_shape: `int` `Tensor` shape of sampled trajectories
        for each set of parameter values. If not specified (either directly
        or by passing an `observed_time_series`), defaults to a
        one-to-one correspondence between trajectories and parameter settings
        (implicitly `trajectories_shape=()`).
        Default value: `None`.
      initial_step: Optional scalar `int` `Tensor` specifying the starting
        timestep.
        Default value: `0`.
      mask: Optional `bool` `Tensor` having shape
        `concat([batch_shape, trajectories_shape, num_timesteps])`, in which
        `True` entries indicate that the series value at the corresponding step
        is missing and should be ignored. This argument should be passed only
        if `observed_time_series` is not specified or does not already contain
        a missingness mask; it is an error to pass both this
        argument and an `observed_time_series` value containing a missingness
        mask.
        Default value: `None`.
      experimental_parallelize: If `True`, use parallel message passing
        algorithms from `tfp.experimental.parallel_filter` to perform time
        series operations in `O(log num_timesteps)` sequential steps. The
        overall FLOP and memory cost may be larger than for the sequential
        implementations by a constant factor.
        Default value: `False`.
    Returns:
      joint_distribution: joint distribution of model parameters and
        observed trajectories. If no `observed_time_series` was specified, this
        is an instance of `tfd.JointDistributionNamedAutoBatched` with a
        random variable for each model parameter (with names and order matching
        `self.parameters`), plus a final random variable `observed_time_series`
        representing a trajectory(ies) conditioned on the parameters. If
        `observed_time_series` was specified, the return value is given by
        `joint_distribution.experimental_pin(
        observed_time_series=observed_time_series)` where `joint_distribution`
        is as just described, so it defines an unnormalized posterior
        distribution over the parameters.

    #### Example:

    The joint distribution can generate prior samples of parameters and
    trajectories:

    ```python
    from matplotlib import pylab as plt
    import tensorflow_probability as tfp

    # Sample and plot 100 trajectories from the prior.
    model = tfp.sts.LocalLinearTrendModel()
    prior_samples = model.joint_distribution().sample([100])
    plt.plot(
      tf.linalg.matrix_transpose(prior_samples['observed_time_series'][..., 0]))
    ```

    It also integrates with TFP inference APIs, providing a more flexible
    alternative to the STS-specific fitting utilities.

    ```python
    jd = model.joint_distribution(observed_time_series)

    # Variational inference.
    surrogate_posterior = (
      tfp.experimental.vi.build_factored_surrogate_posterior(
        event_shape=jd.event_shape,
        bijector=jd.experimental_default_event_space_bijector()))
    losses = tfp.vi.fit_surrogate_posterior(
      target_log_prob_fn=jd.unnormalized_log_prob,
      surrogate_posterior=surrogate_posterior,
      optimizer=tf.optimizers.Adam(0.1),
      num_steps=200)
    parameter_samples = surrogate_posterior.sample(50)

    # No U-Turn Sampler.
    samples, kernel_results = tfp.experimental.mcmc.windowed_adaptive_nuts(
      n_draws=500, joint_dist=dist)
    ```

    """
        def state_space_model_likelihood(**param_vals):
            ssm = self.make_state_space_model(
                param_vals=param_vals,
                num_timesteps=num_timesteps,
                initial_step=initial_step,
                mask=mask,
                experimental_parallelize=experimental_parallelize)
            # Looping LGSSM methods are really expensive in eager mode; wrap them
            # to keep this from slowing things down in interactive use.
            ssm = tfe_util.JitPublicMethods(ssm, trace_only=True)
            if distribution_util.shape_may_be_nontrivial(trajectories_shape):
                return sample.Sample(ssm, sample_shape=trajectories_shape)
            return ssm

        batch_ndims = ps.rank_from_shape(self.batch_shape_tensor,
                                         self.batch_shape)
        if observed_time_series is not None:
            [observed_time_series, is_missing
             ] = sts_util.canonicalize_observed_time_series_with_mask(
                 observed_time_series)
            if is_missing is not None:
                if mask is not None:
                    raise ValueError(
                        'Passed non-None value for `mask`, but the observed '
                        'time series already contains an `is_missing` mask.')
                mask = is_missing
            num_timesteps = ps.shape(observed_time_series)[-2]
            trajectories_shape = ps.shape(observed_time_series)[batch_ndims:-2]

        joint_distribution = (
            joint_distribution_auto_batched.JointDistributionNamedAutoBatched(
                model=collections.OrderedDict(
                    # Prior.
                    list(self._joint_prior_distribution().model.items()) +
                    # Likelihood.
                    [('observed_time_series', state_space_model_likelihood)]),
                use_vectorized_map=False,
                batch_ndims=batch_ndims))

        if observed_time_series is not None:
            return joint_distribution.experimental_pin(
                observed_time_series=observed_time_series)

        return joint_distribution
예제 #16
0
def fit_with_gibbs_sampling(model,
                            observed_time_series,
                            num_chains=(),
                            num_results=2000,
                            num_warmup_steps=200,
                            initial_state=None,
                            seed=None,
                            default_pseudo_observations=None,
                            experimental_use_dynamic_cholesky=False,
                            experimental_use_weight_adjustment=False):
    """Fits parameters for an STS model using Gibbs sampling.

  Args:
    model: A `tfp.sts.StructuralTimeSeries` model instance return by
      `build_model_for_gibbs_fitting`.
    observed_time_series: `float` `Tensor` of shape [..., T, 1]` (omitting the
      trailing unit dimension is also supported when `T > 1`), specifying an
      observed time series. May optionally be an instance of
      `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify
      timesteps with missing observations.
    num_chains: Optional int to indicate the number of parallel MCMC chains.
      Default to an empty tuple to sample a single chain.
    num_results: Optional int to indicate number of MCMC samples.
    num_warmup_steps: Optional int to indicate number of MCMC samples.
    initial_state: A `GibbsSamplerState` structure of the initial states of the
      MCMC chains.
    seed: Optional `Python` `int` seed controlling the sampled values.
    default_pseudo_observations: Optional scalar float `Tensor` Controls the
      number of pseudo-observations for the prior precision matrix over the
      weights.
    experimental_use_dynamic_cholesky: Optional bool - in case of spike and slab
      sampling, will dynamically select the subset of the design matrix with
      active features to perform the Cholesky decomposition. This may provide
      a speedup when the number of true features is small compared to the size
      of the design matrix. *Note*: If this is true, neither batch shape nor
      `jit_compile` is supported.
    experimental_use_weight_adjustment: Optional bool - use a nonstandard
      update for the posterior precision of the weight in case of a spike and
      slab sampler.

  Returns:
    model: A `GibbsSamplerState` structure of posterior samples.
  """
    if not hasattr(model, 'supports_gibbs_sampling'):
        raise ValueError(
            'This STS model does not support Gibbs sampling. Models '
            'for Gibbs sampling must be created using the '
            'method `build_model_for_gibbs_fitting`.')
    if not tf.nest.is_nested(num_chains):
        num_chains = [num_chains]

    [observed_time_series,
     is_missing] = sts_util.canonicalize_observed_time_series_with_mask(
         observed_time_series)
    dtype = observed_time_series.dtype

    # The canonicalized time series always has trailing dimension `1`,
    # because although LinearGaussianSSMs support vector observations, STS models
    # describe scalar time series only. For our purposes it'll be cleaner to
    # remove this dimension.
    observed_time_series = observed_time_series[..., 0]
    batch_shape = prefer_static.concat(
        [num_chains,
         prefer_static.shape(observed_time_series)[:-1]], axis=-1)
    level_slope_shape = prefer_static.concat(
        [num_chains, prefer_static.shape(observed_time_series)], axis=-1)

    # Treat a LocalLevel model as the special case of LocalLinearTrend where
    # the slope_scale is always zero.
    initial_slope_scale = 0.
    initial_slope = 0.
    if isinstance(model.components[0], sts.LocalLinearTrend):
        initial_slope_scale = 1. * tf.ones(batch_shape, dtype=dtype)
        initial_slope = tf.zeros(level_slope_shape, dtype=dtype)

    if initial_state is None:
        design_matrix = _get_design_matrix(model)
        weights = tf.zeros(
            0, dtype=dtype) if design_matrix is None else tf.zeros(  # pylint:disable=g-long-ternary
                prefer_static.concat([batch_shape, design_matrix.shape[-1:]],
                                     axis=0),
                dtype=dtype)
        initial_state = GibbsSamplerState(observation_noise_scale=tf.ones(
            batch_shape, dtype=dtype),
                                          level_scale=tf.ones(batch_shape,
                                                              dtype=dtype),
                                          slope_scale=initial_slope_scale,
                                          weights=weights,
                                          level=tf.zeros(level_slope_shape,
                                                         dtype=dtype),
                                          slope=initial_slope,
                                          seed=None)  # Set below.

    if isinstance(seed, six.integer_types):
        tf.random.set_seed(seed)

    # Always use the passed-in `seed` arg, ignoring any seed in the initial state.
    initial_state = initial_state._replace(
        seed=samplers.sanitize_seed(seed, salt='initial_GibbsSamplerState'))

    sampler_loop_body = _build_sampler_loop_body(
        model=model,
        observed_time_series=observed_time_series,
        is_missing=is_missing,
        default_pseudo_observations=default_pseudo_observations,
        experimental_use_dynamic_cholesky=experimental_use_dynamic_cholesky,
        experimental_use_weight_adjustment=experimental_use_weight_adjustment)

    samples = tf.scan(sampler_loop_body,
                      np.arange(num_warmup_steps + num_results), initial_state)
    return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
예제 #17
0
def build_default_model(observed_time_series,
                        base_component=sts_components.LocalLinearTrend,
                        observation_noise_scale_prior=None,
                        drift_scale_prior=None,
                        allow_seasonal_effect_drift=True,
                        name=None):
  """Builds a model with seasonality from a Pandas Series or DataFrame.

  Returns a model of the form
  `tfp.sts.Sum([base_component] + seasonal_components)`, where
  `seasonal_components` are automatically selected using the frequency from the
  `DatetimeIndex` of the provided `pd.Series` or `pd.DataFrame`. If the index
  does not have a set frequency, one will be inferred from the index dates, and

  Args:
    observed_time_series: Instance of `pd.Series` or `pd.DataFrame` containing
      one or more time series indexed by a `pd.DatetimeIndex`.
    base_component: Optional subclass of `tfp.sts.StructuralTimeSeries`
      specifying the model used for residual variation in the series not
      explained by seasonal or other effects. May also be an *instance* of such
      a class with specific priors set; if not provided, such an instance will
      be constructed with heuristic default priors.
      Default value: `tfp.sts.LocalLinearTrend`.
    observation_noise_scale_prior: Optional `tfd.Distribution` instance
      specifying a prior on `observation_noise_scale`. If `None`, a heuristic
      default prior is constructed based on the provided `observed_time_series`.
      Default value: `None`.
    drift_scale_prior: Optional `tfd.Distribution` instance
      specifying a prior on the `drift_scale` parameter of Seasonal components.
      If `None`, a heuristic default prior is constructed based on the provided
      `observed_time_series`.
      Default value: `None`.
    allow_seasonal_effect_drift: optional Python `bool` specifying whether the
      seasonal effects can drift over time.  Setting this to `False`
      removes the `drift_scale` parameter from the model. This is
      mathematically equivalent to `drift_scale_prior = tfd.Deterministic(0.)`,
      but removing drift directly is preferred because it avoids the use of a
      degenerate prior.
      Default value: `True`.
    name: Python `str` name for ops created by this function.
      Default value: `None` (i.e., 'build_default_model').
  Returns:
    model: instance of `tfp.sts.Sum` representing a model for the given data.

  #### Example

  Consider a series of eleven data points, covering a period of two weeks
  with three missing days.

  ```python
  import pandas as pd
  import tensorflow as tf
  import tensorflow_probability as tfp

  series = pd.Series(
    [100., 27., 92., 66., 51., 126., 113., 95., 48., 20., 59.,],
    index=pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-04',
                          '2020-01-05', '2020-01-06', '2020-01-07',
                          '2020-01-10', '2020-01-11', '2020-01-12',
                          '2020-01-13', '2020-01-14']))
  ```

  Before calling `build_default_model`, we must regularize the series to follow
  a fixed frequency (here, daily observations):

  ```python
  series = tfp.sts.regularize_series(series)
  # len(series) ==> 14
  ```

  The default model will combine a LocalLinearTrend baseline with a Seasonal
  component to capture day-of-week effects. We can then fit this model to our
  observed data. Here we'll use variational inference:

  ```python
  model = tfp.sts.build_default_model(series)
  # len(model.components) == 2

  # Fit the model using variational inference.
  surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(model)
  losses = tfp.vi.fit_surrogate_posterior(
    target_log_prob_fn=model.joint_log_prob(series),
    surrogate_posterior=surrogate_posterior,
    optimizer=tf.optimizers.Adam(0.1),
    num_steps=1000,
    convergence_criterion=(
      tfp.optimizer.convergence_criteria.SuccessiveGradientsAreUncorrelated(
        window_size=20, min_num_steps=50)),
    jit_compile=True)
  parameter_samples = surrogate_posterior.sample(50)
  ```

  Finally, use the fitted parameters to forecast the next week of data:

  ```python
  forecast_dist = tfp.sts.forecast(model,
                                   observed_time_series=series,
                                   parameter_samples=parameter_samples,
                                   num_steps_forecast=7)
  # Strip trailing unit dimension from LinearGaussianStateSpaceModel events.
  forecast_mean = forecast_dist.mean()[..., 0]
  forecast_stddev = forecast_dist.stddev()[..., 0]

  forecast = pd.DataFrame(
      {'mean': forecast_mean,
       'lower_bound': forecast_mean - 2. * forecast_stddev,
       'upper_bound': forecast_mean + 2. * forecast_stddev}
      index=pd.date_range(start=series.index[-1] + series.index.freq,
                          periods=7,
                          freq=series.index.freq))
  ```

  """
  with tf.name_scope(name or 'build_default_model'):
    frequency = getattr(observed_time_series.index, 'freq', None)
    if frequency is None:
      raise ValueError('Provided series has no set frequency. Consider '
                       'using `tfp.sts.regularize_series` to infer a frequency '
                       'and build a regularly spaced series.')
    observed_time_series = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    if not isinstance(base_component,
                      structural_time_series.StructuralTimeSeries):
      # Build a component of the given type using default priors.
      base_component = base_component(observed_time_series=observed_time_series)

    components = [base_component]
    seasonal_structure = seasonality_util.create_seasonal_structure(
        frequency=frequency,
        num_steps=int(observed_time_series.time_series.shape[-2]))
    for season_type, season in seasonal_structure.items():
      components.append(
          sts_components.Seasonal(num_seasons=season.num,
                                  num_steps_per_season=season.duration,
                                  drift_scale_prior=drift_scale_prior,
                                  allow_drift=allow_seasonal_effect_drift,
                                  observed_time_series=observed_time_series,
                                  name=str(season_type)))
    return sts_components.Sum(
        components,
        observed_time_series=observed_time_series,
        observation_noise_scale_prior=observation_noise_scale_prior)
예제 #18
0
def decompose_by_component(model, observed_time_series, parameter_samples):
  """Decompose an observed time series into contributions from each component.

  This method decomposes a time series according to the posterior represention
  of a structural time series model. In particular, it:
    - Computes the posterior marginal mean and covariances over the additive
      model's latent space.
    - Decomposes the latent posterior into the marginal blocks for each
      model component.
    - Maps the per-component latent posteriors back through each component's
      observation model, to generate the time series modeled by that component.

  Args:
    model: An instance of `tfp.sts.Sum` representing a structural time series
      model.
    observed_time_series: `float` `Tensor` of shape
      `batch_shape + [num_timesteps, 1]` (omitting the trailing unit dimension
      is also supported when `num_timesteps > 1`), specifying an observed time
      series. May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which
      includes a mask `Tensor` to specify timesteps with missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.
  Returns:
    component_dists: A `collections.OrderedDict` instance mapping
      component StructuralTimeSeries instances (elements of `model.components`)
      to `tfd.Distribution` instances representing the posterior marginal
      distributions on the process modeled by each component. Each distribution
      has batch shape matching that of `posterior_means`/`posterior_covs`, and
      event shape of `[num_timesteps]`.

  #### Examples

  Suppose we've built a model and fit it to data:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    num_steps_forecast = 50
    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  To extract the contributions of individual components, pass the time series
  and sampled parameters into `decompose_by_component`:

  ```python
    component_dists = decompose_by_component(
      model,
      observed_time_series=observed_time_series,
      parameter_samples=samples)

    # Component mean and stddev have shape `[len(observed_time_series)]`.
    day_of_week_effect_mean = component_dists[day_of_week].mean()
    day_of_week_effect_stddev = component_dists[day_of_week].stddev()
  ```

  Using the component distributions, we can visualize the uncertainty for
  each component:

  ```
  from matplotlib import pylab as plt
  num_components = len(component_dists)
  xs = np.arange(len(observed_time_series))
  fig = plt.figure(figsize=(12, 3 * num_components))
  for i, (component, component_dist) in enumerate(component_dists.items()):

    # If in graph mode, replace `.numpy()` with `.eval()` or `sess.run()`.
    component_mean = component_dist.mean().numpy()
    component_stddev = component_dist.stddev().numpy()

    ax = fig.add_subplot(num_components, 1, 1 + i)
    ax.plot(xs, component_mean, lw=2)
    ax.fill_between(xs,
                    component_mean - 2 * component_stddev,
                    component_mean + 2 * component_stddev,
                    alpha=0.5)
    ax.set_title(component.name)
  ```

  """

  with tf.compat.v1.name_scope('decompose_by_component',
                               values=[observed_time_series]):
    [
        observed_time_series,
        is_missing
    ] = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    # Run smoothing over the training timesteps to extract the
    # posterior on latents.
    num_timesteps = dist_util.prefer_static_value(
        tf.shape(input=observed_time_series))[-2]
    ssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                       param_vals=parameter_samples)
    posterior_means, posterior_covs = ssm.posterior_marginals(
        observed_time_series, mask=is_missing)

    return _decompose_from_posterior_marginals(
        model, posterior_means, posterior_covs, parameter_samples)
예제 #19
0
def one_step_predictive(model, observed_time_series, parameter_samples):
    """Compute one-step-ahead predictive distributions for all timesteps.

  Given samples from the posterior over parameters, return the predictive
  distribution over observations at each time `T`, given observations up
  through time `T-1`.

  Args:
    model: An instance of `StructuralTimeSeries` representing a
      time-series model. This represents a joint distribution over
      time-series and their parameters with batch shape `[b1, ..., bN]`.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior samples
      of model parameters, with shapes `[concat([[num_posterior_draws],
      param.prior.batch_shape, param.prior.event_shape]) for param in
      model.parameters]`. This may optionally also be a map (Python `dict`) of
      parameter names to `Tensor` values.

  Returns:
    forecast_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and
      batch shape `concat([sample_shape, model.batch_shape])`, with
      `num_posterior_draws` mixture components. The `t`th step represents the
      forecast distribution `p(observed_time_series[t] |
      observed_time_series[0:t-1], parameter_samples)`.

  #### Examples

  Suppose we've built a model and fit it to data using HMC:

  ```python
    day_of_week = tfp.sts.Seasonal(
        num_seasons=7,
        observed_time_series=observed_time_series,
        name='day_of_week')
    local_linear_trend = tfp.sts.LocalLinearTrend(
        observed_time_series=observed_time_series,
        name='local_linear_trend')
    model = tfp.sts.Sum(components=[day_of_week, local_linear_trend],
                        observed_time_series=observed_time_series)

    samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  Passing the posterior samples into `one_step_predictive`, we construct a
  one-step-ahead predictive distribution:

  ```python
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)

    predictive_means = one_step_predictive_dist.mean()
    predictive_scales = one_step_predictive_dist.stddev()
  ```

  If using variational inference instead of HMC, we'd construct a forecast using
  samples from the variational posterior:

  ```python
    (variational_loss,
     variational_distributions) = tfp.sts.build_factored_variational_loss(
       model=model, observed_time_series=observed_time_series)

    # OMITTED: take steps to optimize variational loss

    samples = {k: q.sample(30) for (k, q) in variational_distributions.items()}
    one_step_predictive_dist = tfp.sts.one_step_predictive(
      model, observed_time_series, parameter_samples=samples)
  ```

  We can visualize the forecast by plotting:

  ```python
    from matplotlib import pylab as plt
    def plot_one_step_predictive(observed_time_series,
                                 forecast_mean,
                                 forecast_scale):
      plt.figure(figsize=(12, 6))
      num_timesteps = forecast_mean.shape[-1]
      c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05)
      plt.plot(observed_time_series, label="observed time series", color=c1)
      plt.plot(forecast_mean, label="one-step prediction", color=c2)
      plt.fill_between(np.arange(num_timesteps),
                       forecast_mean - 2 * forecast_scale,
                       forecast_mean + 2 * forecast_scale,
                       alpha=0.1, color=c2)
      plt.legend()

    plot_one_step_predictive(observed_time_series,
                             forecast_mean=predictive_means,
                             forecast_scale=predictive_scales)
  ```

  To detect anomalous timesteps, we check whether the observed value at each
  step is within a 95% predictive interval, i.e., two standard deviations from
  the mean:

  ```python
    z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1])
                 / predictive_scales[..., :-1])
    anomalous_timesteps = tf.boolean_mask(
        tf.range(1, num_timesteps),
        tf.abs(z_scores) > 2.0)
  ```

  """

    with tf.name_scope('one_step_predictive'):

        [observed_time_series,
         is_missing] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run filtering over the training timesteps to extract the
        # predictive means and variances.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        lgssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                             param_vals=parameter_samples)
        (_, _, _, _, _, observation_means,
         observation_covs) = lgssm.forward_filter(observed_time_series,
                                                  mask=is_missing)

        # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
        # to a scalar time series.
        return sts_util.mix_over_posterior_draws(
            means=observation_means[..., 0],
            variances=observation_covs[..., 0, 0])
예제 #20
0
def fit_with_gibbs_sampling(model,
                            observed_time_series,
                            num_chains=(),
                            num_results=2000,
                            num_warmup_steps=200,
                            initial_state=None,
                            seed=None):
    """Fits parameters for an STS model using Gibbs sampling.

  Args:
    model: A `tfp.sts.StructuralTimeSeries` model instance return by
      `build_model_for_gibbs_fitting`.
    observed_time_series: `float` `Tensor` of shape [..., T, 1]`
      (omitting the trailing unit dimension is also supported when `T > 1`),
      specifying an observed time series. May optionally be an instance of
      `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify
      timesteps with missing observations.
    num_chains: Optional int to indicate the number of parallel MCMC chains.
      Default to an empty tuple to sample a single chain.
    num_results: Optional int to indicate number of MCMC samples.
    num_warmup_steps: Optional int to indicate number of MCMC samples.
    initial_state: A `GibbsSamplerState` structure of the initial states of the
      MCMC chains.
    seed: Optional `Python` `int` seed controlling the sampled values.
  Returns:
    model: A `GibbsSamplerState` structure of posterior samples.
  """
    if not hasattr(model, 'supports_gibbs_sampling'):
        raise ValueError(
            'This STS model does not support Gibbs sampling. Models '
            'for Gibbs sampling must be created using the '
            'method `build_model_for_gibbs_fitting`.')
    if not tf.nest.is_nested(num_chains):
        num_chains = [num_chains]

    [observed_time_series,
     is_missing] = sts_util.canonicalize_observed_time_series_with_mask(
         observed_time_series)
    dtype = observed_time_series.dtype

    # The canonicalized time series always has trailing dimension `1`,
    # because although LinearGaussianSSMs support vector observations, STS models
    # describe scalar time series only. For our purposes it'll be cleaner to
    # remove this dimension.
    observed_time_series = observed_time_series[..., 0]
    batch_shape = prefer_static.concat(
        [num_chains,
         prefer_static.shape(observed_time_series)[:-1]], axis=-1)
    level_slope_shape = prefer_static.concat(
        [num_chains, prefer_static.shape(observed_time_series)], axis=-1)

    # Treat a LocalLevel model as the special case of LocalLinearTrend where
    # the slope_scale is always zero.
    initial_slope_scale = 0.
    initial_slope = 0.
    if isinstance(model.components[0], sts.LocalLinearTrend):
        initial_slope_scale = 1. * tf.ones(batch_shape, dtype=dtype)
        initial_slope = tf.zeros(level_slope_shape, dtype=dtype)

    if initial_state is None:
        initial_state = GibbsSamplerState(
            observation_noise_scale=tf.ones(batch_shape, dtype=dtype),
            level_scale=tf.ones(batch_shape, dtype=dtype),
            slope_scale=initial_slope_scale,
            weights=tf.zeros(prefer_static.concat(
                [batch_shape,
                 _get_design_matrix(model).shape[-1:]], axis=0),
                             dtype=dtype),
            level=tf.zeros(level_slope_shape, dtype=dtype),
            slope=initial_slope,
            seed=None)  # Set below.

    if isinstance(seed, six.integer_types):
        tf.random.set_seed(seed)

    # Always use the passed-in `seed` arg, ignoring any seed in the initial state.
    initial_state = initial_state._replace(
        seed=samplers.sanitize_seed(seed, salt='initial_GibbsSamplerState'))

    sampler_loop_body = _build_sampler_loop_body(model, observed_time_series,
                                                 is_missing)

    samples = tf.scan(sampler_loop_body,
                      np.arange(num_warmup_steps + num_results), initial_state)
    return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
예제 #21
0
def impute_missing_values(model,
                          observed_time_series,
                          parameter_samples,
                          include_observation_noise=False):
    """Runs posterior inference to impute the missing values in a time series.

  This method computes the posterior marginals `p(latent state | observations)`,
  given the time series at observed timesteps (a missingness mask should
  be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back
  through the observation model to impute a predictive distribution on the
  observed time series. At unobserved steps, this is an imputed value; at other
  steps it is interpreted as the model's estimate of the underlying noise-free
  series.

  Args:
    model: `tfp.sts.Sum` instance defining an additive STS model.
    observed_time_series: `float` `Tensor` of shape
      `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where
      `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]`
      dimension may (optionally) be omitted if `num_timesteps > 1`. May
      optionally be an instance of `tfp.sts.MaskedTimeSeries` including a
      mask `Tensor` to encode the locations of missing observations.
    parameter_samples: Python `list` of `Tensors` representing posterior
      samples of model parameters, with shapes `[concat([
      [num_posterior_draws], param.prior.batch_shape,
      param.prior.event_shape]) for param in model.parameters]`. This may
      optionally also be a map (Python `dict`) of parameter names to
      `Tensor` values.
    include_observation_noise: If `False`, the imputed uncertainties
      represent the model's estimate of the noise-free time series at each
      timestep. If `True`, they represent the model's estimate of the range of
      values that could be *observed* at each timestep, including any i.i.d.
      observation noise.
      Default value: `False`.

  Returns:
    imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape
      [num_timesteps] and batch shape `concat([sample_shape,
      model.batch_shape])`, with `num_posterior_draws` mixture components.

  #### Example

  To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`:

  ```python
  time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5]
  observed_time_series = tfp.sts.MaskedTimeSeries(
    time_series=time_series_with_nans,
    is_missing=tf.math.is_nan(time_series_with_nans))
  ```

  Masked time series can be passed to `tfp.sts` methods in place of a
  `observed_time_series` `Tensor`:

  ```python
  # Build model using observed time series to set heuristic priors.
  linear_trend_model = tfp.sts.LocalLinearTrend(
    observed_time_series=observed_time_series)
  model = tfp.sts.Sum([linear_trend_model],
                      observed_time_series=observed_time_series)

  # Fit model to data
  parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series)
  ```

  After fitting a model, `impute_missing_values` will return a distribution
  ```python
  # Impute missing values
  imputed_series_distribution = tfp.sts.impute_missing_values(
    model, observed_time_series)
  print('imputed means and stddevs: ',
        imputed_series_distribution.mean(),
        imputed_series_distribution.stddev())
  ```

  """
    with tf.name_scope('impute_missing_values'):

        [observed_time_series,
         mask] = sts_util.canonicalize_observed_time_series_with_mask(
             observed_time_series)

        # Run smoothing over the training timesteps to extract the
        # predictive means and variances.
        num_timesteps = dist_util.prefer_static_value(
            tf.shape(input=observed_time_series))[-2]
        lgssm = model.make_state_space_model(num_timesteps=num_timesteps,
                                             param_vals=parameter_samples)
        posterior_means, posterior_covs = lgssm.posterior_marginals(
            observed_time_series, mask=mask)

        observation_means, observation_covs = lgssm.latents_to_observations(
            latent_means=posterior_means, latent_covs=posterior_covs)

        if not include_observation_noise:
            # Extract just the variance of observation noise by pushing forward
            # zero-variance latents.
            _, observation_noise_covs = lgssm.latents_to_observations(
                latent_means=posterior_means,
                latent_covs=tf.zeros_like(posterior_covs))
            # Subtract out the observation noise that was added in the original
            # pushforward. Note that this could cause numerical issues if the
            # observation noise is very large. If this becomes an issue we could
            # avoid the subtraction by plumbing `include_observation_noise` through
            # `lgssm.latents_to_observations`.
            observation_covs -= observation_noise_covs

        # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]`
        # to a scalar time series.
        return sts_util.mix_over_posterior_draws(
            means=observation_means[..., 0],
            variances=observation_covs[..., 0, 0])
예제 #22
0
    def joint_log_prob(self, observed_time_series):
        """Build the joint density `log p(params) + log p(y|params)` as a callable.

    Args:
      observed_time_series: Observed `Tensor` trajectories of shape
        `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing
        `1` dimension is optional if `num_timesteps > 1`), where
        `batch_shape` should match `self.batch_shape` (the broadcast batch
        shape of all priors on parameters for this structural time series
        model). May optionally be an instance of `tfp.sts.MaskedTimeSeries`,
        which includes a mask `Tensor` to specify timesteps with missing
        observations.

    Returns:
     log_joint_fn: A function taking a `Tensor` argument for each model
       parameter, in canonical order, and returning a `Tensor` log probability
       of shape `batch_shape`. Note that, *unlike* `tfp.Distributions`
       `log_prob` methods, the `log_joint` sums over the `sample_shape` from y,
       so that `sample_shape` does not appear in the output log_prob. This
       corresponds to viewing multiple samples in `y` as iid observations from a
       single model, which is typically the desired behavior for parameter
       inference.
    """

        with tf.compat.v1.name_scope('joint_log_prob',
                                     values=[observed_time_series]):
            [observed_time_series,
             mask] = sts_util.canonicalize_observed_time_series_with_mask(
                 observed_time_series)

            num_timesteps = distribution_util.prefer_static_value(
                tf.shape(input=observed_time_series))[-2]

            def log_joint_fn(*param_vals):
                """Generated log-density function."""

                # Sum the log_prob values from parameter priors.
                param_lp = sum([
                    param.prior.log_prob(param_val)
                    for (param, param_val) in zip(self.parameters, param_vals)
                ])

                # Build a linear Gaussian state space model and evaluate the marginal
                # log_prob on observations.
                lgssm = self.make_state_space_model(
                    param_vals=param_vals, num_timesteps=num_timesteps)
                observation_lp = lgssm.log_prob(observed_time_series,
                                                mask=mask)

                # Sum over likelihoods from iid observations. Without this sum,
                # adding `param_lp + observation_lp` would broadcast the param priors
                # over the sample shape, which incorrectly multi-counts the param
                # priors.
                sample_ndims = tf.maximum(
                    0,
                    tf.rank(observation_lp) - tf.rank(param_lp))
                observation_lp = tf.reduce_sum(input_tensor=observation_lp,
                                               axis=tf.range(sample_ndims))

                return param_lp + observation_lp

        return log_joint_fn
예제 #23
0
  def __init__(self,
               order,
               coefficients_prior=None,
               level_scale_prior=None,
               initial_state_prior=None,
               coefficient_constraining_bijector=None,
               observed_time_series=None,
               name=None):
    """Specify an autoregressive model.

    Args:
      order: scalar Python positive `int` specifying the number of past
        timesteps to regress on.
      coefficients_prior: optional `tfd.Distribution` instance specifying a
        prior on the `coefficients` parameter. If `None`, a default standard
        normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([order]))`) prior
        is used.
        Default value: `None`.
      level_scale_prior: optional `tfd.Distribution` instance specifying a prior
        on the `level_scale` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      initial_state_prior: optional `tfd.Distribution` instance specifying a
        prior on the initial state, corresponding to the values of the process
        at a set of size `order` of imagined timesteps before the initial step.
        If `None`, a heuristic default prior is constructed based on the
        provided `observed_time_series`.
        Default value: `None`.
      coefficient_constraining_bijector: optional `tfb.Bijector` instance
        representing a constraining mapping for the autoregressive coefficients.
        For example, `tfb.Tanh()` constrains the coefficients to lie in
        `(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and
        `tfb.Identity()` implies no constraint. If `None`, the default behavior
        constrains the coefficients to lie in `(-1, 1)` using a `Tanh` bijector.
        Default value: `None`.
      observed_time_series: optional `float` `Tensor` of shape
        `batch_shape + [T, 1]` (omitting the trailing unit dimension is also
        supported when `T > 1`), specifying an observed time series.
        Any priors not explicitly set will be given default values according to
        the scale of the observed time series (or batch of time series). May
        optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes
        a mask `Tensor` to specify timesteps with missing observations.
        Default value: `None`.
      name: the name of this model component.
        Default value: 'Autoregressive'.
    """
    with tf.name_scope(name or 'Autoregressive') as name:
      masked_time_series = None
      if observed_time_series is not None:
        masked_time_series = (
            sts_util.canonicalize_observed_time_series_with_mask(
                observed_time_series))

      dtype = dtype_util.common_dtype(
          [(masked_time_series.time_series
            if masked_time_series is not None else None),
           coefficients_prior,
           level_scale_prior,
           initial_state_prior], dtype_hint=tf.float32)

      if observed_time_series is not None:
        _, observed_stddev, observed_initial = sts_util.empirical_statistics(
            masked_time_series)
      else:
        observed_stddev, observed_initial = (
            tf.convert_to_tensor(value=1., dtype=dtype),
            tf.convert_to_tensor(value=0., dtype=dtype))
      batch_ones = tf.ones(tf.concat([
          tf.shape(observed_initial),  # Batch shape
          [order]], axis=0), dtype=dtype)

      # Heuristic default priors. Overriding these may dramatically
      # change inference performance and results.
      if coefficients_prior is None:
        coefficients_prior = tfd.MultivariateNormalDiag(
            scale_diag=batch_ones)
      if level_scale_prior is None:
        level_scale_prior = tfd.LogNormal(
            loc=tf.math.log(0.05 *  observed_stddev), scale=3.)

      if (coefficients_prior.event_shape.is_fully_defined() and
          order != coefficients_prior.event_shape[0]):
        raise ValueError("Prior dimension {} doesn't match order {}.".format(
            coefficients_prior.event_shape[0], order))

      if initial_state_prior is None:
        initial_state_prior = tfd.MultivariateNormalDiag(
            loc=observed_initial[..., tf.newaxis] * batch_ones,
            scale_diag=(tf.abs(observed_initial) +
                        observed_stddev)[..., tf.newaxis] * batch_ones)

      self._order = order
      self._coefficients_prior = coefficients_prior
      self._level_scale_prior = level_scale_prior
      self._initial_state_prior = initial_state_prior

      if coefficient_constraining_bijector is None:
        coefficient_constraining_bijector = tfb.Tanh()
      super(Autoregressive, self).__init__(
          parameters=[
              Parameter('coefficients',
                        coefficients_prior,
                        coefficient_constraining_bijector),
              Parameter('level_scale', level_scale_prior,
                        tfb.Chain([tfb.AffineScalar(scale=observed_stddev),
                                   tfb.Softplus()]))
          ],
          latent_size=order,
          name=name)
  def __init__(self,
               ar_order,
               ma_order,
               integration_degree=0,
               ar_coefficients_prior=None,
               ma_coefficients_prior=None,
               level_drift_prior=None,
               level_scale_prior=None,
               initial_state_prior=None,
               ar_coefficient_constraining_bijector=None,
               ma_coefficient_constraining_bijector=None,
               observed_time_series=None,
               name=None):
    """Specifies an ARIMA(p=ar_order, d=integration_degree, q=ma_order) model.

    Args:
      ar_order: scalar Python positive `int` specifying the order of the
        autoregressive process (`p` in `ARIMA(p, d, q)`).
      ma_order: scalar Python positive `int` specifying the order of the
        moving-average process (`q` in `ARIMA(p, d, q)`).
      integration_degree: scalar Python positive `int` specifying the number
        of times to integrate an ARMA process. (`d` in `ARIMA(p, d, q)`).
        Default value: `0`.
      ar_coefficients_prior: optional `tfd.Distribution` instance specifying a
        prior on the `ar_coefficients` parameter. If `None`, a default standard
        normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([ar_order]))`)
        prior is used.
        Default value: `None`.
      ma_coefficients_prior: optional `tfd.Distribution` instance specifying a
        prior on the `ma_coefficients` parameter. If `None`, a default standard
        normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([ma_order]))`)
        prior is used.
        Default value: `None`.
      level_drift_prior: optional `tfd.Distribution` instance specifying a prior
        on the `level_drift` parameter. If `None`, the parameter is not inferred
        and is instead fixed to zero.
        Default value: `None`.
      level_scale_prior: optional `tfd.Distribution` instance specifying a prior
        on the `level_scale` parameter. If `None`, a heuristic default prior is
        constructed based on the provided `observed_time_series`.
        Default value: `None`.
      initial_state_prior: optional `tfd.Distribution` instance specifying a
        prior on the initial state, corresponding to the values of the process
        at a set of size `order` of imagined timesteps before the initial step.
        If `None`, a heuristic default prior is constructed based on the
        provided `observed_time_series`.
        Default value: `None`.
      ar_coefficient_constraining_bijector: optional `tfb.Bijector` instance
        representing a constraining mapping for the autoregressive coefficients.
        For example, `tfb.Tanh()` constrains the coefficients to lie in
        `(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and
        `tfb.Identity()` implies no constraint. If `None`, the default behavior
        constrains the coefficients to lie in `(-1, 1)` using a `Tanh` bijector.
        Default value: `None`.
      ma_coefficient_constraining_bijector: optional `tfb.Bijector` instance
        representing a constraining mapping for the moving average coefficients.
        For example, `tfb.Tanh()` constrains the coefficients to lie in
        `(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and
        `tfb.Identity()` implies no constraint. If `None`, the default behavior
        is to apply no constraint.
        Default value: `None`.
      observed_time_series: optional `float` `Tensor` of shape
        `batch_shape + [T, 1]` (omitting the trailing unit dimension is also
        supported when `T > 1`), specifying an observed time series. Any `NaN`s
        are interpreted as missing observations; missingness may be also be
        explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance.
        Any priors not explicitly set will be given default values according to
        the scale of the observed time series (or batch of time series).
        Default value: `None`.
      name: the name of this model component.
        Default value: 'ARIMA'.
    """
    init_parameters = dict(locals())
    with tf.name_scope(name or 'ARIMA') as name:
      masked_time_series = None
      if observed_time_series is not None:
        masked_time_series = (
            sts_util.canonicalize_observed_time_series_with_mask(
                observed_time_series))
      dtype = dtype_util.common_dtype(
          [(masked_time_series.time_series
            if masked_time_series is not None else None),
           ar_coefficients_prior,
           ma_coefficients_prior,
           level_scale_prior,
           initial_state_prior], dtype_hint=tf.float32)

      if observed_time_series is not None:
        for _ in range(integration_degree):
          # Compute statistics using `integration_order`-order differences.
          masked_time_series = (
              missing_values_util.differentiate_masked_time_series(
                  masked_time_series))
        _, observed_stddev, observed_initial = sts_util.empirical_statistics(
            masked_time_series)
      else:
        observed_stddev, observed_initial = (
            tf.convert_to_tensor(value=1., dtype=dtype),
            tf.convert_to_tensor(value=0., dtype=dtype))
      batch_ones = ps.ones(ps.concat([
          ps.shape(observed_initial),  # Batch shape
          [1]], axis=0), dtype=dtype)

      # Heuristic default priors. Overriding these may dramatically
      # change inference performance and results.
      if ar_coefficients_prior is None:
        ar_coefficients_prior = tfd.MultivariateNormalDiag(
            scale_diag=batch_ones * ps.ones([ar_order]))
      if ma_coefficients_prior is None:
        ma_coefficients_prior = tfd.MultivariateNormalDiag(
            scale_diag=batch_ones * ps.ones([ma_order]))
      if level_scale_prior is None:
        level_scale_prior = tfd.LogNormal(
            loc=tf.math.log(0.05 *  observed_stddev), scale=3.)

      if (ar_coefficients_prior.event_shape.is_fully_defined() and
          ar_order != ar_coefficients_prior.event_shape[0]):
        raise ValueError(
            "Autoregressive prior dimension {} doesn't match order {}.".format(
                ar_coefficients_prior.event_shape[0], ar_order))
      if (ma_coefficients_prior.event_shape.is_fully_defined() and
          ma_order != ma_coefficients_prior.event_shape[0]):
        raise ValueError(
            "Moving average prior dimension {} doesn't match order {}.".format(
                ma_coefficients_prior.event_shape[0], ma_order))

      latent_size = ps.maximum(ar_order, ma_order + 1) + integration_degree
      if initial_state_prior is None:
        initial_state_prior = tfd.MultivariateNormalDiag(
            loc=sts_util.pad_tensor_with_trailing_zeros(
                observed_initial[..., tf.newaxis] * batch_ones,
                num_zeros=latent_size - 1),
            scale_diag=sts_util.pad_tensor_with_trailing_zeros(
                (tf.abs(observed_initial) +
                 observed_stddev)[..., tf.newaxis] * batch_ones,
                num_zeros=latent_size - 1))

      self._ar_order = ar_order
      self._ma_order = ma_order
      self._integration_degree = integration_degree
      self._ar_coefficients_prior = ar_coefficients_prior
      self._ma_coefficients_prior = ma_coefficients_prior
      self._level_scale_prior = level_scale_prior
      self._initial_state_prior = initial_state_prior

      parameters = []
      if ar_order > 0:
        parameters.append(
            Parameter('ar_coefficients',
                      ar_coefficients_prior,
                      (ar_coefficient_constraining_bijector
                       if ar_coefficient_constraining_bijector
                       else tfb.Tanh())))
      if ma_order > 0:
        parameters.append(
            Parameter('ma_coefficients',
                      ma_coefficients_prior,
                      (ma_coefficient_constraining_bijector
                       if ma_coefficient_constraining_bijector
                       else tfb.Identity())))
      if level_drift_prior is not None:
        parameters.append(
            Parameter(
                'level_drift',
                level_drift_prior,
                tfb.Chain([
                    tfb.Scale(scale=observed_stddev),
                    (level_drift_prior.
                     experimental_default_event_space_bijector())])))
      super(AutoregressiveIntegratedMovingAverage, self).__init__(
          parameters=parameters + [
              Parameter('level_scale', level_scale_prior,
                        tfb.Chain([tfb.Scale(scale=observed_stddev),
                                   tfb.Softplus(low=dtype_util.eps(dtype))]))
          ],
          latent_size=latent_size,
          init_parameters=init_parameters,
          name=name)