def test_create_seasonal_structure(self, start, end, freq, expected):
     """Test seasonal structure creation for a few different scenarios."""
     dates = pd.date_range(start, end, freq=freq)
     seasonal_structure = seasonality_util.create_seasonal_structure(
         frequency=freq, num_steps=len(dates))
     for key, value in expected.items():
         self.assertEqual(value, seasonal_structure[key])
Exemple #2
0
def build_default_model(observed_time_series,
                        base_component=sts_components.LocalLinearTrend,
                        observation_noise_scale_prior=None,
                        drift_scale_prior=None,
                        allow_seasonal_effect_drift=True,
                        name=None):
  """Builds a model with seasonality from a Pandas Series or DataFrame.

  Returns a model of the form
  `tfp.sts.Sum([base_component] + seasonal_components)`, where
  `seasonal_components` are automatically selected using the frequency from the
  `DatetimeIndex` of the provided `pd.Series` or `pd.DataFrame`. If the index
  does not have a set frequency, one will be inferred from the index dates, and

  Args:
    observed_time_series: Instance of `pd.Series` or `pd.DataFrame` containing
      one or more time series indexed by a `pd.DatetimeIndex`.
    base_component: Optional subclass of `tfp.sts.StructuralTimeSeries`
      specifying the model used for residual variation in the series not
      explained by seasonal or other effects. May also be an *instance* of such
      a class with specific priors set; if not provided, such an instance will
      be constructed with heuristic default priors.
      Default value: `tfp.sts.LocalLinearTrend`.
    observation_noise_scale_prior: Optional `tfd.Distribution` instance
      specifying a prior on `observation_noise_scale`. If `None`, a heuristic
      default prior is constructed based on the provided `observed_time_series`.
      Default value: `None`.
    drift_scale_prior: Optional `tfd.Distribution` instance
      specifying a prior on the `drift_scale` parameter of Seasonal components.
      If `None`, a heuristic default prior is constructed based on the provided
      `observed_time_series`.
      Default value: `None`.
    allow_seasonal_effect_drift: optional Python `bool` specifying whether the
      seasonal effects can drift over time.  Setting this to `False`
      removes the `drift_scale` parameter from the model. This is
      mathematically equivalent to `drift_scale_prior = tfd.Deterministic(0.)`,
      but removing drift directly is preferred because it avoids the use of a
      degenerate prior.
      Default value: `True`.
    name: Python `str` name for ops created by this function.
      Default value: `None` (i.e., 'build_default_model').
  Returns:
    model: instance of `tfp.sts.Sum` representing a model for the given data.

  #### Example

  Consider a series of eleven data points, covering a period of two weeks
  with three missing days.

  ```python
  import pandas as pd
  import tensorflow as tf
  import tensorflow_probability as tfp

  series = pd.Series(
    [100., 27., 92., 66., 51., 126., 113., 95., 48., 20., 59.,],
    index=pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-04',
                          '2020-01-05', '2020-01-06', '2020-01-07',
                          '2020-01-10', '2020-01-11', '2020-01-12',
                          '2020-01-13', '2020-01-14']))
  ```

  Before calling `build_default_model`, we must regularize the series to follow
  a fixed frequency (here, daily observations):

  ```python
  series = tfp.sts.regularize_series(series)
  # len(series) ==> 14
  ```

  The default model will combine a LocalLinearTrend baseline with a Seasonal
  component to capture day-of-week effects. We can then fit this model to our
  observed data. Here we'll use variational inference:

  ```python
  model = tfp.sts.build_default_model(series)
  # len(model.components) == 2

  # Fit the model using variational inference.
  surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(model)
  losses = tfp.vi.fit_surrogate_posterior(
    target_log_prob_fn=model.joint_log_prob(series),
    surrogate_posterior=surrogate_posterior,
    optimizer=tf.optimizers.Adam(0.1),
    num_steps=1000,
    convergence_criterion=(
      tfp.optimizer.convergence_criteria.SuccessiveGradientsAreUncorrelated(
        window_size=20, min_num_steps=50)),
    jit_compile=True)
  parameter_samples = surrogate_posterior.sample(50)
  ```

  Finally, use the fitted parameters to forecast the next week of data:

  ```python
  forecast_dist = tfp.sts.forecast(model,
                                   observed_time_series=series,
                                   parameter_samples=parameter_samples,
                                   num_steps_forecast=7)
  # Strip trailing unit dimension from LinearGaussianStateSpaceModel events.
  forecast_mean = forecast_dist.mean()[..., 0]
  forecast_stddev = forecast_dist.stddev()[..., 0]

  forecast = pd.DataFrame(
      {'mean': forecast_mean,
       'lower_bound': forecast_mean - 2. * forecast_stddev,
       'upper_bound': forecast_mean + 2. * forecast_stddev}
      index=pd.date_range(start=series.index[-1] + series.index.freq,
                          periods=7,
                          freq=series.index.freq))
  ```

  """
  with tf.name_scope(name or 'build_default_model'):
    frequency = getattr(observed_time_series.index, 'freq', None)
    if frequency is None:
      raise ValueError('Provided series has no set frequency. Consider '
                       'using `tfp.sts.regularize_series` to infer a frequency '
                       'and build a regularly spaced series.')
    observed_time_series = sts_util.canonicalize_observed_time_series_with_mask(
        observed_time_series)

    if not isinstance(base_component,
                      structural_time_series.StructuralTimeSeries):
      # Build a component of the given type using default priors.
      base_component = base_component(observed_time_series=observed_time_series)

    components = [base_component]
    seasonal_structure = seasonality_util.create_seasonal_structure(
        frequency=frequency,
        num_steps=int(observed_time_series.time_series.shape[-2]))
    for season_type, season in seasonal_structure.items():
      components.append(
          sts_components.Seasonal(num_seasons=season.num,
                                  num_steps_per_season=season.duration,
                                  drift_scale_prior=drift_scale_prior,
                                  allow_drift=allow_seasonal_effect_drift,
                                  observed_time_series=observed_time_series,
                                  name=str(season_type)))
    return sts_components.Sum(
        components,
        observed_time_series=observed_time_series,
        observation_noise_scale_prior=observation_noise_scale_prior)
def detect_anomalies(series,
                     anomaly_threshold=0.01,
                     use_gibbs_predictive_dist=False,
                     num_warmup_steps=50,
                     num_samples=100,
                     jit_compile=False,
                     seed=None):
    """Detects anomalies in a Pandas time series using a default seasonal model.

  This function fits a `LocalLinearTrend` model with automatically determined
  seasonal effects, and returns a predictive credible interval at each step
  of the series. The fitting is done via Gibbs sampling, implemented
  specifically for this model class, which sometimes gives useful results more
  quickly than other fitting methods such as VI or HMC.

  Args:
    series: a Pandas `pd.Series` or `pd.DataFrame` instance indexed by a
      `pd.DateTimeIndex`. This may be irregular (missing timesteps) and/or
      contain unobserved steps indicated by `NaN` values (`NaN` values may also
      be provided to indicate future steps at which a forecast is desired).
      Multiple columns in a `pd.DataFrame` will generate results with a batch
      dimension.
    anomaly_threshold: float, confidence level for anomaly detection. An
        anomaly will be detected if the observed series falls outside the
        equal-tailed credible interval containing `(1 - anomaly_threshold)` of
        the posterior predictive probability mass.
    use_gibbs_predictive_dist: Python `bool`. If `True`, the predictive
      distribution is derived from Gibbs samples of the latent level, which
      incorporate information from the entire series *including future
      timesteps*. Otherwise, the predictive distribution is the 'filtering'
      distribution in which (conditioned on sampled parameters) the prediction
      at each step depends only on values observed at previous steps.
      Default value: `False`.
    num_warmup_steps: `int` number of steps to take before starting to collect
      samples.
      Default value: `50`.
    num_samples: `int` number of steps to take while sampling parameter
      values.
      Default value: `100`.
    jit_compile: Python `bool`. If `True`, compile the sampler with XLA. This
      adds overhead to the first call, but may speed up subsequent calls with
      series of the same shape and frequency.
      Default value: `True`.
    seed: PRNG seed; see `tfp.random.sanitize_seed` for details.
  Returns:
    prediction_output: instance of `PredictionOutput` named tuple containing
      the predicted credible intervals for each point (omitting the first) in
      the series.
  """
    regularized_series = regularization.regularize_series(series)
    observed_time_series = sts_util.canonicalize_observed_time_series_with_mask(
        regularized_series)
    anomaly_threshold = tf.convert_to_tensor(
        anomaly_threshold,
        dtype=observed_time_series.time_series.dtype,
        name='anomaly_threshold')

    seasonal_structure = seasonality_util.create_seasonal_structure(
        frequency=regularized_series.index.freq,
        num_steps=len(regularized_series))
    # Convert SeasonType keys into strings, because `tf.function` doesn't like
    # enum-valued arguments.
    seasonal_structure = {str(k): v for (k, v) in seasonal_structure.items()}
    inner_fn = (_detect_anomalies_inner_compiled
                if jit_compile else _detect_anomalies_inner)
    lower_limit, upper_limit, mean, tail_probabilities = inner_fn(
        observed_time_series,
        seasonal_structure=seasonal_structure,
        use_gibbs_predictive_dist=use_gibbs_predictive_dist,
        num_warmup_steps=num_warmup_steps,
        num_samples=num_samples,
        seed=seed)
    return PredictionOutput(
        times=regularized_series.index,
        observed_time_series=observed_time_series.time_series[..., 0],
        mean=mean,
        lower_limit=lower_limit,
        upper_limit=upper_limit,
        tail_probabilities=tail_probabilities,
        is_anomaly=tail_probabilities < anomaly_threshold)