def test_create_seasonal_structure(self, start, end, freq, expected): """Test seasonal structure creation for a few different scenarios.""" dates = pd.date_range(start, end, freq=freq) seasonal_structure = seasonality_util.create_seasonal_structure( frequency=freq, num_steps=len(dates)) for key, value in expected.items(): self.assertEqual(value, seasonal_structure[key])
def build_default_model(observed_time_series, base_component=sts_components.LocalLinearTrend, observation_noise_scale_prior=None, drift_scale_prior=None, allow_seasonal_effect_drift=True, name=None): """Builds a model with seasonality from a Pandas Series or DataFrame. Returns a model of the form `tfp.sts.Sum([base_component] + seasonal_components)`, where `seasonal_components` are automatically selected using the frequency from the `DatetimeIndex` of the provided `pd.Series` or `pd.DataFrame`. If the index does not have a set frequency, one will be inferred from the index dates, and Args: observed_time_series: Instance of `pd.Series` or `pd.DataFrame` containing one or more time series indexed by a `pd.DatetimeIndex`. base_component: Optional subclass of `tfp.sts.StructuralTimeSeries` specifying the model used for residual variation in the series not explained by seasonal or other effects. May also be an *instance* of such a class with specific priors set; if not provided, such an instance will be constructed with heuristic default priors. Default value: `tfp.sts.LocalLinearTrend`. observation_noise_scale_prior: Optional `tfd.Distribution` instance specifying a prior on `observation_noise_scale`. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. drift_scale_prior: Optional `tfd.Distribution` instance specifying a prior on the `drift_scale` parameter of Seasonal components. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. allow_seasonal_effect_drift: optional Python `bool` specifying whether the seasonal effects can drift over time. Setting this to `False` removes the `drift_scale` parameter from the model. This is mathematically equivalent to `drift_scale_prior = tfd.Deterministic(0.)`, but removing drift directly is preferred because it avoids the use of a degenerate prior. Default value: `True`. name: Python `str` name for ops created by this function. Default value: `None` (i.e., 'build_default_model'). Returns: model: instance of `tfp.sts.Sum` representing a model for the given data. #### Example Consider a series of eleven data points, covering a period of two weeks with three missing days. ```python import pandas as pd import tensorflow as tf import tensorflow_probability as tfp series = pd.Series( [100., 27., 92., 66., 51., 126., 113., 95., 48., 20., 59.,], index=pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-10', '2020-01-11', '2020-01-12', '2020-01-13', '2020-01-14'])) ``` Before calling `build_default_model`, we must regularize the series to follow a fixed frequency (here, daily observations): ```python series = tfp.sts.regularize_series(series) # len(series) ==> 14 ``` The default model will combine a LocalLinearTrend baseline with a Seasonal component to capture day-of-week effects. We can then fit this model to our observed data. Here we'll use variational inference: ```python model = tfp.sts.build_default_model(series) # len(model.components) == 2 # Fit the model using variational inference. surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(model) losses = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_log_prob(series), surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(0.1), num_steps=1000, convergence_criterion=( tfp.optimizer.convergence_criteria.SuccessiveGradientsAreUncorrelated( window_size=20, min_num_steps=50)), jit_compile=True) parameter_samples = surrogate_posterior.sample(50) ``` Finally, use the fitted parameters to forecast the next week of data: ```python forecast_dist = tfp.sts.forecast(model, observed_time_series=series, parameter_samples=parameter_samples, num_steps_forecast=7) # Strip trailing unit dimension from LinearGaussianStateSpaceModel events. forecast_mean = forecast_dist.mean()[..., 0] forecast_stddev = forecast_dist.stddev()[..., 0] forecast = pd.DataFrame( {'mean': forecast_mean, 'lower_bound': forecast_mean - 2. * forecast_stddev, 'upper_bound': forecast_mean + 2. * forecast_stddev} index=pd.date_range(start=series.index[-1] + series.index.freq, periods=7, freq=series.index.freq)) ``` """ with tf.name_scope(name or 'build_default_model'): frequency = getattr(observed_time_series.index, 'freq', None) if frequency is None: raise ValueError('Provided series has no set frequency. Consider ' 'using `tfp.sts.regularize_series` to infer a frequency ' 'and build a regularly spaced series.') observed_time_series = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) if not isinstance(base_component, structural_time_series.StructuralTimeSeries): # Build a component of the given type using default priors. base_component = base_component(observed_time_series=observed_time_series) components = [base_component] seasonal_structure = seasonality_util.create_seasonal_structure( frequency=frequency, num_steps=int(observed_time_series.time_series.shape[-2])) for season_type, season in seasonal_structure.items(): components.append( sts_components.Seasonal(num_seasons=season.num, num_steps_per_season=season.duration, drift_scale_prior=drift_scale_prior, allow_drift=allow_seasonal_effect_drift, observed_time_series=observed_time_series, name=str(season_type))) return sts_components.Sum( components, observed_time_series=observed_time_series, observation_noise_scale_prior=observation_noise_scale_prior)
def detect_anomalies(series, anomaly_threshold=0.01, use_gibbs_predictive_dist=False, num_warmup_steps=50, num_samples=100, jit_compile=False, seed=None): """Detects anomalies in a Pandas time series using a default seasonal model. This function fits a `LocalLinearTrend` model with automatically determined seasonal effects, and returns a predictive credible interval at each step of the series. The fitting is done via Gibbs sampling, implemented specifically for this model class, which sometimes gives useful results more quickly than other fitting methods such as VI or HMC. Args: series: a Pandas `pd.Series` or `pd.DataFrame` instance indexed by a `pd.DateTimeIndex`. This may be irregular (missing timesteps) and/or contain unobserved steps indicated by `NaN` values (`NaN` values may also be provided to indicate future steps at which a forecast is desired). Multiple columns in a `pd.DataFrame` will generate results with a batch dimension. anomaly_threshold: float, confidence level for anomaly detection. An anomaly will be detected if the observed series falls outside the equal-tailed credible interval containing `(1 - anomaly_threshold)` of the posterior predictive probability mass. use_gibbs_predictive_dist: Python `bool`. If `True`, the predictive distribution is derived from Gibbs samples of the latent level, which incorporate information from the entire series *including future timesteps*. Otherwise, the predictive distribution is the 'filtering' distribution in which (conditioned on sampled parameters) the prediction at each step depends only on values observed at previous steps. Default value: `False`. num_warmup_steps: `int` number of steps to take before starting to collect samples. Default value: `50`. num_samples: `int` number of steps to take while sampling parameter values. Default value: `100`. jit_compile: Python `bool`. If `True`, compile the sampler with XLA. This adds overhead to the first call, but may speed up subsequent calls with series of the same shape and frequency. Default value: `True`. seed: PRNG seed; see `tfp.random.sanitize_seed` for details. Returns: prediction_output: instance of `PredictionOutput` named tuple containing the predicted credible intervals for each point (omitting the first) in the series. """ regularized_series = regularization.regularize_series(series) observed_time_series = sts_util.canonicalize_observed_time_series_with_mask( regularized_series) anomaly_threshold = tf.convert_to_tensor( anomaly_threshold, dtype=observed_time_series.time_series.dtype, name='anomaly_threshold') seasonal_structure = seasonality_util.create_seasonal_structure( frequency=regularized_series.index.freq, num_steps=len(regularized_series)) # Convert SeasonType keys into strings, because `tf.function` doesn't like # enum-valued arguments. seasonal_structure = {str(k): v for (k, v) in seasonal_structure.items()} inner_fn = (_detect_anomalies_inner_compiled if jit_compile else _detect_anomalies_inner) lower_limit, upper_limit, mean, tail_probabilities = inner_fn( observed_time_series, seasonal_structure=seasonal_structure, use_gibbs_predictive_dist=use_gibbs_predictive_dist, num_warmup_steps=num_warmup_steps, num_samples=num_samples, seed=seed) return PredictionOutput( times=regularized_series.index, observed_time_series=observed_time_series.time_series[..., 0], mean=mean, lower_limit=lower_limit, upper_limit=upper_limit, tail_probabilities=tail_probabilities, is_anomaly=tail_probabilities < anomaly_threshold)