def test_series_with_no_fixed_frequency_raises_error(self): with self.assertRaisesRegex(ValueError, 'no set frequency'): observed_time_series = pd.Series([1., 2., 4.], index=pd.to_datetime([ '2014-01-01', '2014-01-02', '2014-01-04' ])) sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)
def _build_sts(self, observed_time_series=None): max_timesteps = 100 num_features = 3 prior = tfd.Sample(tfd.Laplace(0., 1.), sample_shape=[num_features]) # LinearRegression components don't currently take an `observed_time_series` # argument, so they can't infer a prior batch shape. This means we have to # manually set the batch shape expected by the tests. dtype = np.float32 if observed_time_series is not None: observed_time_series_tensor, _ = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) batch_shape = tf.shape(observed_time_series_tensor)[:-2] dtype = dtype_util.as_numpy_dtype( observed_time_series_tensor.dtype) prior = tfd.Sample(tfd.Laplace(tf.zeros(batch_shape, dtype=dtype), 1.), sample_shape=[num_features]) regression = LinearRegression(design_matrix=np.random.randn( max_timesteps, num_features).astype(dtype), weights_prior=prior) return Sum(components=[regression], observed_time_series=observed_time_series)
def fit_with_gibbs_sampling(model, observed_time_series, num_results=2000, num_warmup_steps=200, initial_state=None, seed=None): """Fits parameters for an STS model using Gibbs sampling.""" if not hasattr(model, 'supports_gibbs_sampling'): raise ValueError( 'This STS model does not support Gibbs sampling. Models ' 'for Gibbs sampling must be created using the ' 'method `build_model_for_gibbs_fitting`.') [observed_time_series, is_missing] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) dtype = observed_time_series.dtype # The canonicalized time series always has trailing dimension `1`, # because although LinearGaussianSSMs support vector observations, STS models # describe scalar time series only. For our purposes it'll be cleaner to # remove this dimension. observed_time_series = observed_time_series[..., 0] batch_shape = prefer_static.shape(observed_time_series)[:-1] # Treat a LocalLevel model as the special case of LocalLinearTrend where # the slope_scale is always zero. initial_slope_scale = 0. initial_slope = 0. if isinstance(model.components[0], sts.LocalLinearTrend): initial_slope_scale = 1. * tf.ones(batch_shape, dtype=dtype) initial_slope = tf.zeros_like(observed_time_series) if initial_state is None: initial_state = GibbsSamplerState( observation_noise_scale=tf.ones(batch_shape, dtype=dtype), level_scale=tf.ones(batch_shape, dtype=dtype), slope_scale=initial_slope_scale, weights=tf.zeros(prefer_static.concat( [batch_shape, _get_design_matrix(model).shape[-1:]], axis=0), dtype=dtype), level=tf.zeros_like(observed_time_series), slope=initial_slope, seed=None) # Set below. if isinstance(seed, six.integer_types): tf.random.set_seed(seed) # Always use the passed-in `seed` arg, ignoring any seed in the initial state. initial_state = initial_state._replace( seed=samplers.sanitize_seed(seed, salt='initial_GibbsSamplerState')) sampler_loop_body = _build_sampler_loop_body(model, observed_time_series, is_missing) samples = tf.scan(sampler_loop_body, np.arange(num_warmup_steps + num_results), initial_state) return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
def fit_with_gibbs_sampling(model, observed_time_series, num_results=2000, num_warmup_steps=200, compile_steps_with_xla=False, initial_state=None, seed=None): """Fits parameters for an STS model using Gibbs sampling.""" if not hasattr(model, 'supports_gibbs_sampling'): raise ValueError('This STS model does not support Gibbs sampling. Models ' 'for Gibbs sampling must be created using the ' 'method `build_model_for_gibbs_fitting`.') [ observed_time_series, is_missing ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) dtype = observed_time_series.dtype # The canonicalized time series always has trailing dimension `1`, # because although LinearGaussianSSMs support vector observations, STS models # describe scalar time series only. For our purposes it'll be cleaner to # remove this dimension. observed_time_series = observed_time_series[..., 0] batch_shape = prefer_static.shape(observed_time_series)[:-1] if initial_state is None: initial_state = GibbsSamplerState( observation_noise_scale=tf.ones(batch_shape, dtype=dtype), level_scale=tf.ones(batch_shape, dtype=dtype), weights=tf.zeros(prefer_static.concat([ batch_shape, _get_design_matrix(model).shape[-1:]], axis=0), dtype=dtype), level=tf.zeros_like(observed_time_series), seed=None) # Set below. if seed and isinstance(seed, six.integer_types): tf.random.set_seed(seed) # Always use the passed-in `seed` arg, ignoring any seed in the initial state. seeded_state = initial_state._asdict() seeded_state['seed'] = samplers.sanitize_seed( seed, salt='initial_GibbsSamplerState') initial_state = GibbsSamplerState(**seeded_state) sampler_loop_body = _build_sampler_loop_body( model, observed_time_series, is_missing, compile_steps_with_xla=compile_steps_with_xla, seed=seed) # This is still an `int` seed, because the InverseGamma # sampler currently requires stateful semantics. samples = tf.scan(sampler_loop_body, np.arange(num_warmup_steps + num_results), initial_state) return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
def _build_sts(self, observed_time_series=None): one = 1. if observed_time_series is not None: observed_time_series = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) one = tf.ones_like(observed_time_series.time_series[..., 0, 0]) return AutoregressiveIntegratedMovingAverage( ar_order=3, ma_order=1, integration_degree=0, level_drift_prior=tfd.Normal(loc=one, scale=one), observed_time_series=observed_time_series)
def _build_sts(self, observed_time_series=None): max_timesteps = 100 num_features = 3 dtype = np.float32 if observed_time_series is not None: observed_time_series_tensor, _ = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) dtype = dtype_util.as_numpy_dtype(observed_time_series_tensor.dtype) return DynamicLinearRegression( design_matrix=np.random.randn( max_timesteps, num_features).astype(dtype), observed_time_series=observed_time_series)
def test_canonicalizes_observed_time_series(self, observed_time_series, expected_shape, expected_is_missing): observed_time_series, is_missing = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) # Evaluate with explicit identity ops to avoid TF1 error # `RuntimeError: The Session graph is empty.` observed_time_series, is_missing = self.evaluate( (observed_time_series, is_missing)) self.assertAllEqual(observed_time_series.shape, expected_shape) if is_missing is None: self.assertIsNone(expected_is_missing) elif expected_is_missing is None: expected_is_missing = np.zeros(is_missing.shape, dtype=np.bool) self.assertAllEqual(expected_is_missing, is_missing)
def _fit_seasonal_model_with_gibbs_sampling(observed_time_series, seasonal_structure, num_warmup_steps=50, num_results=100, seed=None): """Builds a seasonality-as-regression model and fits it by Gibbs sampling.""" with tf.name_scope('fit_seasonal_model_with_gibbs_sampling'): observed_time_series = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) dtype = observed_time_series.time_series.dtype design_matrix = seasonality_util.build_fixed_effects( num_steps=ps.shape(observed_time_series.time_series)[-2], seasonal_structure=seasonal_structure, dtype=dtype) # Default priors. # pylint: disable=protected-access one = tf.ones([], dtype=dtype) level_variance_prior = tfd.InverseGamma(concentration=16, scale=16. * 0.001**2 * one) level_variance_prior._upper_bound = one slope_variance_prior = tfd.InverseGamma(concentration=16, scale=16. * 0.05**2 * one) slope_variance_prior._upper_bound = 0.01 * one observation_noise_variance_prior = tfd.InverseGamma(concentration=0.05, scale=0.05 * one) observation_noise_variance_prior._upper_bound = 1.2 * one # pylint: enable=protected-access model = gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series=observed_time_series, design_matrix=design_matrix, weights_prior=tfd.Normal(loc=0., scale=one), level_variance_prior=level_variance_prior, slope_variance_prior=slope_variance_prior, observation_noise_variance_prior=observation_noise_variance_prior) return [ model, gibbs_sampler.fit_with_gibbs_sampling( model, observed_time_series, num_results=num_results, num_warmup_steps=num_warmup_steps, seed=seed) ]
def _build_sts(self, observed_time_series=None): max_timesteps = 100 num_features = 3 # LinearRegression components don't currently take an `observed_time_series` # argument, so they can't infer a prior batch shape. This means we have to # manually set the batch shape expected by the tests. batch_shape = None if observed_time_series is not None: observed_time_series_tensor, _ = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) batch_shape = tf.shape(observed_time_series_tensor)[:-2] regression = SparseLinearRegression(design_matrix=np.random.randn( max_timesteps, num_features).astype(np.float32), weights_batch_shape=batch_shape) return Sum(components=[regression], observed_time_series=observed_time_series)
def _build_sts(self, observed_time_series=None): max_timesteps = 100 num_features = 3 prior = tfd.Laplace(0., 1.) # LinearRegression components don't currently take an `observed_time_series` # argument, so they can't infer a prior batch shape. This means we have to # manually set the batch shape expected by the tests. if observed_time_series is not None: observed_time_series_tensor, _ = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) batch_shape = tf.shape(input=observed_time_series_tensor)[:-2] prior = tfd.TransformedDistribution(prior, tfb.Identity(), event_shape=[num_features], batch_shape=batch_shape) regression = LinearRegression( design_matrix=tf.random.normal([max_timesteps, num_features]), weights_prior=prior) return Sum(components=[regression], observed_time_series=observed_time_series)
def forecast(model, observed_time_series, parameter_samples, num_steps_forecast, include_observation_noise=True): """Construct predictive distribution over future observations. Given samples from the posterior over parameters, return the predictive distribution over future observations for num_steps_forecast timesteps. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. num_steps_forecast: scalar `int` `Tensor` number of steps to forecast. include_observation_noise: Python `bool` indicating whether the forecast distribution should include uncertainty from observation noise. If `True`, the forecast is over future observations, if `False`, the forecast is over future values of the latent noise-free time series. Default value: `True`. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_steps_forecast, 1] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `forecast`, we construct a forecast distribution: ```python forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) forecast_mean = forecast_dist.mean()[..., 0] # shape: [50] forecast_scale = forecast_dist.stddev()[..., 0] # shape: [50] forecast_samples = forecast_dist.sample(10)[..., 0] # shape: [10, 50] ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python (variational_loss, variational_distributions) = tfp.sts.build_factored_variational_loss( model=model, observed_time_series=observed_time_series) # OMITTED: take steps to optimize variational loss samples = {k: q.sample(30) for (k, q) in variational_distributions.items()} forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_forecast(observed_time_series, forecast_mean, forecast_scale, forecast_samples): plt.figure(figsize=(12, 6)) num_steps = observed_time_series.shape[-1] num_steps_forecast = forecast_mean.shape[-1] num_steps_train = num_steps - num_steps_forecast c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(np.arange(num_steps), observed_time_series, lw=2, color=c1, label='ground truth') forecast_steps = np.arange(num_steps_train, num_steps_train+num_steps_forecast) plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1) plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2, label='forecast') plt.fill_between(forecast_steps, forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2) plt.xlim([0, num_steps]) plt.legend() plot_forecast(observed_time_series, forecast_mean=forecast_mean, forecast_scale=forecast_scale, forecast_samples=forecast_samples) ``` """ with tf.name_scope('forecast'): [observed_time_series, mask] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the observed timesteps to extract the # latent state posterior at timestep T+1 (i.e., the final # filtering distribution, pushed through the transition model). # This is the prior for the forecast model ("today's prior # is yesterday's posterior"). num_observed_steps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] observed_data_ssm = model.make_state_space_model( num_timesteps=num_observed_steps, param_vals=parameter_samples) (_, _, _, predictive_means, predictive_covs, _, _) = observed_data_ssm.forward_filter(observed_time_series, mask=mask) # Build a batch of state-space models over the forecast period. Because # we'll use MixtureSameFamily to mix over the posterior draws, we need to # do some shenanigans to move the `[num_posterior_draws]` batch dimension # from the leftmost to the rightmost side of the model's batch shape. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate `move_dimension` calls here. parameter_samples = model._canonicalize_param_vals_as_map( parameter_samples) # pylint: disable=protected-access parameter_samples_with_reordered_batch_dimension = { param.name: dist_util.move_dimension( parameter_samples[param.name], 0, -(1 + _prefer_static_event_ndims(param.prior))) for param in model.parameters } forecast_prior = tfd.MultivariateNormalFullCovariance( loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2), covariance_matrix=dist_util.move_dimension( predictive_covs[..., -1, :, :], 0, -3)) # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather # than leading) dimension of parameters, the parameter batch shapes no # longer broadcast against the `constant_offset` attribute used in `sts.Sum` # models. We fix this by manually adding an extra broadcasting dim to # `constant_offset` if present. # The root cause of this hack is that we mucked with param dimensions above # and are now passing params that are 'invalid' in the sense that they don't # match the shapes of the model's param priors. The fix (as above) will be # to update MixtureSameFamily so we can avoid changing param dimensions # altogether. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate this hack. kwargs = {} if hasattr(model, 'constant_offset'): kwargs['constant_offset'] = tf.convert_to_tensor( value=model.constant_offset, dtype=forecast_prior.dtype)[..., tf.newaxis] if not include_observation_noise: parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale'] = tf.zeros_like( parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale']) # We assume that any STS model that has a `constant_offset` attribute # will allow it to be overridden as a kwarg. This is currently just # `sts.Sum`. # TODO(b/120245392): when kwargs hack is removed, switch back to calling # the public version of `_make_state_space_model`. forecast_ssm = model._make_state_space_model( # pylint: disable=protected-access num_timesteps=num_steps_forecast, param_map=parameter_samples_with_reordered_batch_dimension, initial_state_prior=forecast_prior, initial_step=num_observed_steps, **kwargs) num_posterior_draws = dist_util.prefer_static_value( forecast_ssm.batch_shape_tensor())[-1] return tfd.MixtureSameFamily(mixture_distribution=tfd.Categorical( logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)), components_distribution=forecast_ssm)
def detect_anomalies(series, anomaly_threshold=0.01, use_gibbs_predictive_dist=False, num_warmup_steps=50, num_samples=100, jit_compile=False, seed=None): """Detects anomalies in a Pandas time series using a default seasonal model. This function fits a `LocalLinearTrend` model with automatically determined seasonal effects, and returns a predictive credible interval at each step of the series. The fitting is done via Gibbs sampling, implemented specifically for this model class, which sometimes gives useful results more quickly than other fitting methods such as VI or HMC. Args: series: a Pandas `pd.Series` or `pd.DataFrame` instance indexed by a `pd.DateTimeIndex`. This may be irregular (missing timesteps) and/or contain unobserved steps indicated by `NaN` values (`NaN` values may also be provided to indicate future steps at which a forecast is desired). Multiple columns in a `pd.DataFrame` will generate results with a batch dimension. anomaly_threshold: float, confidence level for anomaly detection. An anomaly will be detected if the observed series falls outside the equal-tailed credible interval containing `(1 - anomaly_threshold)` of the posterior predictive probability mass. use_gibbs_predictive_dist: Python `bool`. If `True`, the predictive distribution is derived from Gibbs samples of the latent level, which incorporate information from the entire series *including future timesteps*. Otherwise, the predictive distribution is the 'filtering' distribution in which (conditioned on sampled parameters) the prediction at each step depends only on values observed at previous steps. Default value: `False`. num_warmup_steps: `int` number of steps to take before starting to collect samples. Default value: `50`. num_samples: `int` number of steps to take while sampling parameter values. Default value: `100`. jit_compile: Python `bool`. If `True`, compile the sampler with XLA. This adds overhead to the first call, but may speed up subsequent calls with series of the same shape and frequency. Default value: `True`. seed: PRNG seed; see `tfp.random.sanitize_seed` for details. Returns: prediction_output: instance of `PredictionOutput` named tuple containing the predicted credible intervals for each point (omitting the first) in the series. """ regularized_series = regularization.regularize_series(series) observed_time_series = sts_util.canonicalize_observed_time_series_with_mask( regularized_series) anomaly_threshold = tf.convert_to_tensor( anomaly_threshold, dtype=observed_time_series.time_series.dtype, name='anomaly_threshold') seasonal_structure = seasonality_util.create_seasonal_structure( frequency=regularized_series.index.freq, num_steps=len(regularized_series)) # Convert SeasonType keys into strings, because `tf.function` doesn't like # enum-valued arguments. seasonal_structure = {str(k): v for (k, v) in seasonal_structure.items()} inner_fn = (_detect_anomalies_inner_compiled if jit_compile else _detect_anomalies_inner) lower_limit, upper_limit, mean, tail_probabilities = inner_fn( observed_time_series, seasonal_structure=seasonal_structure, use_gibbs_predictive_dist=use_gibbs_predictive_dist, num_warmup_steps=num_warmup_steps, num_samples=num_samples, seed=seed) return PredictionOutput( times=regularized_series.index, observed_time_series=observed_time_series.time_series[..., 0], mean=mean, lower_limit=lower_limit, upper_limit=upper_limit, tail_probabilities=tail_probabilities, is_anomaly=tail_probabilities < anomaly_threshold)
def one_step_predictive(model, observed_time_series, parameter_samples, timesteps_are_event_shape=True): """Compute one-step-ahead predictive distributions for all timesteps. Given samples from the posterior over parameters, return the predictive distribution over observations at each time `T`, given observations up through time `T-1`. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. timesteps_are_event_shape: Deprecated, for backwards compatibility only. If `False`, the predictive distribution will return per-timestep probabilities Default value: `True`. Returns: predictive_dist: a `tfd.MixtureSameFamily` instance with event shape `[num_timesteps] if timesteps_are_event_shape else []` and batch shape `concat([sample_shape, model.batch_shape, [] if timesteps_are_event_shape else [num_timesteps])`, with `num_posterior_draws` mixture components. The `t`th step represents the forecast distribution `p(observed_time_series[t] | observed_time_series[0:t-1], parameter_samples)`. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `one_step_predictive`, we construct a one-step-ahead predictive distribution: ```python one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) predictive_means = one_step_predictive_dist.mean() predictive_scales = one_step_predictive_dist.stddev() ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python surrogate_posterior = tfp.sts.build_factored_surrogate_posterior( model=model) loss_curve = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_distribution(observed_time_series).log_prob, surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(learning_rate=0.1), num_steps=200) samples = surrogate_posterior.sample(30) one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_one_step_predictive(observed_time_series, forecast_mean, forecast_scale): plt.figure(figsize=(12, 6)) num_timesteps = forecast_mean.shape[-1] c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(observed_time_series, label="observed time series", color=c1) plt.plot(forecast_mean, label="one-step prediction", color=c2) plt.fill_between(np.arange(num_timesteps), forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, alpha=0.1, color=c2) plt.legend() plot_one_step_predictive(observed_time_series, forecast_mean=predictive_means, forecast_scale=predictive_scales) ``` To detect anomalous timesteps, we check whether the observed value at each step is within a 95% predictive interval, i.e., two standard deviations from the mean: ```python z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1]) / predictive_scales[..., :-1]) anomalous_timesteps = tf.boolean_mask( tf.range(1, num_timesteps), tf.abs(z_scores) > 2.0) ``` """ with tf.name_scope('one_step_predictive'): [ observed_time_series, is_missing ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] lgssm = tfe_util.JitPublicMethods( model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples), trace_only=True) # Avoid eager overhead w/o introducing XLA dependence. (_, _, _, _, _, observation_means, observation_covs ) = lgssm.forward_filter(observed_time_series, mask=is_missing) # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. predictive_dist = sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0]) if timesteps_are_event_shape: predictive_dist = tfd.Independent( predictive_dist, reinterpreted_batch_ndims=1) return predictive_dist
def joint_log_prob(self, observed_time_series): """Build the joint density `log p(params) + log p(y|params)` as a callable. Args: observed_time_series: Observed `Tensor` trajectories of shape `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing `1` dimension is optional if `num_timesteps > 1`), where `batch_shape` should match `self.batch_shape` (the broadcast batch shape of all priors on parameters for this structural time series model). Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. Returns: log_joint_fn: A function taking a `Tensor` argument for each model parameter, in canonical order, and returning a `Tensor` log probability of shape `batch_shape`. Note that, *unlike* `tfp.Distributions` `log_prob` methods, the `log_joint` sums over the `sample_shape` from y, so that `sample_shape` does not appear in the output log_prob. This corresponds to viewing multiple samples in `y` as iid observations from a single model, which is typically the desired behavior for parameter inference. """ with tf.name_scope('joint_log_prob'): [observed_time_series, mask] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) num_timesteps = ps.shape(observed_time_series)[-2] parameter_prior = self._joint_prior_distribution() def log_joint_fn(*param_vals, **param_kwargs): """Generated log-density function.""" if param_kwargs: if param_vals: raise ValueError( 'log_joint_fn saw both positional args ({}) and named args ({}). ' 'This is not supported: you have to choose!'. format(param_vals, param_kwargs)) param_vals = [ param_kwargs[p.name] for p in self.parameters ] param_lp = parameter_prior.log_prob(*param_vals) # Build a linear Gaussian state space model and evaluate the marginal # log_prob on observations. lgssm = self.make_state_space_model( param_vals=param_vals, num_timesteps=num_timesteps) observation_lp = lgssm.log_prob(observed_time_series, mask=mask) # Sum over likelihoods from iid observations. Without this sum, # adding `param_lp + observation_lp` would broadcast the param priors # over the sample shape, which incorrectly multi-counts the param # priors. sample_ndims = tf.maximum( 0, tf.rank(observation_lp) - tf.rank(param_lp)) observation_lp = tf.reduce_sum(observation_lp, axis=tf.range(sample_ndims)) return param_lp + observation_lp return log_joint_fn
def joint_distribution(self, observed_time_series=None, num_timesteps=None, trajectories_shape=None, initial_step=0, mask=None, experimental_parallelize=False): """Constructs the joint distribution over parameters and observed values. Args: observed_time_series: Optional observed time series to model, as a `Tensor` or `tfp.sts.MaskedTimeSeries` instance having shape `concat([batch_shape, trajectories_shape, num_timesteps, 1])`. If an observed time series is provided, the `num_timesteps`, `trajectories_shape`, and `mask` arguments are ignored, and an unnormalized (pinned) distribution over parameter values is returned. Default value: `None`. num_timesteps: scalar `int` `Tensor` number of timesteps to model. This must be specified either directly or by passing an `observed_time_series`. Default value: `0`. trajectories_shape: `int` `Tensor` shape of sampled trajectories for each set of parameter values. If not specified (either directly or by passing an `observed_time_series`), defaults to a one-to-one correspondence between trajectories and parameter settings (implicitly `trajectories_shape=()`). Default value: `None`. initial_step: Optional scalar `int` `Tensor` specifying the starting timestep. Default value: `0`. mask: Optional `bool` `Tensor` having shape `concat([batch_shape, trajectories_shape, num_timesteps])`, in which `True` entries indicate that the series value at the corresponding step is missing and should be ignored. This argument should be passed only if `observed_time_series` is not specified or does not already contain a missingness mask; it is an error to pass both this argument and an `observed_time_series` value containing a missingness mask. Default value: `None`. experimental_parallelize: If `True`, use parallel message passing algorithms from `tfp.experimental.parallel_filter` to perform time series operations in `O(log num_timesteps)` sequential steps. The overall FLOP and memory cost may be larger than for the sequential implementations by a constant factor. Default value: `False`. Returns: joint_distribution: joint distribution of model parameters and observed trajectories. If no `observed_time_series` was specified, this is an instance of `tfd.JointDistributionNamedAutoBatched` with a random variable for each model parameter (with names and order matching `self.parameters`), plus a final random variable `observed_time_series` representing a trajectory(ies) conditioned on the parameters. If `observed_time_series` was specified, the return value is given by `joint_distribution.experimental_pin( observed_time_series=observed_time_series)` where `joint_distribution` is as just described, so it defines an unnormalized posterior distribution over the parameters. #### Example: The joint distribution can generate prior samples of parameters and trajectories: ```python from matplotlib import pylab as plt import tensorflow_probability as tfp # Sample and plot 100 trajectories from the prior. model = tfp.sts.LocalLinearTrendModel() prior_samples = model.joint_distribution().sample([100]) plt.plot( tf.linalg.matrix_transpose(prior_samples['observed_time_series'][..., 0])) ``` It also integrates with TFP inference APIs, providing a more flexible alternative to the STS-specific fitting utilities. ```python jd = model.joint_distribution(observed_time_series) # Variational inference. surrogate_posterior = ( tfp.experimental.vi.build_factored_surrogate_posterior( event_shape=jd.event_shape, bijector=jd.experimental_default_event_space_bijector())) losses = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=jd.unnormalized_log_prob, surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(0.1), num_steps=200) parameter_samples = surrogate_posterior.sample(50) # No U-Turn Sampler. samples, kernel_results = tfp.experimental.mcmc.windowed_adaptive_nuts( n_draws=500, joint_dist=dist) ``` """ def state_space_model_likelihood(**param_vals): ssm = self.make_state_space_model( param_vals=param_vals, num_timesteps=num_timesteps, initial_step=initial_step, mask=mask, experimental_parallelize=experimental_parallelize) # Looping LGSSM methods are really expensive in eager mode; wrap them # to keep this from slowing things down in interactive use. ssm = tfe_util.JitPublicMethods(ssm, trace_only=True) if distribution_util.shape_may_be_nontrivial(trajectories_shape): return sample.Sample(ssm, sample_shape=trajectories_shape) return ssm batch_ndims = ps.rank_from_shape(self.batch_shape_tensor, self.batch_shape) if observed_time_series is not None: [observed_time_series, is_missing ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) if is_missing is not None: if mask is not None: raise ValueError( 'Passed non-None value for `mask`, but the observed ' 'time series already contains an `is_missing` mask.') mask = is_missing num_timesteps = ps.shape(observed_time_series)[-2] trajectories_shape = ps.shape(observed_time_series)[batch_ndims:-2] joint_distribution = ( joint_distribution_auto_batched.JointDistributionNamedAutoBatched( model=collections.OrderedDict( # Prior. list(self._joint_prior_distribution().model.items()) + # Likelihood. [('observed_time_series', state_space_model_likelihood)]), use_vectorized_map=False, batch_ndims=batch_ndims)) if observed_time_series is not None: return joint_distribution.experimental_pin( observed_time_series=observed_time_series) return joint_distribution
def fit_with_gibbs_sampling(model, observed_time_series, num_chains=(), num_results=2000, num_warmup_steps=200, initial_state=None, seed=None, default_pseudo_observations=None, experimental_use_dynamic_cholesky=False, experimental_use_weight_adjustment=False): """Fits parameters for an STS model using Gibbs sampling. Args: model: A `tfp.sts.StructuralTimeSeries` model instance return by `build_model_for_gibbs_fitting`. observed_time_series: `float` `Tensor` of shape [..., T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. num_chains: Optional int to indicate the number of parallel MCMC chains. Default to an empty tuple to sample a single chain. num_results: Optional int to indicate number of MCMC samples. num_warmup_steps: Optional int to indicate number of MCMC samples. initial_state: A `GibbsSamplerState` structure of the initial states of the MCMC chains. seed: Optional `Python` `int` seed controlling the sampled values. default_pseudo_observations: Optional scalar float `Tensor` Controls the number of pseudo-observations for the prior precision matrix over the weights. experimental_use_dynamic_cholesky: Optional bool - in case of spike and slab sampling, will dynamically select the subset of the design matrix with active features to perform the Cholesky decomposition. This may provide a speedup when the number of true features is small compared to the size of the design matrix. *Note*: If this is true, neither batch shape nor `jit_compile` is supported. experimental_use_weight_adjustment: Optional bool - use a nonstandard update for the posterior precision of the weight in case of a spike and slab sampler. Returns: model: A `GibbsSamplerState` structure of posterior samples. """ if not hasattr(model, 'supports_gibbs_sampling'): raise ValueError( 'This STS model does not support Gibbs sampling. Models ' 'for Gibbs sampling must be created using the ' 'method `build_model_for_gibbs_fitting`.') if not tf.nest.is_nested(num_chains): num_chains = [num_chains] [observed_time_series, is_missing] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) dtype = observed_time_series.dtype # The canonicalized time series always has trailing dimension `1`, # because although LinearGaussianSSMs support vector observations, STS models # describe scalar time series only. For our purposes it'll be cleaner to # remove this dimension. observed_time_series = observed_time_series[..., 0] batch_shape = prefer_static.concat( [num_chains, prefer_static.shape(observed_time_series)[:-1]], axis=-1) level_slope_shape = prefer_static.concat( [num_chains, prefer_static.shape(observed_time_series)], axis=-1) # Treat a LocalLevel model as the special case of LocalLinearTrend where # the slope_scale is always zero. initial_slope_scale = 0. initial_slope = 0. if isinstance(model.components[0], sts.LocalLinearTrend): initial_slope_scale = 1. * tf.ones(batch_shape, dtype=dtype) initial_slope = tf.zeros(level_slope_shape, dtype=dtype) if initial_state is None: design_matrix = _get_design_matrix(model) weights = tf.zeros( 0, dtype=dtype) if design_matrix is None else tf.zeros( # pylint:disable=g-long-ternary prefer_static.concat([batch_shape, design_matrix.shape[-1:]], axis=0), dtype=dtype) initial_state = GibbsSamplerState(observation_noise_scale=tf.ones( batch_shape, dtype=dtype), level_scale=tf.ones(batch_shape, dtype=dtype), slope_scale=initial_slope_scale, weights=weights, level=tf.zeros(level_slope_shape, dtype=dtype), slope=initial_slope, seed=None) # Set below. if isinstance(seed, six.integer_types): tf.random.set_seed(seed) # Always use the passed-in `seed` arg, ignoring any seed in the initial state. initial_state = initial_state._replace( seed=samplers.sanitize_seed(seed, salt='initial_GibbsSamplerState')) sampler_loop_body = _build_sampler_loop_body( model=model, observed_time_series=observed_time_series, is_missing=is_missing, default_pseudo_observations=default_pseudo_observations, experimental_use_dynamic_cholesky=experimental_use_dynamic_cholesky, experimental_use_weight_adjustment=experimental_use_weight_adjustment) samples = tf.scan(sampler_loop_body, np.arange(num_warmup_steps + num_results), initial_state) return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
def build_default_model(observed_time_series, base_component=sts_components.LocalLinearTrend, observation_noise_scale_prior=None, drift_scale_prior=None, allow_seasonal_effect_drift=True, name=None): """Builds a model with seasonality from a Pandas Series or DataFrame. Returns a model of the form `tfp.sts.Sum([base_component] + seasonal_components)`, where `seasonal_components` are automatically selected using the frequency from the `DatetimeIndex` of the provided `pd.Series` or `pd.DataFrame`. If the index does not have a set frequency, one will be inferred from the index dates, and Args: observed_time_series: Instance of `pd.Series` or `pd.DataFrame` containing one or more time series indexed by a `pd.DatetimeIndex`. base_component: Optional subclass of `tfp.sts.StructuralTimeSeries` specifying the model used for residual variation in the series not explained by seasonal or other effects. May also be an *instance* of such a class with specific priors set; if not provided, such an instance will be constructed with heuristic default priors. Default value: `tfp.sts.LocalLinearTrend`. observation_noise_scale_prior: Optional `tfd.Distribution` instance specifying a prior on `observation_noise_scale`. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. drift_scale_prior: Optional `tfd.Distribution` instance specifying a prior on the `drift_scale` parameter of Seasonal components. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. allow_seasonal_effect_drift: optional Python `bool` specifying whether the seasonal effects can drift over time. Setting this to `False` removes the `drift_scale` parameter from the model. This is mathematically equivalent to `drift_scale_prior = tfd.Deterministic(0.)`, but removing drift directly is preferred because it avoids the use of a degenerate prior. Default value: `True`. name: Python `str` name for ops created by this function. Default value: `None` (i.e., 'build_default_model'). Returns: model: instance of `tfp.sts.Sum` representing a model for the given data. #### Example Consider a series of eleven data points, covering a period of two weeks with three missing days. ```python import pandas as pd import tensorflow as tf import tensorflow_probability as tfp series = pd.Series( [100., 27., 92., 66., 51., 126., 113., 95., 48., 20., 59.,], index=pd.to_datetime(['2020-01-01', '2020-01-02', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-10', '2020-01-11', '2020-01-12', '2020-01-13', '2020-01-14'])) ``` Before calling `build_default_model`, we must regularize the series to follow a fixed frequency (here, daily observations): ```python series = tfp.sts.regularize_series(series) # len(series) ==> 14 ``` The default model will combine a LocalLinearTrend baseline with a Seasonal component to capture day-of-week effects. We can then fit this model to our observed data. Here we'll use variational inference: ```python model = tfp.sts.build_default_model(series) # len(model.components) == 2 # Fit the model using variational inference. surrogate_posterior = tfp.sts.build_factored_surrogate_posterior(model) losses = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_log_prob(series), surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(0.1), num_steps=1000, convergence_criterion=( tfp.optimizer.convergence_criteria.SuccessiveGradientsAreUncorrelated( window_size=20, min_num_steps=50)), jit_compile=True) parameter_samples = surrogate_posterior.sample(50) ``` Finally, use the fitted parameters to forecast the next week of data: ```python forecast_dist = tfp.sts.forecast(model, observed_time_series=series, parameter_samples=parameter_samples, num_steps_forecast=7) # Strip trailing unit dimension from LinearGaussianStateSpaceModel events. forecast_mean = forecast_dist.mean()[..., 0] forecast_stddev = forecast_dist.stddev()[..., 0] forecast = pd.DataFrame( {'mean': forecast_mean, 'lower_bound': forecast_mean - 2. * forecast_stddev, 'upper_bound': forecast_mean + 2. * forecast_stddev} index=pd.date_range(start=series.index[-1] + series.index.freq, periods=7, freq=series.index.freq)) ``` """ with tf.name_scope(name or 'build_default_model'): frequency = getattr(observed_time_series.index, 'freq', None) if frequency is None: raise ValueError('Provided series has no set frequency. Consider ' 'using `tfp.sts.regularize_series` to infer a frequency ' 'and build a regularly spaced series.') observed_time_series = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) if not isinstance(base_component, structural_time_series.StructuralTimeSeries): # Build a component of the given type using default priors. base_component = base_component(observed_time_series=observed_time_series) components = [base_component] seasonal_structure = seasonality_util.create_seasonal_structure( frequency=frequency, num_steps=int(observed_time_series.time_series.shape[-2])) for season_type, season in seasonal_structure.items(): components.append( sts_components.Seasonal(num_seasons=season.num, num_steps_per_season=season.duration, drift_scale_prior=drift_scale_prior, allow_drift=allow_seasonal_effect_drift, observed_time_series=observed_time_series, name=str(season_type))) return sts_components.Sum( components, observed_time_series=observed_time_series, observation_noise_scale_prior=observation_noise_scale_prior)
def decompose_by_component(model, observed_time_series, parameter_samples): """Decompose an observed time series into contributions from each component. This method decomposes a time series according to the posterior represention of a structural time series model. In particular, it: - Computes the posterior marginal mean and covariances over the additive model's latent space. - Decomposes the latent posterior into the marginal blocks for each model component. - Maps the per-component latent posteriors back through each component's observation model, to generate the time series modeled by that component. Args: model: An instance of `tfp.sts.Sum` representing a structural time series model. observed_time_series: `float` `Tensor` of shape `batch_shape + [num_timesteps, 1]` (omitting the trailing unit dimension is also supported when `num_timesteps > 1`), specifying an observed time series. May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: component_dists: A `collections.OrderedDict` instance mapping component StructuralTimeSeries instances (elements of `model.components`) to `tfd.Distribution` instances representing the posterior marginal distributions on the process modeled by each component. Each distribution has batch shape matching that of `posterior_means`/`posterior_covs`, and event shape of `[num_timesteps]`. #### Examples Suppose we've built a model and fit it to data: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) num_steps_forecast = 50 samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` To extract the contributions of individual components, pass the time series and sampled parameters into `decompose_by_component`: ```python component_dists = decompose_by_component( model, observed_time_series=observed_time_series, parameter_samples=samples) # Component mean and stddev have shape `[len(observed_time_series)]`. day_of_week_effect_mean = component_dists[day_of_week].mean() day_of_week_effect_stddev = component_dists[day_of_week].stddev() ``` Using the component distributions, we can visualize the uncertainty for each component: ``` from matplotlib import pylab as plt num_components = len(component_dists) xs = np.arange(len(observed_time_series)) fig = plt.figure(figsize=(12, 3 * num_components)) for i, (component, component_dist) in enumerate(component_dists.items()): # If in graph mode, replace `.numpy()` with `.eval()` or `sess.run()`. component_mean = component_dist.mean().numpy() component_stddev = component_dist.stddev().numpy() ax = fig.add_subplot(num_components, 1, 1 + i) ax.plot(xs, component_mean, lw=2) ax.fill_between(xs, component_mean - 2 * component_stddev, component_mean + 2 * component_stddev, alpha=0.5) ax.set_title(component.name) ``` """ with tf.compat.v1.name_scope('decompose_by_component', values=[observed_time_series]): [ observed_time_series, is_missing ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # posterior on latents. num_timesteps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] ssm = model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples) posterior_means, posterior_covs = ssm.posterior_marginals( observed_time_series, mask=is_missing) return _decompose_from_posterior_marginals( model, posterior_means, posterior_covs, parameter_samples)
def one_step_predictive(model, observed_time_series, parameter_samples): """Compute one-step-ahead predictive distributions for all timesteps. Given samples from the posterior over parameters, return the predictive distribution over observations at each time `T`, given observations up through time `T-1`. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. The `t`th step represents the forecast distribution `p(observed_time_series[t] | observed_time_series[0:t-1], parameter_samples)`. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `one_step_predictive`, we construct a one-step-ahead predictive distribution: ```python one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) predictive_means = one_step_predictive_dist.mean() predictive_scales = one_step_predictive_dist.stddev() ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python (variational_loss, variational_distributions) = tfp.sts.build_factored_variational_loss( model=model, observed_time_series=observed_time_series) # OMITTED: take steps to optimize variational loss samples = {k: q.sample(30) for (k, q) in variational_distributions.items()} one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_one_step_predictive(observed_time_series, forecast_mean, forecast_scale): plt.figure(figsize=(12, 6)) num_timesteps = forecast_mean.shape[-1] c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(observed_time_series, label="observed time series", color=c1) plt.plot(forecast_mean, label="one-step prediction", color=c2) plt.fill_between(np.arange(num_timesteps), forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, alpha=0.1, color=c2) plt.legend() plot_one_step_predictive(observed_time_series, forecast_mean=predictive_means, forecast_scale=predictive_scales) ``` To detect anomalous timesteps, we check whether the observed value at each step is within a 95% predictive interval, i.e., two standard deviations from the mean: ```python z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1]) / predictive_scales[..., :-1]) anomalous_timesteps = tf.boolean_mask( tf.range(1, num_timesteps), tf.abs(z_scores) > 2.0) ``` """ with tf.name_scope('one_step_predictive'): [observed_time_series, is_missing] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] lgssm = model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples) (_, _, _, _, _, observation_means, observation_covs) = lgssm.forward_filter(observed_time_series, mask=is_missing) # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. return sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0])
def fit_with_gibbs_sampling(model, observed_time_series, num_chains=(), num_results=2000, num_warmup_steps=200, initial_state=None, seed=None): """Fits parameters for an STS model using Gibbs sampling. Args: model: A `tfp.sts.StructuralTimeSeries` model instance return by `build_model_for_gibbs_fitting`. observed_time_series: `float` `Tensor` of shape [..., T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. num_chains: Optional int to indicate the number of parallel MCMC chains. Default to an empty tuple to sample a single chain. num_results: Optional int to indicate number of MCMC samples. num_warmup_steps: Optional int to indicate number of MCMC samples. initial_state: A `GibbsSamplerState` structure of the initial states of the MCMC chains. seed: Optional `Python` `int` seed controlling the sampled values. Returns: model: A `GibbsSamplerState` structure of posterior samples. """ if not hasattr(model, 'supports_gibbs_sampling'): raise ValueError( 'This STS model does not support Gibbs sampling. Models ' 'for Gibbs sampling must be created using the ' 'method `build_model_for_gibbs_fitting`.') if not tf.nest.is_nested(num_chains): num_chains = [num_chains] [observed_time_series, is_missing] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) dtype = observed_time_series.dtype # The canonicalized time series always has trailing dimension `1`, # because although LinearGaussianSSMs support vector observations, STS models # describe scalar time series only. For our purposes it'll be cleaner to # remove this dimension. observed_time_series = observed_time_series[..., 0] batch_shape = prefer_static.concat( [num_chains, prefer_static.shape(observed_time_series)[:-1]], axis=-1) level_slope_shape = prefer_static.concat( [num_chains, prefer_static.shape(observed_time_series)], axis=-1) # Treat a LocalLevel model as the special case of LocalLinearTrend where # the slope_scale is always zero. initial_slope_scale = 0. initial_slope = 0. if isinstance(model.components[0], sts.LocalLinearTrend): initial_slope_scale = 1. * tf.ones(batch_shape, dtype=dtype) initial_slope = tf.zeros(level_slope_shape, dtype=dtype) if initial_state is None: initial_state = GibbsSamplerState( observation_noise_scale=tf.ones(batch_shape, dtype=dtype), level_scale=tf.ones(batch_shape, dtype=dtype), slope_scale=initial_slope_scale, weights=tf.zeros(prefer_static.concat( [batch_shape, _get_design_matrix(model).shape[-1:]], axis=0), dtype=dtype), level=tf.zeros(level_slope_shape, dtype=dtype), slope=initial_slope, seed=None) # Set below. if isinstance(seed, six.integer_types): tf.random.set_seed(seed) # Always use the passed-in `seed` arg, ignoring any seed in the initial state. initial_state = initial_state._replace( seed=samplers.sanitize_seed(seed, salt='initial_GibbsSamplerState')) sampler_loop_body = _build_sampler_loop_body(model, observed_time_series, is_missing) samples = tf.scan(sampler_loop_body, np.arange(num_warmup_steps + num_results), initial_state) return tf.nest.map_structure(lambda x: x[num_warmup_steps:], samples)
def impute_missing_values(model, observed_time_series, parameter_samples, include_observation_noise=False): """Runs posterior inference to impute the missing values in a time series. This method computes the posterior marginals `p(latent state | observations)`, given the time series at observed timesteps (a missingness mask should be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back through the observation model to impute a predictive distribution on the observed time series. At unobserved steps, this is an imputed value; at other steps it is interpreted as the model's estimate of the underlying noise-free series. Args: model: `tfp.sts.Sum` instance defining an additive STS model. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. include_observation_noise: If `False`, the imputed uncertainties represent the model's estimate of the noise-free time series at each timestep. If `True`, they represent the model's estimate of the range of values that could be *observed* at each timestep, including any i.i.d. observation noise. Default value: `False`. Returns: imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Example To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`: ```python time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5] observed_time_series = tfp.sts.MaskedTimeSeries( time_series=time_series_with_nans, is_missing=tf.math.is_nan(time_series_with_nans)) ``` Masked time series can be passed to `tfp.sts` methods in place of a `observed_time_series` `Tensor`: ```python # Build model using observed time series to set heuristic priors. linear_trend_model = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series) model = tfp.sts.Sum([linear_trend_model], observed_time_series=observed_time_series) # Fit model to data parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series) ``` After fitting a model, `impute_missing_values` will return a distribution ```python # Impute missing values imputed_series_distribution = tfp.sts.impute_missing_values( model, observed_time_series) print('imputed means and stddevs: ', imputed_series_distribution.mean(), imputed_series_distribution.stddev()) ``` """ with tf.name_scope('impute_missing_values'): [observed_time_series, mask] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] lgssm = model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples) posterior_means, posterior_covs = lgssm.posterior_marginals( observed_time_series, mask=mask) observation_means, observation_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=posterior_covs) if not include_observation_noise: # Extract just the variance of observation noise by pushing forward # zero-variance latents. _, observation_noise_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=tf.zeros_like(posterior_covs)) # Subtract out the observation noise that was added in the original # pushforward. Note that this could cause numerical issues if the # observation noise is very large. If this becomes an issue we could # avoid the subtraction by plumbing `include_observation_noise` through # `lgssm.latents_to_observations`. observation_covs -= observation_noise_covs # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. return sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0])
def joint_log_prob(self, observed_time_series): """Build the joint density `log p(params) + log p(y|params)` as a callable. Args: observed_time_series: Observed `Tensor` trajectories of shape `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing `1` dimension is optional if `num_timesteps > 1`), where `batch_shape` should match `self.batch_shape` (the broadcast batch shape of all priors on parameters for this structural time series model). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Returns: log_joint_fn: A function taking a `Tensor` argument for each model parameter, in canonical order, and returning a `Tensor` log probability of shape `batch_shape`. Note that, *unlike* `tfp.Distributions` `log_prob` methods, the `log_joint` sums over the `sample_shape` from y, so that `sample_shape` does not appear in the output log_prob. This corresponds to viewing multiple samples in `y` as iid observations from a single model, which is typically the desired behavior for parameter inference. """ with tf.compat.v1.name_scope('joint_log_prob', values=[observed_time_series]): [observed_time_series, mask] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) num_timesteps = distribution_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] def log_joint_fn(*param_vals): """Generated log-density function.""" # Sum the log_prob values from parameter priors. param_lp = sum([ param.prior.log_prob(param_val) for (param, param_val) in zip(self.parameters, param_vals) ]) # Build a linear Gaussian state space model and evaluate the marginal # log_prob on observations. lgssm = self.make_state_space_model( param_vals=param_vals, num_timesteps=num_timesteps) observation_lp = lgssm.log_prob(observed_time_series, mask=mask) # Sum over likelihoods from iid observations. Without this sum, # adding `param_lp + observation_lp` would broadcast the param priors # over the sample shape, which incorrectly multi-counts the param # priors. sample_ndims = tf.maximum( 0, tf.rank(observation_lp) - tf.rank(param_lp)) observation_lp = tf.reduce_sum(input_tensor=observation_lp, axis=tf.range(sample_ndims)) return param_lp + observation_lp return log_joint_fn
def __init__(self, order, coefficients_prior=None, level_scale_prior=None, initial_state_prior=None, coefficient_constraining_bijector=None, observed_time_series=None, name=None): """Specify an autoregressive model. Args: order: scalar Python positive `int` specifying the number of past timesteps to regress on. coefficients_prior: optional `tfd.Distribution` instance specifying a prior on the `coefficients` parameter. If `None`, a default standard normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([order]))`) prior is used. Default value: `None`. level_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `level_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_state_prior: optional `tfd.Distribution` instance specifying a prior on the initial state, corresponding to the values of the process at a set of size `order` of imagined timesteps before the initial step. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. coefficient_constraining_bijector: optional `tfb.Bijector` instance representing a constraining mapping for the autoregressive coefficients. For example, `tfb.Tanh()` constrains the coefficients to lie in `(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and `tfb.Identity()` implies no constraint. If `None`, the default behavior constrains the coefficients to lie in `(-1, 1)` using a `Tanh` bijector. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Default value: `None`. name: the name of this model component. Default value: 'Autoregressive'. """ with tf.name_scope(name or 'Autoregressive') as name: masked_time_series = None if observed_time_series is not None: masked_time_series = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) dtype = dtype_util.common_dtype( [(masked_time_series.time_series if masked_time_series is not None else None), coefficients_prior, level_scale_prior, initial_state_prior], dtype_hint=tf.float32) if observed_time_series is not None: _, observed_stddev, observed_initial = sts_util.empirical_statistics( masked_time_series) else: observed_stddev, observed_initial = ( tf.convert_to_tensor(value=1., dtype=dtype), tf.convert_to_tensor(value=0., dtype=dtype)) batch_ones = tf.ones(tf.concat([ tf.shape(observed_initial), # Batch shape [order]], axis=0), dtype=dtype) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if coefficients_prior is None: coefficients_prior = tfd.MultivariateNormalDiag( scale_diag=batch_ones) if level_scale_prior is None: level_scale_prior = tfd.LogNormal( loc=tf.math.log(0.05 * observed_stddev), scale=3.) if (coefficients_prior.event_shape.is_fully_defined() and order != coefficients_prior.event_shape[0]): raise ValueError("Prior dimension {} doesn't match order {}.".format( coefficients_prior.event_shape[0], order)) if initial_state_prior is None: initial_state_prior = tfd.MultivariateNormalDiag( loc=observed_initial[..., tf.newaxis] * batch_ones, scale_diag=(tf.abs(observed_initial) + observed_stddev)[..., tf.newaxis] * batch_ones) self._order = order self._coefficients_prior = coefficients_prior self._level_scale_prior = level_scale_prior self._initial_state_prior = initial_state_prior if coefficient_constraining_bijector is None: coefficient_constraining_bijector = tfb.Tanh() super(Autoregressive, self).__init__( parameters=[ Parameter('coefficients', coefficients_prior, coefficient_constraining_bijector), Parameter('level_scale', level_scale_prior, tfb.Chain([tfb.AffineScalar(scale=observed_stddev), tfb.Softplus()])) ], latent_size=order, name=name)
def __init__(self, ar_order, ma_order, integration_degree=0, ar_coefficients_prior=None, ma_coefficients_prior=None, level_drift_prior=None, level_scale_prior=None, initial_state_prior=None, ar_coefficient_constraining_bijector=None, ma_coefficient_constraining_bijector=None, observed_time_series=None, name=None): """Specifies an ARIMA(p=ar_order, d=integration_degree, q=ma_order) model. Args: ar_order: scalar Python positive `int` specifying the order of the autoregressive process (`p` in `ARIMA(p, d, q)`). ma_order: scalar Python positive `int` specifying the order of the moving-average process (`q` in `ARIMA(p, d, q)`). integration_degree: scalar Python positive `int` specifying the number of times to integrate an ARMA process. (`d` in `ARIMA(p, d, q)`). Default value: `0`. ar_coefficients_prior: optional `tfd.Distribution` instance specifying a prior on the `ar_coefficients` parameter. If `None`, a default standard normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([ar_order]))`) prior is used. Default value: `None`. ma_coefficients_prior: optional `tfd.Distribution` instance specifying a prior on the `ma_coefficients` parameter. If `None`, a default standard normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([ma_order]))`) prior is used. Default value: `None`. level_drift_prior: optional `tfd.Distribution` instance specifying a prior on the `level_drift` parameter. If `None`, the parameter is not inferred and is instead fixed to zero. Default value: `None`. level_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `level_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_state_prior: optional `tfd.Distribution` instance specifying a prior on the initial state, corresponding to the values of the process at a set of size `order` of imagined timesteps before the initial step. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. ar_coefficient_constraining_bijector: optional `tfb.Bijector` instance representing a constraining mapping for the autoregressive coefficients. For example, `tfb.Tanh()` constrains the coefficients to lie in `(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and `tfb.Identity()` implies no constraint. If `None`, the default behavior constrains the coefficients to lie in `(-1, 1)` using a `Tanh` bijector. Default value: `None`. ma_coefficient_constraining_bijector: optional `tfb.Bijector` instance representing a constraining mapping for the moving average coefficients. For example, `tfb.Tanh()` constrains the coefficients to lie in `(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and `tfb.Identity()` implies no constraint. If `None`, the default behavior is to apply no constraint. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). Default value: `None`. name: the name of this model component. Default value: 'ARIMA'. """ init_parameters = dict(locals()) with tf.name_scope(name or 'ARIMA') as name: masked_time_series = None if observed_time_series is not None: masked_time_series = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) dtype = dtype_util.common_dtype( [(masked_time_series.time_series if masked_time_series is not None else None), ar_coefficients_prior, ma_coefficients_prior, level_scale_prior, initial_state_prior], dtype_hint=tf.float32) if observed_time_series is not None: for _ in range(integration_degree): # Compute statistics using `integration_order`-order differences. masked_time_series = ( missing_values_util.differentiate_masked_time_series( masked_time_series)) _, observed_stddev, observed_initial = sts_util.empirical_statistics( masked_time_series) else: observed_stddev, observed_initial = ( tf.convert_to_tensor(value=1., dtype=dtype), tf.convert_to_tensor(value=0., dtype=dtype)) batch_ones = ps.ones(ps.concat([ ps.shape(observed_initial), # Batch shape [1]], axis=0), dtype=dtype) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if ar_coefficients_prior is None: ar_coefficients_prior = tfd.MultivariateNormalDiag( scale_diag=batch_ones * ps.ones([ar_order])) if ma_coefficients_prior is None: ma_coefficients_prior = tfd.MultivariateNormalDiag( scale_diag=batch_ones * ps.ones([ma_order])) if level_scale_prior is None: level_scale_prior = tfd.LogNormal( loc=tf.math.log(0.05 * observed_stddev), scale=3.) if (ar_coefficients_prior.event_shape.is_fully_defined() and ar_order != ar_coefficients_prior.event_shape[0]): raise ValueError( "Autoregressive prior dimension {} doesn't match order {}.".format( ar_coefficients_prior.event_shape[0], ar_order)) if (ma_coefficients_prior.event_shape.is_fully_defined() and ma_order != ma_coefficients_prior.event_shape[0]): raise ValueError( "Moving average prior dimension {} doesn't match order {}.".format( ma_coefficients_prior.event_shape[0], ma_order)) latent_size = ps.maximum(ar_order, ma_order + 1) + integration_degree if initial_state_prior is None: initial_state_prior = tfd.MultivariateNormalDiag( loc=sts_util.pad_tensor_with_trailing_zeros( observed_initial[..., tf.newaxis] * batch_ones, num_zeros=latent_size - 1), scale_diag=sts_util.pad_tensor_with_trailing_zeros( (tf.abs(observed_initial) + observed_stddev)[..., tf.newaxis] * batch_ones, num_zeros=latent_size - 1)) self._ar_order = ar_order self._ma_order = ma_order self._integration_degree = integration_degree self._ar_coefficients_prior = ar_coefficients_prior self._ma_coefficients_prior = ma_coefficients_prior self._level_scale_prior = level_scale_prior self._initial_state_prior = initial_state_prior parameters = [] if ar_order > 0: parameters.append( Parameter('ar_coefficients', ar_coefficients_prior, (ar_coefficient_constraining_bijector if ar_coefficient_constraining_bijector else tfb.Tanh()))) if ma_order > 0: parameters.append( Parameter('ma_coefficients', ma_coefficients_prior, (ma_coefficient_constraining_bijector if ma_coefficient_constraining_bijector else tfb.Identity()))) if level_drift_prior is not None: parameters.append( Parameter( 'level_drift', level_drift_prior, tfb.Chain([ tfb.Scale(scale=observed_stddev), (level_drift_prior. experimental_default_event_space_bijector())]))) super(AutoregressiveIntegratedMovingAverage, self).__init__( parameters=parameters + [ Parameter('level_scale', level_scale_prior, tfb.Chain([tfb.Scale(scale=observed_stddev), tfb.Softplus(low=dtype_util.eps(dtype))])) ], latent_size=latent_size, init_parameters=init_parameters, name=name)