def state_space_model_likelihood(**param_vals): ssm = self.make_state_space_model( param_vals=param_vals, num_timesteps=num_timesteps, initial_step=initial_step, mask=mask, experimental_parallelize=experimental_parallelize) # Looping LGSSM methods are really expensive in eager mode; wrap them # to keep this from slowing things down in interactive use. ssm = tfe_util.JitPublicMethods(ssm, trace_only=True) if distribution_util.shape_may_be_nontrivial(trajectories_shape): return sample.Sample(ssm, sample_shape=trajectories_shape) return ssm
def decompose_by_component(model, observed_time_series, parameter_samples): """Decompose an observed time series into contributions from each component. This method decomposes a time series according to the posterior represention of a structural time series model. In particular, it: - Computes the posterior marginal mean and covariances over the additive model's latent space. - Decomposes the latent posterior into the marginal blocks for each model component. - Maps the per-component latent posteriors back through each component's observation model, to generate the time series modeled by that component. Args: model: An instance of `tfp.sts.Sum` representing a structural time series model. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: component_dists: A `collections.OrderedDict` instance mapping component StructuralTimeSeries instances (elements of `model.components`) to `tfd.Distribution` instances representing the posterior marginal distributions on the process modeled by each component. Each distribution has batch shape matching that of `posterior_means`/`posterior_covs`, and event shape of `[num_timesteps]`. #### Examples Suppose we've built a model and fit it to data: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) num_steps_forecast = 50 samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` To extract the contributions of individual components, pass the time series and sampled parameters into `decompose_by_component`: ```python component_dists = decompose_by_component( model, observed_time_series=observed_time_series, parameter_samples=samples) # Component mean and stddev have shape `[len(observed_time_series)]`. day_of_week_effect_mean = component_dists[day_of_week].mean() day_of_week_effect_stddev = component_dists[day_of_week].stddev() ``` Using the component distributions, we can visualize the uncertainty for each component: ``` from matplotlib import pylab as plt num_components = len(component_dists) xs = np.arange(len(observed_time_series)) fig = plt.figure(figsize=(12, 3 * num_components)) for i, (component, component_dist) in enumerate(component_dists.items()): # If in graph mode, replace `.numpy()` with `.eval()` or `sess.run()`. component_mean = component_dist.mean().numpy() component_stddev = component_dist.stddev().numpy() ax = fig.add_subplot(num_components, 1, 1 + i) ax.plot(xs, component_mean, lw=2) ax.fill_between(xs, component_mean - 2 * component_stddev, component_mean + 2 * component_stddev, alpha=0.5) ax.set_title(component.name) ``` """ with tf.name_scope('decompose_by_component'): [observed_time_series, is_missing] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # posterior on latents. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] ssm = tfe_util.JitPublicMethods( model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples), trace_only=True ) # Avoid eager overhead w/o introducing XLA dependence. posterior_means, posterior_covs = ssm.posterior_marginals( observed_time_series, mask=is_missing) return _decompose_from_posterior_marginals(model, posterior_means, posterior_covs, parameter_samples)
def one_step_predictive(model, observed_time_series, parameter_samples, timesteps_are_event_shape=True): """Compute one-step-ahead predictive distributions for all timesteps. Given samples from the posterior over parameters, return the predictive distribution over observations at each time `T`, given observations up through time `T-1`. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. timesteps_are_event_shape: Deprecated, for backwards compatibility only. If `False`, the predictive distribution will return per-timestep probabilities Default value: `True`. Returns: predictive_dist: a `tfd.MixtureSameFamily` instance with event shape `[num_timesteps] if timesteps_are_event_shape else []` and batch shape `concat([sample_shape, model.batch_shape, [] if timesteps_are_event_shape else [num_timesteps])`, with `num_posterior_draws` mixture components. The `t`th step represents the forecast distribution `p(observed_time_series[t] | observed_time_series[0:t-1], parameter_samples)`. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `one_step_predictive`, we construct a one-step-ahead predictive distribution: ```python one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) predictive_means = one_step_predictive_dist.mean() predictive_scales = one_step_predictive_dist.stddev() ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python surrogate_posterior = tfp.sts.build_factored_surrogate_posterior( model=model) loss_curve = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_distribution(observed_time_series).log_prob, surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(learning_rate=0.1), num_steps=200) samples = surrogate_posterior.sample(30) one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_one_step_predictive(observed_time_series, forecast_mean, forecast_scale): plt.figure(figsize=(12, 6)) num_timesteps = forecast_mean.shape[-1] c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(observed_time_series, label="observed time series", color=c1) plt.plot(forecast_mean, label="one-step prediction", color=c2) plt.fill_between(np.arange(num_timesteps), forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, alpha=0.1, color=c2) plt.legend() plot_one_step_predictive(observed_time_series, forecast_mean=predictive_means, forecast_scale=predictive_scales) ``` To detect anomalous timesteps, we check whether the observed value at each step is within a 95% predictive interval, i.e., two standard deviations from the mean: ```python z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1]) / predictive_scales[..., :-1]) anomalous_timesteps = tf.boolean_mask( tf.range(1, num_timesteps), tf.abs(z_scores) > 2.0) ``` """ with tf.name_scope('one_step_predictive'): [ observed_time_series, is_missing ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] lgssm = tfe_util.JitPublicMethods( model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples), trace_only=True) # Avoid eager overhead w/o introducing XLA dependence. (_, _, _, _, _, observation_means, observation_covs ) = lgssm.forward_filter(observed_time_series, mask=is_missing) # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. predictive_dist = sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0]) if timesteps_are_event_shape: predictive_dist = tfd.Independent( predictive_dist, reinterpreted_batch_ndims=1) return predictive_dist
def impute_missing_values(model, observed_time_series, parameter_samples, include_observation_noise=False, timesteps_are_event_shape=True): """Runs posterior inference to impute the missing values in a time series. This method computes the posterior marginals `p(latent state | observations)`, given the time series at observed timesteps (a missingness mask should be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back through the observation model to impute a predictive distribution on the observed time series. At unobserved steps, this is an imputed value; at other steps it is interpreted as the model's estimate of the underlying noise-free series. Args: model: `tfp.sts.Sum` instance defining an additive STS model. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. include_observation_noise: If `False`, the imputed uncertainties represent the model's estimate of the noise-free time series at each timestep. If `True`, they represent the model's estimate of the range of values that could be *observed* at each timestep, including any i.i.d. observation noise. Default value: `False`. timesteps_are_event_shape: Deprecated, for backwards compatibility only. If `False`, the predictive distribution will return per-timestep probabilities Default value: `True`. Returns: imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape `[num_timesteps] if timesteps_are_event_shape else []` and batch shape `concat([sample_shape, model.batch_shape, [] if timesteps_are_event_shape else [num_timesteps])`, with `num_posterior_draws` mixture components. #### Example To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`: ```python time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5] observed_time_series = tfp.sts.MaskedTimeSeries( time_series=time_series_with_nans, is_missing=tf.math.is_nan(time_series_with_nans)) ``` Masked time series can be passed to `tfp.sts` methods in place of a `observed_time_series` `Tensor`: ```python # Build model using observed time series to set heuristic priors. linear_trend_model = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series) model = tfp.sts.Sum([linear_trend_model], observed_time_series=observed_time_series) # Fit model to data parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series) ``` After fitting a model, `impute_missing_values` will return a distribution ```python # Impute missing values imputed_series_distribution = tfp.sts.impute_missing_values( model, observed_time_series, parameter_samples=parameter_samples) print('imputed means and stddevs: ', imputed_series_distribution.mean(), imputed_series_distribution.stddev()) ``` """ with tf.name_scope('impute_missing_values'): [ observed_time_series, mask ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] lgssm = tfe_util.JitPublicMethods( model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples), trace_only=True) # Avoid eager overhead w/o introducing XLA dependence. posterior_means, posterior_covs = lgssm.posterior_marginals( observed_time_series, mask=mask) observation_means, observation_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=posterior_covs) if not include_observation_noise: # Extract just the variance of observation noise by pushing forward # zero-variance latents. _, observation_noise_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=tf.zeros_like(posterior_covs)) # Subtract out the observation noise that was added in the original # pushforward. Note that this could cause numerical issues if the # observation noise is very large. If this becomes an issue we could # avoid the subtraction by plumbing `include_observation_noise` through # `lgssm.latents_to_observations`. observation_covs -= observation_noise_covs # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. imputed_values_dist = sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0]) if timesteps_are_event_shape: imputed_values_dist = tfd.Independent( imputed_values_dist, reinterpreted_batch_ndims=1) return imputed_values_dist
def forecast(model, observed_time_series, parameter_samples, num_steps_forecast, include_observation_noise=True): """Construct predictive distribution over future observations. Given samples from the posterior over parameters, return the predictive distribution over future observations for num_steps_forecast timesteps. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. num_steps_forecast: scalar `int` `Tensor` number of steps to forecast. include_observation_noise: Python `bool` indicating whether the forecast distribution should include uncertainty from observation noise. If `True`, the forecast is over future observations, if `False`, the forecast is over future values of the latent noise-free time series. Default value: `True`. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_steps_forecast, 1] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `forecast`, we construct a forecast distribution: ```python forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) forecast_mean = forecast_dist.mean()[..., 0] # shape: [50] forecast_scale = forecast_dist.stddev()[..., 0] # shape: [50] forecast_samples = forecast_dist.sample(10)[..., 0] # shape: [10, 50] ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python surrogate_posterior = tfp.sts.build_factored_surrogate_posterior( model=model) loss_curve = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_distribution(observed_time_series).log_prob, surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(learning_rate=0.1), num_steps=200) samples = surrogate_posterior.sample(30) forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_forecast(observed_time_series, forecast_mean, forecast_scale, forecast_samples): plt.figure(figsize=(12, 6)) num_steps = observed_time_series.shape[-1] num_steps_forecast = forecast_mean.shape[-1] num_steps_train = num_steps - num_steps_forecast c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(np.arange(num_steps), observed_time_series, lw=2, color=c1, label='ground truth') forecast_steps = np.arange(num_steps_train, num_steps_train+num_steps_forecast) plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1) plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2, label='forecast') plt.fill_between(forecast_steps, forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2) plt.xlim([0, num_steps]) plt.legend() plot_forecast(observed_time_series, forecast_mean=forecast_mean, forecast_scale=forecast_scale, forecast_samples=forecast_samples) ``` """ with tf.name_scope('forecast'): [ observed_time_series, mask ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the observed timesteps to extract the # latent state posterior at timestep T+1 (i.e., the final # filtering distribution, pushed through the transition model). # This is the prior for the forecast model ("today's prior # is yesterday's posterior"). num_observed_steps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] observed_data_ssm = tfe_util.JitPublicMethods( model.make_state_space_model(num_timesteps=num_observed_steps, param_vals=parameter_samples), trace_only=True) # Avoid eager overhead w/o introducing XLA dependence. (_, _, _, predictive_mean, predictive_cov, _, _ ) = observed_data_ssm.forward_filter(observed_time_series, mask=mask, final_step_only=True) # Build a batch of state-space models over the forecast period. Because # we'll use MixtureSameFamily to mix over the posterior draws, we need to # do some shenanigans to move the `[num_posterior_draws]` batch dimension # from the leftmost to the rightmost side of the model's batch shape. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate `move_dimension` calls here. parameter_samples = model._canonicalize_param_vals_as_map(parameter_samples) # pylint: disable=protected-access parameter_samples_with_reordered_batch_dimension = { param.name: dist_util.move_dimension( parameter_samples[param.name], 0, -(1 + _prefer_static_event_ndims(param.prior))) for param in model.parameters} forecast_prior = tfd.MultivariateNormalTriL( loc=dist_util.move_dimension(predictive_mean, 0, -2), scale_tril=tf.linalg.cholesky( dist_util.move_dimension(predictive_cov, 0, -3))) # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather # than leading) dimension of parameters, the parameter batch shapes no # longer broadcast against the `constant_offset` attribute used in `sts.Sum` # models. We fix this by manually adding an extra broadcasting dim to # `constant_offset` if present. # The root cause of this hack is that we mucked with param dimensions above # and are now passing params that are 'invalid' in the sense that they don't # match the shapes of the model's param priors. The fix (as above) will be # to update MixtureSameFamily so we can avoid changing param dimensions # altogether. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate this hack. kwargs = {} if hasattr(model, 'constant_offset'): kwargs['constant_offset'] = tf.convert_to_tensor( value=model.constant_offset, dtype=forecast_prior.dtype)[..., tf.newaxis, :] if not include_observation_noise: parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale'] = tf.zeros_like( parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale']) # We assume that any STS model that has a `constant_offset` attribute # will allow it to be overridden as a kwarg. This is currently just # `sts.Sum`. # TODO(b/120245392): when kwargs hack is removed, switch back to calling # the public version of `_make_state_space_model`. forecast_ssm = model._make_state_space_model( # pylint: disable=protected-access num_timesteps=num_steps_forecast, param_map=parameter_samples_with_reordered_batch_dimension, initial_state_prior=forecast_prior, initial_step=num_observed_steps, **kwargs) # Avoid eager-mode loops when querying the forecast. forecast_ssm = tfe_util.JitPublicMethods(forecast_ssm, trace_only=True) num_posterior_draws = dist_util.prefer_static_value( forecast_ssm.batch_shape_tensor())[-1] return tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical( logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)), components_distribution=forecast_ssm)