def joint_log_prob(self, observed_time_series): """Build the joint density `log p(params) + log p(y|params)` as a callable. Args: observed_time_series: Observed `Tensor` trajectories of shape `sample_shape + batch_shape + [num_timesteps, 1]` (the trailing `1` dimension is optional if `num_timesteps > 1`), where `batch_shape` should match `self.batch_shape` (the broadcast batch shape of all priors on parameters for this structural time series model). Returns: log_joint_fn: A function taking a `Tensor` argument for each model parameter, in canonical order, and returning a `Tensor` log probability of shape `batch_shape`. Note that, *unlike* `tfp.Distributions` `log_prob` methods, the `log_joint` sums over the `sample_shape` from y, so that `sample_shape` does not appear in the output log_prob. This corresponds to viewing multiple samples in `y` as iid observations from a single model, which is typically the desired behavior for parameter inference. """ with tf.name_scope('joint_log_prob', values=[observed_time_series]): observed_time_series = tf.convert_to_tensor(observed_time_series) observed_time_series = sts_util.maybe_expand_trailing_dim( observed_time_series) num_timesteps = distribution_util.prefer_static_value( tf.shape(observed_time_series))[-2] def log_joint_fn(*param_vals): """Generated log-density function.""" # Sum the log_prob values from parameter priors. param_lp = sum([ param.prior.log_prob(param_val) for (param, param_val) in zip(self.parameters, param_vals) ]) # Build a linear Gaussian state space model and evaluate the marginal # log_prob on observations. lgssm = self.make_state_space_model( param_vals=param_vals, num_timesteps=num_timesteps) observation_lp = lgssm.log_prob(observed_time_series) # Sum over likelihoods from iid observations. Without this sum, # adding `param_lp + observation_lp` would broadcast the param priors # over the sample shape, which incorrectly multi-counts the param # priors. sample_ndims = tf.maximum(0, tf.rank(observation_lp) - tf.rank(param_lp)) observation_lp = tf.reduce_sum( observation_lp, axis=tf.range(sample_ndims)) return param_lp + observation_lp return log_joint_fn
def testScalarTensor(self): x = tf.constant(1.) value = distribution_util.prefer_static_value(x) if not tf.executing_eagerly(): self.assertIsInstance(value, np.ndarray) self.assertAllEqual(np.array(1.), value)
def testDynamicValueEndsUpBeingEmpty(self): if tf.executing_eagerly(): return x = tf1.placeholder_with_default( np.array([], dtype=np.int32), shape=None) value = distribution_util.prefer_static_value(x) self.assertAllEqual(np.array([]), self.evaluate(value))
def testNonEmptyConstantTensor(self): x = tf.zeros((2, 3, 4)) value = distribution_util.prefer_static_value(x) self.assertIsInstance(value, np.ndarray) self.assertAllEqual(np.zeros((2, 3, 4)), value)
def testEmptyConstantTensor(self): x = tf.constant([]) value = distribution_util.prefer_static_value(x) self.assertIsInstance(value, np.ndarray) self.assertAllEqual(np.array([]), value)
def forecast(model, observed_time_series, parameter_samples, num_steps_forecast): """Construct predictive distribution over future observations. Given samples from the posterior over parameters, return the predictive distribution over future observations for num_steps_forecast timesteps. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. num_steps_forecast: scalar `int` `Tensor` number of steps to forecast. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_steps_forecast, 1] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `forecast`, we construct a forecast distribution: ```python forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) forecast_mean = forecast_dist.mean()[..., 0] # shape: [50] forecast_scale = forecast_dist.stddev()[..., 0] # shape: [50] forecast_samples = forecast_dist.sample(10)[..., 0] # shape: [10, 50] ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python (variational_loss, variational_distributions) = tfp.sts.build_factored_variational_loss( model=model, observed_time_series=observed_time_series) # OMITTED: take steps to optimize variational loss samples = {k: q.sample(30) for (k, q) in variational_distributions.items()} forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_forecast(observed_time_series, forecast_mean, forecast_scale, forecast_samples): plt.figure(figsize=(12, 6)) num_steps = observed_time_series.shape[-1] num_steps_forecast = forecast_mean.shape[-1] num_steps_train = num_steps - num_steps_forecast c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(np.arange(num_steps), observed_time_series, lw=2, color=c1, label='ground truth') forecast_steps = np.arange(num_steps_train, num_steps_train+num_steps_forecast) plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1) plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2, label='forecast') plt.fill_between(forecast_steps, forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2) plt.xlim([0, num_steps]) plt.legend() plot_forecast(observed_time_series, forecast_mean=forecast_mean, forecast_scale=forecast_scale, forecast_samples=forecast_samples) ``` """ with tf.compat.v1.name_scope( 'forecast', values=[observed_time_series, parameter_samples, num_steps_forecast]): observed_time_series = tf.convert_to_tensor( value=observed_time_series, name='observed_time_series') observed_time_series = sts_util.maybe_expand_trailing_dim( observed_time_series) # Run filtering over the observed timesteps to extract the # latent state posterior at timestep T+1 (i.e., the final # filtering distribution, pushed through the transition model). # This is the prior for the forecast model ("today's prior # is yesterday's posterior"). num_observed_steps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] observed_data_ssm = model.make_state_space_model( num_timesteps=num_observed_steps, param_vals=parameter_samples) (_, _, _, predictive_means, predictive_covs, _, _ ) = observed_data_ssm.forward_filter(observed_time_series) # Build a batch of state-space models over the forecast period. Because # we'll use MixtureSameFamily to mix over the posterior draws, we need to # do some shenanigans to move the `[num_posterior_draws]` batch dimension # from the leftmost to the rightmost side of the model's batch shape. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate `move_dimension` calls here. parameter_samples = model._canonicalize_param_vals_as_map(parameter_samples) # pylint: disable=protected-access parameter_samples_with_reordered_batch_dimension = { param.name: dist_util.move_dimension( parameter_samples[param.name], 0, -(1 + _prefer_static_event_ndims(param.prior))) for param in model.parameters} forecast_prior = tfd.MultivariateNormalFullCovariance( loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2), covariance_matrix=dist_util.move_dimension( predictive_covs[..., -1, :, :], 0, -3)) forecast_ssm = model.make_state_space_model( num_timesteps=num_steps_forecast, param_vals=parameter_samples_with_reordered_batch_dimension, initial_state_prior=forecast_prior, initial_step=num_observed_steps) num_posterior_draws = dist_util.prefer_static_value( forecast_ssm.batch_shape_tensor())[-1] return tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical( logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)), components_distribution=forecast_ssm)
def one_step_predictive(model, observed_time_series, parameter_samples): """Compute one-step-ahead predictive distributions for all timesteps. Given samples from the posterior over parameters, return the predictive distribution over observations at each time `T`, given observations up through time `T-1`. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. The `t`th step represents the forecast distribution `p(observed_time_series[t] | observed_time_series[0:t-1], parameter_samples)`. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `one_step_predictive`, we construct a one-step-ahead predictive distribution: ```python one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) predictive_means = one_step_predictive_dist.mean() predictive_scales = one_step_predictive_dist.stddev() ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python (variational_loss, variational_distributions) = tfp.sts.build_factored_variational_loss( model=model, observed_time_series=observed_time_series) # OMITTED: take steps to optimize variational loss samples = {k: q.sample(30) for (k, q) in variational_distributions.items()} one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_one_step_predictive(observed_time_series, forecast_mean, forecast_scale): plt.figure(figsize=(12, 6)) num_timesteps = forecast_mean.shape[-1] c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(observed_time_series, label="observed time series", color=c1) plt.plot(forecast_mean, label="one-step prediction", color=c2) plt.fill_between(np.arange(num_timesteps), forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, alpha=0.1, color=c2) plt.legend() plot_one_step_predictive(observed_time_series, forecast_mean=predictive_means, forecast_scale=predictive_scales) ``` To detect anomalous timesteps, we check whether the observed value at each step is within a 95% predictive interval, i.e., two standard deviations from the mean: ```python z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1]) / predictive_scales[..., :-1]) anomalous_timesteps = tf.boolean_mask( tf.range(1, num_timesteps), tf.abs(z_scores) > 2.0) ``` """ with tf.compat.v1.name_scope( 'one_step_predictive', values=[observed_time_series, parameter_samples]): observed_time_series = tf.convert_to_tensor( value=observed_time_series, name='observed_time_series') observed_time_series = sts_util.maybe_expand_trailing_dim( observed_time_series) # Run filtering over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] lgssm = model.make_state_space_model( num_timesteps=num_timesteps, param_vals=parameter_samples) (_, _, _, _, _, observation_means, observation_covs ) = lgssm.forward_filter(observed_time_series) # Construct the predictive distribution by mixing over posterior draws. # Unfortunately this requires some shenanigans with shapes. The predictive # parameters have shape # `concat([ # [num_posterior_draws], # observed_time_series.sample_shape, # lgssm.batch_shape, # lgssm.event_shape # => [num_timesteps, 1] # ]` # Because MixtureSameFamily mixes over the rightmost batch dimension, # we need to move the `num_posterior_draws` dimension to be rightmost # in the batch shape. This requires use of `Independent` (to preserve # `num_timesteps` as part of the event shape) and `move_dimension`. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate `move_dimension` calls here. predictions = tfd.Independent( distribution=tfd.Normal( loc=dist_util.move_dimension(observation_means[..., 0], 0, -2), scale=tf.sqrt(dist_util.move_dimension( observation_covs[..., 0, 0], 0, -2))), reinterpreted_batch_ndims=1) num_posterior_draws = dist_util.prefer_static_value( tf.shape(input=observation_means))[0] return tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical( logits=tf.zeros([num_posterior_draws], dtype=predictions.dtype)), components_distribution=predictions)
def decompose_by_component(model, observed_time_series, parameter_samples): """Decompose an observed time series into contributions from each component. This method decomposes a time series according to the posterior represention of a structural time series model. In particular, it: - Computes the posterior marginal mean and covariances over the additive model's latent space. - Decomposes the latent posterior into the marginal blocks for each model component. - Maps the per-component latent posteriors back through each component's observation model, to generate the time series modeled by that component. Args: model: An instance of `tfp.sts.Sum` representing a structural time series model. observed_time_series: `float` `Tensor` of shape `batch_shape + [num_timesteps, 1]` (omitting the trailing unit dimension is also supported when `num_timesteps > 1`), specifying an observed time series. May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: component_dists: A `collections.OrderedDict` instance mapping component StructuralTimeSeries instances (elements of `model.components`) to `tfd.Distribution` instances representing the posterior marginal distributions on the process modeled by each component. Each distribution has batch shape matching that of `posterior_means`/`posterior_covs`, and event shape of `[num_timesteps]`. #### Examples Suppose we've built a model and fit it to data: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) num_steps_forecast = 50 samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` To extract the contributions of individual components, pass the time series and sampled parameters into `decompose_by_component`: ```python component_dists = decompose_by_component( model, observed_time_series=observed_time_series, parameter_samples=samples) # Component mean and stddev have shape `[len(observed_time_series)]`. day_of_week_effect_mean = component_dists[day_of_week].mean() day_of_week_effect_stddev = component_dists[day_of_week].stddev() ``` Using the component distributions, we can visualize the uncertainty for each component: ``` from matplotlib import pylab as plt num_components = len(component_dists) xs = np.arange(len(observed_time_series)) fig = plt.figure(figsize=(12, 3 * num_components)) for i, (component, component_dist) in enumerate(component_dists.items()): # If in graph mode, replace `.numpy()` with `.eval()` or `sess.run()`. component_mean = component_dist.mean().numpy() component_stddev = component_dist.stddev().numpy() ax = fig.add_subplot(num_components, 1, 1 + i) ax.plot(xs, component_mean, lw=2) ax.fill_between(xs, component_mean - 2 * component_stddev, component_mean + 2 * component_stddev, alpha=0.5) ax.set_title(component.name) ``` """ with tf1.name_scope('decompose_by_component', values=[observed_time_series]): [observed_time_series, is_missing] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # posterior on latents. num_timesteps = dist_util.prefer_static_value( tf.shape(input=observed_time_series))[-2] ssm = model.make_state_space_model(num_timesteps=num_timesteps, param_vals=parameter_samples) posterior_means, posterior_covs = ssm.posterior_marginals( observed_time_series, mask=is_missing) return _decompose_from_posterior_marginals(model, posterior_means, posterior_covs, parameter_samples)
def _decompose_from_posterior_marginals(model, posterior_means, posterior_covs, parameter_samples): """Utility method to decompose a joint posterior into components. Args: model: `tfp.sts.Sum` instance defining an additive STS model. posterior_means: float `Tensor` of shape `concat( [[num_posterior_draws], batch_shape, num_timesteps, latent_size])` representing the posterior mean over latents in an `AdditiveStateSpaceModel`. posterior_covs: float `Tensor` of shape `concat( [[num_posterior_draws], batch_shape, num_timesteps, latent_size, latent_size])` representing the posterior marginal covariances over latents in an `AdditiveStateSpaceModel`. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: component_dists: A `collections.OrderedDict` instance mapping component StructuralTimeSeries instances (elements of `model.components`) to `tfd.Distribution` instances representing the posterior marginal distributions on the process modeled by each component. Each distribution has batch shape matching that of `posterior_means`/`posterior_covs`, and event shape of `[num_timesteps]`. """ try: model.components except AttributeError: raise ValueError( 'Model decomposed into components must be an instance of' '`tfp.sts.Sum` (passed model {})'.format(model)) with tf1.name_scope('decompose_from_posterior_marginals'): # Extract the component means/covs from the joint latent posterior. latent_sizes = [ component.latent_size for component in model.components ] component_means = tf.split(posterior_means, latent_sizes, axis=-1) component_covs = _split_covariance_into_marginals( posterior_covs, latent_sizes) # Instantiate per-component state space models, and use them to push the # posterior means/covs through the observation model for each component. num_timesteps = dist_util.prefer_static_value( tf.shape(input=posterior_means))[-2] component_ssms = model.make_component_state_space_models( num_timesteps=num_timesteps, param_vals=parameter_samples) component_predictive_dists = collections.OrderedDict() for (component, component_ssm, component_mean, component_cov) in zip(model.components, component_ssms, component_means, component_covs): component_obs_mean, component_obs_cov = ( component_ssm.latents_to_observations( latent_means=component_mean, latent_covs=component_cov)) # Using the observation means and covs, build a mixture distribution # that integrates over the posterior draws. component_predictive_dists[ component] = sts_util.mix_over_posterior_draws( means=component_obs_mean[..., 0], variances=component_obs_cov[..., 0, 0]) return component_predictive_dists
def _get_perm(self): if self.perm is None: perm_start = (distribution_util.prefer_static_value( self.rightmost_transposed_ndims) - 1) return tf.range(start=perm_start, limit=-1, delta=-1, name='perm') return self.perm
def __init__(self, perm=None, rightmost_transposed_ndims=None, validate_args=False, name='transpose'): """Instantiates the `Transpose` bijector. Args: perm: Positive `int32` vector-shaped `Tensor` representing permutation of rightmost dims (for forward transformation). Note that the `0`th index represents the first of the rightmost dims and the largest value must be `rightmost_transposed_ndims - 1` and corresponds to `tf.rank(x) - 1`. Only one of `perm` and `rightmost_transposed_ndims` can (and must) be specified. Default value: `tf.range(start=rightmost_transposed_ndims, limit=-1, delta=-1)`. rightmost_transposed_ndims: Positive `int32` scalar-shaped `Tensor` representing the number of rightmost dimensions to permute. Only one of `perm` and `rightmost_transposed_ndims` can (and must) be specified. Default value: `tf.size(perm)`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. Raises: ValueError: if both or neither `perm` and `rightmost_transposed_ndims` are specified. NotImplementedError: if `rightmost_transposed_ndims` is not known prior to graph execution. """ with tf.compat.v1.name_scope(name, values=[perm, rightmost_transposed_ndims]): if (rightmost_transposed_ndims is None) == (perm is None): raise ValueError('Must specify exactly one of ' '`rightmost_transposed_ndims` and `perm`.') if rightmost_transposed_ndims is not None: rightmost_transposed_ndims = tf.convert_to_tensor( value=rightmost_transposed_ndims, dtype=np.int32, name='rightmost_transposed_ndims') rightmost_transposed_ndims_ = tf.get_static_value( rightmost_transposed_ndims) assertions = _maybe_validate_rightmost_transposed_ndims( rightmost_transposed_ndims, validate_args) if assertions: with tf.control_dependencies(assertions): rightmost_transposed_ndims = tf.identity( rightmost_transposed_ndims) perm = tf.range( start=util.prefer_static_value(rightmost_transposed_ndims) - 1, limit=-1, delta=-1, name='perm') else: # perm is not None: perm = tf.convert_to_tensor(value=perm, dtype=np.int32, name='perm') rightmost_transposed_ndims = tf.size( input=perm, name='rightmost_transposed_ndims') rightmost_transposed_ndims_ = tf.get_static_value( rightmost_transposed_ndims) assertions = _maybe_validate_perm(perm, validate_args) if assertions: with tf.control_dependencies(assertions): perm = tf.identity(perm) # TODO(b/110828604): If bijector base class ever supports dynamic # `min_event_ndims`, then this class already works dynamically and the # following five lines can be removed. if rightmost_transposed_ndims_ is None: raise NotImplementedError( '`rightmost_transposed_ndims` must be ' 'known prior to graph execution.') else: rightmost_transposed_ndims_ = int(rightmost_transposed_ndims_) self._perm = perm self._rightmost_transposed_ndims = rightmost_transposed_ndims super(Transpose, self).__init__( forward_min_event_ndims=rightmost_transposed_ndims_, graph_parents=[perm, rightmost_transposed_ndims], is_constant_jacobian=True, validate_args=validate_args, name=name)
def impute_missing_values(model, observed_time_series, parameter_samples, include_observation_noise=False): """Runs posterior inference to impute the missing values in a time series. This method computes the posterior marginals `p(latent state | observations)`, given the time series at observed timesteps (a missingness mask should be specified using `tfp.sts.MaskedTimeSeries`). It pushes this posterior back through the observation model to impute a predictive distribution on the observed time series. At unobserved steps, this is an imputed value; at other steps it is interpreted as the model's estimate of the underlying noise-free series. Args: model: `tfp.sts.Sum` instance defining an additive STS model. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([ [num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. include_observation_noise: If `False`, the imputed uncertainties represent the model's estimate of the noise-free time series at each timestep. If `True`, they represent the model's estimate of the range of values that could be *observed* at each timestep, including any i.i.d. observation noise. Default value: `False`. Returns: imputed_series_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Example To specify a time series with missing values, use `tfp.sts.MaskedTimeSeries`: ```python time_series_with_nans = [-1., 1., np.nan, 2.4, np.nan, 5] observed_time_series = tfp.sts.MaskedTimeSeries( time_series=time_series_with_nans, is_missing=tf.math.is_nan(time_series_with_nans)) ``` Masked time series can be passed to `tfp.sts` methods in place of a `observed_time_series` `Tensor`: ```python # Build model using observed time series to set heuristic priors. linear_trend_model = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series) model = tfp.sts.Sum([linear_trend_model], observed_time_series=observed_time_series) # Fit model to data parameter_samples, _ = tfp.sts.fit_with_hmc(model, observed_time_series) ``` After fitting a model, `impute_missing_values` will return a distribution ```python # Impute missing values imputed_series_distribution = tfp.sts.impute_missing_values( model, observed_time_series, parameter_samples=parameter_samples) print('imputed means and stddevs: ', imputed_series_distribution.mean(), imputed_series_distribution.stddev()) ``` """ with tf.name_scope('impute_missing_values'): [ observed_time_series, mask ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run smoothing over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] lgssm = model.make_state_space_model( num_timesteps=num_timesteps, param_vals=parameter_samples) posterior_means, posterior_covs = lgssm.posterior_marginals( observed_time_series, mask=mask) observation_means, observation_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=posterior_covs) if not include_observation_noise: # Extract just the variance of observation noise by pushing forward # zero-variance latents. _, observation_noise_covs = lgssm.latents_to_observations( latent_means=posterior_means, latent_covs=tf.zeros_like(posterior_covs)) # Subtract out the observation noise that was added in the original # pushforward. Note that this could cause numerical issues if the # observation noise is very large. If this becomes an issue we could # avoid the subtraction by plumbing `include_observation_noise` through # `lgssm.latents_to_observations`. observation_covs -= observation_noise_covs # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. return sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0])
def one_step_predictive(model, observed_time_series, parameter_samples): """Compute one-step-ahead predictive distributions for all timesteps. Given samples from the posterior over parameters, return the predictive distribution over observations at each time `T`, given observations up through time `T-1`. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_timesteps] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. The `t`th step represents the forecast distribution `p(observed_time_series[t] | observed_time_series[0:t-1], parameter_samples)`. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `one_step_predictive`, we construct a one-step-ahead predictive distribution: ```python one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) predictive_means = one_step_predictive_dist.mean() predictive_scales = one_step_predictive_dist.stddev() ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python surrogate_posterior = tfp.sts.build_factored_surrogate_posterior( model=model) loss_curve = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_log_prob(observed_time_series), surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(learning_rate=0.1), num_steps=200) samples = surrogate_posterior.sample(30) one_step_predictive_dist = tfp.sts.one_step_predictive( model, observed_time_series, parameter_samples=samples) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_one_step_predictive(observed_time_series, forecast_mean, forecast_scale): plt.figure(figsize=(12, 6)) num_timesteps = forecast_mean.shape[-1] c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(observed_time_series, label="observed time series", color=c1) plt.plot(forecast_mean, label="one-step prediction", color=c2) plt.fill_between(np.arange(num_timesteps), forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, alpha=0.1, color=c2) plt.legend() plot_one_step_predictive(observed_time_series, forecast_mean=predictive_means, forecast_scale=predictive_scales) ``` To detect anomalous timesteps, we check whether the observed value at each step is within a 95% predictive interval, i.e., two standard deviations from the mean: ```python z_scores = ((observed_time_series[..., 1:] - predictive_means[..., :-1]) / predictive_scales[..., :-1]) anomalous_timesteps = tf.boolean_mask( tf.range(1, num_timesteps), tf.abs(z_scores) > 2.0) ``` """ with tf.name_scope('one_step_predictive'): [ observed_time_series, is_missing ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the training timesteps to extract the # predictive means and variances. num_timesteps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] lgssm = model.make_state_space_model( num_timesteps=num_timesteps, param_vals=parameter_samples) (_, _, _, _, _, observation_means, observation_covs ) = lgssm.forward_filter(observed_time_series, mask=is_missing) # Squeeze dims to convert from LGSSM's event shape `[num_timesteps, 1]` # to a scalar time series. return sts_util.mix_over_posterior_draws( means=observation_means[..., 0], variances=observation_covs[..., 0, 0])
def forecast(model, observed_time_series, parameter_samples, num_steps_forecast, include_observation_noise=True): """Construct predictive distribution over future observations. Given samples from the posterior over parameters, return the predictive distribution over future observations for num_steps_forecast timesteps. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]])` where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries` including a mask `Tensor` to encode the locations of missing observations. parameter_samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_posterior_draws], param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. This may optionally also be a map (Python `dict`) of parameter names to `Tensor` values. num_steps_forecast: scalar `int` `Tensor` number of steps to forecast. include_observation_noise: Python `bool` indicating whether the forecast distribution should include uncertainty from observation noise. If `True`, the forecast is over future observations, if `False`, the forecast is over future values of the latent noise-free time series. Default value: `True`. Returns: forecast_dist: a `tfd.MixtureSameFamily` instance with event shape [num_steps_forecast, 1] and batch shape `concat([sample_shape, model.batch_shape])`, with `num_posterior_draws` mixture components. #### Examples Suppose we've built a model and fit it to data using HMC: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) ``` Passing the posterior samples into `forecast`, we construct a forecast distribution: ```python forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) forecast_mean = forecast_dist.mean()[..., 0] # shape: [50] forecast_scale = forecast_dist.stddev()[..., 0] # shape: [50] forecast_samples = forecast_dist.sample(10)[..., 0] # shape: [10, 50] ``` If using variational inference instead of HMC, we'd construct a forecast using samples from the variational posterior: ```python surrogate_posterior = tfp.sts.build_factored_surrogate_posterior( model=model) loss_curve = tfp.vi.fit_surrogate_posterior( target_log_prob_fn=model.joint_log_prob(observed_time_series), surrogate_posterior=surrogate_posterior, optimizer=tf.optimizers.Adam(learning_rate=0.1), num_steps=200) samples = surrogate_posterior.sample(30) forecast_dist = tfp.sts.forecast(model, observed_time_series, parameter_samples=samples, num_steps_forecast=50) ``` We can visualize the forecast by plotting: ```python from matplotlib import pylab as plt def plot_forecast(observed_time_series, forecast_mean, forecast_scale, forecast_samples): plt.figure(figsize=(12, 6)) num_steps = observed_time_series.shape[-1] num_steps_forecast = forecast_mean.shape[-1] num_steps_train = num_steps - num_steps_forecast c1, c2 = (0.12, 0.47, 0.71), (1.0, 0.5, 0.05) plt.plot(np.arange(num_steps), observed_time_series, lw=2, color=c1, label='ground truth') forecast_steps = np.arange(num_steps_train, num_steps_train+num_steps_forecast) plt.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1) plt.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2, label='forecast') plt.fill_between(forecast_steps, forecast_mean - 2 * forecast_scale, forecast_mean + 2 * forecast_scale, color=c2, alpha=0.2) plt.xlim([0, num_steps]) plt.legend() plot_forecast(observed_time_series, forecast_mean=forecast_mean, forecast_scale=forecast_scale, forecast_samples=forecast_samples) ``` """ with tf.name_scope('forecast'): [ observed_time_series, mask ] = sts_util.canonicalize_observed_time_series_with_mask( observed_time_series) # Run filtering over the observed timesteps to extract the # latent state posterior at timestep T+1 (i.e., the final # filtering distribution, pushed through the transition model). # This is the prior for the forecast model ("today's prior # is yesterday's posterior"). num_observed_steps = dist_util.prefer_static_value( tf.shape(observed_time_series))[-2] observed_data_ssm = model.make_state_space_model( num_timesteps=num_observed_steps, param_vals=parameter_samples) (_, _, _, predictive_means, predictive_covs, _, _ ) = observed_data_ssm.forward_filter(observed_time_series, mask=mask) # Build a batch of state-space models over the forecast period. Because # we'll use MixtureSameFamily to mix over the posterior draws, we need to # do some shenanigans to move the `[num_posterior_draws]` batch dimension # from the leftmost to the rightmost side of the model's batch shape. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate `move_dimension` calls here. parameter_samples = model._canonicalize_param_vals_as_map(parameter_samples) # pylint: disable=protected-access parameter_samples_with_reordered_batch_dimension = { param.name: dist_util.move_dimension( parameter_samples[param.name], 0, -(1 + _prefer_static_event_ndims(param.prior))) for param in model.parameters} forecast_prior = tfd.MultivariateNormalFullCovariance( loc=dist_util.move_dimension(predictive_means[..., -1, :], 0, -2), covariance_matrix=dist_util.move_dimension( predictive_covs[..., -1, :, :], 0, -3)) # Ugly hack: because we moved `num_posterior_draws` to the trailing (rather # than leading) dimension of parameters, the parameter batch shapes no # longer broadcast against the `constant_offset` attribute used in `sts.Sum` # models. We fix this by manually adding an extra broadcasting dim to # `constant_offset` if present. # The root cause of this hack is that we mucked with param dimensions above # and are now passing params that are 'invalid' in the sense that they don't # match the shapes of the model's param priors. The fix (as above) will be # to update MixtureSameFamily so we can avoid changing param dimensions # altogether. # TODO(b/120245392): enhance `MixtureSameFamily` to reduce along an # arbitrary axis, and eliminate this hack. kwargs = {} if hasattr(model, 'constant_offset'): kwargs['constant_offset'] = tf.convert_to_tensor( value=model.constant_offset, dtype=forecast_prior.dtype)[..., tf.newaxis, :] if not include_observation_noise: parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale'] = tf.zeros_like( parameter_samples_with_reordered_batch_dimension[ 'observation_noise_scale']) # We assume that any STS model that has a `constant_offset` attribute # will allow it to be overridden as a kwarg. This is currently just # `sts.Sum`. # TODO(b/120245392): when kwargs hack is removed, switch back to calling # the public version of `_make_state_space_model`. forecast_ssm = model._make_state_space_model( # pylint: disable=protected-access num_timesteps=num_steps_forecast, param_map=parameter_samples_with_reordered_batch_dimension, initial_state_prior=forecast_prior, initial_step=num_observed_steps, **kwargs) num_posterior_draws = dist_util.prefer_static_value( forecast_ssm.batch_shape_tensor())[-1] return tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical( logits=tf.zeros([num_posterior_draws], dtype=forecast_ssm.dtype)), components_distribution=forecast_ssm)