Ejemplo n.º 1
0
    def test_empirical_statistics_accepts_masked_values(self):

        # Ensure that masks broadcast over batch shape by creating a batch of
        # time series.
        time_series = np.random.randn(3, 2, 5)
        mask = np.array([True, False, False, True, False])

        masked_series = missing_values_util.MaskedTimeSeries(
            time_series=time_series, is_missing=mask)
        mean, stddev, initial = self.evaluate(
            sts_util.empirical_statistics(masked_series))

        broadcast_mask = np.broadcast_to(mask, time_series.shape)
        unmasked_series = time_series[~broadcast_mask].reshape([3, 2, 3])
        unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate(
            sts_util.empirical_statistics(unmasked_series))
        self.assertAllClose(mean, unmasked_mean)
        self.assertAllClose(stddev, unmasked_stddev)
        self.assertAllClose(initial, unmasked_initial)

        # Run the same tests without batch shape.
        unbatched_time_series = time_series[0, 0, :]
        masked_series = missing_values_util.MaskedTimeSeries(
            time_series=unbatched_time_series, is_missing=mask)
        mean, stddev, initial = self.evaluate(
            sts_util.empirical_statistics(masked_series))
        unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate(
            sts_util.empirical_statistics(unbatched_time_series[~mask]))
        self.assertAllClose(mean, unmasked_mean)
        self.assertAllClose(stddev, unmasked_stddev)
        self.assertAllClose(initial, unmasked_initial)
Ejemplo n.º 2
0
def pad_batch_dimension_for_multiple_chains(
    observed_time_series, model, chain_batch_shape):
  """"Expand the observed time series with extra batch dimension(s)."""
  # Running with multiple chains introduces an extra batch dimension. In
  # general we also need to pad the observed time series with a matching batch
  # dimension.
  #
  # For example, suppose our model has batch shape [3, 4] and
  # the observed time series has shape `concat([[5], [3, 4], [100])`,
  # corresponding to `sample_shape`, `batch_shape`, and `num_timesteps`
  # respectively. The model will produce distributions with batch shape
  # `concat([chain_batch_shape, [3, 4]])`, so we pad `observed_time_series` to
  # have matching shape `[5, 1, 3, 4, 100]`, where the added `1` dimension
  # between the sample and batch shapes will broadcast to `chain_batch_shape`.

  [  # Extract mask and guarantee `event_ndims=2`.
      observed_time_series,
      is_missing
  ] = canonicalize_observed_time_series_with_mask(observed_time_series)

  event_ndims = 2  # event_shape = [num_timesteps, observation_size=1]

  model_batch_ndims = (
      model.batch_shape.ndims if model.batch_shape.ndims is not None else
      tf.shape(model.batch_shape_tensor())[0])

  # Compute ndims from chain_batch_shape.
  chain_batch_shape = tf.convert_to_tensor(
      value=chain_batch_shape, name='chain_batch_shape', dtype=tf.int32)
  if not chain_batch_shape.shape.is_fully_defined():
    raise ValueError('Batch shape must have static rank. (given: {})'.format(
        chain_batch_shape))
  if chain_batch_shape.shape.ndims == 0:  # expand int `k` to `[k]`.
    chain_batch_shape = chain_batch_shape[tf.newaxis]
  chain_batch_ndims = tf.compat.dimension_value(chain_batch_shape.shape[0])

  def do_padding(observed_time_series_tensor):
    current_sample_shape = tf.shape(
        observed_time_series_tensor)[:-(model_batch_ndims + event_ndims)]
    current_batch_and_event_shape = tf.shape(
        observed_time_series_tensor)[-(model_batch_ndims + event_ndims):]
    return tf.reshape(
        tensor=observed_time_series_tensor,
        shape=tf.concat([
            current_sample_shape,
            tf.ones([chain_batch_ndims], dtype=tf.int32),
            current_batch_and_event_shape], axis=0))

  # Padding is only needed if the observed time series has sample shape.
  observed_time_series = ps.cond(ps.rank(observed_time_series) >
                                 model_batch_ndims + event_ndims,
                                 lambda: do_padding(observed_time_series),
                                 lambda: observed_time_series)
  if is_missing is not None:
    is_missing = ps.cond(ps.rank(is_missing) >
                         model_batch_ndims + event_ndims,
                         lambda: do_padding(is_missing),
                         lambda: is_missing)
  return missing_values_util.MaskedTimeSeries(observed_time_series,
                                              is_missing=is_missing)
Ejemplo n.º 3
0
def canonicalize_observed_time_series_with_mask(
    maybe_masked_observed_time_series):
  """Extract a Tensor with canonical shape and optional mask.

  Args:
    maybe_masked_observed_time_series: a `Tensor`-like object with shape
      `[..., num_timesteps]` or `[..., num_timesteps, 1]`, or a
      `tfp.sts.MaskedTimeSeries` containing such an object, or a Pandas
      Series or DataFrame instance with set frequency
      (i.e., `.index.freq is not None`).
  Returns:
    masked_time_series: a `tfp.sts.MaskedTimeSeries` namedtuple, in which
      the `observed_time_series` is converted to `Tensor` with canonical shape
      `[..., num_timesteps, 1]`, and `is_missing` is either `None` or a boolean
      `Tensor`.
  """

  with tf.name_scope('canonicalize_observed_time_series_with_mask'):

    is_missing_is_specified = hasattr(maybe_masked_observed_time_series,
                                      'is_missing')
    if is_missing_is_specified:
      # Input is a MaskedTimeSeries.
      observed_time_series = (
          maybe_masked_observed_time_series.time_series)
      is_missing = maybe_masked_observed_time_series.is_missing
    elif (hasattr(maybe_masked_observed_time_series, 'index') and
          hasattr(maybe_masked_observed_time_series, 'to_numpy')):
      # Input is a Pandas Series or DataFrame.
      index = maybe_masked_observed_time_series.index
      if hasattr(index, 'freq') and index.freq is None:
        raise ValueError('Pandas DataFrame or Series has a DatetimeIndex with '
                         'no set frequency, but STS requires regularly spaced '
                         'observations. Consider using '
                         '`tfp.sts.regularize_series` to infer a frequency and '
                         'build a regularly spaced series (by marking '
                         'unobserved steps as missing observations).')
      # When a DataFrame has multiple columns representing a batch of series,
      # we want shape `[batch_size, num_steps]` rather than vice versa.
      observed_time_series = np.squeeze(np.transpose(
          maybe_masked_observed_time_series.to_numpy()))
    else:
      observed_time_series = maybe_masked_observed_time_series

    observed_time_series = tf.convert_to_tensor(value=observed_time_series,
                                                name='observed_time_series')
    observed_time_series = _maybe_expand_trailing_dim(observed_time_series)

    # Treat `NaN` values as missing.
    if not is_missing_is_specified:
      is_missing = tf.math.is_nan(observed_time_series[..., 0])
    is_missing_static = tf.get_static_value(is_missing)
    if is_missing_static is not None and not np.any(is_missing_static):
      is_missing = None
    if is_missing is not None:
      is_missing = tf.convert_to_tensor(
          value=is_missing, name='is_missing', dtype_hint=tf.bool)

    return missing_values_util.MaskedTimeSeries(observed_time_series,
                                                is_missing=is_missing)
Ejemplo n.º 4
0
def canonicalize_observed_time_series_with_mask(
    maybe_masked_observed_time_series):
  """Extract a Tensor with canonical shape and optional mask.

  Args:
    maybe_masked_observed_time_series: a `Tensor`-like object with shape
      `[..., num_timesteps]` or `[..., num_timesteps, 1]`, or a
      `tfp.sts.MaskedTimeSeries` containing such an object.
  Returns:
    masked_time_series: a `tfp.sts.MaskedTimeSeries` namedtuple, in which
      the `observed_time_series` is converted to `Tensor` with canonical shape
      `[..., num_timesteps, 1]`, and `is_missing` is either `None` or a boolean
      `Tensor`.
  """

  with tf.name_scope('canonicalize_observed_time_series_with_mask'):
    if hasattr(maybe_masked_observed_time_series, 'is_missing'):
      observed_time_series = (
          maybe_masked_observed_time_series.time_series)
      is_missing = maybe_masked_observed_time_series.is_missing
    else:
      observed_time_series = maybe_masked_observed_time_series
      is_missing = None

    observed_time_series = tf.convert_to_tensor(value=observed_time_series,
                                                name='observed_time_series')
    observed_time_series = _maybe_expand_trailing_dim(observed_time_series)

    if is_missing is not None:
      is_missing = tf.convert_to_tensor(
          value=is_missing, name='is_missing', dtype_hint=tf.bool)

    return missing_values_util.MaskedTimeSeries(observed_time_series,
                                                is_missing=is_missing)
Ejemplo n.º 5
0
    def test_empirical_statistics_accepts_masked_values(self):

        # Ensure that masks broadcast over batch shape by creating a batch of
        # time series.
        time_series = np.random.randn(3, 2, 5)
        mask = np.array([[True, False, False, True, False],
                         [True, True, True, True, True]])

        masked_series = missing_values_util.MaskedTimeSeries(
            time_series=time_series, is_missing=mask)
        mean, stddev, initial = self.evaluate(
            sts_util.empirical_statistics(masked_series))

        # Should return default values when the series is completely masked.
        self.assertAllClose(mean[:, 1], tf.zeros_like(mean[:, 1]))
        self.assertAllClose(stddev[:, 1], tf.ones_like(stddev[:, 1]))
        self.assertAllClose(initial[:, 1], tf.zeros_like(initial[:, 1]))

        # Otherwise, should return the actual mean/stddev/initial values.
        time_series = time_series[:, 0, :]
        mask = mask[0, :]
        broadcast_mask = np.broadcast_to(mask, time_series.shape)
        unmasked_series = time_series[~broadcast_mask].reshape([3, 3])
        unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate(
            sts_util.empirical_statistics(unmasked_series))
        self.assertAllClose(mean[:, 0], unmasked_mean)
        self.assertAllClose(stddev[:, 0], unmasked_stddev)
        self.assertAllClose(initial[:, 0], unmasked_initial)

        # Run the same tests without batch shape.
        unbatched_time_series = time_series[0, :]
        masked_series = missing_values_util.MaskedTimeSeries(
            time_series=unbatched_time_series, is_missing=mask)
        mean, stddev, initial = self.evaluate(
            sts_util.empirical_statistics(masked_series))
        unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate(
            sts_util.empirical_statistics(unbatched_time_series[~mask]))
        self.assertAllClose(mean, unmasked_mean)
        self.assertAllClose(stddev, unmasked_stddev)
        self.assertAllClose(initial, unmasked_initial)