Ejemplo n.º 1
0
def semilocal_linear_trend_transition_noise(level_scale, slope_mean,
                                            slope_scale, autoregressive_coef):
    """Build the transition noise model for a semi-local linear trend model."""

    # At each timestep, the stochasticity of `level` and `slope` are given
    # by `level_scale` and `slope_scale` respectively.
    broadcast_batch_shape = dist_util.get_broadcast_shape(
        level_scale, slope_mean, slope_scale, autoregressive_coef)
    broadcast_ones = tf.ones(broadcast_batch_shape, dtype=level_scale.dtype)
    scale_diag = tf.stack(
        [level_scale * broadcast_ones, slope_scale * broadcast_ones], axis=-1)

    # We additionally fold in a bias term implementing the nonzero `slope_mean`.
    # The overall `slope` update is (from `SemiLocalLinearTrend` docstring)
    #   slope[t] = (slope_mean +
    #               autoregressive_coef * (slope[t-1] - slope_mean) +
    #               Normal(0., slope_scale))
    # which we rewrite as
    #   slope[t] = (
    #    autoregressive_coef * slope[t-1] +                  # linear transition
    #    Normal(loc=slope_mean - autoregressive_coef * slope_mean,  # noise bias
    #           scale=slope_scale))                                 # noise scale
    bias = tf.stack([
        tf.zeros_like(broadcast_ones), slope_mean *
        (1 - autoregressive_coef) * broadcast_ones
    ],
                    axis=-1)
    return tfd.MultivariateNormalDiag(loc=bias, scale_diag=scale_diag)
Ejemplo n.º 2
0
    def _matrix(self, x1, x2):
        locs = util.pad_shape_with_ones(self.locs, ndims=1, start=-2)
        slopes = util.pad_shape_with_ones(self.slopes, ndims=1, start=-2)

        weights_x1 = tf.math.sigmoid(
            slopes *
            (self.weight_fn(x1, self.feature_ndims)[..., tf.newaxis] - locs))
        weights_x1 = weights_x1[..., tf.newaxis, :]
        weights_x2 = tf.math.sigmoid(
            slopes *
            (self.weight_fn(x2, self.feature_ndims)[..., tf.newaxis] - locs))
        weights_x2 = weights_x2[..., tf.newaxis, :, :]

        initial_weights = (1. - weights_x1) * (1. - weights_x2)
        initial_weights = tf.concat([
            initial_weights,
            tf.ones_like(initial_weights[..., 0])[..., tf.newaxis]
        ],
                                    axis=-1)
        end_weights = weights_x1 * weights_x2
        end_weights = tf.concat(
            [tf.ones_like(end_weights[..., 0])[..., tf.newaxis], end_weights],
            axis=-1)

        results = [k.matrix(x1, x2)[..., tf.newaxis] for k in self.kernels]
        broadcasted_shape = distribution_util.get_broadcast_shape(*results)
        results = tf.concat(
            [ps.broadcast_to(r, broadcasted_shape) for r in results], axis=-1)
        return tf.math.reduce_sum(initial_weights * results * end_weights,
                                  axis=-1)
Ejemplo n.º 3
0
  def __init__(self,
               loc,
               scale,
               validate_args=False,
               allow_nan_stats=True,
               name="Gumbel"):
    """Construct Gumbel distributions with location and scale `loc` and `scale`.

    The parameters `loc` and `scale` must be shaped in a way that supports
    broadcasting (e.g. `loc + scale` is a valid operation).

    Args:
      loc: Floating point tensor, the means of the distribution(s).
      scale: Floating point tensor, the scales of the distribution(s).
        scale must contain only positive values.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `'Gumbel'`.

    Raises:
      TypeError: if loc and scale are different dtypes.
    """
    parameters = dict(locals())
    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([loc, scale], dtype_hint=tf.float32)
      loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype)
      scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype)
      with tf.control_dependencies(
          [assert_util.assert_positive(scale)] if validate_args else []):
        loc = tf.identity(loc, name="loc")
        scale = tf.identity(scale, name="scale")
        dtype_util.assert_same_float_dtype([loc, scale])
        self._gumbel_bijector = gumbel_bijector.Gumbel(
            loc=loc, scale=scale, validate_args=validate_args)

      # Because the uniform sampler generates samples in `[0, 1)` this would
      # cause samples to lie in `(inf, -inf]` instead of `(inf, -inf)`. To fix
      # this, we use `np.finfo(dtype_util.as_numpy_dtype(self.dtype).tiny`
      # because it is the smallest, positive, "normal" number.
      super(Gumbel, self).__init__(
          distribution=uniform.Uniform(
              low=np.finfo(dtype_util.as_numpy_dtype(dtype)).tiny,
              high=tf.ones([], dtype=loc.dtype),
              allow_nan_stats=allow_nan_stats),
          # The Gumbel bijector encodes the quantile
          # function as the forward, and hence needs to
          # be inverted.
          bijector=invert_bijector.Invert(self._gumbel_bijector),
          batch_shape=distribution_util.get_broadcast_shape(loc, scale),
          parameters=parameters,
          name=name)
Ejemplo n.º 4
0
def _kl_pareto_pareto(a, b, name=None):
    """Calculate the batched KL divergence KL(a || b) with a and b Pareto.

  Args:
    a: instance of a Pareto distribution object.
    b: instance of a Pareto distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_pareto_pareto".

  Returns:
    Batchwise KL(a || b)
  """
    with tf.compat.v2.name_scope(name or "kl_pareto_pareto"):
        # Consistent with
        # http://www.mast.queensu.ca/~communications/Papers/gil-msc11.pdf, page 55
        # Terminology is different from source to source for Pareto distributions.
        # The 'concentration' parameter corresponds to 'a' in that source, and the
        # 'scale' parameter corresponds to 'm'.
        final_batch_shape = distribution_util.get_broadcast_shape(
            a.concentration, b.concentration, a.scale, b.scale)
        common_type = dtype_util.common_dtype(
            [a.concentration, b.concentration, a.scale, b.scale], tf.float32)
        return tf.where(
            a.scale >= b.scale,
            b.concentration * (tf.math.log(a.scale) - tf.math.log(b.scale)) +
            tf.math.log(a.concentration) - tf.math.log(b.concentration) +
            b.concentration / a.concentration - 1.0,
            tf.broadcast_to(tf.cast(np.inf, common_type), final_batch_shape))
  def _compute_flattened_covariance(self, index_points=None):
    # This is of shape KN x KN, where K is the number of outputs
    # Compute this explicitly via the Schur Complement of the vector kernel.
    # The reason this is written explicitly as opposed to using a GPRM
    # internally for reshaping is there is potential for efficiency gains when
    # `observation_noise_variance = 0.`.
    index_points = self._get_index_points(index_points)
    kxx = self.kernel.matrix_over_all_tasks(index_points, index_points)

    kxz = self.kernel.matrix_over_all_tasks(
        index_points, self.observation_index_points).to_dense()
    if self._observations_is_missing is not None:
      kxz = tf.where(_vec(tf.math.logical_not(
          self._observations_is_missing))[..., tf.newaxis, :],
                     kxz,
                     tf.zeros([], dtype=kxz.dtype))
    cholinv_kzx = self.observation_cholesky.solve(kxz, adjoint_arg=True)
    kxz_kzzinv_kzx = tf.linalg.matmul(
        cholinv_kzx, cholinv_kzx, transpose_a=True)

    flattened_covariance = kxx.to_dense() - kxz_kzzinv_kzx
    if self.predictive_noise_variance is None:
      return flattened_covariance
    broadcast_shape = distribution_util.get_broadcast_shape(
        flattened_covariance, self.predictive_noise_variance[..., tf.newaxis,
                                                             tf.newaxis])
    flattened_covariance = tf.broadcast_to(flattened_covariance,
                                           broadcast_shape)
    return _add_diagonal_shift(flattened_covariance,
                               self.predictive_noise_variance)
Ejemplo n.º 6
0
def _kl_uniform_uniform(a, b, name=None):
    """Calculate the batched KL divergence KL(a || b) with a and b Uniform.

  Note that the KL divergence is infinite if the support of `a` is not a subset
  of the support of `b`.

  Args:
    a: instance of a Uniform distribution object.
    b: instance of a Uniform distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_uniform_uniform".

  Returns:
    Batchwise KL(a || b)
  """
    with tf.name_scope(name or "kl_uniform_uniform"):
        # Consistent with
        # http://www.mast.queensu.ca/~communications/Papers/gil-msc11.pdf, page 60
        # Watch out for the change in conventions--they use 'a' and 'b' to refer to
        # lower and upper bounds respectively there.
        final_batch_shape = distribution_util.get_broadcast_shape(
            a.low, b.low, a.high, b.high)
        dtype = dtype_util.common_dtype([a.low, a.high, b.low, b.high],
                                        tf.float32)
        return tf1.where(
            (b.low <= a.low) & (a.high <= b.high),
            tf.math.log(b.high - b.low) - tf.math.log(a.high - a.low),
            tf.broadcast_to(
                dtype_util.as_numpy_dtype(dtype)(np.inf), final_batch_shape))
Ejemplo n.º 7
0
    def __init__(self,
                 concentration,
                 scale,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='Weibull'):
        """Construct Weibull distributions.

    The parameters `concentration` and `scale` must be shaped in a way that
    supports broadcasting (e.g. `concentration + scale` is a valid operation).

    Args:
     concentration: Positive Float-type `Tensor`, the concentration param of the
       distribution. Must contain only positive values.
     scale: Positive Float-type `Tensor`, the scale param of the distribution.
       Must contain only positive values.
     validate_args: Python `bool` indicating whether arguments should be checked
       for correctness.
     allow_nan_stats: Python `bool` indicating whether nan values should be
       allowed.
     name: Python `str` name given to ops managed by this class.
       Default value: `'Weibull'`.

    Raises:
      TypeError: if concentration and scale are different dtypes.

    """
        parameters = dict(locals())
        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([concentration, scale],
                                            dtype_hint=tf.float32)
            concentration = tensor_util.convert_nonref_to_tensor(
                concentration, name='concentration', dtype=dtype)
            scale = tensor_util.convert_nonref_to_tensor(scale,
                                                         name='scale',
                                                         dtype=dtype)
            # Positive scale and concentration is asserted by the incorporated
            # Weibull bijector.
            self._weibull_bijector = weibull_cdf_bijector.WeibullCDF(
                scale=scale,
                concentration=concentration,
                validate_args=validate_args)

            batch_shape = distribution_util.get_broadcast_shape(
                concentration, scale)
            super(Weibull, self).__init__(
                distribution=uniform.Uniform(
                    # TODO(b/137665504): Use batch-adding meta-distribution to set the
                    # batch shape instead of tf.ones.
                    low=tf.zeros(batch_shape, dtype=dtype),
                    high=tf.ones(batch_shape, dtype=dtype),
                    allow_nan_stats=allow_nan_stats),
                # The Weibull bijector encodes the CDF function as the forward,
                # and hence needs to be inverted.
                bijector=invert_bijector.Invert(self._weibull_bijector,
                                                validate_args=validate_args),
                parameters=parameters,
                name=name)
Ejemplo n.º 8
0
 def test_with_some_dynamic_shapes_works(self):
     if tf.executing_eagerly(): return
     x = tf.ones([2, 1, 3])
     y = tf1.placeholder_with_default(np.ones([1, 5, 3], dtype=np.float32),
                                      shape=None)
     z = tf.ones([])
     bcast_shape = self.evaluate(
         distribution_util.get_broadcast_shape(x, y, z))
     self.assertAllEqual([2, 5, 3], bcast_shape)
 def test_with_some_dynamic_shapes_works(self):
     x = tf.ones((2, 1, 3))
     y = tf.placeholder(x.dtype)
     z = tf.ones(())
     with self.test_session() as sess:
         bcast_shape = sess.run(
             distribution_util.get_broadcast_shape(x, y, z),
             feed_dict={y: np.ones((1, 5, 3)).astype(np.float32)})
         self.assertAllEqual([2, 5, 3], bcast_shape)
Ejemplo n.º 10
0
def _compute_divisor_matrix(base_kernel, diag_shift, fixed_inputs):
    """Compute the the modified kernel with respect to the fixed inputs."""
    divisor_matrix = base_kernel.matrix(fixed_inputs, fixed_inputs)
    if diag_shift is not None:
        diag_shift = tf.convert_to_tensor(diag_shift)
        broadcast_shape = distribution_util.get_broadcast_shape(
            divisor_matrix, diag_shift[..., tf.newaxis, tf.newaxis])
        divisor_matrix = tf.broadcast_to(divisor_matrix, broadcast_shape)
        divisor_matrix = _add_diagonal_shift(divisor_matrix,
                                             diag_shift[..., tf.newaxis])
    return divisor_matrix
Ejemplo n.º 11
0
  def __init__(self,
               loc,
               scale,
               validate_args=False,
               allow_nan_stats=True,
               name="Gumbel"):
    """Construct Gumbel distributions with location and scale `loc` and `scale`.

    The parameters `loc` and `scale` must be shaped in a way that supports
    broadcasting (e.g. `loc + scale` is a valid operation).

    Args:
      loc: Floating point tensor, the means of the distribution(s).
      scale: Floating point tensor, the scales of the distribution(s).
        scale must contain only positive values.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `'Gumbel'`.

    Raises:
      TypeError: if loc and scale are different dtypes.
    """
    with tf.name_scope(name, values=[loc, scale]) as name:
      dtype = dtype_util.common_dtype([loc, scale], preferred_dtype=tf.float32)
      loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype)
      scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype)
      with tf.control_dependencies([tf.assert_positive(scale)]
                                   if validate_args else []):
        loc = tf.identity(loc, name="loc")
        scale = tf.identity(scale, name="scale")
        tf.assert_same_float_dtype([loc, scale])
        self._gumbel_bijector = gumbel_bijector.Gumbel(
            loc=loc, scale=scale, validate_args=validate_args)

      super(Gumbel, self).__init__(
          distribution=uniform.Uniform(
              low=tf.zeros([], dtype=loc.dtype),
              high=tf.ones([], dtype=loc.dtype),
              allow_nan_stats=allow_nan_stats),
          # The Gumbel bijector encodes the quantile
          # function as the forward, and hence needs to
          # be inverted.
          bijector=invert_bijector.Invert(self._gumbel_bijector),
          batch_shape=distribution_util.get_broadcast_shape(loc, scale),
          name=name)
Ejemplo n.º 12
0
 def _divisor_matrix(self, fixed_inputs=None):
     fixed_inputs = tf.convert_to_tensor(
         self._fixed_inputs if fixed_inputs is None else fixed_inputs)
     divisor_matrix = self._base_kernel.matrix(fixed_inputs, fixed_inputs)
     if self._diag_shift is not None:
         diag_shift = tf.convert_to_tensor(self._diag_shift)
         broadcast_shape = distribution_util.get_broadcast_shape(
             divisor_matrix, diag_shift[..., tf.newaxis, tf.newaxis])
         divisor_matrix = tf.broadcast_to(divisor_matrix, broadcast_shape)
         divisor_matrix = _add_diagonal_shift(divisor_matrix,
                                              diag_shift[..., tf.newaxis])
     return divisor_matrix
Ejemplo n.º 13
0
  def __init__(self,
               concentration1=1.,
               concentration0=1.,
               validate_args=False,
               allow_nan_stats=True,
               name='Kumaraswamy'):
    """Initialize a batch of Kumaraswamy distributions.

    Args:
      concentration1: Positive floating-point `Tensor` indicating mean
        number of successes; aka 'alpha'. Implies `self.dtype` and
        `self.batch_shape`, i.e.,
        `concentration1.shape = [N1, N2, ..., Nm] = self.batch_shape`.
      concentration0: Positive floating-point `Tensor` indicating mean
        number of failures; aka 'beta'. Otherwise has same semantics as
        `concentration1`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value '`NaN`' to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    parameters = dict(locals())
    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([concentration1, concentration0],
                                      dtype_hint=tf.float32)
      concentration1 = tensor_util.convert_nonref_to_tensor(
          concentration1, name='concentration1', dtype=dtype)
      concentration0 = tensor_util.convert_nonref_to_tensor(
          concentration0, name='concentration0', dtype=dtype)
      self._kumaraswamy_cdf = kumaraswamy_cdf.KumaraswamyCDF(
          concentration1=concentration1,
          concentration0=concentration0,
          validate_args=validate_args)
      batch_shape = distribution_util.get_broadcast_shape(
          concentration1, concentration0)
      super(Kumaraswamy, self).__init__(
          # TODO(b/137665504): Use batch-adding meta-distribution to set the
          # batch shape instead of tf.zeros.
          distribution=uniform.Uniform(
              low=tf.zeros(batch_shape, dtype=dtype),
              high=tf.ones([], dtype=dtype),
              allow_nan_stats=allow_nan_stats),
          bijector=invert.Invert(
              self._kumaraswamy_cdf, validate_args=validate_args),
          parameters=parameters,
          name=name)
Ejemplo n.º 14
0
    def __init__(self,
                 loc,
                 scale,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="Gumbel"):
        """Construct Gumbel distributions with location and scale `loc` and `scale`.

    The parameters `loc` and `scale` must be shaped in a way that supports
    broadcasting (e.g. `loc + scale` is a valid operation).

    Args:
      loc: Floating point tensor, the means of the distribution(s).
      scale: Floating point tensor, the scales of the distribution(s).
        scale must contain only positive values.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `'Gumbel'`.

    Raises:
      TypeError: if loc and scale are different dtypes.
    """
        with tf.name_scope(name, values=[loc, scale]) as name:
            with tf.control_dependencies(
                [tf.assert_positive(scale)] if validate_args else []):
                loc = tf.identity(loc, name="loc")
                scale = tf.identity(scale, name="scale")
                tf.assert_same_float_dtype([loc, scale])
                self._gumbel_bijector = bijectors.Gumbel(
                    loc=loc, scale=scale, validate_args=validate_args)

            super(Gumbel, self).__init__(
                distribution=tf.distributions.Uniform(
                    low=tf.zeros([], dtype=loc.dtype),
                    high=tf.ones([], dtype=loc.dtype),
                    allow_nan_stats=allow_nan_stats),
                # The Gumbel bijector encodes the quantile
                # function as the forward, and hence needs to
                # be inverted.
                bijector=bijectors.Invert(self._gumbel_bijector),
                batch_shape=distribution_util.get_broadcast_shape(loc, scale),
                name=name)
Ejemplo n.º 15
0
    def __init__(self,
                 concentration1=None,
                 concentration0=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="Kumaraswamy"):
        """Initialize a batch of Kumaraswamy distributions.

    Args:
      concentration1: Positive floating-point `Tensor` indicating mean
        number of successes; aka "alpha". Implies `self.dtype` and
        `self.batch_shape`, i.e.,
        `concentration1.shape = [N1, N2, ..., Nm] = self.batch_shape`.
      concentration0: Positive floating-point `Tensor` indicating mean
        number of failures; aka "beta". Otherwise has same semantics as
        `concentration1`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())
        with tf.compat.v1.name_scope(name,
                                     values=[concentration1,
                                             concentration0]) as name:
            dtype = dtype_util.common_dtype([concentration1, concentration0],
                                            tf.float32)
            concentration1 = tf.convert_to_tensor(value=concentration1,
                                                  name="concentration1",
                                                  dtype=dtype)
            concentration0 = tf.convert_to_tensor(value=concentration0,
                                                  name="concentration0",
                                                  dtype=dtype)
        super(Kumaraswamy,
              self).__init__(distribution=uniform.Uniform(
                  low=tf.zeros([], dtype=concentration1.dtype),
                  high=tf.ones([], dtype=concentration1.dtype),
                  allow_nan_stats=allow_nan_stats),
                             bijector=kumaraswamy_bijector.Kumaraswamy(
                                 concentration1=concentration1,
                                 concentration0=concentration0,
                                 validate_args=validate_args),
                             batch_shape=distribution_util.get_broadcast_shape(
                                 concentration1, concentration0),
                             parameters=parameters,
                             name=name)
Ejemplo n.º 16
0
  def __init__(self,
               concentration1=None,
               concentration0=None,
               validate_args=False,
               allow_nan_stats=True,
               name="Kumaraswamy"):
    """Initialize a batch of Kumaraswamy distributions.

    Args:
      concentration1: Positive floating-point `Tensor` indicating mean
        number of successes; aka "alpha". Implies `self.dtype` and
        `self.batch_shape`, i.e.,
        `concentration1.shape = [N1, N2, ..., Nm] = self.batch_shape`.
      concentration0: Positive floating-point `Tensor` indicating mean
        number of failures; aka "beta". Otherwise has same semantics as
        `concentration1`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    with tf.name_scope(name, values=[concentration1, concentration0]) as name:
      concentration1 = tf.convert_to_tensor(
          concentration1, name="concentration1")
      concentration0 = tf.convert_to_tensor(
          concentration0, name="concentration0")
    super(Kumaraswamy, self).__init__(
        distribution=tf.distributions.Uniform(
            low=tf.zeros([], dtype=concentration1.dtype),
            high=tf.ones([], dtype=concentration1.dtype),
            allow_nan_stats=allow_nan_stats),
        bijector=bijectors.Kumaraswamy(
            concentration1=concentration1,
            concentration0=concentration0,
            validate_args=validate_args),
        batch_shape=distribution_util.get_broadcast_shape(
            concentration1, concentration0),
        name=name)
    self._reparameterization_type = tf.distributions.FULLY_REPARAMETERIZED
 def _compute_flattened_covariance(self, index_points=None):
     # This is of shape KN x KN, where K is the number of outputs
     index_points = self._get_index_points(index_points)
     kernel_matrix = self.kernel.matrix_over_all_tasks(
         index_points, index_points)
     if self.observation_noise_variance is None:
         return kernel_matrix
     kernel_matrix = kernel_matrix.to_dense()
     broadcast_shape = distribution_util.get_broadcast_shape(
         kernel_matrix, self.observation_noise_variance[..., tf.newaxis,
                                                        tf.newaxis])
     kernel_matrix = tf.broadcast_to(kernel_matrix, broadcast_shape)
     kernel_matrix = tf.linalg.set_diag(
         kernel_matrix,
         tf.linalg.diag_part(kernel_matrix) +
         self.observation_noise_variance[..., tf.newaxis])
     kernel_matrix = tf.linalg.LinearOperatorFullMatrix(
         kernel_matrix, is_non_singular=True, is_positive_definite=True)
     return kernel_matrix
Ejemplo n.º 18
0
  def _compute_covariance(self, index_points):
    kernel_matrix = self.kernel.matrix(index_points, index_points)
    if self._is_univariate_marginal(index_points):
      # kernel_matrix thus has shape [..., 1, 1]; squeeze off the last dims and
      # tack on the observation noise variance.
      return (tf.squeeze(kernel_matrix, axis=[-2, -1]) +
              self.observation_noise_variance)
    else:
      # We are compute K + obs_noise_variance * I. The shape of this matrix
      # is going to be a broadcast of the shapes of K and obs_noise_variance *
      # I.
      broadcast_shape = distribution_util.get_broadcast_shape(
          kernel_matrix,
          # We pad with two single dimension since this represents a batch of
          # scaled identity matrices.
          self.observation_noise_variance[..., tf.newaxis, tf.newaxis])

      kernel_matrix = tf.broadcast_to(kernel_matrix, broadcast_shape)
      return _add_diagonal_shift(
          kernel_matrix, self.observation_noise_variance[..., tf.newaxis])
def _compute_observation_scale(kernel,
                               observation_index_points,
                               cholesky_fn,
                               observation_noise_variance=None,
                               observations_is_missing=None):
    """Compute matrix square root of the kernel on observation index points."""
    if observations_is_missing is not None:
        observations_is_missing = tf.convert_to_tensor(observations_is_missing)
        # If observations are missing, there's nothing we can do to preserve the
        # operator structure, so densify.

        observation_covariance = kernel.matrix_over_all_tasks(
            observation_index_points, observation_index_points).to_dense()

        if observation_noise_variance is not None:
            broadcast_shape = distribution_util.get_broadcast_shape(
                observation_covariance,
                observation_noise_variance[..., tf.newaxis, tf.newaxis])
            observation_covariance = tf.broadcast_to(observation_covariance,
                                                     broadcast_shape)
            observation_covariance = _add_diagonal_shift(
                observation_covariance, observation_noise_variance)
        vec_observations_is_missing = _vec(observations_is_missing)
        observation_covariance = tf.linalg.LinearOperatorFullMatrix(
            psd_kernels_util.mask_matrix(
                observation_covariance,
                is_missing=vec_observations_is_missing),
            is_non_singular=True,
            is_positive_definite=True)
        observation_scale = cholesky_util.cholesky_from_fn(
            observation_covariance, cholesky_fn)
    else:
        observation_scale = mtgp._compute_flattened_scale(  # pylint:disable=protected-access
            kernel=kernel,
            index_points=observation_index_points,
            cholesky_fn=cholesky_fn,
            observation_noise_variance=observation_noise_variance)

    return observation_scale
Ejemplo n.º 20
0
  def __init__(self,
               loc,
               scale,
               low,
               high,
               validate_args=False,
               allow_nan_stats=True,
               name="TruncatedNormal"):
    """Construct TruncatedNormal.

    All parameters of the distribution will be broadcast to the same shape,
    so the resulting distribution will have a batch_shape of the broadcast
    shape of all parameters.

    Args:
      loc: Floating point tensor; the mean of the normal distribution(s) (
        note that the mean of the resulting distribution will be different
        since it is modified by the bounds).
      scale: Floating point tensor; the std deviation of the normal
        distribution(s).
      low: `float` `Tensor` representing lower bound of the distribution's
        support. Must be such that `low < high`.
      high: `float` `Tensor` representing upper bound of the distribution's
        support. Must be such that `low < high`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked at run-time.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    parameters = dict(locals())
    with tf.name_scope(name, values=[scale]) as name:
      loc = tf.convert_to_tensor(loc, name="loc")
      dtype = loc.dtype
      scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype)
      low = tf.convert_to_tensor(low, name="low", dtype=dtype)
      high = tf.convert_to_tensor(high, name="high", dtype=dtype)
      tf.assert_same_float_dtype([loc, scale, low, high])

      self._broadcast_batch_shape = distribution_util.get_broadcast_shape(
          loc, scale, low, high)

      # Broadcast all parameters to the same shape
      broadcast_ones = tf.ones(shape=self._broadcast_batch_shape,
                               dtype=scale.dtype)
      self._scale = scale * broadcast_ones
      self._loc = loc * broadcast_ones
      self._low = low * broadcast_ones
      self._high = high * broadcast_ones

      with tf.control_dependencies([self._validate()] if validate_args else []):
        self._loc = tf.identity(self._loc)

    super(TruncatedNormal, self).__init__(
        dtype=dtype,
        # This distribution is partial reparameterized. loc, scale have straight
        # through gradients but not the bounds.
        # TODO(mfigurnov): This could be extended to use implicit gradients to
        # compute derivatives for the bounds.
        # https://arxiv.org/pdf/1806.01851.pdf
        reparameterization_type=tf.distributions.NOT_REPARAMETERIZED,
        validate_args=validate_args,
        allow_nan_stats=allow_nan_stats,
        parameters=parameters,
        graph_parents=[loc, scale, low, high],
        name=name)
Ejemplo n.º 21
0
 def _batch_shape_tensor(self):
     return distribution_util.get_broadcast_shape(self.skewness,
                                                  self.tailweight, self.loc,
                                                  self.scale)
    def precompute_regression_model(
            kernel,
            observation_index_points,
            observations,
            observations_is_missing=None,
            index_points=None,
            observation_noise_variance=None,
            predictive_noise_variance=None,
            mean_fn=None,
            cholesky_fn=None,
            validate_args=False,
            allow_nan_stats=False,
            name='PrecomputedMultiTaskGaussianProcessRegressionModel'):
        """Returns a MTGaussianProcessRegressionModel with precomputed quantities.

    This differs from the constructor by precomputing quantities associated with
    observations in a non-tape safe way. `index_points` is the only parameter
    that is allowed to vary (i.e. is a `Variable` / changes after
    initialization).

    Specifically:

    * We make `observation_index_points` and `observations` mandatory
      parameters.
    * We precompute `kernel(observation_index_points, observation_index_points)`
      along with any other associated quantities relating to the `kernel`,
      `observations` and `observation_index_points`.

    A typical usecase would be optimizing kernel hyperparameters for a
    `MultiTaskGaussianProcess`, and computing the posterior predictive with
    respect to those optimized hyperparameters and observation / index-points
    pairs.

    WARNING: This method assumes `index_points` is the only varying parameter
    (i.e. is a `Variable` / changes after initialization) and hence is not
    tape-safe.

    Args:
      kernel: `PositiveSemidefiniteKernel`-like instance representing the
        GP's covariance function.
      observation_index_points: `float` `Tensor` representing finite collection,
        or batch of collections, of points in the index set for which some data
        has been observed. Shape has the form `[b1, ..., bB, e, f1, ..., fF]`
        where `F` is the number of feature dimensions and must equal
        `kernel.feature_ndims`, and `e` is the number (size) of index points in
        each batch. `[b1, ..., bB, e]` must be broadcastable with the shape of
        `observations`, and `[b1, ..., bB]` must be broadcastable with the
        shapes of all other batched parameters (`kernel.batch_shape`,
        `index_points`, etc). The default value is `None`, which corresponds to
        the empty set of observations, and simply results in the prior
        predictive model (a GP with noise of variance
        `predictive_noise_variance`).
      observations: `float` `Tensor` representing collection, or batch of
        collections, of observations corresponding to
        `observation_index_points`. Shape has the form `[b1, ..., bB, e, t]`
        The batch shape `[b1, ..., bB]` must be
        broadcastable with the shapes of all other batched parameters
        (`kernel.batch_shape`, `index_points`, etc.). The default value is
        `None`, which corresponds to the empty set of observations, and simply
        results in the prior predictive model (a GP with noise of variance
        `predictive_noise_variance`).
      observations_is_missing:  `bool` `Tensor` of shape `[..., e]`,
        representing a batch of boolean masks.  When `observations_is_missing`
        is not `None`, the returned distribution is conditioned only on the
        observations for which the corresponding elements of
        `observations_is_missing` are `True`.
      index_points: `float` `Tensor` representing finite collection, or batch of
        collections, of points in the index set over which the GP is defined.
        Shape has the form `[b1, ..., bB, e, f1, ..., fF]` where `F` is the
        number of feature dimensions and must equal `kernel.feature_ndims` and
        `e` is the number (size) of index points in each batch. Ultimately this
        distribution corresponds to an `e`-dimensional multivariate normal. The
        batch shape must be broadcastable with `kernel.batch_shape` and any
        batch dims yielded by `mean_fn`.
      observation_noise_variance: `float` `Tensor` representing the variance
        of the noise in the Normal likelihood distribution of the model. May be
        batched, in which case the batch shape must be broadcastable with the
        shapes of all other batched parameters (`kernel.batch_shape`,
        `index_points`, etc.).
        Default value: `None`
      predictive_noise_variance: `float` `Tensor` representing the variance in
        the posterior predictive model. If `None`, we simply re-use
        `observation_noise_variance` for the posterior predictive noise. If set
        explicitly, however, we use this value. This allows us, for example, to
        omit predictive noise variance (by setting this to zero) to obtain
        noiseless posterior predictions of function values, conditioned on noisy
        observations.
      mean_fn: Python `callable` that acts on `index_points` to produce a
        collection, or batch of collections, of mean values at `index_points`.
        Takes a `Tensor` of shape `[b1, ..., bB, f1, ..., fF]` and returns a
        `Tensor` whose shape is broadcastable with `[b1, ..., bB, t]`.
        Default value: `None` implies the constant zero function.
      cholesky_fn: Callable which takes a single (batch) matrix argument and
        returns a Cholesky-like lower triangular factor.  Default value: `None`,
        in which case `make_cholesky_with_jitter_fn` is used with the `jitter`
        parameter.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value `NaN` to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `False`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: 'PrecomputedGaussianProcessRegressionModel'.
    Returns
      An instance of `MultiTaskGaussianProcessRegressionModel` with precomputed
      quantities associated with observations.
    """

        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([
                index_points,
                observation_index_points,
                observations,
                observation_noise_variance,
                predictive_noise_variance,
            ], tf.float32)

            # Convert-to-tensor arguments that are expected to not be Variables / not
            # going to change.
            observation_index_points = tf.convert_to_tensor(
                observation_index_points, dtype=dtype)
            if observation_noise_variance is not None:
                observation_noise_variance = tf.convert_to_tensor(
                    observation_noise_variance, dtype=dtype)
            observations = tf.convert_to_tensor(observations, dtype=dtype)

            if observations_is_missing is not None:
                observations_is_missing = tf.convert_to_tensor(
                    observations_is_missing)

            if cholesky_fn is None:
                cholesky_fn = cholesky_util.make_cholesky_with_jitter_fn()
            else:
                if not callable(cholesky_fn):
                    raise ValueError('`cholesky_fn` must be a Python callable')

            if mean_fn is None:
                mean_fn = lambda x: tf.zeros([1], dtype=dtype)
            else:
                if not callable(mean_fn):
                    raise ValueError('`mean_fn` must be a Python callable')

            if observations_is_missing is not None:
                # If observations are missing, there's nothing we can do to preserve the
                # operator structure, so densify.

                observation_covariance = kernel.matrix_over_all_tasks(
                    observation_index_points,
                    observation_index_points).to_dense()

                if observation_noise_variance is not None:
                    broadcast_shape = distribution_util.get_broadcast_shape(
                        observation_covariance,
                        observation_noise_variance[..., tf.newaxis,
                                                   tf.newaxis])
                    observation_covariance = tf.broadcast_to(
                        observation_covariance, broadcast_shape)
                    observation_covariance = _add_diagonal_shift(
                        observation_covariance, observation_noise_variance)
                vec_observations_is_missing = _vec(observations_is_missing)
                observation_covariance = tf.linalg.LinearOperatorFullMatrix(
                    psd_kernels_util.mask_matrix(
                        observation_covariance,
                        is_missing=vec_observations_is_missing),
                    is_non_singular=True,
                    is_positive_definite=True)
                observation_scale = cholesky_util.cholesky_from_fn(
                    observation_covariance, cholesky_fn)
            else:
                observation_scale = mtgp._compute_flattened_scale(  # pylint:disable=protected-access
                    kernel=kernel,
                    index_points=observation_index_points,
                    cholesky_fn=cholesky_fn,
                    observation_noise_variance=observation_noise_variance)

            # Note that the conditional mean is
            # k(x, o) @ (k(o, o) + sigma**2)^-1 obs. We can precompute the latter
            # term since it won't change per iteration.
            vec_diff = _vec(observations - mean_fn(observation_index_points))

            if observations_is_missing is not None:
                vec_diff = tf.where(vec_observations_is_missing,
                                    tf.zeros([], dtype=vec_diff.dtype),
                                    vec_diff)
            solve_on_observations = observation_scale.solvevec(
                observation_scale.solvevec(vec_diff), adjoint=True)

            def flattened_conditional_mean_fn(x):

                return _flattened_conditional_mean_fn_helper(
                    x,
                    kernel,
                    observations,
                    observation_index_points,
                    observations_is_missing,
                    observation_scale,
                    mean_fn,
                    solve_on_observations=solve_on_observations)

            mtgprm = MultiTaskGaussianProcessRegressionModel(
                kernel=kernel,
                observation_index_points=observation_index_points,
                observations=observations,
                index_points=index_points,
                observation_noise_variance=observation_noise_variance,
                predictive_noise_variance=predictive_noise_variance,
                cholesky_fn=cholesky_fn,
                _flattened_conditional_mean_fn=flattened_conditional_mean_fn,
                _observation_scale=observation_scale,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                name=name)

        return mtgprm
Ejemplo n.º 23
0
    def __init__(self,
                 loc,
                 scale,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='Moyal'):
        """Construct Moyal distributions with location and scale `loc` and `scale`.

    The parameters `loc` and `scale` must be shaped in a way that supports
    broadcasting (e.g. `loc + scale` is a valid operation).

    Args:
      loc: Floating point tensor, the means of the distribution(s).
      scale: Floating point tensor, the scales of the distribution(s).
        scale must contain only positive values.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value `NaN` to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `'Moyal'`.

    Raises:
      TypeError: if loc and scale are different dtypes.


    #### References

    [1] J.E. Moyal, "XXX. Theory of ionization fluctuations",
       The London, Edinburgh, and Dublin Philosophical Magazine
       and Journal of Science.
       https://www.tandfonline.com/doi/abs/10.1080/14786440308521076
    [2] G. Cordeiro, J. Nobre, R. Pescim, E. Ortega,
        "The beta Moyal: a useful skew distribution",
        https://www.arpapress.com/Volumes/Vol10Issue2/IJRRAS_10_2_02.pdf
    """
        parameters = dict(locals())
        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([loc, scale],
                                            dtype_hint=tf.float32)
            loc = tensor_util.convert_nonref_to_tensor(loc,
                                                       name='loc',
                                                       dtype=dtype)
            scale = tensor_util.convert_nonref_to_tensor(scale,
                                                         name='scale',
                                                         dtype=dtype)
            dtype_util.assert_same_float_dtype([loc, scale])
            # Positive scale is asserted by the incorporated Moyal bijector.
            self._moyal_bijector = moyal_cdf_bijector.MoyalCDF(
                loc=loc, scale=scale, validate_args=validate_args)

            # Because the uniform sampler generates samples in `[0, 1)` this would
            # cause samples to lie in `(inf, -inf]` instead of `(inf, -inf)`. To fix
            # this, we use `np.finfo(dtype_util.as_numpy_dtype(self.dtype).tiny`
            # because it is the smallest, positive, 'normal' number.
            batch_shape = distribution_util.get_broadcast_shape(loc, scale)
            super(Moyal, self).__init__(
                # TODO(b/137665504): Use batch-adding meta-distribution to set the
                # batch shape instead of tf.ones.
                distribution=uniform.Uniform(low=np.finfo(
                    dtype_util.as_numpy_dtype(dtype)).tiny,
                                             high=tf.ones(batch_shape,
                                                          dtype=dtype),
                                             allow_nan_stats=allow_nan_stats),
                # The Moyal bijector encodes the CDF function as the forward,
                # and hence needs to be inverted.
                bijector=invert_bijector.Invert(self._moyal_bijector,
                                                validate_args=validate_args),
                parameters=parameters,
                name=name)
Ejemplo n.º 24
0
    def __init__(self,
                 loc,
                 scale,
                 skewness=None,
                 tailweight=None,
                 distribution=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="SinhArcsinh"):
        """Construct SinhArcsinh distribution on `(-inf, inf)`.

    Arguments `(loc, scale, skewness, tailweight)` must have broadcastable shape
    (indexing batch dimensions).  They must all have the same `dtype`.

    Args:
      loc: Floating-point `Tensor`.
      scale:  `Tensor` of same `dtype` as `loc`.
      skewness:  Skewness parameter.  Default is `0.0` (no skew).
      tailweight:  Tailweight parameter. Default is `1.0` (unchanged tailweight)
      distribution: `tf.Distribution`-like instance. Distribution that is
        transformed to produce this distribution.
        Default is `tfd.Normal(0., 1.)`.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a `SinhArcsinh` sample and `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())

        with tf.compat.v2.name_scope(name) as name:
            dtype = dtype_util.common_dtype([loc, scale, skewness, tailweight],
                                            tf.float32)
            loc = tf.convert_to_tensor(value=loc, name="loc", dtype=dtype)
            scale = tf.convert_to_tensor(value=scale,
                                         name="scale",
                                         dtype=dtype)
            tailweight = 1. if tailweight is None else tailweight
            has_default_skewness = skewness is None
            skewness = 0. if skewness is None else skewness
            tailweight = tf.convert_to_tensor(value=tailweight,
                                              name="tailweight",
                                              dtype=dtype)
            skewness = tf.convert_to_tensor(value=skewness,
                                            name="skewness",
                                            dtype=dtype)

            batch_shape = distribution_util.get_broadcast_shape(
                loc, scale, tailweight, skewness)

            # Recall, with Z a random variable,
            #   Y := loc + C * F(Z),
            #   F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight )
            #   F_0(Z) := Sinh( Arcsinh(Z) * tailweight )
            #   C := 2 * scale / F_0(2)
            if distribution is None:
                distribution = normal.Normal(loc=tf.zeros([], dtype=dtype),
                                             scale=tf.ones([], dtype=dtype),
                                             allow_nan_stats=allow_nan_stats)
            else:
                asserts = distribution_util.maybe_check_scalar_distribution(
                    distribution, dtype, validate_args)
                if asserts:
                    loc = distribution_util.with_dependencies(asserts, loc)

            # Make the SAS bijector, 'F'.
            f = sinh_arcsinh_bijector.SinhArcsinh(skewness=skewness,
                                                  tailweight=tailweight)
            if has_default_skewness:
                f_noskew = f
            else:
                f_noskew = sinh_arcsinh_bijector.SinhArcsinh(
                    skewness=skewness.dtype.as_numpy_dtype(0.),
                    tailweight=tailweight)

            # Make the AffineScalar bijector, Z --> loc + scale * Z (2 / F_0(2))
            c = 2 * scale / f_noskew.forward(
                tf.convert_to_tensor(value=2, dtype=dtype))
            affine = affine_scalar_bijector.AffineScalar(
                shift=loc, scale=c, validate_args=validate_args)

            bijector = chain_bijector.Chain([affine, f])

            super(SinhArcsinh, self).__init__(distribution=distribution,
                                              bijector=bijector,
                                              batch_shape=batch_shape,
                                              validate_args=validate_args,
                                              name=name)
        self._parameters = parameters
        self._loc = loc
        self._scale = scale
        self._tailweight = tailweight
        self._skewness = skewness
Ejemplo n.º 25
0
  def __init__(self,
               num_timesteps,
               level_scale,
               slope_scale,
               initial_state_prior,
               observation_noise_scale=0.,
               initial_step=0,
               validate_args=False,
               allow_nan_stats=True,
               name=None):
    """Build a state space model implementing a local linear trend.

    Args:
      num_timesteps: Scalar `int` `Tensor` number of timesteps to model
        with this distribution.
      level_scale: Scalar (any additional dimensions are treated as batch
        dimensions) `float` `Tensor` indicating the standard deviation of the
        level transitions.
      slope_scale: Scalar (any additional dimensions are treated as batch
        dimensions) `float` `Tensor` indicating the standard deviation of the
        slope transitions.
      initial_state_prior: instance of `tfd.MultivariateNormal`
        representing the prior distribution on latent states; must
        have event shape `[2]`.
      observation_noise_scale: Scalar (any additional dimensions are
        treated as batch dimensions) `float` `Tensor` indicating the standard
        deviation of the observation noise.
      initial_step: Optional scalar `int` `Tensor` specifying the starting
        timestep.
        Default value: 0.
      validate_args: Python `bool`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
        Default value: `False`.
      allow_nan_stats: Python `bool`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member. If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
        Default value: `True`.
      name: Python `str` name prefixed to ops created by this class.
        Default value: "LocalLinearTrendStateSpaceModel".
    """

    with tf.name_scope(name, 'LocalLinearTrendStateSpaceModel',
                       [level_scale, slope_scale]) as name:

      # The initial state prior determines the dtype of sampled values.
      # Other model parameters must have the same dtype.
      dtype = initial_state_prior.dtype

      level_scale = tf.convert_to_tensor(
          level_scale, name='level_scale', dtype=dtype)
      slope_scale = tf.convert_to_tensor(
          slope_scale, name='slope_scale', dtype=dtype)
      observation_noise_scale = tf.convert_to_tensor(
          observation_noise_scale, name='observation_noise_scale', dtype=dtype)

      # Explicitly broadcast all parameters to the same batch shape. This
      # allows us to use `tf.stack` for a compact model specification.
      broadcast_batch_shape = dist_util.get_broadcast_shape(
          level_scale, slope_scale)
      broadcast_ones = tf.ones(broadcast_batch_shape, dtype=dtype)

      self._level_scale = level_scale
      self._slope_scale = slope_scale
      self._observation_noise_scale = observation_noise_scale

      # Construct a linear Gaussian state space model implementing the
      # local linear trend model. See "Mathematical Details" in the
      # class docstring for further explanation.
      super(LocalLinearTrendStateSpaceModel, self).__init__(
          num_timesteps=num_timesteps,
          transition_matrix=tf.constant(
              [[1., 1.], [0., 1.]], dtype=dtype, name='transition_matrix'),
          transition_noise=tfd.MultivariateNormalDiag(
              scale_diag=tf.stack(
                  [level_scale * broadcast_ones, slope_scale * broadcast_ones],
                  axis=-1),
              name='transition_noise'),
          observation_matrix=tf.constant(
              [[1., 0.]], dtype=dtype, name='observation_matrix'),
          observation_noise=tfd.MultivariateNormalDiag(
              scale_diag=observation_noise_scale[..., tf.newaxis],
              name='observation_noise'),
          initial_state_prior=initial_state_prior,
          initial_step=initial_step,
          allow_nan_stats=allow_nan_stats,
          validate_args=validate_args,
          name=name)
Ejemplo n.º 26
0
def _compute_flattened_scale(kernel,
                             index_points,
                             cholesky_fn,
                             observation_noise_variance=None):
    """Computes a matrix square root of the flattened covariance matrix.

  Given a multi-task kernel `k`, computes a matrix square root of the
  matrix over all tasks of `index_points`. That is, compute `S` such that
  `S^T @ S = k.matrix_over_all_tasks(index_points, index_points)`.

  In the case of a `Separable` or `Independent` kernel, this function tries to
  do this efficiently in O(N^3 + T^3) time where `N` is the number of
  `index_points` and `T` is the number of tasks.

  Args:
    kernel: `MultiTaskKernel`-like instance representing the GP's covariance
      function.
    index_points: `float` `Tensor` representing finite collection, or batch of
      collections, of points in the index set over which the GP is defined.
      Shape has the form `[b1, ..., bB, e, f1, ..., fF]` where `F` is the
      number of feature dimensions and must equal `kernel.feature_ndims` and
      `e` is the number (size) of index points in each batch. Ultimately this
      distribution corresponds to an `e`-dimensional multivariate normal. The
      batch shape must be broadcastable with `kernel.batch_shape`.
    cholesky_fn: Callable which takes a single (batch) matrix argument and
      returns a Cholesky-like lower triangular factor.  Default value: `None`,
      in which case `make_cholesky_with_jitter_fn(1e-6)` is used.
    observation_noise_variance: `float` `Tensor` representing the variance
      of the noise in the Normal likelihood distribution of the model. May be
      batched, in which case the batch shape must be broadcastable with the
      shapes of all other batched parameters (`kernel.batch_shape`,
      `index_points`, etc.).
      Default value: `None`
  Returns:
    scale_operator: `LinearOperator` representing a matrix square root of
    the flattened kernel matrix over all tasks.

  """
    # This is of shape KN x KN, where K is the number of outputs
    kernel_matrix = kernel.matrix_over_all_tasks(index_points, index_points)
    if observation_noise_variance is None:
        return cholesky_util.cholesky_from_fn(kernel_matrix, cholesky_fn)

    observation_noise_variance = tf.convert_to_tensor(
        observation_noise_variance)

    # We can add the observation noise to each block.
    if isinstance(kernel, multitask_kernel.Independent):
        # The Independent kernel matrix is realized as a kronecker product of the
        # kernel over inputs, and an identity matrix per task (representing
        # independent tasks). Update the diagonal of the first matrix and take the
        # cholesky of it (since the cholesky of the second matrix will remain the
        # identity matrix.)
        base_kernel_matrix = kernel_matrix.operators[0].to_dense()

        broadcast_shape = distribution_util.get_broadcast_shape(
            base_kernel_matrix, observation_noise_variance[..., tf.newaxis,
                                                           tf.newaxis])
        base_kernel_matrix = tf.broadcast_to(base_kernel_matrix,
                                             broadcast_shape)
        base_kernel_matrix = tf.linalg.set_diag(
            base_kernel_matrix,
            tf.linalg.diag_part(base_kernel_matrix) +
            observation_noise_variance[..., tf.newaxis])
        base_kernel_matrix = tf.linalg.LinearOperatorFullMatrix(
            base_kernel_matrix)
        kernel_matrix = tf.linalg.LinearOperatorKronecker(
            operators=[base_kernel_matrix] + kernel_matrix.operators[1:])
        return cholesky_util.cholesky_from_fn(kernel_matrix, cholesky_fn)

    if isinstance(kernel, multitask_kernel.Separable):
        # When `kernel_matrix` is a kronecker product, we can compute
        # an eigenvalue decomposition to get a matrix square-root, which will
        # be faster than densifying the kronecker product.

        # Let K = A X B. Let A (and B) have an eigenvalue decomposition of
        # U @ D @ U^T, where U is an orthogonal matrix. Then,
        # K = (U_A @ D_A @ U_A^T) X (U_B @ D_B @ U_B^T) =
        # (U_A X U_B) @ (D_A X D_B) @ (U_A X U_B)^T
        # Thus, a matrix square root of K would be
        # (U_A X U_B) @ (sqrt(D_A) X sqrt(D_B)) which offers
        # efficient matmul and solves.

        # Now, if we update the diagonal by `v * I`, we have
        # (U_A X U_B) @ (sqrt((D_A X D_B + vI)) @ (U_A X U_B)^T
        # which still admits an efficient matmul and solve.

        kronecker_diags = []
        kronecker_orths = []
        for block in kernel_matrix.operators:
            diag, orth = tf.linalg.eigh(block.to_dense())
            kronecker_diags.append(tf.linalg.LinearOperatorDiag(diag))
            kronecker_orths.append(
                linear_operator_unitary.LinearOperatorUnitary(orth))

        full_diag = tf.linalg.LinearOperatorKronecker(
            kronecker_diags).diag_part()
        full_diag = full_diag + observation_noise_variance[..., tf.newaxis]
        scale_diag = tf.math.sqrt(full_diag)
        diag_operator = tf.linalg.LinearOperatorDiag(scale_diag,
                                                     is_square=True,
                                                     is_non_singular=True,
                                                     is_positive_definite=True)

        orthogonal_operator = tf.linalg.LinearOperatorKronecker(
            kronecker_orths, is_square=True, is_non_singular=True)
        # This is efficient as a scale matrix. When used for matmuls, we take
        # advantage of the kronecker product and diagonal operator. When used for
        # solves, we take advantage of the orthogonal and diagonal structure,
        # which essentially reduces to the matmul case.
        return orthogonal_operator.matmul(diag_operator)

    # By default densify the kernel matrix and add noise.

    kernel_matrix = kernel_matrix.to_dense()
    broadcast_shape = distribution_util.get_broadcast_shape(
        kernel_matrix, observation_noise_variance[..., tf.newaxis, tf.newaxis])
    kernel_matrix = tf.broadcast_to(kernel_matrix, broadcast_shape)
    kernel_matrix = tf.linalg.set_diag(
        kernel_matrix,
        tf.linalg.diag_part(kernel_matrix) +
        observation_noise_variance[..., tf.newaxis])
    kernel_matrix = tf.linalg.LinearOperatorFullMatrix(kernel_matrix)
    kernel_cholesky = cholesky_util.cholesky_from_fn(kernel_matrix,
                                                     cholesky_fn)
    return kernel_cholesky
Ejemplo n.º 27
0
  def __init__(self,
               loc,
               scale,
               skewness=None,
               tailweight=None,
               distribution=None,
               validate_args=False,
               allow_nan_stats=True,
               name="SinhArcsinh"):
    """Construct SinhArcsinh distribution on `(-inf, inf)`.

    Arguments `(loc, scale, skewness, tailweight)` must have broadcastable shape
    (indexing batch dimensions).  They must all have the same `dtype`.

    Args:
      loc: Floating-point `Tensor`.
      scale:  `Tensor` of same `dtype` as `loc`.
      skewness:  Skewness parameter.  Default is `0.0` (no skew).
      tailweight:  Tailweight parameter. Default is `1.0` (unchanged tailweight)
      distribution: `tf.Distribution`-like instance. Distribution that is
        transformed to produce this distribution.
        Default is `tf.distributions.Normal(0., 1.)`.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a `SinhArcsinh` sample and `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    parameters = dict(locals())

    with tf.name_scope(name, values=[loc, scale, skewness, tailweight]) as name:
      loc = tf.convert_to_tensor(loc, name="loc")
      dtype = loc.dtype
      scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype)
      tailweight = 1. if tailweight is None else tailweight
      has_default_skewness = skewness is None
      skewness = 0. if skewness is None else skewness
      tailweight = tf.convert_to_tensor(
          tailweight, name="tailweight", dtype=dtype)
      skewness = tf.convert_to_tensor(skewness, name="skewness", dtype=dtype)

      batch_shape = distribution_util.get_broadcast_shape(
          loc, scale, tailweight, skewness)

      # Recall, with Z a random variable,
      #   Y := loc + C * F(Z),
      #   F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight )
      #   F_0(Z) := Sinh( Arcsinh(Z) * tailweight )
      #   C := 2 * scale / F_0(2)
      if distribution is None:
        distribution = tf.distributions.Normal(
            loc=tf.zeros([], dtype=dtype),
            scale=tf.ones([], dtype=dtype),
            allow_nan_stats=allow_nan_stats)
      else:
        asserts = distribution_util.maybe_check_scalar_distribution(
            distribution, dtype, validate_args)
        if asserts:
          loc = control_flow_ops.with_dependencies(asserts, loc)

      # Make the SAS bijector, 'F'.
      f = bijectors.SinhArcsinh(
          skewness=skewness, tailweight=tailweight)
      if has_default_skewness:
        f_noskew = f
      else:
        f_noskew = bijectors.SinhArcsinh(
            skewness=skewness.dtype.as_numpy_dtype(0.),
            tailweight=tailweight)

      # Make the AffineScalar bijector, Z --> loc + scale * Z (2 / F_0(2))
      c = 2 * scale / f_noskew.forward(tf.convert_to_tensor(2, dtype=dtype))
      affine = bijectors.AffineScalar(
          shift=loc,
          scale=c,
          validate_args=validate_args)

      bijector = bijectors.Chain([affine, f])

      super(SinhArcsinh, self).__init__(
          distribution=distribution,
          bijector=bijector,
          batch_shape=batch_shape,
          validate_args=validate_args,
          name=name)
    self._parameters = parameters
    self._loc = loc
    self._scale = scale
    self._tailweight = tailweight
    self._skewness = skewness
  def __init__(self,
               kernel,
               observation_index_points,
               observations,
               observations_is_missing=None,
               index_points=None,
               mean_fn=None,
               observation_noise_variance=None,
               predictive_noise_variance=None,
               cholesky_fn=None,
               validate_args=False,
               allow_nan_stats=False,
               name='MultiTaskGaussianProcessRegressionModelWithCholesky'):
    """Construct a MultiTaskGaussianProcessRegressionModelWithCholesky instance.

    WARNING: This method assumes `index_points` is the only varying parameter
    (i.e. is a `Variable` / changes after initialization) and hence is not
    tape-safe.

    Args:
      kernel: `MultiTaskKernel`-like instance representing the GP's covariance
        function.
      observation_index_points: `float` `Tensor` representing finite collection,
        or batch of collections, of points in the index set for which some data
        has been observed. Shape has the form `[b1, ..., bB, e, f1, ..., fF]`
        where `F` is the number of feature dimensions and must equal
        `kernel.feature_ndims`, and `e` is the number (size) of index points in
        each batch. `[b1, ..., bB, e]` must be broadcastable with the shape of
        `observations`, and `[b1, ..., bB]` must be broadcastable with the
        shapes of all other batched parameters (`kernel.batch_shape`,
        `index_points`, etc).
      observations: `float` `Tensor` representing collection, or batch of
        collections, of observations corresponding to
        `observation_index_points`. Shape has the form `[b1, ..., bB, e, t]`,
        which must be broadcastable with the batch and example shapes of
        `observation_index_points`. The batch shape `[b1, ..., bB]` must be
        broadcastable with the shapes of all other batched parameters
        (`kernel.batch_shape`, `index_points`, etc.).
      observations_is_missing:  `bool` `Tensor` of shape `[..., e, t]`,
        representing a batch of boolean masks.  When
        `observations_is_missing` is not `None`, this distribution is
        conditioned only on the observations for which the
        corresponding elements of `observations_is_missing` are `False`.
      index_points: `float` `Tensor` representing finite collection, or batch of
        collections, of points in the index set over which the GP is defined.
        Shape has the form `[b1, ..., bB, e, f1, ..., fF]` where `F` is the
        number of feature dimensions and must equal `kernel.feature_ndims` and
        `e` is the number (size) of index points in each batch. Ultimately this
        distribution corresponds to an `e`-dimensional multivariate normal. The
        batch shape must be broadcastable with `kernel.batch_shape`.
      mean_fn: Python `callable` that acts on `index_points` to produce a (batch
        of) collection of mean values at `index_points`. Takes a `Tensor` of
        shape `[b1, ..., bB, e, f1, ..., fF]` and returns a `Tensor` whose shape
        is broadcastable with `[b1, ..., bB, e, t]`, where `t` is the number of
        tasks.
      observation_noise_variance: `float` `Tensor` representing the variance of
        the noise in the Normal likelihood distribution of the model. May be
        batched, in which case the batch shape must be broadcastable with the
        shapes of all other batched parameters (`kernel.batch_shape`,
        `index_points`, etc.).
        Default value: `None`
      predictive_noise_variance: `float` `Tensor` representing the variance in
        the posterior predictive model. If `None`, we simply re-use
        `observation_noise_variance` for the posterior predictive noise. If set
        explicitly, however, we use this value. This allows us, for example, to
        omit predictive noise variance (by setting this to zero) to obtain
        noiseless posterior predictions of function values, conditioned on noisy
        observations.
      cholesky_fn: Callable which takes a single (batch) matrix argument and
        returns a Cholesky-like lower triangular factor.  Default value: `None`,
          in which case `make_cholesky_with_jitter_fn(1e-6)` is used.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value `NaN` to indicate the result
        is undefined. When `False`, an exception is raised if one or more of the
        statistic's batch members are undefined.
        Default value: `False`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: 'MultiTaskGaussianProcessRegressionModel'.
    """
    parameters = dict(locals())
    with tf.name_scope(name) as name:

      if not isinstance(kernel, multitask_kernel.MultiTaskKernel):
        raise ValueError('`kernel` must be a `MultiTaskKernel`.')

      dtype = dtype_util.common_dtype([
          index_points, observation_index_points, observations,
          observation_noise_variance, predictive_noise_variance
      ], tf.float32)
      index_points = tensor_util.convert_nonref_to_tensor(
          index_points, dtype=dtype, name='index_points')
      observation_index_points = tf.convert_to_tensor(
          observation_index_points,
          dtype=dtype,
          name='observation_index_points')
      observations = tf.convert_to_tensor(
          observations, dtype=dtype, name='observations')
      if observations_is_missing is not None:
        observations_is_missing = tf.convert_to_tensor(
            observations_is_missing, dtype=tf.bool)
      if observation_noise_variance is not None:
        observation_noise_variance = tf.convert_to_tensor(
            observation_noise_variance,
            dtype=dtype,
            name='observation_noise_variance')
      predictive_noise_variance = tensor_util.convert_nonref_to_tensor(
          predictive_noise_variance,
          dtype=dtype,
          name='predictive_noise_variance')
      if predictive_noise_variance is None:
        predictive_noise_variance = observation_noise_variance
      if cholesky_fn is None:
        self._cholesky_fn = cholesky_util.make_cholesky_with_jitter_fn()
      else:
        if not callable(cholesky_fn):
          raise ValueError('`cholesky_fn` must be a Python callable')
        self._cholesky_fn = cholesky_fn

      self._kernel = kernel
      self._index_points = index_points

      # Scalar or vector the size of the number of tasks.
      if mean_fn is not None:
        if not callable(mean_fn):
          raise ValueError('`mean_fn` must be a Python callable')
      self._mean_fn = mean_fn
      self._observation_noise_variance = observation_noise_variance
      self._predictive_noise_variance = predictive_noise_variance
      self._index_ponts = index_points
      self._observation_index_points = observation_index_points
      self._observations = observations
      self._observations_is_missing = observations_is_missing

      observation_covariance = self.kernel.matrix_over_all_tasks(
          observation_index_points, observation_index_points)

      if observation_noise_variance is not None:
        observation_covariance = observation_covariance.to_dense()
        broadcast_shape = distribution_util.get_broadcast_shape(
            observation_covariance, observation_noise_variance[..., tf.newaxis,
                                                               tf.newaxis])
        observation_covariance = tf.broadcast_to(observation_covariance,
                                                 broadcast_shape)
        observation_covariance = _add_diagonal_shift(observation_covariance,
                                                     observation_noise_variance)
        observation_covariance = tf.linalg.LinearOperatorFullMatrix(
            observation_covariance,
            is_non_singular=True,
            is_positive_definite=True)

      if observations_is_missing is not None:
        vec_observations_is_missing = _vec(observations_is_missing)
        observation_covariance = tf.linalg.LinearOperatorFullMatrix(
            psd_kernels_util.mask_matrix(
                observation_covariance.to_dense(),
                mask=~vec_observations_is_missing),
            is_non_singular=True,
            is_positive_definite=True)

      self._observation_cholesky = cholesky_util.cholesky_from_fn(
          observation_covariance, self._cholesky_fn)

      # Note that the conditional mean is
      # k(x, o) @ (k(o, o) + sigma**2)^-1 obs. We can precompute the latter
      # term since it won't change per iteration.
      if mean_fn:
        vec_observations = _vec(observations -
                                mean_fn(observation_index_points))
      else:
        vec_observations = _vec(observations)
      if observations_is_missing is not None:
        vec_observations = tf.where(~vec_observations_is_missing,
                                    vec_observations,
                                    tf.zeros([], dtype=vec_observations.dtype))
      self._solve_on_obs = self._observation_cholesky.solvevec(
          self._observation_cholesky.solvevec(vec_observations), adjoint=True)
      super(MultiTaskGaussianProcessRegressionModel, self).__init__(
          dtype=dtype,
          reparameterization_type=(reparameterization.FULLY_REPARAMETERIZED),
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats,
          parameters=parameters,
          name=name)
Ejemplo n.º 29
0
  def __init__(self,
               num_timesteps,
               level_scale,
               slope_scale,
               initial_state_prior,
               observation_noise_scale=0.,
               initial_step=0,
               validate_args=False,
               allow_nan_stats=True,
               name=None):
    """Build a state space model implementing a local linear trend.

    Args:
      num_timesteps: Scalar `int` `Tensor` number of timesteps to model
        with this distribution.
      level_scale: Scalar (any additional dimensions are treated as batch
        dimensions) `float` `Tensor` indicating the standard deviation of the
        level transitions.
      slope_scale: Scalar (any additional dimensions are treated as batch
        dimensions) `float` `Tensor` indicating the standard deviation of the
        slope transitions.
      initial_state_prior: instance of `tfd.MultivariateNormal`
        representing the prior distribution on latent states; must
        have event shape `[2]`.
      observation_noise_scale: Scalar (any additional dimensions are
        treated as batch dimensions) `float` `Tensor` indicating the standard
        deviation of the observation noise.
      initial_step: Optional scalar `int` `Tensor` specifying the starting
        timestep.
        Default value: 0.
      validate_args: Python `bool`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
        Default value: `False`.
      allow_nan_stats: Python `bool`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member. If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
        Default value: `True`.
      name: Python `str` name prefixed to ops created by this class.
        Default value: "LocalLinearTrendStateSpaceModel".
    """

    with tf.name_scope(name, 'LocalLinearTrendStateSpaceModel',
                       [level_scale, slope_scale]) as name:

      # The initial state prior determines the dtype of sampled values.
      # Other model parameters must have the same dtype.
      dtype = initial_state_prior.dtype

      level_scale = tf.convert_to_tensor(
          level_scale, name='level_scale', dtype=dtype)
      slope_scale = tf.convert_to_tensor(
          slope_scale, name='slope_scale', dtype=dtype)
      observation_noise_scale = tf.convert_to_tensor(
          observation_noise_scale, name='observation_noise_scale', dtype=dtype)

      # Explicitly broadcast all parameters to the same batch shape. This
      # allows us to use `tf.stack` for a compact model specification.
      broadcast_batch_shape = dist_util.get_broadcast_shape(
          level_scale, slope_scale)
      broadcast_ones = tf.ones(broadcast_batch_shape, dtype=dtype)

      self._level_scale = level_scale
      self._slope_scale = slope_scale
      self._observation_noise_scale = observation_noise_scale

      # Construct a linear Gaussian state space model implementing the
      # local linear trend model. See "Mathematical Details" in the
      # class docstring for further explanation.
      super(LocalLinearTrendStateSpaceModel, self).__init__(
          num_timesteps=num_timesteps,
          transition_matrix=tf.constant(
              [[1., 1.], [0., 1.]], dtype=dtype, name='transition_matrix'),
          transition_noise=tfd.MultivariateNormalDiag(
              scale_diag=tf.stack(
                  [level_scale * broadcast_ones, slope_scale * broadcast_ones],
                  axis=-1),
              name='transition_noise'),
          observation_matrix=tf.constant(
              [[1., 0.]], dtype=dtype, name='observation_matrix'),
          observation_noise=tfd.MultivariateNormalDiag(
              scale_diag=observation_noise_scale[..., tf.newaxis],
              name='observation_noise'),
          initial_state_prior=initial_state_prior,
          initial_step=initial_step,
          allow_nan_stats=allow_nan_stats,
          validate_args=validate_args,
          name=name)
Ejemplo n.º 30
0
    def __init__(self,
                 num_timesteps,
                 level_scale,
                 slope_scale,
                 initial_state_prior,
                 observation_noise_scale=0.,
                 name=None,
                 **linear_gaussian_ssm_kwargs):
        """Build a state space model implementing a local linear trend.

    Args:
      num_timesteps: Scalar `int` `Tensor` number of timesteps to model
        with this distribution.
      level_scale: Scalar (any additional dimensions are treated as batch
        dimensions) `float` `Tensor` indicating the standard deviation of the
        level transitions.
      slope_scale: Scalar (any additional dimensions are treated as batch
        dimensions) `float` `Tensor` indicating the standard deviation of the
        slope transitions.
      initial_state_prior: instance of `tfd.MultivariateNormal`
        representing the prior distribution on latent states; must
        have event shape `[2]`.
      observation_noise_scale: Scalar (any additional dimensions are
        treated as batch dimensions) `float` `Tensor` indicating the standard
        deviation of the observation noise.
      name: Python `str` name prefixed to ops created by this class.
        Default value: "LocalLinearTrendStateSpaceModel".
      **linear_gaussian_ssm_kwargs: Optional additional keyword arguments to
        to the base `tfd.LinearGaussianStateSpaceModel` constructor.
    """
        parameters = dict(locals())
        parameters.update(linear_gaussian_ssm_kwargs)
        del parameters['linear_gaussian_ssm_kwargs']
        with tf.name_scope(name or 'LocalLinearTrendStateSpaceModel') as name:
            # The initial state prior determines the dtype of sampled values.
            # Other model parameters must have the same dtype.
            dtype = initial_state_prior.dtype

            level_scale = tf.convert_to_tensor(value=level_scale,
                                               name='level_scale',
                                               dtype=dtype)
            slope_scale = tf.convert_to_tensor(value=slope_scale,
                                               name='slope_scale',
                                               dtype=dtype)
            observation_noise_scale = tf.convert_to_tensor(
                value=observation_noise_scale,
                name='observation_noise_scale',
                dtype=dtype)

            # Explicitly broadcast all parameters to the same batch shape. This
            # allows us to use `tf.stack` for a compact model specification.
            broadcast_batch_shape = dist_util.get_broadcast_shape(
                level_scale, slope_scale)
            broadcast_ones = tf.ones(broadcast_batch_shape, dtype=dtype)

            self._level_scale = level_scale
            self._slope_scale = slope_scale
            self._observation_noise_scale = observation_noise_scale

            # Construct a linear Gaussian state space model implementing the
            # local linear trend model. See "Mathematical Details" in the
            # class docstring for further explanation.
            super(LocalLinearTrendStateSpaceModel, self).__init__(
                num_timesteps=num_timesteps,
                transition_matrix=tf.constant([[1., 1.], [0., 1.]],
                                              dtype=dtype,
                                              name='transition_matrix'),
                transition_noise=tfd.MultivariateNormalDiag(
                    scale_diag=tf.stack([
                        level_scale * broadcast_ones,
                        slope_scale * broadcast_ones
                    ],
                                        axis=-1),
                    name='transition_noise'),
                observation_matrix=tf.constant([[1., 0.]],
                                               dtype=dtype,
                                               name='observation_matrix'),
                observation_noise=tfd.MultivariateNormalDiag(
                    scale_diag=observation_noise_scale[..., tf.newaxis],
                    name='observation_noise'),
                initial_state_prior=initial_state_prior,
                name=name,
                **linear_gaussian_ssm_kwargs)
            self._parameters = parameters
Ejemplo n.º 31
0
    def __init__(self,
                 base_kernel,
                 fixed_inputs,
                 diag_shift=None,
                 validate_args=False,
                 name='SchurComplement'):
        """Construct a SchurComplement kernel instance.

    Args:
      base_kernel: A `PositiveSemidefiniteKernel` instance, the kernel used to
        build the block matrices of which this kernel computes the  Schur
        complement.
      fixed_inputs: A Tensor, representing a collection of inputs. The Schur
        complement that this kernel computes comes from a block matrix, whose
        bottom-right corner is derived from `base_kernel.matrix(fixed_inputs,
        fixed_inputs)`, and whose top-right and bottom-left pieces are
        constructed by computing the base_kernel at pairs of input locations
        together with these `fixed_inputs`. `fixed_inputs` is allowed to be an
        empty collection (either `None` or having a zero shape entry), in which
        case the kernel falls back to the trivial application of `base_kernel`
        to inputs. See class-level docstring for more details on the exact
        computation this does; `fixed_inputs` correspond to the `Z` structure
        discussed there. `fixed_inputs` is assumed to have shape `[b1, ..., bB,
        N, f1, ..., fF]` where the `b`'s are batch shape entries, the `f`'s are
        feature_shape entries, and `N` is the number of fixed inputs. Use of
        this kernel entails a 1-time O(N^3) cost of computing the Cholesky
        decomposition of the k(Z, Z) matrix. The batch shape elements of
        `fixed_inputs` must be broadcast compatible with
        `base_kernel.batch_shape`.
      diag_shift: A floating point scalar to be added to the diagonal of the
        divisor_matrix before computing its Cholesky.
      validate_args: If `True`, parameters are checked for validity despite
        possibly degrading runtime performance.
        Default value: `False`
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `"SchurComplement"`
    """
        with tf.compat.v1.name_scope(name, values=[base_kernel,
                                                   fixed_inputs]) as name:
            # If the base_kernel doesn't have a specified dtype, we can't pass it off
            # to common_dtype, which always expects `tf.as_dtype(dtype)` to work (and
            # it doesn't if the given `dtype` is None.
            # TODO(b/130421035): Consider changing common_dtype to allow Nones, and
            # clean this up after.
            #
            # Thus, we spell out the logic
            # here: use the dtype of `fixed_inputs` if possible. If base_kernel.dtype
            # is not None, use the usual logic.
            if base_kernel.dtype is None:
                dtype = None if fixed_inputs is None else fixed_inputs.dtype
            else:
                dtype = dtype_util.common_dtype([base_kernel, fixed_inputs],
                                                tf.float32)
            self._base_kernel = base_kernel
            self._fixed_inputs = (None if fixed_inputs is None else
                                  tf.convert_to_tensor(value=fixed_inputs,
                                                       dtype=dtype))
            if not self._is_fixed_inputs_empty():
                # We create and store this matrix here, so that we get the caching
                # benefit when we later access its cholesky. If we computed the matrix
                # every time we needed the cholesky, the bijector cache wouldn't be hit.
                self._divisor_matrix = base_kernel.matrix(
                    fixed_inputs, fixed_inputs)
                if diag_shift is not None:
                    broadcast_shape = distribution_util.get_broadcast_shape(
                        self._divisor_matrix, diag_shift[..., tf.newaxis])
                    self._divisor_matrix = tf.broadcast_to(
                        self._divisor_matrix, broadcast_shape)
                    self._divisor_matrix = _add_diagonal_shift(
                        self._divisor_matrix, diag_shift)

            self._cholesky_bijector = invert.Invert(
                cholesky_outer_product.CholeskyOuterProduct())
        super(SchurComplement, self).__init__(base_kernel.feature_ndims,
                                              dtype=dtype,
                                              name=name)
Ejemplo n.º 32
0
  def __init__(self,
               loc,
               scale,
               concentration,
               validate_args=False,
               allow_nan_stats=True,
               name='GeneralizedExtremeValue'):
    """Construct generalized extreme value distribution.

    The parameters `loc`, `scale`, and `concentration` must be shaped in a way
    that supports broadcasting (e.g. `loc + scale` + `concentration` is valid).

    Args:
      loc: Floating point tensor, the location parameter of the distribution(s).
      scale: Floating point tensor, the scales of the distribution(s).
        scale must contain only positive values.
      concentration: Floating point tensor, the concentration of
        the distribution(s).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value `NaN` to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `'GeneralizedExtremeValue'`.

    Raises:
      TypeError: if loc and scale are different dtypes.
    """
    parameters = dict(locals())
    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([loc, scale, concentration],
                                      dtype_hint=tf.float32)
      loc = tensor_util.convert_nonref_to_tensor(
          loc, name='loc', dtype=dtype)
      scale = tensor_util.convert_nonref_to_tensor(
          scale, name='scale', dtype=dtype)
      concentration = tensor_util.convert_nonref_to_tensor(
          concentration, name='concentration', dtype=dtype)
      dtype_util.assert_same_float_dtype([loc, scale, concentration])
      # Positive scale is asserted by the incorporated GEV bijector.
      self._gev_bijector = gev_cdf_bijector.GeneralizedExtremeValueCDF(
          loc=loc, scale=scale, concentration=concentration,
          validate_args=validate_args)

      batch_shape = distribution_util.get_broadcast_shape(loc, scale,
                                                          concentration)
      # Because the uniform sampler generates samples in `[0, 1)` this would
      # cause samples to lie in `(inf, -inf]` instead of `(inf, -inf)`. To fix
      # this, we use `np.finfo(dtype_util.as_numpy_dtype(self.dtype).tiny`
      # because it is the smallest, positive, 'normal' number.
      super(GeneralizedExtremeValue, self).__init__(
          # TODO(b/137665504): Use batch-adding meta-distribution to set the
          # batch shape instead of tf.ones.
          distribution=uniform.Uniform(
              low=np.finfo(dtype_util.as_numpy_dtype(dtype)).tiny,
              high=tf.ones(batch_shape, dtype=dtype),
              allow_nan_stats=allow_nan_stats),
          # The GEV bijector encodes the CDF function as the forward,
          # and hence needs to be inverted.
          bijector=invert_bijector.Invert(
              self._gev_bijector, validate_args=validate_args),
          parameters=parameters,
          name=name)
Ejemplo n.º 33
0
    def __init__(self,
                 loc,
                 scale,
                 low,
                 high,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="TruncatedNormal"):
        """Construct TruncatedNormal.

    All parameters of the distribution will be broadcast to the same shape,
    so the resulting distribution will have a batch_shape of the broadcast
    shape of all parameters.

    Args:
      loc: Floating point tensor; the mean of the normal distribution(s) (
        note that the mean of the resulting distribution will be different
        since it is modified by the bounds).
      scale: Floating point tensor; the std deviation of the normal
        distribution(s).
      low: `float` `Tensor` representing lower bound of the distribution's
        support. Must be such that `low < high`.
      high: `float` `Tensor` representing upper bound of the distribution's
        support. Must be such that `low < high`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked at run-time.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())
        with tf.name_scope(name, values=[loc, scale, low, high]) as name:
            dtype = dtype_util.common_dtype([loc, scale, low, high],
                                            tf.float32)
            loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype)
            scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype)
            low = tf.convert_to_tensor(low, name="low", dtype=dtype)
            high = tf.convert_to_tensor(high, name="high", dtype=dtype)
            tf.assert_same_float_dtype([loc, scale, low, high])

            self._broadcast_batch_shape = distribution_util.get_broadcast_shape(
                loc, scale, low, high)

            # Broadcast all parameters to the same shape
            broadcast_ones = tf.ones(shape=self._broadcast_batch_shape,
                                     dtype=scale.dtype)
            self._scale = scale * broadcast_ones
            self._loc = loc * broadcast_ones
            self._low = low * broadcast_ones
            self._high = high * broadcast_ones

            with tf.control_dependencies(
                [self._validate()] if validate_args else []):
                self._loc = tf.identity(self._loc)

        super(TruncatedNormal, self).__init__(
            dtype=dtype,
            # This distribution is fully reparameterized. loc, scale have straight
            # through gradients. The gradients for the bounds are implemented using
            # custom derived expressions based on implicit gradients.
            # For the special case of lower bound zero and a positive upper bound
            # an equivalent expression can also be found in Sec 9.1.1.
            # of https://arxiv.org/pdf/1806.01851.pdf. The implementation here
            # handles arbitrary bounds.
            reparameterization_type=reparameterization.FULLY_REPARAMETERIZED,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            parameters=parameters,
            graph_parents=[loc, scale, low, high],
            name=name)
Ejemplo n.º 34
0
    def precompute_regression_model(
            kernel,
            observation_index_points,
            observations,
            index_points=None,
            observation_noise_variance=0.,
            predictive_noise_variance=None,
            mean_fn=None,
            jitter=1e-6,
            validate_args=False,
            allow_nan_stats=False,
            name='PrecomputedGaussianProcessRegressionModel'):
        """Returns a GaussianProcessRegressionModel with precomputed quantities.

    This differs from the constructor by precomputing quantities associated with
    observations in a non-tape safe way. `index_points` is the only parameter
    that is allowed to vary (i.e. is a `Variable` / changes after
    initialization).

    Specifically:

    * We make `observation_index_points` and `observations` mandatory
      parameters.
    * We precompute `kernel(observation_index_points, observation_index_points)`
      along with any other associated quantities relating to the `kernel`,
      `observations` and `observation_index_points`.

    A typical usecase would be optimizing kernel hyperparameters for a
    `GaussianProcess`, and computing the posterior predictive with respect to
    those optimized hyperparameters and observation / index-points pairs.

    WARNING: This method assumes `index_points` is the only varying parameter
    (i.e. is a `Variable` / changes after initialization) and hence is not
    tape-safe.

    Args:
      kernel: `PositiveSemidefiniteKernel`-like instance representing the
        GP's covariance function.
      observation_index_points: `float` `Tensor` representing finite collection,
        or batch of collections, of points in the index set for which some data
        has been observed. Shape has the form `[b1, ..., bB, e, f1, ..., fF]`
        where `F` is the number of feature dimensions and must equal
        `kernel.feature_ndims`, and `e` is the number (size) of index points in
        each batch. `[b1, ..., bB, e]` must be broadcastable with the shape of
        `observations`, and `[b1, ..., bB]` must be broadcastable with the
        shapes of all other batched parameters (`kernel.batch_shape`,
        `index_points`, etc). The default value is `None`, which corresponds to
        the empty set of observations, and simply results in the prior
        predictive model (a GP with noise of variance
        `predictive_noise_variance`).
      observations: `float` `Tensor` representing collection, or batch of
        collections, of observations corresponding to
        `observation_index_points`. Shape has the form `[b1, ..., bB, e]`, which
        must be brodcastable with the batch and example shapes of
        `observation_index_points`. The batch shape `[b1, ..., bB]` must be
        broadcastable with the shapes of all other batched parameters
        (`kernel.batch_shape`, `index_points`, etc.). The default value is
        `None`, which corresponds to the empty set of observations, and simply
        results in the prior predictive model (a GP with noise of variance
        `predictive_noise_variance`).
      index_points: `float` `Tensor` representing finite collection, or batch of
        collections, of points in the index set over which the GP is defined.
        Shape has the form `[b1, ..., bB, e, f1, ..., fF]` where `F` is the
        number of feature dimensions and must equal `kernel.feature_ndims` and
        `e` is the number (size) of index points in each batch. Ultimately this
        distribution corresponds to an `e`-dimensional multivariate normal. The
        batch shape must be broadcastable with `kernel.batch_shape` and any
        batch dims yielded by `mean_fn`.
      observation_noise_variance: `float` `Tensor` representing the variance
        of the noise in the Normal likelihood distribution of the model. May be
        batched, in which case the batch shape must be broadcastable with the
        shapes of all other batched parameters (`kernel.batch_shape`,
        `index_points`, etc.).
        Default value: `0.`
      predictive_noise_variance: `float` `Tensor` representing the variance in
        the posterior predictive model. If `None`, we simply re-use
        `observation_noise_variance` for the posterior predictive noise. If set
        explicitly, however, we use this value. This allows us, for example, to
        omit predictive noise variance (by setting this to zero) to obtain
        noiseless posterior predictions of function values, conditioned on noisy
        observations.
      mean_fn: Python `callable` that acts on `index_points` to produce a
        collection, or batch of collections, of mean values at `index_points`.
        Takes a `Tensor` of shape `[b1, ..., bB, f1, ..., fF]` and returns a
        `Tensor` whose shape is broadcastable with `[b1, ..., bB]`.
        Default value: `None` implies the constant zero function.
      jitter: `float` scalar `Tensor` added to the diagonal of the covariance
        matrix to ensure positive definiteness of the covariance matrix.
        Default value: `1e-6`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value `NaN` to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
        Default value: `False`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: 'PrecomputedGaussianProcessRegressionModel'.
    Returns
      An instance of `GaussianProcessRegressionModel` with precomputed
      quantities associated with observations.
    """

        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([
                index_points, observation_index_points, observations,
                observation_noise_variance, predictive_noise_variance, jitter
            ], tf.float32)

            # Convert to tensor arguments that are expected to not be Variables / not
            # going to change.
            jitter = tf.convert_to_tensor(jitter, dtype=dtype)

            observation_index_points = tf.convert_to_tensor(
                observation_index_points, dtype=dtype)
            observation_noise_variance = tf.convert_to_tensor(
                observation_noise_variance, dtype=dtype)
            observations = tf.convert_to_tensor(observations, dtype=dtype)

            observation_cholesky = kernel.matrix(observation_index_points,
                                                 observation_index_points)

            broadcast_shape = distribution_util.get_broadcast_shape(
                observation_cholesky,
                observation_noise_variance[..., tf.newaxis, tf.newaxis])

            observation_cholesky = tf.broadcast_to(observation_cholesky,
                                                   broadcast_shape)

            observation_cholesky = tf.linalg.set_diag(
                observation_cholesky,
                tf.linalg.diag_part(observation_cholesky) + jitter +
                observation_noise_variance[..., tf.newaxis])
            observation_cholesky = tf.linalg.cholesky(observation_cholesky)
            observation_cholesky_operator = tf.linalg.LinearOperatorLowerTriangular(
                observation_cholesky)

            conditional_kernel = tfpk.SchurComplement.with_precomputed_divisor(
                base_kernel=kernel,
                fixed_inputs=observation_index_points,
                diag_shift=observation_noise_variance + jitter)

            if mean_fn is None:
                mean_fn = lambda x: tf.zeros([1], dtype=dtype)
            else:
                if not callable(mean_fn):
                    raise ValueError('`mean_fn` must be a Python callable')

            diff = observations - mean_fn(observation_index_points)
            solve_on_observation = observation_cholesky_operator.solvevec(
                observation_cholesky_operator.solvevec(diff), adjoint=True)

            def conditional_mean_fn(x):
                k_x_obs = kernel.matrix(x, observation_index_points)
                return mean_fn(x) + tf.linalg.matvec(k_x_obs,
                                                     solve_on_observation)

            gprm = GaussianProcessRegressionModel(
                kernel=kernel,
                observation_index_points=observation_index_points,
                observations=observations,
                index_points=index_points,
                observation_noise_variance=observation_noise_variance,
                predictive_noise_variance=predictive_noise_variance,
                jitter=jitter,
                _conditional_kernel=conditional_kernel,
                _conditional_mean_fn=conditional_mean_fn,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                name=name)

        return gprm
Ejemplo n.º 35
0
 def test_all_static_shapes_work(self):
     x = tf.ones((2, 1, 3))
     y = tf.ones((1, 5, 3))
     z = tf.ones(())
     self.assertAllEqual([2, 5, 3],
                         distribution_util.get_broadcast_shape(x, y, z))