def testVerifyTensorAllFiniteSucceeds(self):
   x_shape = [5, 4]
   x = np.random.random_sample(x_shape).astype(np.float32)
   with test_util.use_gpu():
     t = constant_op.constant(x, shape=x_shape, dtype=dtypes.float32)
     t_verified = numerics.verify_tensor_all_finite(t,
                                                    "Input is not a number.")
     self.assertAllClose(x, self.evaluate(t_verified))
  def testVerifyTensorAllFiniteFails(self):
    x_shape = [5, 4]
    x = np.random.random_sample(x_shape).astype(np.float32)
    my_msg = "Input is not a number."

    # Test NaN.
    x[0] = np.nan
    with test_util.use_gpu():
      with self.assertRaisesOpError(my_msg):
        t = constant_op.constant(x, shape=x_shape, dtype=dtypes.float32)
        t_verified = numerics.verify_tensor_all_finite(t, my_msg)
        self.evaluate(t_verified)

    # Test Inf.
    x[0] = np.inf
    with test_util.use_gpu():
      with self.assertRaisesOpError(my_msg):
        t = constant_op.constant(x, shape=x_shape, dtype=dtypes.float32)
        t_verified = numerics.verify_tensor_all_finite(t, my_msg)
        self.evaluate(t_verified)
Example #3
0
def verify_tensor_all_finite(labeled_tensor, message, name=None):
  """Asserts a tensor doesn't contain NaNs or Infs.

  See tf.verify_tensor_all_finite.

  Args:
    labeled_tensor: The input tensor.
    message: Message to log on failure.
    name: Optional op name.

  Returns:
    The input tensor.
  """
  with ops.name_scope(name, 'lt_verify_tensor_all_finite',
                      [labeled_tensor]) as scope:
    labeled_tensor = core.convert_to_labeled_tensor(labeled_tensor)
    op = numerics.verify_tensor_all_finite(
        labeled_tensor.tensor, msg=message, name=scope)
    return core.LabeledTensor(op, labeled_tensor.axes)
Example #4
0
def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
  """Clips values of multiple tensors by the ratio of the sum of their norms.

  Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
  this operation returns a list of clipped tensors `list_clipped`
  and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
  if you've already computed the global norm for `t_list`, you can specify
  the global norm with `use_norm`.

  To perform the clipping, the values `t_list[i]` are set to:

      t_list[i] * clip_norm / max(global_norm, clip_norm)

  where:

      global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))

  If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
  otherwise they're all shrunk by the global ratio.

  Any of the entries of `t_list` that are of type `None` are ignored.

  This is the correct way to perform gradient clipping (for example, see
  [Pascanu et al., 2012](http://arxiv.org/abs/1211.5063)
  ([pdf](http://arxiv.org/pdf/1211.5063.pdf))).

  However, it is slower than `clip_by_norm()` because all the parameters must be
  ready before the clipping operation can be performed.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
    use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
      norm to use. If not provided, `global_norm()` is used to compute the norm.
    name: A name for the operation (optional).

  Returns:
    list_clipped: A list of `Tensors` of the same type as `list_t`.
    global_norm: A 0-D (scalar) `Tensor` representing the global norm.

  Raises:
    TypeError: If `t_list` is not a sequence.
    InvalidArgumentError: If global norm is not finite.
  """
  if (not isinstance(t_list, collections.Sequence)
      or isinstance(t_list, six.string_types)):
    raise TypeError("t_list should be a sequence")
  t_list = list(t_list)
  if use_norm is None:
    use_norm = global_norm(t_list, name)
  use_norm = numerics.verify_tensor_all_finite(use_norm,
                                               "Found Inf or NaN global norm.")

  with ops.name_scope(name, "clip_by_global_norm",
                      t_list + [clip_norm]) as name:
    # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
    scale = clip_norm * math_ops.minimum(
        1.0 / use_norm,
        constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)

    values = [
        ops.convert_to_tensor(
            t.values if isinstance(t, ops.IndexedSlices) else t,
            name="t_%d" % i)
        if t is not None else t
        for i, t in enumerate(t_list)]

    values_clipped = []
    for i, v in enumerate(values):
      if v is None:
        values_clipped.append(None)
      else:
        with ops.colocate_with(v):
          values_clipped.append(
              array_ops.identity(v * scale, name="%s_%d" % (name, i)))

    list_clipped = [
        ops.IndexedSlices(c_v, t.indices, t.dense_shape)
        if isinstance(t, ops.IndexedSlices)
        else c_v
        for (c_v, t) in zip(values_clipped, t_list)]

  return list_clipped, use_norm
Example #5
0
def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
    """Clips values of multiple tensors by the ratio of the sum of their norms.

  Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
  this operation returns a list of clipped tensors `list_clipped`
  and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
  if you've already computed the global norm for `t_list`, you can specify
  the global norm with `use_norm`.

  To perform the clipping, the values `t_list[i]` are set to:

      t_list[i] * clip_norm / max(global_norm, clip_norm)

  where:

      global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))

  If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
  otherwise they're all shrunk by the global ratio.

  Any of the entries of `t_list` that are of type `None` are ignored.

  This is the correct way to perform gradient clipping (for example, see
  [Pascanu et al., 2012](http://arxiv.org/abs/1211.5063)
  ([pdf](http://arxiv.org/pdf/1211.5063.pdf))).

  However, it is slower than `clip_by_norm()` because all the parameters must be
  ready before the clipping operation can be performed.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
    use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
      norm to use. If not provided, `global_norm()` is used to compute the norm.
    name: A name for the operation (optional).

  Returns:
    list_clipped: A list of `Tensors` of the same type as `list_t`.
    global_norm: A 0-D (scalar) `Tensor` representing the global norm.

  Raises:
    TypeError: If `t_list` is not a sequence.
    InvalidArgumentError: If global norm is not finite.
  """
    if (not isinstance(t_list, collections.Sequence)
            or isinstance(t_list, six.string_types)):
        raise TypeError("t_list should be a sequence")
    t_list = list(t_list)
    if use_norm is None:
        use_norm = global_norm(t_list, name)
    use_norm = numerics.verify_tensor_all_finite(
        use_norm, "Found Inf or NaN global norm.")

    with ops.name_scope(name, "clip_by_global_norm",
                        t_list + [clip_norm]) as name:
        # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
        scale = clip_norm * math_ops.minimum(
            1.0 / use_norm,
            constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)

        values = [
            ops.convert_to_tensor(
                t.values if isinstance(t, ops.IndexedSlices) else t,
                name="t_%d" % i) if t is not None else t
            for i, t in enumerate(t_list)
        ]

        values_clipped = []
        for i, v in enumerate(values):
            if v is None:
                values_clipped.append(None)
            else:
                with ops.colocate_with(v):
                    values_clipped.append(
                        array_ops.identity(v * scale,
                                           name="%s_%d" % (name, i)))

        list_clipped = [
            ops.IndexedSlices(c_v, t.indices, t.dense_shape) if isinstance(
                t, ops.IndexedSlices) else c_v
            for (c_v, t) in zip(values_clipped, t_list)
        ]

    return list_clipped, use_norm
    def do_filter(self, estimated_state, estimated_state_covariance,
                  predicted_observation, predicted_observation_covariance,
                  observation, observation_model, observation_noise):
        """Convenience function for scoring predictions.

    Scores a prediction against an observation, and computes the updated
    posterior over states.

    Shapes given below for arguments are for single-model Kalman filtering
    (e.g. KalmanFilter). For ensembles, prior_state and prior_state_var are
    same-length tuples of values corresponding to each model.

    Args:
      estimated_state: A prior mean over states [batch size x state dimension]
      estimated_state_covariance: Covariance of state prior [batch size x D x
          D], with D depending on the Kalman filter implementation (typically
          the state dimension).
      predicted_observation: A prediction for the observed value, such as that
          returned by observed_from_state. A [batch size x num features] Tensor.
      predicted_observation_covariance: A covariance matrix corresponding to
          `predicted_observation`, a [batch size x num features x num features]
          Tensor.
      observation: The observed value corresponding to the predictions
          given [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      observation_noise: A [batch size x observation dimension x observation
          dimension] Tensor or [observation dimension x observation dimension]
          Tensor with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      posterior_state, posterior_state_var: Posterior mean and
          covariance, updated versions of prior_state and
          prior_state_var.
      log_prediction_prob: Log probability of the observations under
          the priors, suitable for optimization (should be maximized).

    """
        symmetrized_observation_covariance = 0.5 * (
            predicted_observation_covariance +
            array_ops.matrix_transpose(predicted_observation_covariance))
        instability_message = (
            "This may occur due to numerically unstable filtering when there is "
            "a large difference in posterior variances, or when inferences are "
            "near-deterministic. Considering tuning the "
            "'filtering_maximum_posterior_variance_ratio' or "
            "'filtering_minimum_posterior_variance' parameters in your "
            "StateSpaceModelConfiguration, or tuning the transition matrix.")
        symmetrized_observation_covariance = numerics.verify_tensor_all_finite(
            symmetrized_observation_covariance,
            "Predicted observation covariance was not finite. {}".format(
                instability_message))
        diag = array_ops.matrix_diag_part(symmetrized_observation_covariance)
        min_diag = math_ops.reduce_min(diag)
        non_negative_assert = control_flow_ops.Assert(
            min_diag >= 0.,
            [("The predicted observation covariance "
              "has a negative diagonal entry. {}").format(instability_message),
             min_diag])
        with ops.control_dependencies([non_negative_assert]):
            observation_covariance_cholesky = linalg_ops.cholesky(
                symmetrized_observation_covariance)
        log_prediction_prob = distributions.MultivariateNormalTriL(
            predicted_observation,
            observation_covariance_cholesky).log_prob(observation)
        (posterior_state,
         posterior_state_var) = self.posterior_from_prior_state(
             prior_state=estimated_state,
             prior_state_var=estimated_state_covariance,
             observation=observation,
             observation_model=observation_model,
             predicted_observations=(predicted_observation,
                                     predicted_observation_covariance),
             observation_noise=observation_noise)
        return (posterior_state, posterior_state_var, log_prediction_prob)
Example #7
0
  def do_filter(
      self, estimated_state, estimated_state_covariance,
      predicted_observation, predicted_observation_covariance,
      observation, observation_model, observation_noise):
    """Convenience function for scoring predictions.

    Scores a prediction against an observation, and computes the updated
    posterior over states.

    Shapes given below for arguments are for single-model Kalman filtering
    (e.g. KalmanFilter). For ensembles, prior_state and prior_state_var are
    same-length tuples of values corresponding to each model.

    Args:
      estimated_state: A prior mean over states [batch size x state dimension]
      estimated_state_covariance: Covariance of state prior [batch size x D x
          D], with D depending on the Kalman filter implementation (typically
          the state dimension).
      predicted_observation: A prediction for the observed value, such as that
          returned by observed_from_state. A [batch size x num features] Tensor.
      predicted_observation_covariance: A covariance matrix corresponding to
          `predicted_observation`, a [batch size x num features x num features]
          Tensor.
      observation: The observed value corresponding to the predictions
          given [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      observation_noise: A [batch size x observation dimension x observation
          dimension] Tensor or [observation dimension x observation dimension]
          Tensor with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      posterior_state, posterior_state_var: Posterior mean and
          covariance, updated versions of prior_state and
          prior_state_var.
      log_prediction_prob: Log probability of the observations under
          the priors, suitable for optimization (should be maximized).

    """
    symmetrized_observation_covariance = 0.5 * (
        predicted_observation_covariance + array_ops.matrix_transpose(
            predicted_observation_covariance))
    instability_message = (
        "This may occur due to numerically unstable filtering when there is "
        "a large difference in posterior variances, or when inferences are "
        "near-deterministic. Considering tuning the "
        "'filtering_maximum_posterior_variance_ratio' or "
        "'filtering_minimum_posterior_variance' parameters in your "
        "StateSpaceModelConfiguration, or tuning the transition matrix.")
    symmetrized_observation_covariance = numerics.verify_tensor_all_finite(
        symmetrized_observation_covariance,
        "Predicted observation covariance was not finite. {}".format(
            instability_message))
    diag = array_ops.matrix_diag_part(symmetrized_observation_covariance)
    min_diag = math_ops.reduce_min(diag)
    non_negative_assert = control_flow_ops.Assert(
        min_diag >= 0.,
        [("The predicted observation covariance "
          "has a negative diagonal entry. {}").format(instability_message),
         min_diag])
    with ops.control_dependencies([non_negative_assert]):
      observation_covariance_cholesky = linalg_ops.cholesky(
          symmetrized_observation_covariance)
    log_prediction_prob = distributions.MultivariateNormalTriL(
        predicted_observation, observation_covariance_cholesky).log_prob(
            observation)
    (posterior_state,
     posterior_state_var) = self.posterior_from_prior_state(
         prior_state=estimated_state,
         prior_state_var=estimated_state_covariance,
         observation=observation,
         observation_model=observation_model,
         predicted_observations=(predicted_observation,
                                 predicted_observation_covariance),
         observation_noise=observation_noise)
    return (posterior_state, posterior_state_var, log_prediction_prob)