def _check_shapes_dynamic(self, operator, v, diag):
    """Return (v, diag) with Assert dependencies, which check shape."""
    checks = []
    with ops.op_scope([operator, v, diag], 'check_shapes'):
      s_v = array_ops.shape(v)
      r_op = operator.rank()
      r_v = array_ops.rank(v)
      if diag is not None:
        s_d = array_ops.shape(diag)
        r_d = array_ops.rank(diag)

      # Check tensor rank.
      checks.append(check_ops.assert_rank(v, r_op))
      if diag is not None:
        checks.append(check_ops.assert_rank(diag, r_op - 1))

      # Check batch shape
      checks.append(check_ops.assert_equal(
          operator.batch_shape(), array_ops.slice(s_v, [0], [r_v - 2])))
      if diag is not None:
        checks.append(check_ops.assert_equal(
            operator.batch_shape(), array_ops.slice(s_d, [0], [r_d - 1])))

      # Check event shape
      checks.append(check_ops.assert_equal(
          operator.vector_space_dimension(), array_ops.gather(s_v, r_v - 2)))
      if diag is not None:
        checks.append(check_ops.assert_equal(
            array_ops.gather(s_v, r_v - 1), array_ops.gather(s_d, r_d - 1)))

      v = control_flow_ops.with_dependencies(checks, v)
      if diag is not None:
        diag = control_flow_ops.with_dependencies(checks, diag)
      return v, diag
Example #2
0
  def _check_mu(self, mu):
    """Return `mu` after validity checks and possibly with assertations."""
    mu = ops.convert_to_tensor(mu)
    cov = self._cov

    if mu.dtype != cov.dtype:
      raise TypeError(
          "mu and cov must have the same dtype.  Found mu.dtype = %s, "
          "cov.dtype = %s"
          % (mu.dtype, cov.dtype))
    if not self.strict:
      return mu
    else:
      assert_compatible_shapes = control_flow_ops.group(
          check_ops.assert_equal(
              array_ops.rank(mu) + 1,
              cov.rank(),
              data=["mu should have rank 1 less than cov.  Found: rank(mu) = ",
                    array_ops.rank(mu), " rank(cov) = ", cov.rank()],
          ),
          check_ops.assert_equal(
              array_ops.shape(mu),
              cov.vector_shape(),
              data=["mu.shape and cov.shape[:-1] should match.  "
                    "Found: shape(mu) = "
                    , array_ops.shape(mu), " shape(cov) = ", cov.shape()],
          ),
      )
      return control_flow_ops.with_dependencies([assert_compatible_shapes], mu)
  def call(self, labels, predictions, weights=None):
    """Accumulate accuracy statistics.

    For example, if labels is [1, 2, 3, 4] and predictions is [0, 2, 3, 4]
    then the accuracy is 3/4 or .75.  If the weights were specified as
    [1, 1, 0, 0] then the accuracy would be 1/2 or .5.

    `labels` and `predictions` should have the same shape and type.

    Args:
      labels: Tensor with the true labels for each example.  One example
        per element of the Tensor.
      predictions: Tensor with the predicted label for each example.
      weights: Optional weighting of each example. Defaults to 1.

    Returns:
      The arguments, for easy chaining.
    """
    check_ops.assert_equal(
        array_ops.shape(labels), array_ops.shape(predictions),
        message="Shapes of labels and predictions are unequal")
    matches = math_ops.equal(labels, predictions)
    matches = math_ops.cast(matches, self.dtype)
    super(Accuracy, self).call(matches, weights=weights)
    if weights is None:
      return labels, predictions
    return labels, predictions, weights
  def call(self, labels, predictions, weights=None):
    """Accumulate accuracy statistics.

    `labels` and `predictions` should have the same shape.
    As argmax is being done here, labels and predictions type
    can be different.

    Args:
      labels: One-hot Tensor.
      predictions: Tensor with the logits or probabilities for each example.
      weights: Optional weighting of each example. Defaults to 1.

    Returns:
      The arguments, for easy chaining.
    """
    check_ops.assert_equal(
        array_ops.shape(labels), array_ops.shape(predictions),
        message="Shapes of labels and predictions are unequal")
    labels = math_ops.argmax(labels, axis=-1)
    predictions = math_ops.argmax(predictions, axis=-1)
    matches = math_ops.equal(labels, predictions)
    matches = math_ops.cast(matches, self.dtype)
    super(CategoricalAccuracy, self).call(matches, weights=weights)
    if weights is None:
      return labels, predictions
    return labels, predictions, weights
  def call(self, labels, predictions, weights=None):
    """Accumulate accuracy statistics.

    `labels` and `predictions` should have the same shape and type.

    Args:
      labels: Binary Tensor(containing 0 or 1).
      predictions: Tensor with probabilities or logits.
      weights: Optional weighting of each example. Defaults to 1.

    Returns:
      The arguments, for easy chaining.
    """
    check_ops.assert_equal(
        array_ops.shape(labels), array_ops.shape(predictions),
        message="Shapes of labels and predictions are unequal")
    predictions = ops.convert_to_tensor(predictions)
    predictions = predictions > self.threshold
    # Convert labels to bool to match predictions.
    labels = math_ops.cast(labels, dtypes.bool)
    matches = math_ops.equal(labels, predictions)
    matches = math_ops.cast(matches, self.dtype)
    super(BinaryAccuracy, self).call(matches, weights=weights)
    if weights is None:
      return labels, predictions
    return labels, predictions, weights
  def call(self, labels, predictions, weights=None):
    """Accumulate accuracy statistics.

    `labels` and `predictions` should have the same shape except the
    predictions must have one additional trailing dimension equal to the
    number of classes(you want to predict).

    Type of labels and predictions can be different.

    Args:
      labels: Tensor of shape (batch_size, ) containing integers
      predictions: Tensor with the logits or probabilities for each example.
      weights: Optional weighting of each example. Defaults to 1.

    Returns:
      The arguments, for easy chaining.
    """
    check_ops.assert_equal(
        array_ops.shape(labels), array_ops.shape(predictions)[0],
        message="First axis of labels and predictions is unequal")
    predictions = math_ops.argmax(predictions, axis=-1)
    labels = math_ops.cast(labels, dtypes.int64)
    matches = math_ops.equal(labels, predictions)
    matches = math_ops.cast(matches, self.dtype)
    super(SparseAccuracy, self).call(matches, weights=weights)
    if weights is None:
      return labels, predictions
    return labels, predictions, weights
 def _get_sparse_tensors(self, inputs, weight_collections=None,
                         trainable=None):
   sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)
   id_tensor = sparse_tensors.id_tensor
   weight_tensor = sparse_tensors.weight_tensor
   # Expands final dimension, so that embeddings are not combined during
   # embedding lookup.
   check_id_rank = check_ops.assert_equal(
       array_ops.rank(id_tensor), 2,
       data=[
           'Column {} expected ID tensor of rank 2. '.format(self.name),
           'id_tensor shape: ', array_ops.shape(id_tensor)])
   with ops.control_dependencies([check_id_rank]):
     id_tensor = sparse_ops.sparse_reshape(
         id_tensor,
         shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0))
   if weight_tensor is not None:
     check_weight_rank = check_ops.assert_equal(
         array_ops.rank(weight_tensor), 2,
         data=[
             'Column {} expected weight tensor of rank 2.'.format(self.name),
             'weight_tensor shape:', array_ops.shape(weight_tensor)])
     with ops.control_dependencies([check_weight_rank]):
       weight_tensor = sparse_ops.sparse_reshape(
           weight_tensor,
           shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0))
   return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
Example #8
0
def _kl_independent(a, b, name="kl_independent"):
  """Batched KL divergence `KL(a || b)` for Independent distributions.

  We can leverage the fact that
  ```
  KL(Independent(a) || Independent(b)) = sum(KL(a || b))
  ```
  where the sum is over the `reinterpreted_batch_ndims`.

  Args:
    a: Instance of `Independent`.
    b: Instance of `Independent`.
    name: (optional) name to use for created ops. Default "kl_independent".

  Returns:
    Batchwise `KL(a || b)`.

  Raises:
    ValueError: If the event space for `a` and `b`, or their underlying
      distributions don't match.
  """
  p = a.distribution
  q = b.distribution

  # The KL between any two (non)-batched distributions is a scalar.
  # Given that the KL between two factored distributions is the sum, i.e.
  # KL(p1(x)p2(y) || q1(x)q2(y)) = KL(p1 || q1) + KL(q1 || q2), we compute
  # KL(p || q) and do a `reduce_sum` on the reinterpreted batch dimensions.
  if a.event_shape.is_fully_defined() and b.event_shape.is_fully_defined():
    if a.event_shape == b.event_shape:
      if p.event_shape == q.event_shape:
        num_reduce_dims = a.event_shape.ndims - p.event_shape.ndims
        reduce_dims = [-i - 1 for i in range(0, num_reduce_dims)]

        return math_ops.reduce_sum(
            kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims)
      else:
        raise NotImplementedError("KL between Independents with different "
                                  "event shapes not supported.")
    else:
      raise ValueError("Event shapes do not match.")
  else:
    with ops.control_dependencies([
        check_ops.assert_equal(a.event_shape_tensor(), b.event_shape_tensor()),
        check_ops.assert_equal(p.event_shape_tensor(), q.event_shape_tensor())
    ]):
      num_reduce_dims = (
          array_ops.shape(a.event_shape_tensor()[0]) -
          array_ops.shape(p.event_shape_tensor()[0]))
      reduce_dims = math_ops.range(-num_reduce_dims - 1, -1, 1)
      return math_ops.reduce_sum(
          kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims)
Example #9
0
 def test_raises_when_less(self):
   with self.test_session():
     # Static check
     static_small = constant_op.constant([3, 1], name="small")
     static_big = constant_op.constant([4, 2], name="big")
     with self.assertRaisesRegexp(ValueError, "fail"):
       check_ops.assert_equal(static_big, static_small, message="fail")
     # Dynamic check
     small = array_ops.placeholder(dtypes.int32, name="small")
     big = array_ops.placeholder(dtypes.int32, name="big")
     with ops.control_dependencies([check_ops.assert_equal(small, big)]):
       out = array_ops.identity(small)
     with self.assertRaisesOpError("small.*big"):
       out.eval(feed_dict={small: [3, 1], big: [4, 2]})
Example #10
0
    def _check_mu(self, mu):
        """Return `mu` after validity checks and possibly with assertations."""
        mu = ops.convert_to_tensor(mu)
        cov = self._cov

        if mu.dtype != cov.dtype:
            raise TypeError(
                "mu and cov must have the same dtype.  Found mu.dtype = %s, " "cov.dtype = %s" % (mu.dtype, cov.dtype)
            )

        # Try to validate with static checks.
        mu_shape = mu.get_shape()
        cov_shape = cov.get_shape()
        if mu_shape.is_fully_defined() and cov_shape.is_fully_defined():
            if mu_shape != cov_shape[:-1]:
                raise ValueError(
                    "mu.shape and cov.shape[:-1] should match.  Found: mu.shape=%s, "
                    "cov.shape=%s" % (mu_shape, cov_shape)
                )
            else:
                return mu

        # Static checks could not be run, so possibly do dynamic checks.
        if not self.validate_args:
            return mu
        else:
            assert_same_rank = check_ops.assert_equal(
                array_ops.rank(mu) + 1,
                cov.rank(),
                data=[
                    "mu should have rank 1 less than cov.  Found: rank(mu) = ",
                    array_ops.rank(mu),
                    " rank(cov) = ",
                    cov.rank(),
                ],
            )
            with ops.control_dependencies([assert_same_rank]):
                assert_same_shape = check_ops.assert_equal(
                    array_ops.shape(mu),
                    cov.vector_shape(),
                    data=[
                        "mu.shape and cov.shape[:-1] should match.  " "Found: shape(mu) = ",
                        array_ops.shape(mu),
                        " shape(cov) = ",
                        cov.shape(),
                    ],
                )
                return control_flow_ops.with_dependencies([assert_same_shape], mu)
Example #11
0
def _model_fn_ops(
    expected_features, expected_labels, actual_features, actual_labels, mode):
  assert_ops = tuple([
      check_ops.assert_equal(
          expected_features[k], actual_features[k], name='assert_%s' % k)
      for k in expected_features
  ] + [
      check_ops.assert_equal(
          expected_labels, actual_labels, name='assert_labels')
  ])
  with ops.control_dependencies(assert_ops):
    return model_fn.ModelFnOps(
        mode=mode,
        predictions=constant_op.constant(0.),
        loss=constant_op.constant(0.),
        train_op=constant_op.constant(0.))
Example #12
0
 def test_doesnt_raise_when_both_empty(self):
   with self.test_session():
     larry = constant_op.constant([])
     curly = constant_op.constant([])
     with ops.control_dependencies([check_ops.assert_equal(larry, curly)]):
       out = array_ops.identity(larry)
     out.eval()
Example #13
0
def calculate_reshape(original_shape, new_shape, validate=False, name=None):
  """Calculates the reshaped dimensions (replacing up to one -1 in reshape)."""
  batch_shape_static = tensor_util.constant_value_as_shape(new_shape)
  if batch_shape_static.is_fully_defined():
    return np.int32(batch_shape_static.as_list()), batch_shape_static, []
  with ops.name_scope(name, "calculate_reshape", [original_shape, new_shape]):
    original_size = math_ops.reduce_prod(original_shape)
    implicit_dim = math_ops.equal(new_shape, -1)
    size_implicit_dim = (
        original_size // math_ops.maximum(1, -math_ops.reduce_prod(new_shape)))
    new_ndims = array_ops.shape(new_shape)
    expanded_new_shape = array_ops.where(  # Assumes exactly one `-1`.
        implicit_dim, array_ops.fill(new_ndims, size_implicit_dim), new_shape)
    validations = [] if not validate else [
        check_ops.assert_rank(
            original_shape, 1, message="Original shape must be a vector."),
        check_ops.assert_rank(
            new_shape, 1, message="New shape must be a vector."),
        check_ops.assert_less_equal(
            math_ops.count_nonzero(implicit_dim, dtype=dtypes.int32),
            1,
            message="At most one dimension can be unknown."),
        check_ops.assert_positive(
            expanded_new_shape, message="Shape elements must be >=-1."),
        check_ops.assert_equal(
            math_ops.reduce_prod(expanded_new_shape),
            original_size,
            message="Shape sizes do not match."),
    ]
    return expanded_new_shape, batch_shape_static, validations
Example #14
0
def assert_close(
    x, y, data=None, summarize=None, message=None, name="assert_close"):
  """Assert that that x and y are within machine epsilon of each other.

  Args:
    x: Numeric `Tensor`
    y: Numeric `Tensor`
    data: The tensors to print out if the condition is `False`. Defaults to
      error message and first few entries of `x` and `y`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    name: A name for this operation (optional).

  Returns:
    Op raising `InvalidArgumentError` if |x - y| > machine epsilon.
  """
  message = message or ""
  x = ops.convert_to_tensor(x, name="x")
  y = ops.convert_to_tensor(y, name="y")

  if x.dtype.is_integer:
    return check_ops.assert_equal(
        x, y, data=data, summarize=summarize, message=message, name=name)

  with ops.name_scope(name, "assert_close", [x, y, data]):
    tol = np.finfo(x.dtype.as_numpy_dtype).resolution
    if data is None:
      data = [
          message,
          "Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ",
          y.name, y
      ]
    condition = math_ops.reduce_all(math_ops.less_equal(math_ops.abs(x-y), tol))
    return control_flow_ops.Assert(
        condition, data, summarize=summarize)
Example #15
0
def assert_splits_match(nested_splits_lists):
  """Checks that the given splits lists are identical.

  Performs static tests to ensure that the given splits lists are identical,
  and returns a list of control dependency op tensors that check that they are
  fully identical.

  Args:
    nested_splits_lists: A list of nested_splits_lists, where each split_list is
      a list of `splits` tensors from a `RaggedTensor`, ordered from outermost
      ragged dimension to innermost ragged dimension.

  Returns:
    A list of control dependency op tensors.
  Raises:
    ValueError: If the splits are not identical.
  """
  error_msg = "Inputs must have identical ragged splits"
  for splits_list in nested_splits_lists:
    if len(splits_list) != len(nested_splits_lists[0]):
      raise ValueError(error_msg)
  return [
      check_ops.assert_equal(s1, s2, message=error_msg)
      for splits_list in nested_splits_lists[1:]
      for (s1, s2) in zip(nested_splits_lists[0], splits_list)
  ]
Example #16
0
 def zero_state(self, batch_size, dtype):
   with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
     if self._initial_cell_state is not None:
       cell_state = self._initial_cell_state
     else:
       cell_state = self._cell.zero_state(batch_size, dtype)
     error_message = (
         "When calling zero_state of AttentionWrapper %s: " % self._base_name +
         "Non-matching batch sizes between the memory "
         "(encoder output) and the requested batch size.  Are you using "
         "the BeamSearchDecoder?  If so, make sure your encoder output has "
         "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and "
         "the batch_size= argument passed to zero_state is "
         "batch_size * beam_width.")
     with ops.control_dependencies(
         [check_ops.assert_equal(batch_size,
                                 self._attention_mechanism.batch_size,
                                 message=error_message)]):
       cell_state = nest.map_structure(
           lambda s: array_ops.identity(s, name="checked_cell_state"),
           cell_state)
     if self._alignment_history:
       alignment_history = tensor_array_ops.TensorArray(
           dtype=dtype, size=0, dynamic_size=True)
     else:
       alignment_history = ()
     return AttentionWrapperState(
         cell_state=cell_state,
         time=array_ops.zeros([], dtype=dtypes.int32),
         attention=_zero_state_tensors(self._attention_layer_size, batch_size,
                                       dtype),
         alignments=self._attention_mechanism.initial_alignments(
             batch_size, dtype),
         alignment_history=alignment_history)
Example #17
0
def _check_labels(labels, expected_labels_dimension):
  """Check labels type and shape."""
  with ops.name_scope(None, 'labels', (labels,)) as scope:
    labels = sparse_tensor.convert_to_tensor_or_sparse_tensor(labels)
    if isinstance(labels, sparse_tensor.SparseTensor):
      raise ValueError('SparseTensor labels are not supported.')
    labels_shape = array_ops.shape(labels)
    err_msg = 'labels shape must be [batch_size, {}]'.format(
        expected_labels_dimension)
    assert_rank = check_ops.assert_rank(labels, 2, message=err_msg)
    with ops.control_dependencies([assert_rank]):
      static_shape = labels.shape
      if static_shape is not None:
        dim1 = static_shape[1]
        if (dim1 is not None) and (dim1 != expected_labels_dimension):
          raise ValueError(
              'Mismatched label shape. '
              'Classifier configured with n_classes=%s.  Received %s. '
              'Suggested Fix: check your n_classes argument to the estimator '
              'and/or the shape of your label.' %
              (expected_labels_dimension, dim1))
      assert_dimension = check_ops.assert_equal(
          expected_labels_dimension, labels_shape[1], message=err_msg)
      with ops.control_dependencies([assert_dimension]):
        return array_ops.identity(labels, name=scope)
Example #18
0
def assert_integer_form(
    x, data=None, summarize=None, message=None,
    int_dtype=None, name="assert_integer_form"):
  """Assert that x has integer components (or floats equal to integers).

  Args:
    x: Floating-point `Tensor`
    data: The tensors to print out if the condition is `False`. Defaults to
      error message and first few entries of `x` and `y`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    int_dtype: A `tf.dtype` used to cast the float to. The default (`None`)
      implies the smallest possible signed int will be used for casting.
    name: A name for this operation (optional).

  Returns:
    Op raising `InvalidArgumentError` if `cast(x, int_dtype) != x`.
  """
  with ops.name_scope(name, values=[x, data]):
    x = ops.convert_to_tensor(x, name="x")
    if x.dtype.is_integer:
      return control_flow_ops.no_op()
    message = message or "{} has non-integer components".format(x.op.name)
    if int_dtype is None:
      try:
        int_dtype = {
            dtypes.float16: dtypes.int16,
            dtypes.float32: dtypes.int32,
            dtypes.float64: dtypes.int64,
        }[x.dtype.base_dtype]
      except KeyError:
        raise TypeError("Unrecognized type {}".format(x.dtype.name))
    return check_ops.assert_equal(
        x, math_ops.cast(math_ops.cast(x, int_dtype), x.dtype),
        data=data, summarize=summarize, message=message, name=name)
  def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"):
    """Instantiates the `AbsoluteValue` bijector.

    Args:
      event_ndims: Python scalar indicating the number of dimensions associated
        with a particular draw from the distribution.  Currently only zero is
        supported.
      validate_args: Python `bool` indicating whether arguments should be
        checked for correctness.
      name: Python `str` name given to ops managed by this object.

    Raises:
      ValueError:  If `event_ndims` is not zero.
    """
    self._graph_parents = []
    self._name = name

    event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
    event_ndims_const = tensor_util.constant_value(event_ndims)
    if event_ndims_const is not None and event_ndims_const not in (0,):
      raise ValueError("event_ndims(%s) was not 0" % event_ndims_const)
    else:
      if validate_args:
        event_ndims = control_flow_ops.with_dependencies(
            [check_ops.assert_equal(
                event_ndims, 0, message="event_ndims was not 0")],
            event_ndims)

    with self._name_scope("init"):
      super(AbsoluteValue, self).__init__(
          event_ndims=event_ndims,
          validate_args=validate_args,
          name=name)
Example #20
0
  def _maybe_check_matching_sizes(self, event_shape_in, event_shape_out,
                                  validate_args=False):
    """Check that prod(event_shape_in)==prod(event_shape_out)."""

    def _get_size_from_shape(shape):
      """Computes size from a shape `Tensor`, statically if possible."""
      s = tensor_util.constant_value(shape)
      if s is not None:
        return [np.int32(np.prod(s))]*2
      return None, math_ops.reduce_prod(shape, name="size")

    # Ensure `event_shape_in` is compatible with `event_shape_out`.
    event_size_in_, event_size_in = _get_size_from_shape(  # pylint: disable=unbalanced-tuple-unpacking
        event_shape_in)
    event_size_out_, event_size_out = _get_size_from_shape(  # pylint: disable=unbalanced-tuple-unpacking
        event_shape_out)

    assertions = []
    if event_size_in_ is not None and event_size_out_ is not None:
      if event_size_in_ != event_size_out_:
        raise ValueError(
            "Input `event_size` ({}) does not match output `event_size` ({}).".
            format(event_size_in, event_size_out_))
    elif validate_args:
      assertions.append(check_ops.assert_equal(
          event_size_in, event_size_out,
          message="Input/output `event_size`s do not match."))

    return assertions
def maybe_check_quadrature_param(param, name, validate_args):
  """Helper which checks validity of `loc` and `scale` init args."""
  with ops.name_scope(name="check_" + name, values=[param]):
    assertions = []
    if param.shape.ndims is not None:
      if param.shape.ndims == 0:
        raise ValueError("Mixing params must be a (batch of) vector; "
                         "{}.rank={} is not at least one.".format(
                             name, param.shape.ndims))
    elif validate_args:
      assertions.append(check_ops.assert_rank_at_least(
          param, 1,
          message=("Mixing params must be a (batch of) vector; "
                   "{}.rank is not at least one.".format(
                       name))))

    # TODO(jvdillon): Remove once we support k-mixtures.
    if param.shape.with_rank_at_least(1)[-1] is not None:
      if param.shape[-1].value != 1:
        raise NotImplementedError("Currently only bimixtures are supported; "
                                  "{}.shape[-1]={} is not 1.".format(
                                      name, param.shape[-1].value))
    elif validate_args:
      assertions.append(check_ops.assert_equal(
          array_ops.shape(param)[-1], 1,
          message=("Currently only bimixtures are supported; "
                   "{}.shape[-1] is not 1.".format(name))))

    if assertions:
      return control_flow_ops.with_dependencies(assertions, param)
    return param
Example #22
0
 def test_doesnt_raise_when_equal_and_broadcastable_shapes(self):
   with self.test_session():
     small = constant_op.constant([1, 2], name="small")
     small_2 = constant_op.constant([1, 2], name="small_2")
     with ops.control_dependencies([check_ops.assert_equal(small, small_2)]):
       out = array_ops.identity(small)
     out.eval()
Example #23
0
def validate_init_args(
    distribution,
    batch_shape,
    validate_args,
    batch_shape_static):
  """Helper to __init__ which makes or raises assertions."""
  with ops.name_scope(name="validate_init_args",
                      values=[batch_shape] + distribution._graph_parents):  # pylint: disable=protected-access
    runtime_assertions = []

    if batch_shape.shape.ndims is not None:
      if batch_shape.shape.ndims != 1:
        raise ValueError("`batch_shape` must be a vector "
                         "(saw rank: {}).".format(
                             batch_shape.shape.ndims))
    elif validate_args:
      runtime_assertions += [
          check_ops.assert_rank(
              batch_shape,
              1,
              message="`batch_shape` must be a vector.",
              name="assert_batch_shape_is_vector"),
      ]

    batch_size_static = np.prod(batch_shape_static)
    dist_batch_size_static = (
        None if not distribution.batch_shape.is_fully_defined()
        else np.prod(distribution.batch_shape).value)

    if batch_size_static is not None and dist_batch_size_static is not None:
      if batch_size_static != dist_batch_size_static:
        raise ValueError("`batch_shape` size ({}) must match "
                         "`distribution.batch_shape` size ({}).".format(
                             batch_size_static,
                             dist_batch_size_static))
    elif validate_args:
      runtime_assertions += [
          check_ops.assert_equal(
              math_ops.reduce_prod(batch_shape),
              math_ops.reduce_prod(distribution.batch_shape_tensor()),
              message=("`batch_shape` size must match "
                       "`distributions.batch_shape` size."),
              name="assert_batch_size"),
      ]

    if batch_shape_static is not None:
      if np.any(batch_shape_static < 1):
        raise ValueError("`batch_shape` elements must be positive "
                         "(i.e., larger than zero).")
    elif validate_args:
      runtime_assertions += [
          check_ops.assert_positive(
              batch_shape,
              message=("`batch_shape` elements must be positive "
                       "(i.e., larger than zero)."),
              name="assert_batch_shape_positive")
      ]

    return runtime_assertions
Example #24
0
def _verify_input(tensor_list, labels, probs_list):
  """Verify that batched inputs are well-formed."""
  checked_probs_list = []
  for probs in probs_list:
    # Since number of classes shouldn't change at runtime, probabilities shape
    # should be fully defined.
    probs.get_shape().assert_is_fully_defined()

    # Probabilities must be 1D.
    probs.get_shape().assert_has_rank(1)

    # Probabilities must be nonnegative and sum to one.
    tol = 1e-6
    prob_sum = math_ops.reduce_sum(probs)
    checked_probs = control_flow_ops.with_dependencies([
        check_ops.assert_non_negative(probs),
        check_ops.assert_less(prob_sum, 1.0 + tol),
        check_ops.assert_less(1.0 - tol, prob_sum)
    ], probs)
    checked_probs_list.append(checked_probs)

  # All probabilities should be the same length.
  prob_length = checked_probs_list[0].get_shape().num_elements()
  for checked_prob in checked_probs_list:
    if checked_prob.get_shape().num_elements() != prob_length:
      raise ValueError('Probability parameters must have the same length.')

  # Labels tensor should only have batch dimension.
  labels.get_shape().assert_has_rank(1)

  for tensor in tensor_list:
    # Data tensor should have a batch dimension.
    shape = tensor.get_shape().with_rank_at_least(1)

    # Data and label batch dimensions must be compatible.
    tensor_shape.dimension_at_index(shape, 0).assert_is_compatible_with(
        labels.get_shape()[0])

  # Data and labels must have the same, strictly positive batch size. Since we
  # can't assume we know the batch size at graph creation, add runtime checks.
  labels_batch_size = array_ops.shape(labels)[0]
  lbl_assert = check_ops.assert_positive(labels_batch_size)

  # Make each tensor depend on its own checks.
  labels = control_flow_ops.with_dependencies([lbl_assert], labels)
  tensor_list = [
      control_flow_ops.with_dependencies([
          lbl_assert,
          check_ops.assert_equal(array_ops.shape(x)[0], labels_batch_size)
      ], x) for x in tensor_list
  ]

  # Label's classes must be integers 0 <= x < num_classes.
  labels = control_flow_ops.with_dependencies([
      check_ops.assert_integer(labels), check_ops.assert_non_negative(labels),
      check_ops.assert_less(labels, math_ops.cast(prob_length, labels.dtype))
  ], labels)

  return tensor_list, labels, checked_probs_list
 def check(t):
   target = array_ops.shape(tensor)[1:]
   result = array_ops.broadcast_dynamic_shape(target, array_ops.shape(t))
   # This rank check ensures that I don't get a wrong answer from the
   # _shapes_ broadcasting against each other.
   gt = check_ops.assert_greater(array_ops.rank(target), array_ops.rank(t))
   eq = check_ops.assert_equal(target, result)
   return gt, eq
Example #26
0
 def test_raises_when_less(self):
   with self.test_session():
     small = constant_op.constant([3, 1], name="small")
     big = constant_op.constant([4, 2], name="big")
     with ops.control_dependencies([check_ops.assert_equal(small, big)]):
       out = array_ops.identity(small)
     with self.assertRaisesOpError("small.*big"):
       out.eval()
Example #27
0
 def test_raises_when_equal_but_non_broadcastable_shapes(self):
   with self.test_session():
     small = constant_op.constant([1, 1, 1], name="small")
     small_2 = constant_op.constant([1, 1], name="small_2")
     with self.assertRaisesRegexp(ValueError, "must be"):
       with ops.control_dependencies([check_ops.assert_equal(small, small_2)]):
         out = array_ops.identity(small)
       out.eval()
Example #28
0
 def _assert_self_adjoint(self):
   dense = self._get_cached_dense_matrix()
   logging.warn(
       "Using (possibly slow) default implementation of assert_self_adjoint."
       "  Requires conversion to a dense matrix.")
   return check_ops.assert_equal(
       dense,
       linear_operator_util.matrix_adjoint(dense),
       message="Matrix was not equal to its adjoint.")
 def _forward(self, x):
   if self.validate_args:
     is_matrix = check_ops.assert_rank_at_least(x, 2)
     shape = array_ops.shape(x)
     is_square = check_ops.assert_equal(shape[-2], shape[-1])
     x = control_flow_ops.with_dependencies([is_matrix, is_square], x)
   # For safety, explicitly zero-out the upper triangular part.
   x = array_ops.matrix_band_part(x, -1, 0)
   return math_ops.matmul(x, x, adjoint_b=True)
Example #30
0
 def update():
   is_finite = itr.get_next()
   grad = self._get_tensor(is_finite)
   update_op, should_apply_gradients = loss_scale.update([grad])
   assert_op = check_ops.assert_equal(should_apply_gradients, is_finite)
   if context.executing_eagerly():
     return
   with ops.control_dependencies([assert_op]):
     return array_ops.identity(update_op)
Example #31
0
 def _batch_size_checks(self, batch_size, error_message):
   del batch_size  # Unused.
   # Attention batch size must be always 1.
   return [check_ops.assert_equal(1, attention_mechanism.batch_size,
                                  message=error_message)
           for attention_mechanism in self._attention_mechanisms]
 def map_fn(x):
     with ops.control_dependencies(
         [check_ops.assert_equal(x, np.int64(0))]):
         return array_ops.identity(x)
Example #33
0
 def shape_reduce_fn(state, value):
   check_ops.assert_equal(state, array_ops.shape(value))
   return state
Example #34
0
def _assert_integer_form(x):
  """Check x for integer components (or floats that are equal to integers)."""
  x = ops.convert_to_tensor(x, name='x')
  casted_x = math_ops.to_int64(x)
  return check_ops.assert_equal(x, math_ops.cast(
      math_ops.round(casted_x), x.dtype))
Example #35
0
def span_overlaps(source_start,
                  source_limit,
                  target_start,
                  target_limit,
                  contains=False,
                  contained_by=False,
                  partial_overlap=False,
                  name=None):
    """Returns a boolean tensor indicating which source and target spans overlap.

  The source and target spans are specified using B+1 dimensional tensors,
  with `B>=0` batch dimensions followed by a final dimension that lists the
  span offsets for each span in the batch:

  * The `i`th source span in batch `b1...bB` starts at
    `source_start[b1...bB, i]` (inclusive), and extends to just before
    `source_limit[b1...bB, i]` (exclusive).
  * The `j`th target span in batch `b1...bB` starts at
    `target_start[b1...bB, j]` (inclusive), and extends to just before
    `target_limit[b1...bB, j]` (exclusive).

  `result[b1...bB, i, j]` is true if the `i`th source span overlaps with the
  `j`th target span in batch `b1...bB`, where a source span overlaps a target
  span if any of the following are true:

    * The spans are identical.
    * `contains` is true, and the source span contains the target span.
    * `contained_by` is true, and the source span is contained by the target
      span.
    * `partial_overlap` is true, and there is a non-zero overlap between the
      source span and the target span.

  Args:
    source_start: A B+1 dimensional potentially ragged tensor with shape
      `[D1...DB, source_size]`: the start offset of each source span.
    source_limit: A B+1 dimensional potentially ragged tensor with shape
      `[D1...DB, source_size]`: the limit offset of each source span.
    target_start: A B+1 dimensional potentially ragged tensor with shape
      `[D1...DB, target_size]`: the start offset of each target span.
    target_limit: A B+1 dimensional potentially ragged tensor with shape
      `[D1...DB, target_size]`: the limit offset of each target span.
    contains: If true, then a source span is considered to overlap a target span
      when the source span contains the target span.
    contained_by: If true, then a source span is considered to overlap a target
      span when the source span is contained by the target span.
    partial_overlap: If true, then a source span is considered to overlap a
      target span when the source span partially overlaps the target span.
    name: A name for the operation (optional).

  Returns:
    A B+2 dimensional potentially ragged boolean tensor with shape
    `[D1...DB, source_size, target_size]`.

  Raises:
    ValueError: If the span tensors are incompatible.

  #### Example:
    Given the following source and target spans (with no batch dimensions):

    ```python
    #         0    5    10   15   20   25   30   35   40
    #         |====|====|====|====|====|====|====|====|
    # Source: [-0-]     [-1-] [2] [-3-][-4-][-5-]
    # Target: [-0-][-1-]     [-2-] [3]   [-4-][-5-]
    #         |====|====|====|====|====|====|====|====|
    >>> source_start = [0, 10, 16, 20, 25, 30]
    >>> source_limit = [5, 15, 19, 25, 30, 35]
    >>> target_start = [0,  5, 15, 21, 27, 31]
    >>> target_limit = [5, 10, 20, 24, 32, 37]
    ```

    `result[i, j]` will be true at the following locations:

      * `[0, 0]` (always)
      * `[2, 2]` (if contained_by=True or partial_overlaps=True)
      * `[3, 3]` (if contains=True or partial_overlaps=True)
      * `[4, 4]` (if partial_overlaps=True)
      * `[5, 5]` (if partial_overlaps=True)

  """
    _check_type(contains, 'contains', bool)
    _check_type(contained_by, 'contained_by', bool)
    _check_type(partial_overlap, 'partial_overlap', bool)

    scope_tensors = [source_start, source_limit, target_start, target_limit]
    with ops.name_scope(name, 'SpanOverlaps', scope_tensors):
        # Convert input tensors.
        source_start = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            source_start, name='source_start')
        source_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            source_limit, name='source_limit')
        target_start = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            target_start, name='target_start')
        target_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            target_limit, name='target_limit')
        span_tensors = [source_start, source_limit, target_start, target_limit]

        # Verify input tensor shapes and types.
        source_start.shape.assert_is_compatible_with(source_limit.shape)
        target_start.shape.assert_is_compatible_with(target_limit.shape)
        source_start.shape.assert_same_rank(target_start.shape)
        source_start.shape.assert_same_rank(target_limit.shape)
        source_limit.shape.assert_same_rank(target_start.shape)
        source_limit.shape.assert_same_rank(target_limit.shape)
        if not (source_start.dtype == target_start.dtype == source_limit.dtype
                == target_limit.dtype):
            raise TypeError('source_start, source_limit, target_start, and '
                            'target_limit must all have the same dtype')
        ndims = set(
            [t.shape.ndims for t in span_tensors if t.shape.ndims is not None])
        assert len(ndims) <= 1  # because of assert_same_rank statements above.

        if all(not isinstance(t, ragged_tensor.RaggedTensor)
               for t in span_tensors):
            return _span_overlaps(source_start, source_limit, target_start,
                                  target_limit, contains, contained_by,
                                  partial_overlap)

        elif all(
                isinstance(t, ragged_tensor.RaggedTensor)
                for t in span_tensors):
            if not ndims:
                raise ValueError(
                    'For ragged inputs, the shape.ndims of at least one '
                    'span tensor must be statically known.')
            if list(ndims)[0] == 2:
                return _span_overlaps(source_start, source_limit, target_start,
                                      target_limit, contains, contained_by,
                                      partial_overlap)
            else:
                # Handle ragged batch dimension by recursion on values.
                row_splits = span_tensors[0].row_splits
                shape_checks = [
                    check_ops.assert_equal(
                        t.row_splits,
                        row_splits,
                        message='Mismatched ragged shapes for batch dimensions'
                    ) for t in span_tensors[1:]
                ]
                with ops.control_dependencies(shape_checks):
                    return ragged_tensor.RaggedTensor.from_row_splits(
                        span_overlaps(source_start.values, source_limit.values,
                                      target_start.values, target_limit.values,
                                      contains, contained_by, partial_overlap),
                        row_splits)

        else:
            # Mix of dense and ragged tensors.
            raise ValueError('Span tensors must all have the same ragged_rank')
Example #36
0
 def map_fn(x):
     with ops.control_dependencies([check_ops.assert_equal(x, 0)]):
         return x
    def _forward_log_det_jacobian(self, x):
        # Let Y be a symmetric, positive definite matrix and write:
        #   Y = X X.T
        # where X is lower-triangular.
        #
        # Observe that,
        #   dY[i,j]/dX[a,b]
        #   = d/dX[a,b] { X[i,:] X[j,:] }
        #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
        #
        # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
        # symmetric and X is lower-triangular, we need vectors of dimension:
        #   d = p (p + 1) / 2
        # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
        #   k = { i (i + 1) / 2 + j   i>=j
        #       { undef               i<j
        # and assume zero-based indexes. When k is undef, the element is dropped.
        # Example:
        #           j      k
        #        0 1 2 3  /
        #    0 [ 0 . . . ]
        # i  1 [ 1 2 . . ]
        #    2 [ 3 4 5 . ]
        #    3 [ 6 7 8 9 ]
        # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
        # slight abuse: k(i,j)=undef means the element is dropped.)
        #
        # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
        # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
        # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
        # (1) j<=i<a thus i,j!=a.
        # (2) i=a>j  thus i,j!=a.
        #
        # Since the Jacobian is lower-triangular, we need only compute the product
        # of diagonal elements:
        #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
        #   = X[j,j] + I[i=j] X[i,j]
        #   = 2 X[j,j].
        # Since there is a 2 X[j,j] term for every lower-triangular element of X we
        # conclude:
        #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
        diag = array_ops.matrix_diag_part(x)

        # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
        # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
        # output is unchanged.
        diag = self._make_columnar(diag)

        if self.validate_args:
            is_matrix = check_ops.assert_rank_at_least(
                x, 2, message="Input must be a (batch of) matrix.")
            shape = array_ops.shape(x)
            is_square = check_ops.assert_equal(
                shape[-2],
                shape[-1],
                message="Input must be a (batch of) square matrix.")
            # Assuming lower-triangular means we only need check diag>0.
            is_positive_definite = check_ops.assert_positive(
                diag, message="Input must be positive definite.")
            x = control_flow_ops.with_dependencies(
                [is_matrix, is_square, is_positive_definite], x)

        # Create a vector equal to: [p, p-1, ..., 2, 1].
        if x.get_shape().ndims is None or x.get_shape()[-1].value is None:
            p_int = array_ops.shape(x)[-1]
            p_float = math_ops.cast(p_int, dtype=x.dtype)
        else:
            p_int = x.get_shape()[-1].value
            p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype)
        exponents = math_ops.linspace(p_float, 1., p_int)

        sum_weighted_log_diag = array_ops.squeeze(math_ops.matmul(
            math_ops.log(diag), exponents[..., array_ops.newaxis]),
                                                  axis=-1)
        fldj = p_float * np.log(2.) + sum_weighted_log_diag

        return fldj
Example #38
0
 def shape_reduce_fn(state, value):
   check_ops.assert_equal(state, value.dense_shape)
   return state
 def _assert_self_adjoint(self):
     imag_multiplier = math_ops.imag(self.multiplier)
     return check_ops.assert_equal(
         array_ops.zeros_like(imag_multiplier),
         imag_multiplier,
         message="LinearOperator was not self-adjoint")
Example #40
0
    def __init__(self,
                 cell,
                 attention_mechanism,
                 attention_layer_size=None,
                 alignment_history=False,
                 cell_input_fn=None,
                 output_attention=True,
                 initial_cell_state=None,
                 name=None):
        super(GatedAttentionWrapper, self).__init__(name=name)
        if not rnn_cell_impl._like_rnncell(cell):  # pylint: disable=protected-access
            raise TypeError("cell must be an RNNCell, saw type: %s" %
                            type(cell).__name__)
        if not isinstance(attention_mechanism, AttentionMechanism):
            raise TypeError(
                "attention_mechanism must be a AttentionMechanism, saw type: %s"
                % type(attention_mechanism).__name__)
        if cell_input_fn is None:
            cell_input_fn = (lambda inputs, attention: array_ops.concat(
                [inputs, attention], -1))
        else:
            if not callable(cell_input_fn):
                raise TypeError(
                    "cell_input_fn must be callable, saw type: %s" %
                    type(cell_input_fn).__name__)

        if attention_layer_size is not None:
            self._attention_layer = layers_core.Dense(attention_layer_size,
                                                      name="attention_layer",
                                                      use_bias=False)
            self._attention_layer_size = attention_layer_size
        else:
            self._attention_layer = None
            self._attention_layer_size = attention_mechanism.values.get_shape(
            )[-1].value

        self._cell = cell
        self._attention_mechanism = attention_mechanism
        self._cell_input_fn = cell_input_fn
        self._output_attention = output_attention
        self._alignment_history = alignment_history
        with ops.name_scope(name, "AttentionWrapperInit"):
            if initial_cell_state is None:
                self._initial_cell_state = None
            else:
                final_state_tensor = nest.flatten(initial_cell_state)[-1]
                state_batch_size = (final_state_tensor.shape[0].value
                                    or array_ops.shape(final_state_tensor)[0])
                error_message = (
                    "When constructing AttentionWrapper %s: " % self._base_name
                    + "Non-matching batch sizes between the memory "
                    "(encoder output) and initial_cell_state.  Are you using "
                    "the BeamSearchDecoder?  You may need to tile your initial state "
                    "via the tf.contrib.seq2seq.tile_batch function with argument "
                    "multiple=beam_width.")
                with ops.control_dependencies([
                        check_ops.assert_equal(
                            state_batch_size,
                            self._attention_mechanism.batch_size,
                            message=error_message)
                ]):
                    self._initial_cell_state = nest.map_structure(
                        lambda s: array_ops.identity(
                            s, name="check_initial_cell_state"),
                        initial_cell_state)
Example #41
0
def stack_dynamic_partitions(data, partitions, num_partitions, name=None):
    """Stacks dynamic partitions of a Tensor or RaggedTensor.

  Returns a RaggedTensor `output` with `num_partitions` rows, where the row
  `output[i]` is formed by stacking all slices `data[j1...jN]` such that
  `partitions[j1...jN] = i`.  Slices of `data` are stacked in row-major
  order.

  If `num_partitions` is an `int` (not a `Tensor`), then this is equivalent to
  `tf.ragged.stack(tf.dynamic_partition(data, partitions, num_partitions))`.

  #### Example:

  >>> data           = ['a', 'b', 'c', 'd', 'e']
  >>> partitions     = [  3,   0,   2,   2,   3]
  >>> num_partitions = 5
  >>> tf.ragged.stack_dynamic_partitions(data, partitions, num_partitions)
  <tf.RaggedTensor [[b'b'], [], [b'c', b'd'], [b'a', b'e'], []]>

  Args:
    data: A `Tensor` or `RaggedTensor` containing the values to stack.
    partitions: An `int32` or `int64` `Tensor` or `RaggedTensor` specifying the
      partition that each slice of `data` should be added to. `partitions.shape`
      must be a prefix of `data.shape`.  Values must be greater than or equal to
      zero, and less than `num_partitions`. `partitions` is not required to be
      sorted.
    num_partitions: An `int32` or `int64` scalar specifying the number of
      partitions to output.  This determines the number of rows in `output`.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the stacked partitions.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_partitions, (D)] + data.shape[partitions.rank:]`, where `(D)` is a
    ragged dimension whose length is the number of data slices stacked for
    each `partition`.
  """
    with ops.name_scope(name, 'SegmentStack',
                        [data, partitions, num_partitions]):
        # Convert inputs to tensors.
        data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data,
                                                                name='data')
        row_splits_dtype = (data.row_splits.dtype if isinstance(
            data, ragged_tensor.RaggedTensor) else None)
        partitions = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            partitions, name='partitions', preferred_dtype=row_splits_dtype)
        num_partitions = ops.convert_to_tensor(
            num_partitions,
            name='num_partitions',
            preferred_dtype=partitions.dtype)
        if row_splits_dtype is not None:
            partitions = math_ops.cast(partitions, row_splits_dtype)
        num_partitions = math_ops.cast(num_partitions, partitions.dtype)

        # Sanity-checks for shapes.
        partitions_rank = partitions.shape.ndims
        if partitions_rank is None:
            raise ValueError('partitions must have known rank.')
        num_partitions.shape.assert_has_rank(0)
        partitions.shape.assert_is_compatible_with(
            data.shape[:partitions_rank])

        if partitions_rank == 0:
            # If partitions is a scalar, then just create a RaggedTensor containing
            # that single the complete `data` value in the specified row.
            return ragged_tensor.RaggedTensor.from_value_rowids(
                values=array_ops.stack([data]),
                value_rowids=array_ops.stack([partitions]),
                nrows=num_partitions,
                validate=False)

        elif partitions_rank == 1:
            # If partitions is a vector (the typical case): we can just use data and
            # partitions as the `values` and `value_rowids` for `from_value_rowids`,
            # as long as we sort them first.
            permutation = sort_ops.argsort(partitions, stable=True)
            value_rowids = array_ops.gather(partitions, permutation)
            values = array_ops.gather(data, permutation)
            check = check_ops.assert_less(
                value_rowids[-1:],
                num_partitions,
                message='partitions must be less than num_partitions')
            with ops.control_dependencies([check]):
                return ragged_tensor.RaggedTensor.from_value_rowids(
                    values, value_rowids, nrows=num_partitions, validate=False)

        else:
            # Handle higher-dimensional partitions via recursion.
            if not isinstance(data, ragged_tensor.RaggedTensor):
                data = ragged_tensor.RaggedTensor.from_tensor(
                    data, row_splits_dtype=partitions.dtype, ragged_rank=1)
            if not isinstance(partitions, ragged_tensor.RaggedTensor):
                partitions = ragged_tensor.RaggedTensor.from_tensor(
                    partitions,
                    row_splits_dtype=partitions.dtype,
                    ragged_rank=max(data.ragged_rank, partitions_rank - 1))
            check = check_ops.assert_equal(
                data.row_splits,
                partitions.row_splits,
                message='data and partitions have incompatible ragged shapes')
            with ops.control_dependencies([check]):
                return stack_dynamic_partitions(data.values, partitions.values,
                                                num_partitions)
Example #42
0
    def call(self, inputs, state):
        """Perform a step of attention-wrapped RNN.

    - Step 1: Mix the `inputs` and previous step's `attention` output via
      `cell_input_fn`.
    - Step 2: Call the wrapped `cell` with this input and its previous state.
    - Step 3: Score the cell's output with `attention_mechanism`.
    - Step 4: Calculate the alignments by passing the score through the
      `normalizer`.
    - Step 5: Calculate the context vector as the inner product between the
      alignments and the attention_mechanism's values (memory).
    - Step 6: Calculate the attention output by concatenating the cell output
      and context through the attention layer (a linear layer with
      `attention_size` outputs).

    Args:
      inputs: (Possibly nested tuple of) Tensor, the input at this time step.
      state: An instance of `AttentionWrapperState` containing
        tensors from the previous time step.

    Returns:
      A tuple `(attention_or_cell_output, next_state)`, where:

      - `attention_or_cell_output` depending on `output_attention`.
      - `next_state` is an instance of `DynamicAttentionWrapperState`
         containing the state calculated at this time step.
    """
        # Step 1: Calculate the true inputs to the cell based on the
        # previous attention value.
        cell_inputs = self._cell_input_fn(inputs, state.attention)
        cell_state = state.cell_state
        cell_output, next_cell_state = self._cell(cell_inputs, cell_state)

        cell_batch_size = (cell_output.shape[0].value
                           or array_ops.shape(cell_output)[0])
        error_message = (
            "When applying AttentionWrapper %s: " % self.name +
            "Non-matching batch sizes between the memory "
            "(encoder output) and the query (decoder output).  Are you using "
            "the BeamSearchDecoder?  You may need to tile your memory input via "
            "the tf.contrib.seq2seq.tile_batch function with argument "
            "multiple=beam_width.")
        with ops.control_dependencies([
                check_ops.assert_equal(cell_batch_size,
                                       self._attention_mechanism.batch_size,
                                       message=error_message)
        ]):
            cell_output = array_ops.identity(cell_output,
                                             name="checked_cell_output")

        score = self._attention_mechanism(cell_output)
        alignments = self._probability_fn(score)

        # Reshape from [batch_size, memory_time] to [batch_size, 1, memory_time]
        expanded_alignments = array_ops.expand_dims(alignments, 1)
        # Context is the inner product of alignments and values along the
        # memory time dimension.
        # alignments shape is
        #   [batch_size, 1, memory_time]
        # attention_mechanism.values shape is
        #   [batch_size, memory_time, attention_mechanism.num_units]
        # the batched matmul is over memory_time, so the output shape is
        #   [batch_size, 1, attention_mechanism.num_units].
        # we then squeeze out the singleton dim.
        attention_mechanism_values = self._attention_mechanism.values
        context = math_ops.matmul(expanded_alignments,
                                  attention_mechanism_values)
        context = array_ops.squeeze(context, [1])

        if self._attention_layer is not None:
            attention = self._attention_layer(
                array_ops.concat([cell_output, context], 1))
        else:
            attention = context

        if self._alignment_history:
            alignment_history = state.alignment_history.write(
                state.time, alignments)
        else:
            alignment_history = ()

        next_state = AttentionWrapperState(time=state.time + 1,
                                           cell_state=next_cell_state,
                                           attention=attention,
                                           alignment_history=alignment_history)

        if self._output_attention:
            return attention, next_state
        else:
            return cell_output, next_state
Example #43
0
    def batch_jacobian(self,
                       target,
                       source,
                       unconnected_gradients=UnconnectedGradients.NONE,
                       parallel_iterations=None,
                       experimental_use_pfor=True):
        """Computes and stacks per-example jacobians.

    See [wikipedia article](http://en.wikipedia.org/wiki/jacobian_matrix_and_determinant) for the
    definition of a Jacobian. This function is essentially an efficient
    implementation of the following:
    
    `tf.stack([self.jacobian(y[i], x[i]) for i in range(x.shape[0])])`.

    Note that compared to `GradientTape.jacobian` which computes gradient of
    each output value w.r.t each input value, this function is useful when
    `target[i,...]` is independent of `source[j,...]` for `j != i`. This
    assumption allows more efficient computation as compared to
    `GradientTape.jacobian`. The output, as well as intermediate activations,
    are lower dimensional and avoid a bunch of redundant zeros which would
    result in the jacobian computation given the independence assumption.

    Example usage:

    ```python
    with tf.GradientTape() as g:
      x = tf.constant([[1., 2.], [3., 4.]], dtype=tf.float32)
      g.watch(x)
      y = x * x
    batch_jacobian = g.batch_jacobian(y, x) 
    # batch_jacobian is [[[2,  0], [0,  4]], [[6,  0], [0,  8]]]
    ```

    Args:
      target: A tensor with rank 2 or higher and with shape [b, y1, ..., y_n].
        `target[i,...]` should only depend on `source[i,...]`.
      source: A tensor with rank 2 or higher and with shape [b, x1, ..., x_m].
      unconnected_gradients: a value which can either hold 'none' or 'zero' and
        alters the value which will be returned if the target and sources are
        unconnected. The possible values and effects are detailed in
        'UnconnectedGradients' and it defaults to 'none'.
      parallel_iterations: A knob to control how many iterations are dispatched
        in parallel. This knob can be used to control the total memory usage.
      experimental_use_pfor: If true, uses pfor for computing the Jacobian. Else
        uses a tf.while_loop.

    Returns:
      A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]`
      is the jacobian of `target[i, ...]` w.r.t. `source[i, ...]`, i.e. stacked
      per-example jacobians.

    Raises:
      RuntimeError: If called on a non-persistent tape with eager execution
        enabled and without enabling experimental_use_pfor.
      ValueError: If vectorization of jacobian computation fails or if first
        dimension of `target` and `source` do not match.
    """
        target_shape = target.shape
        if target_shape.rank is None:
            dim = tensor_shape.Dimension(None)
        else:
            dim = target_shape.dims[0]
        if not (target_shape.with_rank_at_least(2)
                and source.shape.with_rank_at_least(2)
                and dim.is_compatible_with(source.shape[0])):
            raise ValueError("Need first dimension of target shape (%s) and "
                             "source shape (%s) to match." %
                             (target.shape, source.shape))
        if target_shape.is_fully_defined():
            batch_size = int(target_shape[0])
            target_row_size = target_shape.num_elements() // batch_size
        else:
            target_shape = array_ops.shape(target)
            batch_size = target_shape[0]
            target_row_size = array_ops.size(target) // batch_size
        source_shape = array_ops.shape(source)
        # Flatten target to 2-D.
        # Note that we push and pop the tape here and below. This is needed since we
        # need gradients through the enclosed operations.
        self._push_tape()
        with ops.control_dependencies(
            [check_ops.assert_equal(batch_size, source_shape[0])]):
            target = array_ops.reshape(target, [batch_size, target_row_size])
        self._pop_tape()

        def loop_fn(i):
            self._push_tape()
            y = array_ops.gather(target, i, axis=1)
            self._pop_tape()
            return self.gradient(y,
                                 source,
                                 unconnected_gradients=unconnected_gradients)

        if experimental_use_pfor:
            try:
                output = pfor_ops.pfor(loop_fn,
                                       target_row_size,
                                       parallel_iterations=parallel_iterations)
            except ValueError as err:
                six.reraise(
                    ValueError,
                    ValueError(
                        str(err) +
                        "\nEncountered an exception while vectorizing the "
                        "batch_jacobian computation. Vectorization can be disabled by "
                        "setting experimental_use_pfor to False."),
                    sys.exc_info()[2])
        else:
            if context.executing_eagerly() and not self._persistent:
                raise RuntimeError(
                    "GradientTape must be created with persistent=True"
                    " to compute the batch_jacobian with eager execution enabled and "
                    " with experimental_use_pfor set to False.")
            output = pfor_ops.for_loop(loop_fn,
                                       target.dtype,
                                       target_row_size,
                                       parallel_iterations=parallel_iterations)
        if output is None:
            return None
        output = array_ops.reshape(output, [target_row_size, batch_size, -1])
        output = array_ops.transpose(output, [1, 0, 2])
        new_shape = array_ops.concat([target_shape, source_shape[1:]], axis=0)
        return array_ops.reshape(output, new_shape)
def assert_true_mean_equal_by_dkwm_two_sample(
    samples1, low1, high1, samples2, low2, high2,
    false_fail_rate=1e-6, name=None):
  """Asserts the means of the given distributions are equal.

  More precisely, fails if there is enough evidence (using the
  [Dvoretzky-Kiefer-Wolfowitz-Massart inequality]
  (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval))
  that the means of the distributions from which the given samples are
  drawn are _not_ equal with statistical significance `false_fail_rate`
  or stronger, otherwise passes.  If you also want to check that you
  are gathering enough evidence that a pass is not spurious, see
  `min_num_samples_for_dkwm_mean_two_sample_test` and
  `min_discrepancy_of_true_means_detectable_by_dkwm_two_sample`.

  Note that `false_fail_rate` is a total false failure rate for all
  the assertions in the batch.  As such, if the batch is nontrivial,
  the assertion will insist on stronger evidence to fail any one member.

  Args:
    samples1: Floating-point `Tensor` of samples from the
      distribution(s) A.  Entries are assumed IID across the 0th
      dimension.  The other dimensions must broadcast with `low1`,
      `high1`, `low2`, and `high2`.
      The support is bounded: `low1 <= samples1 <= high1`.
    low1: Floating-point `Tensor` of lower bounds on the supports of the
      distributions A.
    high1: Floating-point `Tensor` of upper bounds on the supports of
      the distributions A.
    samples2: Floating-point `Tensor` of samples from the
      distribution(s) B.  Entries are assumed IID across the 0th
      dimension.  The other dimensions must broadcast with `low1`,
      `high1`, `low2`, and `high2`.
      The support is bounded: `low2 <= samples2 <= high2`.
    low2: Floating-point `Tensor` of lower bounds on the supports of the
      distributions B.
    high2: Floating-point `Tensor` of upper bounds on the supports of
      the distributions B.
    false_fail_rate: *Scalar* floating-point `Tensor` admissible total
      rate of mistakes.
    name: A name for this operation (optional).

  Returns:
    check: Op that raises `InvalidArgumentError` if any pair of confidence
      intervals true for corresponding true means do not overlap.
  """
  with ops.name_scope(
      name, "assert_true_mean_equal_by_dkwm_two_sample",
      [samples1, low1, high1, samples2, low2, high2, false_fail_rate]):
    samples1 = ops.convert_to_tensor(samples1, name="samples1")
    low1 = ops.convert_to_tensor(low1, name="low1")
    high1 = ops.convert_to_tensor(high1, name="high1")
    samples2 = ops.convert_to_tensor(samples2, name="samples2")
    low2 = ops.convert_to_tensor(low2, name="low2")
    high2 = ops.convert_to_tensor(high2, name="high2")
    false_fail_rate = ops.convert_to_tensor(
        false_fail_rate, name="false_fail_rate")
    samples1 = _check_shape_dominates(samples1, [low1, high1])
    samples2 = _check_shape_dominates(samples2, [low2, high2])
    compatible_samples = check_ops.assert_equal(
        array_ops.shape(samples1)[1:], array_ops.shape(samples2)[1:])
    with ops.control_dependencies([compatible_samples]):
      # Could in principle play games with cleverly allocating
      # significance instead of the even split below.  It may be possible
      # to get tighter intervals, in order to obtain a higher power test.
      # Any allocation strategy that depends only on the support bounds
      # and sample counts should be valid; however, because the intervals
      # scale as O(-log(false_fail_rate)), there doesn't seem to be much
      # room to win.
      min_mean_2, max_mean_2 = true_mean_confidence_interval_by_dkwm(
          samples2, low2, high2, false_fail_rate / 2.)
      return assert_true_mean_in_interval_by_dkwm(
          samples1, low1, high1, min_mean_2, max_mean_2, false_fail_rate / 2.)
Example #45
0
def _ragged_segment_aggregate(unsorted_segment_op,
                              data,
                              segment_ids,
                              num_segments,
                              name=None):
    """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`.

  Returns a RaggedTensor `output` with `num_segments` rows, where the row
  `output[i]` is formed by combining all rows of `data` whose corresponding
  `segment_id` is `i`.  The values in each row are combined using
  `unsorted_segment_op`.

  The length of the row `output[i]` will be the maximum of the lengths of
  all rows of `data` whose corresponding `segment_id` is `i`.  If no `data`
  rows correspond to a given segment ID, then the output row for that segment
  ID will be empty.

  Args:
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in each row.  Must have the same signature and basic behavior as
      `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    data: A `RaggedTensor` containing the values to be combined.
    segment_ids: A `Tensor` or `RaggedTensor`.  Must have type `int64` or
      `int32`.  `segment_ids.shape` must be a prefix of `data.shape`.
      `segment_ids` is not required to be sorted.
    num_segments: An `int32` or `int64` scalar.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the aggregated values.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_segments] + data.shape[segment_ids.rank:]`.
  Raises:
    ValueError: If segment_ids.shape is not a prefix of data.shape.
  """
    if not (ragged_tensor.is_ragged(data)
            or ragged_tensor.is_ragged(segment_ids)):
        return unsorted_segment_op(data, segment_ids, num_segments, name)

    with ops.name_scope(name, 'RaggedSegment',
                        [data, segment_ids, num_segments]) as name:
        data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            data, name='data')
        segment_ids = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            segment_ids, name='segment_ids')

        if ragged_tensor.is_ragged(segment_ids):
            if not ragged_tensor.is_ragged(data):
                raise ValueError(
                    'segment_ids.shape must be a prefix of data.shape, '
                    'but segment_ids is ragged and data is not.')
            check_splits = check_ops.assert_equal(
                segment_ids.row_splits,
                data.row_splits,
                message='segment_ids.shape must be a prefix of data.shape')
            with ops.control_dependencies([check_splits]):
                return _ragged_segment_aggregate(unsorted_segment_op,
                                                 data.values,
                                                 segment_ids.values,
                                                 num_segments, name)

        segment_ids = math_ops.cast(segment_ids, dtypes.int64)

        # Find the length of each row in data.  (dtype=int64, shape=[data_nrows])
        data_row_lengths = data.row_splits[1:] - data.row_splits[:-1]

        # Find the length that each output row will have.  The length of the row
        # corresponding to segment `id` is `max(data_row_lengths[i])` where
        # `segment_ids[i]=id`.  (dtype=int64, shape=[output_nrows])
        output_row_lengths = math_ops.maximum(
            math_ops.unsorted_segment_max(data_row_lengths, segment_ids,
                                          num_segments), 0)
        assert output_row_lengths.dtype == dtypes.int64

        # Build the splits tensor for the output RaggedTensor.
        output_splits = array_ops.concat([
            array_ops.zeros([1], dtypes.int64),
            math_ops.cumsum(output_row_lengths)
        ],
                                         axis=0)

        # For each row in `data`, find the start & limit position where that row's
        # values will be aggregated in output.values.
        data_row_to_out_row_start = array_ops.gather(output_splits,
                                                     segment_ids)
        data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths

        # For each value in `data.values`, find the position where it will
        # aggregated in `output.values`.
        # Get the target output values index for each data values index.
        data_val_to_out_val_index = range(data_row_to_out_row_start,
                                          data_row_to_out_row_limit).values

        # Recursively aggregate the values.
        output_values = _ragged_segment_aggregate(unsorted_segment_op,
                                                  data.values,
                                                  data_val_to_out_val_index,
                                                  output_splits[-1])
        return ragged_factory_ops.from_row_splits(output_values, output_splits)
Example #46
0
    def __init__(self,
                 cat,
                 components,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="Mixture"):
        """Initialize a Mixture distribution.

    A `Mixture` is defined by a `Categorical` (`cat`, representing the
    mixture probabilities) and a list of `Distribution` objects
    all having matching dtype, batch shape, event shape, and continuity
    properties (the components).

    The `num_classes` of `cat` must be possible to infer at graph construction
    time and match `len(components)`.

    Args:
      cat: A `Categorical` distribution instance, representing the probabilities
          of `distributions`.
      components: A list or tuple of `Distribution` instances.
        Each instance must have the same type, be defined on the same domain,
        and have matching `event_shape` and `batch_shape`.
      validate_args: Python `bool`, default `False`. If `True`, raise a runtime
        error if batch or event ranks are inconsistent between cat and any of
        the distributions. This is only checked if the ranks cannot be
        determined statically at graph construction time.
      allow_nan_stats: Boolean, default `True`. If `False`, raise an
       exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member. If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: A name for this distribution (optional).

    Raises:
      TypeError: If cat is not a `Categorical`, or `components` is not
        a list or tuple, or the elements of `components` are not
        instances of `Distribution`, or do not have matching `dtype`.
      ValueError: If `components` is an empty list or tuple, or its
        elements do not have a statically known event rank.
        If `cat.num_classes` cannot be inferred at graph creation time,
        or the constant value of `cat.num_classes` is not equal to
        `len(components)`, or all `components` and `cat` do not have
        matching static batch shapes, or all components do not
        have matching static event shapes.
    """
        parameters = locals()
        if not isinstance(cat, categorical.Categorical):
            raise TypeError(
                "cat must be a Categorical distribution, but saw: %s" % cat)
        if not components:
            raise ValueError("components must be a non-empty list or tuple")
        if not isinstance(components, (list, tuple)):
            raise TypeError("components must be a list or tuple, but saw: %s" %
                            components)
        if not all(
                isinstance(c, distribution.Distribution) for c in components):
            raise TypeError(
                "all entries in components must be Distribution instances"
                " but saw: %s" % components)

        dtype = components[0].dtype
        if not all(d.dtype == dtype for d in components):
            raise TypeError("All components must have the same dtype, but saw "
                            "dtypes: %s" % [(d.name, d.dtype)
                                            for d in components])
        static_event_shape = components[0].event_shape
        static_batch_shape = cat.batch_shape
        for d in components:
            static_event_shape = static_event_shape.merge_with(d.event_shape)
            static_batch_shape = static_batch_shape.merge_with(d.batch_shape)
        if static_event_shape.ndims is None:
            raise ValueError(
                "Expected to know rank(event_shape) from components, but "
                "none of the components provide a static number of ndims")

        # Ensure that all batch and event ndims are consistent.
        with ops.name_scope(name, values=[cat.logits]):
            num_components = cat.event_size
            static_num_components = tensor_util.constant_value(num_components)
            if static_num_components is None:
                raise ValueError(
                    "Could not infer number of classes from cat and unable "
                    "to compare this value to the number of components passed in."
                )
            # Possibly convert from numpy 0-D array.
            static_num_components = int(static_num_components)
            if static_num_components != len(components):
                raise ValueError(
                    "cat.num_classes != len(components): %d vs. %d" %
                    (static_num_components, len(components)))

            cat_batch_shape = cat.batch_shape_tensor()
            cat_batch_rank = array_ops.size(cat_batch_shape)
            if validate_args:
                batch_shapes = [d.batch_shape_tensor() for d in components]
                batch_ranks = [array_ops.size(bs) for bs in batch_shapes]
                check_message = ("components[%d] batch shape must match cat "
                                 "batch shape")
                self._assertions = [
                    check_ops.assert_equal(cat_batch_rank,
                                           batch_ranks[di],
                                           message=check_message % di)
                    for di in range(len(components))
                ]
                self._assertions += [
                    check_ops.assert_equal(cat_batch_shape,
                                           batch_shapes[di],
                                           message=check_message % di)
                    for di in range(len(components))
                ]
            else:
                self._assertions = []

            self._cat = cat
            self._components = list(components)
            self._num_components = static_num_components
            self._static_event_shape = static_event_shape
            self._static_batch_shape = static_batch_shape

        # We let the Mixture distribution access _graph_parents since its arguably
        # more like a baseclass.
        graph_parents = self._cat._graph_parents  # pylint: disable=protected-access
        for c in self._components:
            graph_parents += c._graph_parents  # pylint: disable=protected-access

        super(Mixture, self).__init__(
            dtype=dtype,
            reparameterization_type=distribution.NOT_REPARAMETERIZED,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            parameters=parameters,
            graph_parents=graph_parents,
            name=name)
Example #47
0
def batch_jacobian(output, inp, use_pfor=True, parallel_iterations=None):
  """Computes and stacks jacobians of `output[i,...]` w.r.t. `input[i,...]`.

  e.g.
  x = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
  y = x * x
  jacobian = batch_jacobian(y, x)
  # => [[[2,  0], [0,  4]], [[6,  0], [0,  8]]]

  Args:
    output: A tensor with shape [b, y1, ..., y_n]. `output[i,...]` should
      only depend on `inp[i,...]`.
    inp: A tensor with shape [b, x1, ..., x_m]
    use_pfor: If true, uses pfor for computing the Jacobian. Else uses a
      tf.while_loop.
    parallel_iterations: A knob to control how many iterations and dispatched in
      parallel. This knob can be used to control the total memory usage.

  Returns:
    A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]`
    is the jacobian of `output[i, ...]` w.r.t. `inp[i, ...]`, i.e. stacked
    per-example jacobians.

  Raises:
    ValueError: if first dimension of `output` and `inp` do not match.
  """
  output_shape = output.shape
  if not output_shape[0].is_compatible_with(inp.shape[0]):
    raise ValueError("Need first dimension of output shape (%s) and inp shape "
                     "(%s) to match." % (output.shape, inp.shape))
  if output_shape.is_fully_defined():
    batch_size = int(output_shape[0])
    output_row_size = output_shape.num_elements() // batch_size
  else:
    output_shape = array_ops.shape(output)
    batch_size = output_shape[0]
    output_row_size = array_ops.size(output) // batch_size
  inp_shape = array_ops.shape(inp)
  # Flatten output to 2-D.
  with ops.control_dependencies(
      [check_ops.assert_equal(batch_size, inp_shape[0])]):
    output = array_ops.reshape(output, [batch_size, output_row_size])

  def loop_fn(i):
    y = array_ops.gather(output, i, axis=1)
    return gradient_ops.gradients(y, inp)[0]

  if use_pfor:
    pfor_output = control_flow_ops.pfor(loop_fn, output_row_size,
                                        parallel_iterations=parallel_iterations)
  else:
    pfor_output = control_flow_ops.for_loop(
        loop_fn, output.dtype,
        output_row_size,
        parallel_iterations=parallel_iterations)
  if pfor_output is None:
    return None
  pfor_output = array_ops.reshape(pfor_output,
                                  [output_row_size, batch_size, -1])
  output = array_ops.transpose(pfor_output, [1, 0, 2])
  new_shape = array_ops.concat([output_shape, inp_shape[1:]], axis=0)
  return array_ops.reshape(output, new_shape)
Example #48
0
 def _assert_self_adjoint(self):
     return check_ops.assert_equal(
         self.row,
         self.col,
         message=("row and col are not the same, and "
                  "so this operator is not self-adjoint."))
Example #49
0
    def predict(self, features):
        """Computes predictions multiple steps into the future.

    Args:
      features: A dictionary with the following key/value pairs:
        PredictionFeatures.TIMES: A [batch size, predict window size]
          integer Tensor of times, after the window of data indicated by
          `STATE_TUPLE`, to make predictions for.
        PredictionFeatures.STATE_TUPLE: A tuple of (times, values), times with
          shape [batch size, self.input_window_size], values with shape [batch
          size, self.input_window_size, self.num_features] representing a
          segment of the time series before `TIMES`. This data is used
          to start of the autoregressive computation. This should have data for
          at least self.input_window_size timesteps.
    Returns:
      A dictionary with keys, "mean", "covariance". The
      values are Tensors of shape [batch_size, predict window size,
      num_features] and correspond to the values passed in `TIMES`.
    """
        predict_times = math_ops.cast(
            ops.convert_to_tensor(features[PredictionFeatures.TIMES]),
            dtypes.int32)
        batch_size = array_ops.shape(predict_times)[0]
        num_predict_values = array_ops.shape(predict_times)[1]
        prediction_iterations = (
            (num_predict_values + self.output_window_size - 1) //
            self.output_window_size)
        # Pad predict_times so as to have exact multiple of self.output_window_size
        # values per example.
        padding_size = (prediction_iterations * self.output_window_size -
                        num_predict_values)
        padding = array_ops.zeros([batch_size, padding_size],
                                  predict_times.dtype)
        predict_times = control_flow_ops.cond(
            padding_size > 0,
            lambda: array_ops.concat([predict_times, padding], 1),
            lambda: predict_times)
        state = features[PredictionFeatures.STATE_TUPLE]
        (state_times, state_values) = state
        state_times = math_ops.cast(ops.convert_to_tensor(state_times),
                                    dtypes.int32)
        state_values = ops.convert_to_tensor(state_values, dtype=self.dtype)

        initial_input_times = predict_times[:, :self.output_window_size]
        if self.input_window_size > 0:
            initial_input_times = array_ops.concat([
                state_times[:, -self.input_window_size:], initial_input_times
            ], 1)
            values_size = array_ops.shape(state_values)[1]
            times_size = array_ops.shape(state_times)[1]
            with ops.control_dependencies([
                    check_ops.assert_greater_equal(values_size,
                                                   self.input_window_size),
                    check_ops.assert_equal(values_size, times_size)
            ]):
                initial_input_values = state_values[:, -self.
                                                    input_window_size:, :]
        else:
            initial_input_values = 0

        # Iterate over the predict_times, predicting self.output_window_size values
        # in each iteration.
        def _while_condition(iteration_number, *unused_args):
            return math_ops.less(iteration_number, prediction_iterations)

        def _while_body(iteration_number, input_times, input_values, mean_ta,
                        covariance_ta):
            """Predict self.output_window_size values."""
            prediction_ops = self.prediction_ops(input_times, input_values)
            predicted_mean = prediction_ops["mean"]
            predicted_covariance = prediction_ops["covariance"]
            offset = self.output_window_size * gen_math_ops.minimum(
                iteration_number + 1, prediction_iterations - 1)
            if self.input_window_size > 0:
                if self.output_window_size < self.input_window_size:
                    new_input_values = array_ops.concat([
                        input_values[:, self.output_window_size:, :],
                        predicted_mean
                    ], 1)
                    new_input_times = array_ops.concat([
                        input_times[:, self.output_window_size:],
                        predict_times[:,
                                      offset:offset + self.output_window_size]
                    ], 1)
                else:
                    new_input_values = predicted_mean[:, -self.
                                                      input_window_size:, :]
                    new_input_times = predict_times[:, offset - self.
                                                    input_window_size:offset +
                                                    self.output_window_size]
            else:
                new_input_values = input_values
                new_input_times = predict_times[:, offset:offset +
                                                self.output_window_size]
            new_input_times.set_shape(initial_input_times.get_shape())
            new_mean_ta = mean_ta.write(iteration_number, predicted_mean)
            if isinstance(covariance_ta, tensor_array_ops.TensorArray):
                new_covariance_ta = covariance_ta.write(
                    iteration_number, predicted_covariance)
            else:
                new_covariance_ta = covariance_ta
            return (iteration_number + 1, new_input_times, new_input_values,
                    new_mean_ta, new_covariance_ta)

        # Note that control_flow_ops.while_loop doesn't seem happy with None. Hence
        # using 0 for cases where we don't want to predict covariance.
        covariance_ta_init = (tensor_array_ops.TensorArray(
            dtype=self.dtype, size=prediction_iterations)
                              if self.loss != ARModel.SQUARED_LOSS else 0.)
        mean_ta_init = tensor_array_ops.TensorArray(dtype=self.dtype,
                                                    size=prediction_iterations)
        _, _, _, mean_ta, covariance_ta = control_flow_ops.while_loop(
            _while_condition, _while_body, [
                0, initial_input_times, initial_input_values, mean_ta_init,
                covariance_ta_init
            ])

        def _parse_ta(values_ta):
            """Helper function to parse the returned TensorArrays."""

            if not isinstance(values_ta, tensor_array_ops.TensorArray):
                return None
            predictions_length = prediction_iterations * self.output_window_size
            # Shape [prediction_iterations, batch_size, self.output_window_size,
            #        self.num_features]
            values_packed = values_ta.stack()
            # Transpose to move batch dimension outside.
            output_values = array_ops.reshape(
                array_ops.transpose(values_packed, [1, 0, 2, 3]),
                array_ops.stack([batch_size, predictions_length, -1]))
            # Clip to desired size
            return output_values[:, :num_predict_values, :]

        predicted_mean = _parse_ta(mean_ta)
        predicted_covariance = _parse_ta(covariance_ta)
        if predicted_covariance is None:
            predicted_covariance = array_ops.ones_like(predicted_mean)

        # Transform and scale the mean and covariance appropriately.
        predicted_mean = self._scale_back_data(predicted_mean)
        predicted_covariance = self._scale_back_variance(predicted_covariance)

        return {"mean": predicted_mean, "covariance": predicted_covariance}
Example #50
0
def matmul(a: ragged_tensor.RaggedOrDense,
           b: ragged_tensor.RaggedOrDense,
           transpose_a=False,
           transpose_b=False,
           adjoint_a=False,
           adjoint_b=False,
           a_is_sparse=False,
           b_is_sparse=False,
           output_type=None,
           name=None):
  """Multiplies matrix `a` by matrix `b`.

  If all transpose or adjoint attributes are `False` then:

  ```
  output[..., i, j] = sum_k (a[..., i, k] * b[..., k, j]), for all indices i, j.
  ```

  The inputs `a` and `b` must have `rank >= 2`, where the outermost `rank - 2`
  dimensions are batch dimensions.  The inputs must have the same dtype.  See
  `tf.matmul` for more information.

  Args:
    a: `tf.Tensor` or `RaggedTensor` with `rank > 1`.
    b: `tf.Tensor` or `RaggedTensor` with same type and rank as `a`.
    transpose_a: If `True`, `a` is transposed before multiplication.
    transpose_b: If `True`, `b` is transposed before multiplication.
    adjoint_a: If `True`, `a` is conjugated & transposed before multiplication.
    adjoint_b: If `True`, `b` is conjugated & transposed before multiplication.
    a_is_sparse: If `True`, optimize assuming `a` is mostly zero.
    b_is_sparse: If `True`, optimize assuming `b` is mostly zero.
    output_type: The output datatype (optional).
    name: Name for the operation (optional).

  Returns:
    A `Tensor` or `RaggedTensor` with the same rank and shape as `a`, where
    each inner-most matrix is the product of the corresponding matrices in `a`
    and `b`.
  """
  if transpose_a and adjoint_a:
    raise ValueError('Only one of transpose_a and adjoint_a can be True.')
  if transpose_b and adjoint_b:
    raise ValueError('Only one of transpose_b and adjoint_b can be True.')

  kwargs = dict(
      transpose_a=transpose_a,
      transpose_b=transpose_b,
      adjoint_a=adjoint_a,
      adjoint_b=adjoint_b,
      a_is_sparse=a_is_sparse,
      b_is_sparse=b_is_sparse,
      output_type=output_type)

  with ops.name_scope(name, 'RaggedMatMul', [a, b]) as name:
    a = ragged_tensor.convert_to_tensor_or_ragged_tensor(a, name='a')
    b = ragged_tensor.convert_to_tensor_or_ragged_tensor(b, name='b')

    a_is_ragged = isinstance(a, ragged_tensor.RaggedTensor)
    b_is_ragged = isinstance(b, ragged_tensor.RaggedTensor)
    if not (a_is_ragged or b_is_ragged):
      return math_ops.matmul(a, b, **kwargs)

    if a.dtype != b.dtype:
      raise ValueError('`a` and `b` must have the same dtype.')

    # TODO(edloper): Support broadcasting inputs.  (Broadcast support is not
    # documented by https://www.tensorflow.org/api_docs/python/tf/linalg/matmul,
    # but it is supported by the op.)

    # Find the rank of the input tensors.
    if a.shape.rank is None:
      if b.shape.rank is None:
        raise ValueError('matmul requires at least one input to have known '
                         'rank if either input is ragged.')
      rank = b.shape.rank
    else:
      if b.shape.rank is not None and a.shape.rank != b.shape.rank:
        raise ValueError('`a` and `b` must have the same rank.')
      rank = a.shape.rank

    # At least one of `a` and `b` is ragged; and ragged tensors always have
    # rank>=2.
    if rank < 2:
      # This can happen if e.g. `a` is a 1D dense tensor and `b` is a
      # ragged tensor with unknown rank.  Since ragged tensors always have
      # `rank>=2`, this implies that `a` and `b` have different ranks.
      raise ValueError('`a` and `b` must have the same rank.')

    # Rank>3: We have multiple batch dimensions.  Merge them into a single
    # batch dimension, recursively call `matmul`, and then restore the original
    # batch dimension (using a.row_splits).
    if rank > 3:
      shape_err = 'Batch dimensions of `a` and `b` do not have the same size.'
      if not a_is_ragged:
        a = ragged_tensor.RaggedTensor.from_tensor(a, ragged_rank=1)
      if not b_is_ragged:
        b = ragged_tensor.RaggedTensor.from_tensor(b, ragged_rank=1)
      with ops.control_dependencies([
          check_ops.assert_equal(a.row_splits, b.row_splits, message=shape_err)
      ]):
        flat_result = matmul(a.values, b.values, **kwargs)
        return a.with_values(flat_result)

    if rank == 2:
      return _matmul_2d(a, b, **kwargs)

    assert rank == 3  # I.e., we have a single batch dimension.

    a_ragged_rank = a.ragged_rank if a_is_ragged else 0
    if a_ragged_rank == 1 and not (b_is_ragged or transpose_a or adjoint_a):
      # If `a.shape=[B, (I), J]` and `b.shape=[B, J, K], then we can compute
      # the result with a single dense `matmul`.
      return _matmul_3d_with_batch_dim_folding(a, b, **kwargs)
    else:
      # Otherwie, fall back on using `map_fn`.
      return _matmul_3d_with_map_fn(a, b, **kwargs)
Example #51
0
def _add_batched_ragged_partition(rt,
                                  partition,
                                  tensor_dict,
                                  feature_key,
                                  validate,
                                  outer_splits=None):
    """Adds a batched ragged partition tensor to a batched ragged tensor.

  Args:
    rt: A RaggedTensor with shape [batch_size, ...].
    partition: The partition configuration object.  Specifies the key that
      should be used to look up the partition tensor (unless partition is a
      RaggedFeature.UniformRowLength, in which case there is no partition
      tensor).  The specified tensor must have shape [batch_size, ...].
    tensor_dict: The dictionary mapping keys to tensors.
    feature_key: The name of the feature being parsed (for error messages).
    validate: Whether to validate that the values form a valid RaggedTensor.
    outer_splits: If not None, then we have two batch dimensions, and this
      is the row-splits for the collapsed batch dimension.  Every partition
      tensor must have an outer row_splits that matches this value.

  Returns:
    A new RaggedTensor where each batch item `rt[i]` has been partitioned
    using the `partition_t[i]`.
  """
    if isinstance(partition, RaggedFeature.UniformRowLength):
        if rt.ragged_rank > 1:
            length = ops.convert_to_tensor(partition.length,
                                           rt.row_splits.dtype)
            return ragged_tensor.RaggedTensor.from_row_splits(
                ragged_tensor.RaggedTensor.from_uniform_row_length(
                    rt.values, length, validate=validate),
                rt.row_splits // length,
                validate=validate)
        else:
            reshaped_vals = array_ops.reshape(
                rt.values,
                array_ops.concat([[-1, partition.length],
                                  array_ops.shape(rt.values)[1:]],
                                 axis=0))
            return ragged_tensor.RaggedTensor.from_row_splits(
                reshaped_vals,
                rt.row_splits // partition.length,
                validate=validate)

    partition_t = tensor_dict[partition.key]
    if partition_t.values.dtype != rt.row_splits.dtype:
        partition_t = math_ops.cast(partition_t, rt.row_splits.dtype)

    checks = []
    if outer_splits is not None:
        if validate:
            checks.append(
                check_ops.assert_equal(
                    outer_splits,
                    partition_t.row_splits,
                    message="Feature %s: values and partitions are not aligned"
                    % feature_key))
        partition_t = partition_t.values

    with ops.control_dependencies(checks):
        if isinstance(partition,
                      (RaggedFeature.RowSplits, RaggedFeature.RowLimits)):
            if isinstance(partition, RaggedFeature.RowSplits):
                partition_t = partition_t[:, 1:]
            adjusted_limits = partition_t.values + array_ops.repeat(
                rt.row_starts(), partition_t.row_lengths())
            return partition_t.with_values(
                ragged_tensor.RaggedTensor.from_row_limits(rt.values,
                                                           adjusted_limits,
                                                           validate=validate))
        elif isinstance(partition, RaggedFeature.RowStarts):
            adjusted_starts = partition_t.values + array_ops.repeat(
                rt.row_starts(), partition_t.row_lengths())
            return partition_t.with_values(
                ragged_tensor.RaggedTensor.from_row_starts(rt.values,
                                                           adjusted_starts,
                                                           validate=validate))
        elif isinstance(partition, RaggedFeature.RowLengths):
            return partition_t.with_values(
                ragged_tensor.RaggedTensor.from_row_lengths(rt.values,
                                                            partition_t.values,
                                                            validate=validate))
        elif isinstance(partition, RaggedFeature.ValueRowIds):
            nrows = math_ops.maximum(  # number of rows in each batch item
                ragged_math_ops.reduce_max(partition_t + 1, axis=1), 0)
            adjusted_rowids = partition_t.values + array_ops.repeat(
                math_ops.cumsum(nrows, exclusive=True),
                partition_t.row_lengths())
            return ragged_tensor.RaggedTensor.from_row_lengths(
                ragged_tensor.RaggedTensor.from_value_rowids(
                    rt.values, adjusted_rowids, validate=validate),
                nrows,
                validate=validate)

        raise ValueError("Unhandled partition type %r" % partition)
Example #52
0
def fill_lower_triangular(x,
                          validate_args=False,
                          name="fill_lower_triangular"):
    """Creates a (batch of) lower triangular matrix from a vector of inputs.

  If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1,
  b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e.,
  `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`.

  Although the non-batch complexity is O(n**2), large constants and sub-optimal
  vectorization means the complexity of this function is 5x slower than zeroing
  out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This
  function becomes competitive only when several matmul/cholesky/etc ops can be
  ellided in constructing the input. Example: wiring a fully connected layer as
  a covariance matrix; this function reduces the final layer by 2x and possibly
  reduces the network arch complexity considerably. In most cases it is better
  to simply build a full matrix and zero out the upper triangular elements,
  e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly
  construct a lower triangular.

  Warning: This Op is intended for convenience, not efficiency.

  Example:

  ```python
  fill_lower_triangular([1, 2, 3, 4, 5, 6])
  # Returns: [[1, 0, 0],
  #           [2, 3, 0],
  #           [4, 5, 6]]
  ```

  For comparison, a pure numpy version of this function can be found in
  `distribution_util_test.py`, function `_fill_lower_triangular`.

  Args:
    x: `Tensor` representing lower triangular elements.
    validate_args: Python `bool`, default `False`. Whether to ensure the shape
      of `x` can be mapped to a lower triangular matrix (controls non-static
      checks only).
    name: Python `str`. The name to give this op.

  Returns:
    tril: `Tensor` with lower triangular elements filled from `x`.

  Raises:
    ValueError: if shape if `x` has static shape which cannot be mapped to a
      lower triangular matrix.
  """
    # TODO(jvdillon): Replace this code with dedicated op when it exists.
    with ops.name_scope(name, values=[x]):
        x = ops.convert_to_tensor(x, name="x")
        if (x.get_shape().ndims is not None
                and x.get_shape()[-1].value is not None):
            d = x.get_shape()[-1].value
            # d = n(n+1)/2 implies n is:
            n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))
            d_inferred = n * (n + 1) / 2
            if d != d_inferred:
                raise ValueError(
                    "Input cannot be mapped to a lower triangular; "
                    "n*(n+1)/2 = %d != %d" % (d_inferred, d))
            final_shape = x.get_shape()[:-1].concatenate(
                tensor_shape.TensorShape([n, n]))
        else:
            d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32)
            # d = n(n+1)/2 implies n is:
            n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.),
                              dtype=dtypes.int32)
            if validate_args:
                is_valid_input_shape = check_ops.assert_equal(
                    n * (n + 1) / 2,
                    d,
                    message="Input cannot be mapped to a lower triangular.")
                n = control_flow_ops.with_dependencies([is_valid_input_shape],
                                                       n)
            final_shape = x.get_shape()[:-1].concatenate(
                tensor_shape.TensorShape([None, None]))

        def tril_ids(n):
            """Internal helper to create vector of linear indices into y."""
            # Build the ids statically; chose 512 because it implies 1MiB.
            if not tensor_util.is_tensor(n) and n <= 512:
                ids = np.arange(n**2, dtype=np.int32)
                rows = (ids / n).astype(np.int32)  # Implicit floor.
                # We need to stop incrementing the index when we encounter
                # upper-triangular elements. The idea here is to compute the
                # lower-right number of zeros then by "symmetry" subtract this from the
                # total number of zeros, n(n-1)/2.
                # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2
                offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32)
                # We could also zero out when (rows < cols) == (rows < ids-n*rows).
                # mask = (ids <= (n + 1) * rows).astype(np.int32)
            else:
                ids = math_ops.range(n**2)
                rows = math_ops.cast(ids / n, dtype=dtypes.int32)
                offset = math_ops.cast(rows * (2 * n - rows - 1) / 2,
                                       dtype=dtypes.int32)
            return ids - offset

        # Special-case non-batch case.
        if x.get_shape().ndims == 1:
            y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n]))
            y = array_ops.matrix_band_part(y, -1, 0)
            y.set_shape(y.get_shape().merge_with(final_shape))
            return y

        # Make ids for each batch dim.
        if (x.get_shape().ndims is not None
                and x.get_shape()[:-1].is_fully_defined()):
            batch_shape = np.asarray(x.get_shape()[:-1].as_list(),
                                     dtype=np.int32)
            m = np.prod(batch_shape).astype(np.int32)
        else:
            batch_shape = array_ops.shape(x)[:-1]
            m = math_ops.reduce_prod(array_ops.shape(x)[:-1])
        batch_ids = math_ops.range(m)

        # Assemble the tril_ids into batch,tril_id pairs.
        idx = array_ops.stack([
            array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]),
            array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1])
        ])
        idx = array_ops.transpose(idx, [1, 2, 0])

        # Gather up, reshape, and return.
        y = array_ops.reshape(x, [-1, d])
        y = array_ops.gather_nd(y, idx)
        y = array_ops.reshape(y, array_ops.concat([batch_shape, [n, n]], 0))
        y = array_ops.matrix_band_part(y, -1, 0)
        y.set_shape(y.get_shape().merge_with(final_shape))
        return y
Example #53
0
  def __init__(self,
               loc=None,
               covariance_matrix=None,
               validate_args=False,
               allow_nan_stats=True,
               name="MultivariateNormalFullCovariance"):
    """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and
    `covariance_matrix` arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `covariance_matrix`. The last dimension of `loc` (if provided) must
    broadcast with this.

    A non-batch `covariance_matrix` matrix is a `k x k` symmetric positive
    definite matrix.  In other words it is (real) symmetric with all eigenvalues
    strictly positive.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      covariance_matrix: Floating-point, symmetric positive definite `Tensor` of
        same `dtype` as `loc`.  The strict upper triangle of `covariance_matrix`
        is ignored, so if `covariance_matrix` is not symmetric no error will be
        raised (unless `validate_args is True`).  `covariance_matrix` has shape
        `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if neither `loc` nor `covariance_matrix` are specified.
    """
    parameters = locals()

    # Convert the covariance_matrix up to a scale_tril and call MVNTriL.
    with ops.name_scope(name):
      with ops.name_scope("init", values=[loc, covariance_matrix]):
        if covariance_matrix is None:
          scale_tril = None
        else:
          covariance_matrix = ops.convert_to_tensor(
              covariance_matrix, name="covariance_matrix")
          if validate_args:
            assert_symmetric = check_ops.assert_equal(
                covariance_matrix,
                array_ops.matrix_transpose(covariance_matrix),
                message="Matrix was not symmetric.")
            covariance_matrix = control_flow_ops.with_dependencies(
                [assert_symmetric], covariance_matrix)
          # No need to validate that covariance_matrix is non-singular.
          # LinearOperatorLowerTriangular has an assert_non_singular method that
          # is called by the Bijector.
          # However, cholesky() ignores the upper triangular part, so we do need
          # to separately assert symmetric.
          scale_tril = linalg_ops.cholesky(covariance_matrix)
        super(MultivariateNormalFullCovariance, self).__init__(
            loc=loc,
            scale_tril=scale_tril,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            name=name)
    self._parameters = parameters
Example #54
0
 def op(self):
   """Returns the cluster initializer op."""
   return control_flow_ops.cond(
       math_ops.equal(self._num_remaining, 0),
       lambda: check_ops.assert_equal(self._cluster_centers_initialized, True),
       self._initialize)
Example #55
0
 def fn(x):
     with ops.control_dependencies([check_ops.assert_equal(x, 0)]):
         return array_ops.identity(x)
Example #56
0
  def _reshape_helper(self, x, event_shape_in, event_shape_out):
    """Reshape only the event_shape of an input `Tensor`."""

    event_ndims_in_ = _static_ndims_from_shape(event_shape_in)
    event_ndims_in = _ndims_from_shape(event_shape_in)
    x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x)

    assertions = []

    # Ensure x.event_shape is compatible with event_shape_in.
    if (event_ndims_in_ is not None
        and x_ndims_ is not None
        and x.shape.with_rank_at_least(event_ndims_in_)[
            x_ndims_-event_ndims_in_:].is_fully_defined()):
      x_event_shape_, x_event_shape = [  # pylint: disable=unbalanced-tuple-unpacking
          np.int32(x.shape[x_ndims_-event_ndims_in_:])]*2
    else:
      x_event_shape_, x_event_shape = (
          None, array_ops.shape(x)[x_ndims-event_ndims_in:])

    event_shape_in_ = tensor_util.constant_value(event_shape_in)

    if x_event_shape_ is not None and event_shape_in_ is not None:
      # Compare the shape dimensions that are fully specified in the
      # input (i.e., for which event_shape_in is not -1). If x_event_shape
      # matches along all of these dimensions, it is compatible with
      # the desired input shape and any further mismatches (i.e.,
      # imcompatibility with the desired *output* shape) will be
      # caught inside of array_ops.reshape() below.
      x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0]
      event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0]
      if not np.equal(x_event_shape_specified_,
                      event_shape_in_specified_).all():
        raise ValueError(
            "Input `event_shape` does not match `event_shape_in` ({} vs {}).".
            format(x_event_shape_, event_shape_in_))
    elif self.validate_args:
      # Similarly to the static case, we compare the shape dimensions
      # that are fully specified in the input. We extract these
      # dimensions using boolean_mask(), which requires that the mask
      # have known ndims. We can assume that shape Tensors always have
      # ndims==1 (this assumption is verified inside of
      # _maybe_check_valid_shape), so the reshape operation is just a
      # no-op that formally encodes this fact to make boolean_mask()
      # happy.
      event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1])
      x_event_shape_specified = array_ops.boolean_mask(x_event_shape,
                                                       event_shape_mask)
      event_shape_in_specified = array_ops.boolean_mask(event_shape_in,
                                                        event_shape_mask)
      assertions.append(check_ops.assert_equal(
          x_event_shape_specified, event_shape_in_specified,
          message="Input `event_shape` does not match `event_shape_in`."))

    if assertions:
      x = control_flow_ops.with_dependencies(assertions, x)

    # get the parts of shape(x) that will not change
    sample_and_batch_shape = array_ops.shape(x)

    ndims = (x.shape.ndims if x.shape.ndims is not None
             else array_ops.rank(x))
    sample_and_batch_shape = sample_and_batch_shape[
        :(ndims - math_ops.abs(event_ndims_in))]

    if (event_ndims_in_ is not None
        and x_ndims_ is not None
        and event_ndims_in_ == x_ndims_):
      # Hack to allow forward/inverse_event_shape to do shape
      # inference by calling this helper method with a dummy Tensor of
      # shape event_shape_in. In this special case,
      # sample_and_batch_shape will be empty so we can preserve static
      # shape information by avoiding the concat operation below
      # (which would be a no-op).
      new_shape = event_shape_out
    else:
      new_shape = array_ops.concat(
          [sample_and_batch_shape, event_shape_out], axis=0)

    return array_ops.reshape(x, new_shape)
 def _batch_size_checks(self, batch_size, error_message):
     return [check_ops.assert_equal(batch_size,
                                    self._attention_mechanism.batch_size,
                                    message=error_message)]
def assert_symmetric(matrix):
    matrix_t = array_ops.matrix_transpose(matrix)
    return control_flow_ops.with_dependencies(
        [check_ops.assert_equal(matrix, matrix_t)], matrix)
Example #59
0
    def __init__(self,
                 cell,
                 attention_mechanism,
                 attention_layer_size=None,
                 alignment_history=False,
                 cell_input_fn=None,
                 probability_fn=None,
                 output_attention=True,
                 initial_cell_state=None,
                 name=None):
        """Construct the `AttentionWrapper`.

    Args:
      cell: An instance of `RNNCell`.
      attention_mechanism: An instance of `AttentionMechanism`.
      attention_layer_size: Python integer, the depth of the attention (output)
        layer. If None (default), use the context as attention at each time
        step. Otherwise, feed the context and cell output into the attention
        layer to generate attention at each time step.
      alignment_history: Python boolean, whether to store alignment history
        from all time steps in the final output state (currently stored as a
        time major `TensorArray` on which you must call `stack()`).
      cell_input_fn: (optional) A `callable`.  The default is:
        `lambda inputs, attention: array_ops.concat([inputs, attention], -1)`.
      probability_fn: (optional) A `callable`.  Converts the score to
        probabilities.  The default is @{tf.nn.softmax}. Other options include
        @{tf.contrib.seq2seq.hardmax} and @{tf.contrib.sparsemax.sparsemax}.
      output_attention: Python bool.  If `True` (default), the output at each
        time step is the attention value.  This is the behavior of Luong-style
        attention mechanisms.  If `False`, the output at each time step is
        the output of `cell`.  This is the beahvior of Bhadanau-style
        attention mechanisms.  In both cases, the `attention` tensor is
        propagated to the next time step via the state and is used there.
        This flag only controls whether the attention mechanism is propagated
        up to the next cell in an RNN stack or to the top RNN output.
      initial_cell_state: The initial state value to use for the cell when
        the user calls `zero_state()`.  Note that if this value is provided
        now, and the user uses a `batch_size` argument of `zero_state` which
        does not match the batch size of `initial_cell_state`, proper
        behavior is not guaranteed.
      name: Name to use when creating ops.
    """
        super(AttentionWrapper, self).__init__(name=name)
        if not isinstance(cell, core_rnn_cell.RNNCell):
            raise TypeError("cell must be an RNNCell, saw type: %s" %
                            type(cell).__name__)
        if not isinstance(attention_mechanism, AttentionMechanism):
            raise TypeError(
                "attention_mechanism must be a AttentionMechanism, saw type: %s"
                % type(attention_mechanism).__name__)
        if cell_input_fn is None:
            cell_input_fn = (lambda inputs, attention: array_ops.concat(
                [inputs, attention], -1))
        else:
            if not callable(cell_input_fn):
                raise TypeError(
                    "cell_input_fn must be callable, saw type: %s" %
                    type(cell_input_fn).__name__)
        if probability_fn is None:
            probability_fn = nn_ops.softmax
        else:
            if not callable(probability_fn):
                raise TypeError(
                    "probability_fn must be callable, saw type: %s" %
                    type(probability_fn).__name__)

        if attention_layer_size is not None:
            self._attention_layer = layers_core.Dense(attention_layer_size,
                                                      name="attention_layer",
                                                      use_bias=False)
            self._attention_size = attention_layer_size
        else:
            self._attention_layer = None
            self._attention_size = attention_mechanism.values.get_shape(
            )[-1].value

        self._cell = cell
        self._attention_mechanism = attention_mechanism
        self._cell_input_fn = cell_input_fn
        self._probability_fn = probability_fn
        self._output_attention = output_attention
        self._alignment_history = alignment_history
        with ops.name_scope(name, "AttentionWrapperInit"):
            if initial_cell_state is None:
                self._initial_cell_state = None
            else:
                final_state_tensor = nest.flatten(initial_cell_state)[-1]
                state_batch_size = (final_state_tensor.shape[0].value
                                    or array_ops.shape(final_state_tensor)[0])
                error_message = (
                    "When constructing AttentionWrapper %s: " % self._base_name
                    + "Non-matching batch sizes between the memory "
                    "(encoder output) and initial_cell_state.  Are you using "
                    "the BeamSearchDecoder?  You may need to tile your initial state "
                    "via the tf.contrib.seq2seq.tile_batch function with argument "
                    "multiple=beam_width.")
                with ops.control_dependencies([
                        check_ops.assert_equal(
                            state_batch_size,
                            self._attention_mechanism.batch_size,
                            message=error_message)
                ]):
                    self._initial_cell_state = nest.map_structure(
                        lambda s: array_ops.identity(
                            s, name="check_initial_cell_state"),
                        initial_cell_state)
Example #60
0
  def _validate_sample_arg(self, x):
    """Helper which validates sample arg, e.g., input to `log_prob`."""
    with ops.name_scope(name="validate_sample_arg", values=[x]):
      x_ndims = (array_ops.rank(x) if x.shape.ndims is None else x.shape.ndims)
      event_ndims = (array_ops.size(self.event_shape_tensor())
                     if self.event_shape.ndims is None
                     else self.event_shape.ndims)
      batch_ndims = (array_ops.size(self.batch_shape_tensor())
                     if self.batch_shape.ndims is None
                     else self.batch_shape.ndims)
      expected_batch_event_ndims = batch_ndims + event_ndims

      if (isinstance(x_ndims, int) and
          isinstance(expected_batch_event_ndims, int)):
        if x_ndims < expected_batch_event_ndims:
          raise NotImplementedError(
              "Broadcasting is not supported; too few event dims "
              "(expected at least {}, saw {}).".format(
                  expected_batch_event_ndims, x_ndims))
        ndims_assertion = []
      elif self.validate_args:
        ndims_assertion = [
            check_ops.assert_greater_equal(
                x_ndims,
                expected_batch_event_ndims,
                message="Broadcasting is not supported; too few event dims.",
                name="assert_batch_and_event_ndims_large_enough"),
        ]

      if (self.batch_shape.is_fully_defined() and
          self.event_shape.is_fully_defined()):
        expected_batch_event_shape = np.int32(self.batch_shape.concatenate(
            self.event_shape).as_list())
      else:
        expected_batch_event_shape = array_ops.concat([
            self.batch_shape_tensor(),
            self.event_shape_tensor(),
        ], axis=0)

      sample_ndims = x_ndims - expected_batch_event_ndims
      if isinstance(sample_ndims, int):
        sample_ndims = max(sample_ndims, 0)
      if (isinstance(sample_ndims, int) and
          x.shape[sample_ndims:].is_fully_defined()):
        actual_batch_event_shape = np.int32(x.shape[sample_ndims:].as_list())
      else:
        sample_ndims = math_ops.maximum(sample_ndims, 0)
        actual_batch_event_shape = array_ops.shape(x)[sample_ndims:]

      if (isinstance(expected_batch_event_shape, np.ndarray) and
          isinstance(actual_batch_event_shape, np.ndarray)):
        if any(expected_batch_event_shape != actual_batch_event_shape):
          raise NotImplementedError("Broadcasting is not supported; "
                                    "unexpected batch and event shape "
                                    "(expected {}, saw {}).".format(
                                        expected_batch_event_shape,
                                        actual_batch_event_shape))
        # We need to set the final runtime-assertions to `ndims_assertion` since
        # its possible this assertion was created. We could add a condition to
        # only do so if `self.validate_args == True`, however this is redundant
        # as `ndims_assertion` already encodes this information.
        runtime_assertions = ndims_assertion
      elif self.validate_args:
        # We need to make the `ndims_assertion` a control dep because otherwise
        # TF itself might raise an exception owing to this assertion being
        # ill-defined, ie, one cannot even compare different rank Tensors.
        with ops.control_dependencies(ndims_assertion):
          shape_assertion = check_ops.assert_equal(
              expected_batch_event_shape,
              actual_batch_event_shape,
              message=("Broadcasting is not supported; "
                       "unexpected batch and event shape."),
              name="assert_batch_and_event_shape_same")
        runtime_assertions = [shape_assertion]
      else:
        runtime_assertions = []

      return runtime_assertions