Example #1
0
  def BuildLoop(self, pred, body, loop_vars):
    """Add the loop termination condition and body to the graph."""

    loop_vars = ops.convert_n_to_tensor_or_indexed_slices(loop_vars)
    # Let the context know the loop variabes so the _Enter nodes below
    # would be added into the context correctly.
    self._values = set([x.name for x in loop_vars])
    if self._outer_context is not None:
      real_vars = [self._outer_context.AddValue(x) for x in loop_vars]
    else:
      real_vars = loop_vars
    enter_vars = [_Enter(x, self._name, is_constant=False,
                         parallel_iterations=self._parallel_iterations)
                  for x in real_vars]
    self._values = set([x.name for x in enter_vars])

    merge_vars = [merge([x, x])[0] for x in enter_vars]
    self._pivot_for_pred = merge_vars[0]

    # Build the graph for pred.
    c = ops.convert_to_tensor(pred(*merge_vars))
    self._pivot = loop_cond(c, name="LoopCond")
    switch_vars = [_SwitchRefOrTensor(x, self._pivot) for x in merge_vars]

    # Build the graph for body.
    vars_for_body = [_Identity(x[1]) for x in switch_vars]
    self._pivot_for_body = vars_for_body[0]

    body_result = body(*vars_for_body)
    if not isinstance(body_result, (list, _basetuple)):
      body_result = [body_result]
    result = ops.convert_n_to_tensor_or_indexed_slices(body_result)
    next_vars = [next_iteration(x) for x in result]

    # Add the back edges to complete the loop.
    assert len(merge_vars) == len(next_vars)
    for x in zip(merge_vars, next_vars):
      x[0].op._update_input(1, x[1])

    # Add the exit ops.
    exit_vars = [exit(x[0]) for x in switch_vars]

    for m_var, n_var, e_var in zip(merge_vars, next_vars, exit_vars):
      if m_var.get_shape().is_compatible_with(n_var.get_shape()):
        e_var.set_shape(m_var.get_shape().merge_with(n_var.get_shape()))

    # Exit the loop.
    self.ExitResult(exit_vars)
    self.Exit()
    return exit_vars[0] if len(exit_vars) == 1 else exit_vars
Example #2
0
def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
  """Fill in default values for grad_ys.

  Args:
    grad_ys: List of gradients, can contain None.
    ys: List of tensors.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.

  Returns:
    A list of gradients to use, without None.

  Raises:
    ValueError: If one of the grad_ys is invalid.
  """
  if len(grad_ys) != len(ys):
    raise ValueError("Passed %d grad_ys for %d ys" % (len(grad_ys), len(ys)))
  grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
  for i in xrange(len(grad_ys)):
    grad_y = grad_ys[i]
    y = ys[i]
    if grad_y is None:
      with _maybe_colocate_with(y.op, colocate_gradients_with_ops):
        grad_ys[i] = array_ops.fill(
            array_ops.shape(y), constant_op.constant(
                1, dtype=y.dtype))
    else:
      if grad_y.dtype != y.dtype:
        raise ValueError("Y and ys_grad must be of the same type, "
                         "not y: %s, ys_grad: %s " %
                         (dtypes.as_dtype(y.dtype).name,
                          dtypes.as_dtype(grad_y.dtype).name))
  return grad_ys
Example #3
0
def slice_input_producer(tensor_list, num_epochs=None, shuffle=True, seed=None, capacity=32, name=None):
    """Produces a slice of each `Tensor` in `tensor_list`.

  Implemented using a Queue -- a `QueueRunner` for the Queue
  is added to the current `Graph`'s `QUEUE_RUNNER` collection.

  Args:
    tensor_list: A list of `Tensor` objects. Every `Tensor` in
      `tensor_list` must have the same size in the first dimension.
    num_epochs: An integer (optional). If specified, `slice_input_producer`
      produces each slice `num_epochs` times before generating
      an `OutOfRange` error. If not specified, `slice_input_producer` can cycle
      through the slices an unlimited number of times.
    seed: An integer (optional). Seed used if shuffle == True.
    capacity: An integer. Sets the queue capacity.
    name: A name for the operations (optional).

  Returns:
    A list of tensors, one for each element of `tensor_list`.  If the tensor
    in `tensor_list` has shape `[N, a, b, .., z]`, then the corresponding output
    tensor will have shape `[a, b, ..., z]`.
  """
    with ops.op_scope(tensor_list, name, "input_producer"):
        tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensor_list)
        if not tensor_list:
            raise ValueError("Expected at least one tensor in slice_input_producer().")
        range_size = array_ops.shape(tensor_list[0])[0]
        # TODO(josh11b): Add an assertion that the first dimension of
        # everything in TensorList matches. Maybe just check the inferred shapes?
        queue = range_input_producer(range_size, num_epochs=num_epochs, shuffle=shuffle, seed=seed, capacity=capacity)
        index = queue.dequeue()
        output = [array_ops.gather(t, index) for t in tensor_list]
        return output
Example #4
0
def _validate_join(tensor_list_list):
    tensor_list_list = [
        ops.convert_n_to_tensor_or_indexed_slices(tl)
        for tl in tensor_list_list
    ]
    if not tensor_list_list:
        raise ValueError("Expected at least one input in batch_join().")
    return tensor_list_list
def embedding_lookup(params,
                     ids,
                     partition_strategy="mod",
                     name=None,
                     validate_indices=True,
                     max_norm=None):
    """Looks up `ids` in a list of embedding tensors.
  Returns:
    A `Tensor` with the same type as the tensors in `params`.

  Raises:
    ValueError: If `params` is empty.
  """
    #预判断输入是否合法。
    if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
        raise ValueError("Need at least one param")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]

    #def maybe_normalize
    def maybe_normalize(x):
        if max_norm is not None:
            if x.get_shape().ndims is not None:
                ndims = x.get_shape().ndims
            else:
                ndims = array_ops.size(array_ops.shape(x))
            return clip_ops.clip_by_norm(x,
                                         max_norm,
                                         axes=list(range(1, ndims)))
        return x

    with ops.name_scope(name, "embedding_lookup", params + [ids]) as name:
        np = len(params)  # Number of partitions
        #处理一道 params .将其变为tensor.
        params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                           name="params")
        #要是params 只有一个的话。
        if np == 1:
            #colocate 将该结点与默认图合并。
            with ops.colocate_with(params[0]):
                #ResourceVariable is an experimental replacement for Variable
                #Don't care about it.
                #通过.gather方法或者saprse_read方法得到ids对应的值。
                # if isinstance(params[0], resource_variable_ops.ResourceVariable):
                #   ret = params[0].sparse_read(ids, name=name)
                # else:
                ret = array_ops.gather(params[0],
                                       ids,
                                       name=name,
                                       validate_indices=validate_indices)
            #maybe_normalize 看是否需要L2正则。
            return maybe_normalize(ret)
Example #6
0
def slice_input_producer(tensor_list,
                         num_epochs=None,
                         shuffle=True,
                         seed=None,
                         capacity=32,
                         shared_name=None,
                         name=None):
    """Produces a slice of each `Tensor` in `tensor_list`.

  Implemented using a Queue -- a `QueueRunner` for the Queue
  is added to the current `Graph`'s `QUEUE_RUNNER` collection.

  Args:
    tensor_list: A list of `Tensor` objects. Every `Tensor` in
      `tensor_list` must have the same size in the first dimension.
    num_epochs: An integer (optional). If specified, `slice_input_producer`
      produces each slice `num_epochs` times before generating
      an `OutOfRange` error. If not specified, `slice_input_producer` can cycle
      through the slices an unlimited number of times.
    shuffle: Boolean. If true, the integers are randomly shuffled within each
      epoch.
    seed: An integer (optional). Seed used if shuffle == True.
    capacity: An integer. Sets the queue capacity.
    shared_name: (optional). If set, this queue will be shared under the given
      name across multiple sessions.
    name: A name for the operations (optional).

  Returns:
    A list of tensors, one for each element of `tensor_list`.  If the tensor
    in `tensor_list` has shape `[N, a, b, .., z]`, then the corresponding output
    tensor will have shape `[a, b, ..., z]`.

  Raises:
    ValueError: if `slice_input_producer` produces nothing from `tensor_list`.
  """
    with ops.op_scope(tensor_list, name, "input_producer"):
        tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensor_list)
        if not tensor_list:
            raise ValueError(
                "Expected at least one tensor in slice_input_producer().")
        range_size = array_ops.shape(tensor_list[0])[0]
        # TODO(josh11b): Add an assertion that the first dimension of
        # everything in TensorList matches. Maybe just check the inferred shapes?
        queue = range_input_producer(range_size,
                                     num_epochs=num_epochs,
                                     shuffle=shuffle,
                                     seed=seed,
                                     capacity=capacity,
                                     shared_name=shared_name)
        index = queue.dequeue()
        output = [array_ops.gather(t, index) for t in tensor_list]
        return output
def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
  """Fill in default values for grad_ys.

  Args:
    grad_ys: List of gradients, can contain None.
    ys: List of tensors.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.

  Returns:
    A list of gradients to use, without None.

  Raises:
    ValueError: If sizes of gradients and inputs don't match
    TypeError: If type of any gradient is not valid for its input.
  """
  if len(grad_ys) != len(ys):
    raise ValueError("Passed %d grad_ys for %d ys" % (len(grad_ys), len(ys)))
  grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
  for i in xrange(len(grad_ys)):
    grad_y = grad_ys[i]
    y = ys[i]
    if grad_y is None:
      if y.dtype.is_complex:
        raise TypeError(
            "Gradients of complex tensors must set grad_ys (y.dtype = %r)" %
            y.dtype)
      with _maybe_colocate_with(y.op, colocate_gradients_with_ops):
        grad_ys[i] = array_ops.fill(
            array_ops.shape(y), constant_op.constant(
                1, dtype=y.dtype))
      continue
    if y.dtype.is_floating or y.dtype.is_integer:
      if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
        raise TypeError("Gradient type %s generated for real or "
                         "integer-valued tensor %s with type %s must be "
                         "real or integer" %
                         (dtypes.as_dtype(grad_y.dtype).name, y,
                          dtypes.as_dtype(y.dtype).name))
    elif y.dtype.is_complex:
      if not grad_y.dtype.is_complex:
        raise TypeError("Gradient type %s generated for complex-valued "
                         "tensor %s with type %s must be real" %
                         (dtypes.as_dtype(grad_y.dtype).name, y,
                          dtypes.as_dtype(y.dtype).name))
    else:
      raise TypeError("Tensor %s with type %s must be numeric "
                      "to obtain a default gradient" %
                      (y, dtypes.as_dtype(y.dtype).name))
  return grad_ys
def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
    """Fill in default values for grad_ys.

  Args:
    grad_ys: List of gradients, can contain None.
    ys: List of tensors.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.

  Returns:
    A list of gradients to use, without None.

  Raises:
    ValueError: If sizes of gradients and inputs don't match
    TypeError: If type of any gradient is not valid for its input.
  """
    if len(grad_ys) != len(ys):
        raise ValueError("Passed %d grad_ys for %d ys" %
                         (len(grad_ys), len(ys)))
    grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
    for i in xrange(len(grad_ys)):
        grad_y = grad_ys[i]
        y = ys[i]
        if grad_y is None:
            if y.dtype.is_complex:
                raise TypeError(
                    "Gradients of complex tensors must set grad_ys (y.dtype = %r)"
                    % y.dtype)
            with _maybe_colocate_with(y.op, colocate_gradients_with_ops):
                grad_ys[i] = array_ops.fill(
                    array_ops.shape(y), constant_op.constant(1, dtype=y.dtype))
            continue
        if y.dtype.is_floating or y.dtype.is_integer:
            if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
                raise TypeError(
                    "Gradient type %s generated for real or "
                    "integer-valued tensor %s with type %s must be "
                    "real or integer" % (dtypes.as_dtype(
                        grad_y.dtype).name, y, dtypes.as_dtype(y.dtype).name))
        elif y.dtype.is_complex:
            if not grad_y.dtype.is_complex:
                raise TypeError(
                    "Gradient type %s generated for complex-valued "
                    "tensor %s with type %s must be real" % (dtypes.as_dtype(
                        grad_y.dtype).name, y, dtypes.as_dtype(y.dtype).name))
        else:
            raise TypeError("Tensor %s with type %s must be numeric "
                            "to obtain a default gradient" %
                            (y, dtypes.as_dtype(y.dtype).name))
    return grad_ys
Example #9
0
def slice_input_producer(tensor_list,
                         num_epochs=None,
                         shuffle=True,
                         seed=None,
                         capacity=32,
                         name=None):
    """Produces a slice of each Tensor in tensor_list.

  Implemented using a Queue -- a QueueRunner for the Queue
  is added to the current Graph's QUEUE_RUNNER collection.

  Args:
    tensor_list: A list of Tensors. Every Tensor in tensor_list must
      have the same size in the first dimension.
    num_epochs: An integer (optional). If specified, `slice_input_producer`
      produces each slice `num_epochs` times before generating
      an OutOfRange error. If not specified, `slice_input_producer` can cycle
      through the slices an unlimited number of times.
    seed: An integer (optional). Seed used if shuffle == True.
    capacity: An integer. Sets the queue capacity.
    name: A name for the operations (optional).

  Returns:
    A list of tensors, one for each element of tensor_list.  If the tensor
    in tensor_list has shape [N, a, b, .., z], then the corresponding output
    tensor will have shape [a, b, ..., z].
  """
    with ops.op_scope(tensor_list, name, "input_producer"):
        tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensor_list)
        if not tensor_list:
            raise ValueError(
                "Expected at least one tensor in slice_input_producer().")
        range_size = array_ops.shape(tensor_list[0])[0]
        # TODO(josh11b): Add an assertion that the first dimension of
        # everything in TensorList matches. Maybe just check the inferred shapes?
        queue = range_input_producer(range_size,
                                     num_epochs=num_epochs,
                                     shuffle=shuffle,
                                     seed=seed,
                                     capacity=capacity)
        index = queue.dequeue()
        output = [array_ops.gather(t, index) for t in tensor_list]
        return output
def SecureAddN(inputs, name=None):
    if not inputs or not isinstance(inputs, (list, tuple)):
        raise ValueError("inputs must be a list of at least one "
                         "Tensor/IndexedSlices with the same dtype and shape")
    inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs)
    if not all(isinstance(x, (ops.Tensor, ops.IndexedSlices)) for x in inputs):
        raise ValueError("inputs must be a list of at least one "
                         "Tensor/IndexedSlices with the same dtype and shape")

    if len(inputs) == 1:
        if isinstance(inputs[0], ops.IndexedSlices):
            values = ops.convert_to_tensor(inputs[0])
        else:
            values = inputs[0]

        if name:
            return array_ops.identity(values, name=name)
        return values

    return _secure_ops.secure_add_n(inputs, name=name)
Example #11
0
def adaptive_embedding_lookup(params,
                              ids,
                              transforms,
                              max_norm=None,
                              name=None):
    """Looks up `ids` in a list of embedding tensors.
    This function is used to perform parallel lookups on the list of tensors in `params`.  It is a generalization of
    `tf.gather`, where `params` is interpreted as a partitioning of a large embedding tensor with different number
    of rows and columns. `params` may be a `PartitionedVariable`s as returned by using `tf.compat.v1.get_variable()`
    with a partitioner.
    Each element `id` of `ids` is partitioned between the elements of `params` according to the `div` partition
    strategy.
    Id space should be the same with total number of `params` rows.
    The results of the lookup are transformed with corresponding functions from `transforms` and concatenated into a
    dense tensor. The returned tensor shape selected by `transforms`.
    Args:
        params: A list of P tensors of different shape, representing sharded embedding tensors.  Alternatively, a
            `PartitionedVariable`, created by partitioning along dimension 0.
        ids: A `Tensor` with type `int32` or `int64` containing the ids to be looked up in `params`.
        transforms: Functions applied to each retrieved embedding before concatenation. Required due to possible
            different embedding sizes of `params`.
        max_norm: If not `None`, each embedding is clipped if its l2-norm is larger than this value.
        name: A name for the operation (optional).
    Returns:
        A `Tensor` with the same type as the tensors in `params`.
    """

    if isinstance(ids, tf.RaggedTensor):
        return tf.ragged.map_flat_values(adaptive_embedding_lookup, params,
                                         ids, transforms, max_norm, name)

    if not isinstance(params, (list, tuple)) or len(params) < 2:
        raise ValueError('At least 2 variables required in params')

    if not isinstance(transforms,
                      (list, tuple)) or len(transforms) != len(params):
        raise ValueError('Each param should have corresponding transform')

    if not all([callable(t) for t in transforms]):
        raise ValueError('Each transform should be callable')

    with tf.name_scope(name or 'adaptive_embedding_lookup') as name:
        np = len(params)  # Number of partitions

        # Preserve the resource variable status to avoid accidental dense reads.
        if not any(
                isinstance(p, resource_variable_ops.ResourceVariable)
                for p in params):
            params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                               name='params')
        ids = tf.convert_to_tensor(ids, name='ids')

        # Flatten the ids. There is more than one params tensor.
        flat_ids = tf.reshape(ids, [-1])
        original_indices = tf.range(tf.size(flat_ids))

        # Create p_assignments and set new_ids for adaptive strategy.
        # Compute total_ids_capacity as the sum of dim-0 of params, then assign to
        # partitions based on a variable number of ids per partition. Optimize
        # if we already know the full shape statically.
        dim_0_sizes = []
        for p in range(np):
            param_p_dim = tensor_shape.Dimension(
                tensor_shape.dimension_value(params[p].get_shape()[0]))
            dim_0_sizes.append(param_p_dim)

        dim_0_size_value = sum(dim_0_sizes).value
        if dim_0_size_value:
            dim_0_sizes = tf.TensorShape(dim_0_sizes).as_list()
            total_ids_capacity = tf.constant(dim_0_size_value,
                                             dtype=flat_ids.dtype)
        else:
            dim_0_sizes = []
            for p in range(np):
                param_p_dim = tensor_shape.dimension_value(
                    params[p].get_shape()[0])
                if param_p_dim is not None:
                    dim_0_sizes.append(param_p_dim)
                else:
                    with ops.colocate_with(params[p]):
                        dim_0_sizes.append(tf.shape(params[p])[0])
            dim_0_sizes = tf.stack(dim_0_sizes)
            total_ids_capacity = tf.reduce_sum(dim_0_sizes)

        p_cumsum = tf.cumsum(tf.cast(dim_0_sizes, dtype=flat_ids.dtype))
        assert_max_id = tf.debugging.assert_less(
            tf.math.reduce_max(flat_ids), total_ids_capacity,
            'Invalid id. Maximum id should be less then total number of params rows'
        )
        with tf.control_dependencies([assert_max_id]):
            p_assignments = tf.searchsorted(
                p_cumsum,
                flat_ids,
                side='right',
            )

        # Cast partition assignments to int32 for use in dynamic_partition.
        # There really should not be more than 2^32 partitions.
        p_assignments = tf.cast(p_assignments, tf.int32)

        # Partition list of ids based on assignments into np separate lists
        p_intervals = tf.concat(([0], p_cumsum), 0)
        new_ids = flat_ids - tf.gather(p_intervals, p_assignments)
        gather_ids = tf.dynamic_partition(new_ids, p_assignments, np)

        # Similarly, partition the original indices.
        p_indices = tf.dynamic_partition(original_indices, p_assignments, np)

        # Do np separate lookups, finding embeddings for plist[p] in params[p]
        partitioned_result = []
        for p in range(np):
            pids = gather_ids[p]
            transform_fn = transforms[p]
            with ops.colocate_with(params[p]):
                result = tf.gather(params[p], pids)
                result = embedding_ops._clip(transform_fn(result), pids,
                                             max_norm)
            partitioned_result.append(result)

        # Stitch these back together
        ret = data_flow_ops.parallel_dynamic_stitch(p_indices,
                                                    partitioned_result,
                                                    name=name)

        # Determine the static element shape.
        element_shape_s = ret.get_shape()[1:]

        # Compute the dynamic element shape.
        if element_shape_s.is_fully_defined():
            element_shape_d = element_shape_s
        else:
            element_shape_d = tf.shape(ret)[1:]

        # Reshape to reverse the flattening of ids.
        ret = tf.reshape(ret, tf.concat([tf.shape(ids), element_shape_d], 0))

        # Normally the reshape is sufficient, but setting shape explicitly
        # teaches shape inference that params[1:].get_shape() matters.
        ret.set_shape(ids.get_shape().concatenate(element_shape_s))

        return ret
Example #12
0
def stratified_sample(tensors,
                      labels,
                      target_probs,
                      batch_size,
                      init_probs=None,
                      enqueue_many=False,
                      queue_capacity=16,
                      threads_per_queue=1,
                      name=None):
    """Stochastically creates batches based on per-class probabilities.

  This method discards examples. Internally, it creates one queue to amortize
  the cost of disk reads, and one queue to hold the properly-proportioned
  batch.

  Args:
    tensors: List of tensors for data. All tensors are either one item or a
        batch, according to enqueue_many.
    labels: Tensor for label of data. Label is a single integer or a batch,
        depending on `enqueue_many`. It is not a one-hot vector.
    target_probs: Target class proportions in batch. An object whose type has a
        registered Tensor conversion function.
    batch_size: Size of batch to be returned.
    init_probs: Class proportions in the data. An object whose type has a
        registered Tensor conversion function, or `None` for estimating the
        initial distribution.
    enqueue_many: Bool. If true, interpret input tensors as having a batch
        dimension.
    queue_capacity: Capacity of the large queue that holds input examples.
    threads_per_queue: Number of threads for the large queue that holds input
        examples and for the final queue with the proper class proportions.
    name: Optional prefix for ops created by this function.
  Raises:
    ValueError: If `tensors` isn't iterable.
    ValueError: `enqueue_many` is True and labels doesn't have a batch
        dimension, or if `enqueue_many` is False and labels isn't a scalar.
    ValueError: `enqueue_many` is True, and batch dimension on data and labels
        don't match.
    ValueError: if probs don't sum to one.
    ValueError: if a zero initial probability class has a nonzero target
        probability.
    TFAssertion: if labels aren't integers in [0, num classes).
  Returns:
    (data_batch, label_batch), where data_batch is a list of tensors of the same
        length as `tensors`

  Example:
    # Get tensor for a single data and label example.
    data, label = data_provider.Get(['data', 'label'])

    # Get stratified batch according to per-class probabilities.
    target_probs = [...distribution you want...]
    [data_batch], labels = tf.contrib.training.stratified_sample(
        [data], label, target_probs)

    # Run batch through network.
    ...
  """
    with ops.name_scope(name, 'stratified_sample', list(tensors) + [labels]):
        tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
        labels = ops.convert_to_tensor(labels)
        target_probs = ops.convert_to_tensor(target_probs,
                                             dtype=dtypes.float32)
        # Reduce the case of a single example to that of a batch of size 1.
        if not enqueue_many:
            tensor_list = [
                array_ops.expand_dims(tensor, 0) for tensor in tensor_list
            ]
            labels = array_ops.expand_dims(labels, 0)

        # If `init_probs` is `None`, set up online estimation of data distribution.
        if init_probs is None:
            # We use `target_probs` to get the number of classes, so its shape must be
            # fully defined at graph construction time.
            target_probs.get_shape().assert_is_fully_defined()
            init_probs = _estimate_data_distribution(
                labels,
                target_probs.get_shape().num_elements())
        else:
            init_probs = ops.convert_to_tensor(init_probs,
                                               dtype=dtypes.float32)

        # Validate that input is consistent.
        tensor_list, labels, [init_probs, target_probs
                              ] = _verify_input(tensor_list, labels,
                                                [init_probs, target_probs])

        # Check that all zero initial probabilities also have zero target
        # probabilities.
        assert_op = control_flow_ops.Assert(
            math_ops.reduce_all(
                math_ops.logical_or(math_ops.not_equal(init_probs, 0),
                                    math_ops.equal(target_probs, 0))),
            [
                'All classes with zero initial probability must also have zero target '
                'probability: ', init_probs, target_probs
            ])
        init_probs = control_flow_ops.with_dependencies([assert_op],
                                                        init_probs)

        # Calculate acceptance sampling probabilities.
        accept_probs = _calculate_acceptance_probabilities(
            init_probs, target_probs)
        proportion_rejected = math_ops.reduce_sum(
            (1 - accept_probs) * init_probs)
        accept_probs = control_flow_ops.cond(
            math_ops.less(proportion_rejected, .5),
            lambda: accept_probs,
            lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
                accept_probs, [accept_probs],
                message='Proportion of examples rejected by sampler is high.',
                first_n=10))

        # Make a single queue to hold input examples. Reshape output so examples
        # don't have singleton batch dimension.
        batched = input_ops.batch(tensor_list + [labels],
                                  batch_size=1,
                                  num_threads=threads_per_queue,
                                  capacity=queue_capacity,
                                  enqueue_many=True)
        val_list = [array_ops.squeeze(x, [0]) for x in batched[:-1]]
        label = array_ops.squeeze(batched[-1], [0])

        # Set up second queue containing batches that have the desired class
        # proportions.
        cur_prob = array_ops.gather(accept_probs, label)
        batched = input_ops.maybe_batch(
            val_list + [label],
            keep_input=random_ops.random_uniform([]) < cur_prob,
            batch_size=batch_size,
            num_threads=threads_per_queue)
        return batched[:-1], batched[-1]
Example #13
0
def embedding_lookup(params,
                     ids,
                     partition_strategy="mod",
                     name=None,
                     validate_indices=True,
                     max_norm=None):
    """Looks up `ids` in a list of embedding tensors.

  This function is used to perform parallel lookups on the list of
  tensors in `params`.  It is a generalization of
  [`tf.gather()`](../../api_docs/python/array_ops.md#gather), where `params` is
  interpreted as a partitioning of a large embedding tensor.  `params` may be
  a `PartitionedVariable` as returned by using `tf.get_variable()` with a
  partitioner.

  If `len(params) > 1`, each element `id` of `ids` is partitioned between
  the elements of `params` according to the `partition_strategy`.
  In all strategies, if the id space does not evenly divide the number of
  partitions, each of the first `(max_id + 1) % len(params)` partitions will
  be assigned one more id.

  If `partition_strategy` is `"mod"`, we assign each id to partition
  `p = id % len(params)`. For instance,
  13 ids are split across 5 partitions as:
  `[[0, 5, 10], [1, 6, 11], [2, 7, 12], [3, 8], [4, 9]]`

  If `partition_strategy` is `"div"`, we assign ids to partitions in a
  contiguous manner. In this case, 13 ids are split across 5 partitions as:
  `[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]]`

  The results of the lookup are concatenated into a dense
  tensor. The returned tensor has shape `shape(ids) + shape(params)[1:]`.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    ids: A `Tensor` with type `int32` or `int64` containing the ids to be looked
      up in `params`.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`.
    name: A name for the operation (optional).
    validate_indices: Whether or not to validate gather indices.
    max_norm: If not None, embedding values are l2-normalized to the value of
     max_norm.

  Returns:
    A `Tensor` with the same type as the tensors in `params`.

  Raises:
    ValueError: If `params` is empty.
  """
    if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
        raise ValueError("Need at least one param")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]

    def maybe_normalize(x):
        if max_norm is not None:
            if x.get_shape().ndims is not None:
                ndims = x.get_shape().ndims
            else:
                ndims = array_ops.size(array_ops.shape(x))
            return clip_ops.clip_by_norm(x,
                                         max_norm,
                                         axes=list(range(1, ndims)))
        return x

    with ops.name_scope(name, "embedding_lookup", params + [ids]) as name:
        np = len(params)  # Number of partitions
        params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                           name="params")
        if np == 1:
            with ops.colocate_with(params[0]):
                # TODO(apassos): implement the sharded version as well.
                if isinstance(params[0],
                              resource_variable_ops.ResourceVariable):
                    ret = params[0].sparse_read(ids, name=name)
                else:
                    ret = array_ops.gather(params[0],
                                           ids,
                                           name=name,
                                           validate_indices=validate_indices)
            return maybe_normalize(ret)
        else:
            ids = ops.convert_to_tensor(ids, name="ids")
            flat_ids = array_ops.reshape(ids, [-1])
            original_indices = math_ops.range(array_ops.size(flat_ids))

            # Create p_assignments and set new_ids depending on the strategy.
            if partition_strategy == "mod":
                p_assignments = flat_ids % np
                new_ids = flat_ids // np
            elif partition_strategy == "div":
                # Compute num_total_ids as the sum of dim-0 of params, then assign to
                # partitions based on a constant number of ids per partition. Optimize
                # if we already know the full shape statically.
                dim_0_size = params[0].get_shape()[0]
                for p in xrange(1, np):
                    dim_0_size += params[p].get_shape()[0]
                if dim_0_size.value:
                    num_total_ids = constant_op.constant(
                        dim_0_size.value, flat_ids.dtype)
                else:
                    dim_0_sizes = []
                    for p in xrange(np):
                        if params[p].get_shape()[0].value is not None:
                            dim_0_sizes.append(params[p].get_shape()[0].value)
                        else:
                            with ops.colocate_with(params[p]):
                                dim_0_sizes.append(
                                    array_ops.shape(params[p])[0])
                    num_total_ids = math_ops.reduce_sum(
                        math_ops.cast(array_ops.stack(dim_0_sizes),
                                      flat_ids.dtype))
                ids_per_partition = num_total_ids // np
                extras = num_total_ids % np

                p_assignments = math_ops.maximum(
                    flat_ids // (ids_per_partition + 1),
                    (flat_ids - extras) // ids_per_partition)

                # Emulate a conditional using a boolean indicator tensor
                is_in_first_extras_partitions = math_ops.cast(
                    p_assignments < extras, flat_ids.dtype)
                new_ids = (is_in_first_extras_partitions *
                           (flat_ids % (ids_per_partition + 1)) +
                           (1 - is_in_first_extras_partitions) *
                           ((flat_ids - extras) % ids_per_partition))
            else:
                raise ValueError("Unrecognized partition strategy: " +
                                 partition_strategy)

            # Cast partition assignments to int32 for use in dynamic_partition.
            # There really should not be more than 2^32 partitions.
            p_assignments = math_ops.cast(p_assignments, dtypes.int32)
            # Partition list of ids based on assignments into np separate lists
            gather_ids = data_flow_ops.dynamic_partition(
                new_ids, p_assignments, np)
            # Similarly, partition the original indices.
            pindices = data_flow_ops.dynamic_partition(original_indices,
                                                       p_assignments, np)
            # Do np separate lookups, finding embeddings for plist[p] in params[p]
            partitioned_result = []
            for p in xrange(np):
                with ops.colocate_with(params[p]):
                    partitioned_result.append(
                        array_ops.gather(params[p],
                                         gather_ids[p],
                                         validate_indices=validate_indices))
            # Stitch these back together
            ret = data_flow_ops.dynamic_stitch(pindices,
                                               partitioned_result,
                                               name=name)
            # Reshape to reverse the flattening of ids.
            element_shape = params[0].get_shape()[1:]
            for p in params[1:]:
                element_shape = element_shape.merge_with(p.get_shape()[1:])
            if element_shape.is_fully_defined():
                ret = array_ops.reshape(
                    ret,
                    array_ops.concat_v2([array_ops.shape(ids), element_shape],
                                        0))
            else:
                # It's important that we compute params[0].shape on the right device
                # to avoid data motion.
                with ops.colocate_with(params[0]):
                    params_shape = array_ops.shape(params[0])
                ret = array_ops.reshape(
                    ret,
                    array_ops.concat_v2([
                        array_ops.shape(ids),
                        array_ops.slice(params_shape, [1], [-1])
                    ], 0))
            # output shape = ids.shape + params[*].shape[1:]
            # Normally the reshape is sufficient, but setting shape explicitly
            # teaches shape inference that params[1:].get_shape() matters.
            ret.set_shape(ids.get_shape().concatenate(element_shape))
            return maybe_normalize(ret)
Example #14
0
def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None):
    """Returns the element-wise sum of a list of tensors.

  Optionally, pass `shape` and `tensor_dtype` for shape and type checking,
  otherwise, these are inferred.

  `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
  wait for all of its inputs to be ready before beginning to sum. This can
  save memory if inputs are ready at different times, since minimum temporary
  storage is proportional to the output size rather than the inputs size.

  Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.

  For example:

  ```python
  a = tf.constant([[1, 2], [3, 4]])
  b = tf.constant([[5, 0], [0, 6]])
  tf.accumulate_n_v2([a, b, a])  # [[7, 4], [6, 14]]

  # Explicitly pass shape and type
  tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
                                                                   # [[7,  4],
                                                                   #  [6, 14]]
  ```

  Args:
    inputs: A list of `Tensor` objects, each with same shape and type.
    shape: Shape of elements of `inputs`.
    tensor_dtype: The type of `inputs`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of same shape and type as the elements of `inputs`.

  Raises:
    ValueError: If `inputs` don't all have same shape and dtype or the shape
    cannot be inferred.
  """
    _INPUTS_ERR_MSG = ValueError("inputs must be a list of at least one Tensor"
                                 "with the same dtype and shape")
    if not inputs or not isinstance(inputs, (list, tuple)):
        raise _INPUTS_ERR_MSG
    inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs)
    if not all(isinstance(x, ops.Tensor) for x in inputs):
        raise _INPUTS_ERR_MSG
    if not all(x.dtype == inputs[0].dtype for x in inputs):
        raise _INPUTS_ERR_MSG
    if shape is not None:
        shape = tensor_shape.as_shape(shape)
    else:
        shape = tensor_shape.unknown_shape()
    for input_tensor in inputs:
        if isinstance(input_tensor, ops.Tensor):
            shape = shape.merge_with(input_tensor.get_shape())

    # tensor_dtype is for safety only; operator's output type computed in C++
    if tensor_dtype is not None and tensor_dtype != inputs[0].dtype:
        raise TypeError("tensor_dtype is {}, but input is of type {}".format(
            tensor_dtype, inputs[0].dtype))

    if len(inputs) == 1 and name is None:
        return inputs[0]
    elif len(inputs) == 1 and name is not None:
        return array_ops.identity(inputs[0], name=name)
    elif context.in_eager_mode():
        # TemporaryVariable not currently supported in eager mode; fall back
        # onto AddN for now.
        # TODO(frreiss) remove this once the lifetime of eager variables gets
        # addressed
        return math_ops.add_n(inputs, name=name)
    else:
        return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape)
Example #15
0
def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None,
              stop_gradients=None):
    """Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the derivatives of `ys` with
  respect to `xs`.  It returns a list of `Tensor` of length `len(xs)` where
  each tensor is the `sum(dy/dx)` for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  `stop_gradients` is a `Tensor` or a list of tensors to be considered constant
  with respect to all `xs`. These tensors will not be backpropagated through,
  as though they had been explicitly disconnected using `stop_gradient`.  Among
  other things, this allows computation of partial derivatives as opposed to
  total derivatives. For example:

  ```python
  a = tf.constant(0.)
  b = 2 * a
  g = tf.gradients(a + b, [a, b], stop_gradients=[a, b])
  ```

  Here the partial derivatives `g` evaluate to `[1.0, 1.0]`, compared to the
  total derivatives `tf.gradients(a + b, [a, b])`, which take into account the
  influence of `a` on `b` and evaluate to `[3.0, 1.0]`.  Note that the above is
  equivalent to:

  ```python
  a = tf.stop_gradient(tf.constant(0.))
  b = tf.stop_gradient(2 * a)
  g = tf.gradients(a + b, [a, b])
  ```

  `stop_gradients` provides a way of stopping gradient after the graph has
  already been constructed, as compared to `tf.stop_gradient` which is used
  during graph construction.  When the two approaches are combined,
  backpropagation stops at both `tf.stop_gradient` nodes and nodes in
  `stop_gradients`, whichever is encountered first.

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.
    stop_gradients: Optional. A `Tensor` or list of tensors not to differentiate
      through.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.
    RuntimeError: if called in Eager mode.

  """
    if context.in_eager_mode():
        raise RuntimeError("tf.gradients not supported in EAGER mode. Use "
                           "functions in tf.contrib.eager.backprop instead.")
    ys = _AsList(ys)
    xs = _AsList(xs)
    stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients)
    if grad_ys is None:
        grad_ys = [None] * len(ys)
    else:
        grad_ys = _AsList(grad_ys)

    with ops.name_scope(
            name, "gradients",
            list(ys) + list(xs) + list(stop_gradients) +
            list(grad_ys)) as grad_scope:
        ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
        xs = [
            x.handle
            if isinstance(x, resource_variable_ops.ResourceVariable) else x
            for x in xs
        ]
        xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs,
                                                                name="x",
                                                                as_ref=True)
        grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

        # The approach we take here is as follows: Create a list of all ops in the
        # subgraph between the ys and xs.  Visit these ops in reverse order of ids
        # to ensure that when we visit an op the gradients w.r.t its outputs have
        # been collected.  Then aggregate these gradients if needed, call the op's
        # gradient function, and add the generated gradients to the gradients for
        # its input.

        # Initialize the pending count for ops in the connected subgraph from ys
        # to the xs.
        if len(ys) > 1:
            ys = [array_ops.identity(y) if y.consumers() else y for y in ys]
        to_ops = [t.op for t in ys]
        from_ops = [t.op for t in xs]
        stop_gradient_ops = [t.op for t in stop_gradients]
        pending_count, loop_state = _PendingCount(ops.get_default_graph(),
                                                  to_ops, from_ops,
                                                  colocate_gradients_with_ops)

        # Iterate over the collected ops.
        #
        # grads: op => list of gradients received on each output endpoint of the
        # op.  The gradients for each endpoint are initially collected as a list.
        # When it is time to call the op's gradient function, for each endpoint we
        # aggregate the list of received gradients into a Add() Operation if there
        # is more than one.
        grads = {}

        # Add the initial gradients for the ys.
        for y, grad_y in zip(ys, grad_ys):
            _SetGrad(grads, y, grad_y)

        # Initialize queue with to_ops.
        queue = collections.deque()
        # Add the ops in 'to_ops' into the queue.
        to_ops_set = set()
        for op in to_ops:
            # 'ready' handles the case where one output gradient relies on
            # another output's gradient.
            # pylint: disable=protected-access
            ready = (pending_count[op._id] == 0)
            if ready and op._id not in to_ops_set:
                to_ops_set.add(op._id)
                queue.append(op)
            # pylint: enable=protected-access

        if loop_state:
            loop_exits = loop_state.ProcessUnusedLoopExits(
                pending_count, to_ops_set)
            for y in loop_exits:
                if _IsTrainable(y):
                    _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
                    queue.append(y.op)

        stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count)
        while queue:
            # generate gradient subgraph for op.
            op = queue.popleft()
            with _maybe_colocate_with(op, colocate_gradients_with_ops):
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=True)
                out_grads = _AggregatedGrads(grads, op, loop_state,
                                             aggregation_method)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=True)

                grad_fn = None
                # pylint: disable=protected-access
                func_call = None
                is_func_call = ops.get_default_graph()._is_function(op.type)
                has_out_grads = any(
                    isinstance(g, ops.Tensor) or g for g in out_grads)
                if has_out_grads and (op._id not in stop_ops):
                    if is_func_call:
                        func_call = ops.get_default_graph()._get_function(
                            op.type)
                        grad_fn = func_call.python_grad_func
                        # pylint: enable=protected-access
                    else:
                        # A grad_fn must be defined, either as a function or as None
                        # for ops that do not have gradients.
                        try:
                            grad_fn = ops.get_gradient_function(op)
                        except LookupError:
                            raise LookupError(
                                "No gradient defined for operation '%s' (op type: %s)"
                                % (op.name, op.type))
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=False)
                if (grad_fn or is_func_call) and has_out_grads:
                    # NOTE: If _AggregatedGrads didn't compute a value for the i'th id:3537 gh:3538
                    # output, it means that the cost does not depend on output[i],
                    # therefore dC/doutput[i] is 0.
                    for i, out_grad in enumerate(out_grads):
                        if (not isinstance(out_grad, ops.Tensor)
                                and not out_grad) and (
                                    (not grad_fn and is_func_call)
                                    or _IsTrainable(op.outputs[i])):
                            # Only trainable outputs or outputs for a function call that
                            # will use SymbolicGradient get a zero gradient. Gradient
                            # functions should ignore the gradient for other outputs.
                            # TODO (apassos) gradients of resource handles might be an id:3152 gh:3153
                            # issue here because of zeros.
                            if loop_state:
                                out_grads[i] = loop_state.ZerosLike(op, i)
                            else:
                                out_grads[
                                    i] = control_flow_ops.ZerosLikeOutsideLoop(
                                        op, i)
                    with ops.name_scope(op.name + "_grad"):
                        # pylint: disable=protected-access
                        with ops.get_default_graph()._original_op(op):
                            # pylint: enable=protected-access
                            if grad_fn:
                                # If grad_fn was found, do not use SymbolicGradient even for
                                # functions.
                                in_grads = _MaybeCompile(
                                    grad_scope, op, func_call,
                                    lambda: grad_fn(op, *out_grads))
                            else:
                                # For function call ops, we add a 'SymbolicGradient'
                                # node to the graph to compute gradients.
                                in_grads = _MaybeCompile(
                                    grad_scope, op, func_call,
                                    lambda: _SymGrad(op, out_grads))
                            in_grads = _AsList(in_grads)
                            _VerifyGeneratedGradients(in_grads, op)
                            if gate_gradients and len(
                                [x for x in in_grads if x is not None]) > 1:
                                with ops.device(None):
                                    with ops.colocate_with(
                                            None, ignore_existing=True):
                                        in_grads = control_flow_ops.tuple(
                                            in_grads)
                    _LogOpGradients(op, out_grads, in_grads)
                else:
                    # If no grad_fn is defined or none of out_grads is available,
                    # just propagate a list of None backwards.
                    in_grads = [None] * len(op.inputs)
                for i, (t_in, in_grad) in enumerate(zip(op.inputs, in_grads)):
                    if in_grad is not None:
                        if (isinstance(in_grad, ops.Tensor)
                                and t_in.dtype != dtypes.resource):
                            try:
                                in_grad.set_shape(t_in.get_shape())
                            except ValueError:
                                raise ValueError(
                                    "Incompatible shapes between op input and calculated "
                                    "input gradient.  Forward operation: %s.  Input index: %d. "
                                    "Original input shape: %s.  "
                                    "Calculated input gradient shape: %s" %
                                    (op.name, i, t_in.shape, in_grad.shape))
                        _SetGrad(grads, t_in, in_grad)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=False)

            # Update pending count for the inputs of op and enqueue ready ops.
            _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count,
                                          loop_state)

    if loop_state:
        loop_state.PostProcessing()
    return [_GetGrad(grads, x) for x in xs]
Example #16
0
def _validate_bucket(tensor_list):
    tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensor_list)
    if not tensor_list:
        raise ValueError("Expected at least one tensor in bucket().")
    return tensor_list
Example #17
0
def stratified_sample(tensors, labels, init_probs, target_probs, batch_size,
                      enqueue_many=False, queue_capacity=16,
                      threads_per_queue=1, name=None):
  """Stochastically creates batches based on per-class probabilities.

  This method discards examples. Internally, it creates one queue to amortize
  the cost of disk reads, and one queue to hold the properly-proportioned
  batch. See `stratified_sample_unknown_dist` for a function that performs
  stratified sampling with one queue per class and doesn't require knowing the
  class data-distribution ahead of time.

  Args:
    tensors: List of tensors for data. All tensors are either one item or a
        batch, according to enqueue_many.
    labels: Tensor for label of data. Label is a single integer or a batch,
        depending on enqueue_many. It is not a one-hot vector.
    init_probs: Class proportions in the data. An object whose type has a
        registered Tensor conversion function.
    target_probs: Target class proportions in batch. An object whose type has a
        registered Tensor conversion function.
    batch_size: Size of batch to be returned.
    enqueue_many: Bool. If true, interpret input tensors as having a batch
        dimension.
    queue_capacity: Capacity of the large queue that holds input examples.
    threads_per_queue: Number of threads for the large queue that holds input
        examples and for the final queue with the proper class proportions.
    name: Optional prefix for ops created by this function.
  Raises:
    ValueError: enqueue_many is True and labels doesn't have a batch
        dimension, or if enqueue_many is False and labels isn't a scalar.
    ValueError: enqueue_many is True, and batch dimension on data and labels
        don't match.
    ValueError: if probs don't sum to one.
    ValueError: if a zero initial probability class has a nonzero target
        probability.
    TFAssertion: if labels aren't integers in [0, num classes).
  Returns:
    (data_batch, label_batch), where data_batch is a list of tensors of the same
        length as `tensors`

  Example:
    # Get tensor for a single data and label example.
    data, label = data_provider.Get(['data', 'label'])

    # Get stratified batch according to per-class probabilities.
    init_probs = [1.0/NUM_CLASSES for _ in range(NUM_CLASSES)]
    target_probs = [...distribution you want...]
    [data_batch], labels = tf.contrib.framework.sampling_ops.stratified_sample(
        [data], label, init_probs, target_probs)

    # Run batch through network.
    ...
  """
  with ops.op_scope(tensors + [labels], name, 'stratified_sample'):
    tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
    labels = ops.convert_to_tensor(labels)
    init_probs = ops.convert_to_tensor(init_probs, dtype=dtypes.float32)
    target_probs = ops.convert_to_tensor(target_probs, dtype=dtypes.float32)
    # Reduce the case of a single example to that of a batch of size 1.
    if not enqueue_many:
      tensor_list = [array_ops.expand_dims(tensor, 0) for tensor in tensor_list]
      labels = array_ops.expand_dims(labels, 0)

    # Validate that input is consistent.
    tensor_list, labels, [init_probs, target_probs] = _verify_input(
        tensor_list, labels, [init_probs, target_probs])

    # Check that all zero initial probabilities also have zero target
    # probabilities.
    assert_op = logging_ops.Assert(math_ops.reduce_all(math_ops.logical_or(
        math_ops.not_equal(init_probs, 0),
        math_ops.equal(target_probs, 0))), [init_probs, target_probs])
    init_probs = control_flow_ops.with_dependencies([assert_op], init_probs)

    # Calculate acceptance sampling probabilities.
    accept_probs = _calculate_acceptance_probabilities(init_probs, target_probs)
    proportion_rejected = math_ops.reduce_sum((1 - accept_probs) * init_probs)
    accept_probs = control_flow_ops.cond(
        math_ops.less(proportion_rejected, .5),
        lambda: accept_probs,
        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
            accept_probs, [accept_probs],
            message='Proportion of examples rejected by sampler is high.',
            first_n=10))

    # Make a single queue to hold input examples. Reshape output so examples
    # don't have singleton batch dimension.
    batched = input_ops.batch(tensor_list + [labels],
                              batch_size=1,
                              num_threads=threads_per_queue,
                              capacity=queue_capacity,
                              enqueue_many=True)
    val_list = [array_ops.squeeze(x, [0]) for x in batched[:-1]]
    label = array_ops.squeeze(batched[-1], [0])

    # Set up second queue containing batches that have the desired class
    # proportions.
    batched = _get_stratified_batch_from_tensors(
        val_list, label, accept_probs, batch_size, threads_per_queue)
    return batched[:-1], batched[-1]
Example #18
0
    def apply_op(self, op_type_name, g=None, name=None, **keywords):
        # pylint: disable=g-doc-args
        """Add a node invoking a registered Op to a graph.

    Config proto extensions must be provided via the 'ext' keyword argument.
    Example usage:
       # input1 and input2 can be Tensors or anything ops.convert_to_tensor()
       # will convert to a Tensor.
       op_def_library.apply_op("op", input1=input1, input2=input2)
       # If none of the inputs are Tensors and your session doesn't have a
       # default graph, you will have to specify the graph.
       op_def_library.apply_op("op", input1=input1, g=g)
       # Can specify a node name.
       op_def_library.apply_op("op", input1=input1, name="node_name")
       # Must use keyword arguments, with the names specified in the OpDef.
       op_def_library.apply_op("op", input_name=input, attr_name=attr)

    All attrs must either be inferred from an input or specified.
    (If inferred, the attr must not be specified.)  If an attr has a default
    value specified in the Op's OpDef, then you may pass None as the value
    of that attr to get the default.

    Args:
      op_type_name: string. Must match the name field of a registered Op.
      g: The graph context (optional)
      name: string. Optional name of the created op.
      **keywords: input Tensor and attr arguments specified by name,
        and optional parameters to pass when constructing the Operation.

    Returns:
      The Tensor(s) representing the output of the operation, or the Operation
      itself if there are no outputs.

    Raises:
      RuntimeError: On some errors.
      TypeError: On some errors.
      ValueError: On some errors.
    """
        op_info = self._ops.get(op_type_name, None)
        if op_info is None:
            raise RuntimeError("Unrecognized Op name " + op_type_name)
        op_def = op_info.op_def

        # Determine the graph context.
        try:
            # Need to flatten all the arguments into a list.
            # pylint: disable=protected-access
            g = ops._get_graph_from_inputs(_Flatten(keywords.values()),
                                           graph=g)
            # pyline: enable=protected-access
        except AssertionError as e:
            raise RuntimeError(
                "Need to specify g=graph to Op '%s' (could not determine graph due "
                "to: %s)" % (op_type_name, e.message))

        # Default name if not specified.
        if name is None:
            name = op_type_name

        # Requires that op_def has passed validation (using the C++
        # ValidateOpDef() from ../framework/op_def_util.h).
        attrs = {}
        inputs = []
        input_types = []
        with g.as_default(), ops.name_scope(name) as scope:

            # Perform input type inference
            inferred_from = {}
            for input_arg in op_def.input_arg:
                input_name = input_arg.name
                if input_name in keywords:
                    values = keywords.pop(input_name)
                elif input_name + "_" in keywords:
                    # Handle the case where the name is a keyword or built-in
                    # for Python so we use the name + _ instead.
                    input_name += "_"
                    values = keywords.pop(input_name)
                else:
                    raise TypeError("No argument for input " + input_name)

                # Goals:
                # * Convert values to Tensors if it contains constants.
                # * Verify that values is a list if that matches the input_arg's
                #   type.
                # * If the input_arg's type is determined by attrs, either set
                #   those attrs and validate those attr values are legal (if
                #   they have not yet been set) or validate the input matches
                #   the type indicated by the attrs (if they have already been
                #   inferred via an earlier input).
                # * If the input_arg has an explicit type, make sure the input
                #   conforms.

                if _IsListParameter(input_arg):
                    if not _IsListValue(values):
                        raise TypeError(
                            "Expected list for '%s' argument to '%s' Op, not %s."
                            % (input_name, op_type_name, values))
                    # In cases where we expect all elements of the list to have the
                    # same dtype, try to cast non-Tensor elements to that type.
                    dtype = None
                    if input_arg.type != types_pb2.DT_INVALID:
                        dtype = input_arg.type
                    elif input_arg.number_attr:
                        if input_arg.type_attr in attrs:
                            dtype = attrs[input_arg.type_attr]
                        else:
                            for t in values:
                                if isinstance(t, ops.Tensor):
                                    dtype = t.dtype
                                    break

                    try:
                        values = ops.convert_n_to_tensor_or_indexed_slices(
                            values,
                            name=input_arg.name,
                            dtype=types_lib.as_dtype(dtype).base_dtype
                            if dtype else None)
                    except (TypeError, ValueError):
                        assert dtype is not None, "Should not fail if dtype is None"
                        assert input_arg.number_attr, "Should be number_attr case"
                        # What types does the conversion function think values have?
                        values = ops.convert_n_to_tensor_or_indexed_slices(
                            values)
                        observed = ", ".join(v.dtype.base_dtype.name
                                             for v in values)

                        prefix = (
                            "Tensors in list passed to '%s' of '%s' Op have types [%s]"
                            % (input_name, op_type_name, observed))
                        if input_arg.type != types_pb2.DT_INVALID:
                            raise TypeError(
                                "%s that do not match expected type %s." %
                                (prefix, types_lib.as_dtype(dtype).name))
                        elif input_arg.type_attr in attrs:
                            raise TypeError(
                                "%s that do not match type %s inferred from "
                                "earlier arguments." %
                                (prefix, types_lib.as_dtype(dtype).name))
                        else:
                            raise TypeError("%s that don't all match." %
                                            prefix)

                    types = [x.dtype for x in values]
                    inputs.extend(values)
                else:
                    # In cases where we have an expected type, try to convert non-Tensor
                    # arguments to that type.
                    dtype = None
                    if input_arg.type != types_pb2.DT_INVALID:
                        dtype = input_arg.type
                    elif input_arg.type_attr in attrs:
                        dtype = attrs[input_arg.type_attr]

                    try:
                        values = ops.convert_to_tensor(values,
                                                       name=input_arg.name,
                                                       dtype=dtype)
                    except ValueError:
                        # What type does convert_to_tensor think it has?
                        observed = ops.convert_to_tensor(values).dtype.name
                        prefix = (
                            "Input '%s' of '%s' Op has type %s that does not match"
                            % (input_name, op_type_name, observed))
                        if input_arg.type != types_pb2.DT_INVALID:
                            raise TypeError(
                                "%s expected type of %s." %
                                (prefix, types_lib.as_dtype(
                                    input_arg.type).name))
                        else:
                            raise TypeError(
                                "%s type %s of argument '%s'." %
                                (prefix,
                                 types_lib.as_dtype(
                                     attrs[input_arg.type_attr]).name,
                                 inferred_from[input_arg.type_attr]))

                    types = [values.dtype]
                    inputs.append(values)
                base_types = [x.base_dtype for x in types]

                if input_arg.number_attr:
                    # <number-attr> * <type> or <number-attr> * <type-attr>
                    if input_arg.number_attr in attrs:
                        if len(values) != attrs[input_arg.number_attr]:
                            raise ValueError(
                                "List argument '%s' to '%s' Op with length %d must match "
                                "length %d of argument '%s'." %
                                (input_name, op_type_name, len(values),
                                 attrs[input_arg.number_attr],
                                 inferred_from[input_arg.number_attr]))
                    else:
                        attrs[input_arg.number_attr] = len(values)
                        inferred_from[input_arg.number_attr] = input_name
                        num_attr = _Attr(op_def, input_arg.number_attr)
                        if num_attr.has_minimum and len(
                                values) < num_attr.minimum:
                            raise ValueError(
                                "List argument '%s' to '%s' Op with length %d shorter "
                                "than minimum length %d." %
                                (input_name, op_type_name, len(values),
                                 num_attr.minimum))
                    # All tensors must have the same base type.
                    if any([bt != base_types[0] for bt in base_types]):
                        raise TypeError(
                            "All tensors passed to '%s' of '%s' Op "
                            "must have the same type." %
                            (input_name, op_type_name))
                    if input_arg.type != types_pb2.DT_INVALID:
                        # <number-attr> * <type> case
                        if base_types and base_types[0] != input_arg.type:
                            assert False, "Unreachable"
                    elif input_arg.type_attr in attrs:
                        # <number-attr> * <type-attr> case, where <type-attr> already
                        # has an inferred value.
                        if base_types and base_types[0] != attrs[
                                input_arg.type_attr]:
                            assert False, "Unreachable"
                    else:
                        # <number-attr> * <type-attr> case, where we are now setting
                        # the <type-attr> based on this input
                        if not base_types:
                            raise TypeError(
                                "Don't know how to infer type variable from empty input "
                                "list passed to input '%s' of '%s' Op." %
                                (input_name, op_type_name))
                        attrs[input_arg.type_attr] = base_types[0]
                        inferred_from[input_arg.type_attr] = input_name
                        type_attr = _Attr(op_def, input_arg.type_attr)
                        _SatisfiesTypeConstraint(base_types[0], type_attr)
                elif input_arg.type_attr:
                    # <type-attr>
                    attr_value = base_types[0]
                    if input_arg.type_attr in attrs:
                        if attrs[input_arg.type_attr] != attr_value:
                            assert False, "Unreachable"
                    else:
                        for base_type in base_types:
                            _SatisfiesTypeConstraint(
                                base_type, _Attr(op_def, input_arg.type_attr))
                        attrs[input_arg.type_attr] = attr_value
                        inferred_from[input_arg.type_attr] = input_name
                elif input_arg.type_list_attr:
                    # <type-list-attr>
                    attr_value = base_types
                    if input_arg.type_list_attr in attrs:
                        if attrs[input_arg.type_list_attr] != attr_value:
                            raise TypeError(
                                "Input '%s' of '%s' Op has type list of %s that does not "
                                "match type list %s of argument '%s'." %
                                (input_name, op_type_name, ", ".join(
                                    types_lib.as_dtype(x).name
                                    for x in attr_value),
                                 ", ".join(
                                     types_lib.as_dtype(x).name
                                     for x in attrs[input_arg.type_list_attr]),
                                 inferred_from[input_arg.type_list_attr]))
                    else:
                        for base_type in base_types:
                            _SatisfiesTypeConstraint(
                                base_type,
                                _Attr(op_def, input_arg.type_list_attr))
                        attrs[input_arg.type_list_attr] = attr_value
                        inferred_from[input_arg.type_list_attr] = input_name
                else:
                    # single Tensor with specified type
                    if base_types[0] != input_arg.type:
                        assert False, "Unreachable"

                if input_arg.is_ref:
                    if not all(x.is_ref_dtype for x in types):
                        raise TypeError(
                            "Input '%s' of '%s' Op requires l-value input" %
                            (input_name, op_type_name))
                    input_types.extend(types)
                else:
                    input_types.extend(base_types)

            # Process remaining attrs
            for attr in op_def.attr:
                # Skip attrs that have already had their values inferred
                if attr.name in attrs:
                    if attr.name in keywords:
                        raise TypeError(
                            "Should not specify value for inferred attr '%s'."
                            % attr.name)
                    continue
                if attr.name in keywords:
                    attrs[attr.name] = keywords.pop(attr.name)
                elif attr.name + "_" in keywords:
                    # Attrs whose names match Python keywords have an extra '_'
                    # appended, so we must check for that as well.
                    attrs[attr.name] = keywords.pop(attr.name + "_")
                else:
                    raise TypeError("No argument for attr " + attr.name)

            # Convert attr values to AttrValue protos.
            attr_protos = {}
            for attr_def in op_def.attr:
                key = attr_def.name
                value = attrs[key]
                attr_value = attr_value_pb2.AttrValue()
                if attr_def.HasField("default_value") and value is None:
                    attr_value.CopyFrom(attr_def.default_value)
                    attr_protos[key] = attr_value
                    continue
                if attr_def.type.startswith("list("):
                    if not _IsListValue(value):
                        raise TypeError("Expected list for attr " + key)
                    if attr_def.has_minimum:
                        if len(value) < attr_def.minimum:
                            raise ValueError(
                                "Attr '%s' of '%s' Op passed list of length %d "
                                "less than minimum %d." %
                                (key, op_type_name, len(value),
                                 attr_def.minimum))
                if attr_def.type == "string":
                    attr_value.s = _MakeStr(value, key)
                    if attr_def.HasField("allowed_values"):
                        if attr_value.s not in attr_def.allowed_values.list.s:
                            raise ValueError(
                                "Attr '%s' of '%s' Op passed string '%s' not in: \"%s\"."
                                %
                                (key, op_type_name, attr_value.s, '", "'.join(
                                    attr_def.allowed_values.list.s)))
                elif attr_def.type == "list(string)":
                    attr_value.list.s.extend([_MakeStr(x, key) for x in value])
                    if attr_def.HasField("allowed_values"):
                        for x in attr_value.list.s:
                            if x not in attr_def.allowed_values.list.s:
                                raise ValueError(
                                    "Attr '%s' of '%s' Op passed string '%s' not in: \"%s\"."
                                    % (key, op_type_name, x, '", "'.join(
                                        attr_def.allowed_values.list.s)))
                elif attr_def.type == "int":
                    attr_value.i = _MakeInt(value, key)
                    if attr_def.has_minimum:
                        if attr_value.i < attr_def.minimum:
                            raise ValueError(
                                "Attr '%s' of '%s' Op passed %d less than minimum %d."
                                % (key, op_type_name, attr_value.i,
                                   attr_def.minimum))
                elif attr_def.type == "list(int)":
                    attr_value.list.i.extend([_MakeInt(x, key) for x in value])
                elif attr_def.type == "float":
                    attr_value.f = _MakeFloat(value, key)
                elif attr_def.type == "list(float)":
                    attr_value.list.f.extend(
                        [_MakeFloat(x, key) for x in value])
                elif attr_def.type == "bool":
                    attr_value.b = _MakeBool(value, key)
                elif attr_def.type == "list(bool)":
                    attr_value.list.b.extend(
                        [_MakeBool(x, key) for x in value])
                elif attr_def.type == "type":
                    attr_value.type = _MakeType(value, attr_def)
                elif attr_def.type == "list(type)":
                    attr_value.list.type.extend(
                        [_MakeType(x, attr_def) for x in value])
                elif attr_def.type == "shape":
                    attr_value.shape.CopyFrom(_MakeShape(value, key))
                elif attr_def.type == "list(shape)":
                    attr_value.list.shape.extend(
                        [_MakeShape(x, key) for x in value])
                elif attr_def.type == "tensor":
                    attr_value.tensor.CopyFrom(_MakeTensor(value, key))
                elif attr_def.type == "list(tensor)":
                    attr_value.list.tensor.extend(
                        [_MakeTensor(x, key) for x in value])
                else:
                    raise TypeError("Unrecognized Attr type " + attr_def.type)

                attr_protos[key] = attr_value
            del attrs  # attrs is no longer authoritative, use attr_protos instead

            # Determine output types (possibly using attrs)
            output_types = []
            output_structure = []
            for arg in op_def.output_arg:
                types = []
                if arg.number_attr:
                    n = _AttrValue(attr_protos, arg.number_attr).i
                    if arg.type_attr:
                        types = [_AttrValue(attr_protos, arg.type_attr).type
                                 ] * n
                    else:
                        types = [arg.type] * n
                    output_structure.append(n)
                elif arg.type_attr:
                    t = _AttrValue(attr_protos, arg.type_attr)
                    types = [t.type]
                    output_structure.append(None)
                elif arg.type_list_attr:
                    t = _AttrValue(attr_protos, arg.type_list_attr)
                    types = t.list.type
                    output_structure.append(len(t.list.type))
                else:
                    types = [arg.type]
                    output_structure.append(None)
                if arg.is_ref:
                    types = [types_lib.as_dtype(x).as_ref for x in types]
                output_types.extend(types)

            if keywords:
                raise TypeError(
                    "apply_op() got unexpected keyword arguments: " +
                    ", ".join(sorted(keywords.keys())))

            # Add Op to graph
            if output_structure:
                op = g.create_op(op_type_name,
                                 inputs,
                                 output_types,
                                 name=scope,
                                 input_types=input_types,
                                 attrs=attr_protos,
                                 op_def=op_def)
                outputs = op.outputs
                return _Restructure(
                    ops.convert_n_to_tensor_or_indexed_slices(outputs),
                    output_structure)
            else:
                return g.create_op(op_type_name,
                                   inputs,
                                   output_types,
                                   name=scope,
                                   input_types=input_types,
                                   attrs=attr_protos,
                                   op_def=op_def)
Example #19
0
def _embedding_lookup_and_transform(params,
                                    ids,
                                    partition_strategy="mod",
                                    name=None,
                                    max_norm=None,
                                    transform_fn=None):
    """Helper function for embedding_lookup and _compute_sampled_logits.

  This function is a generalization of embedding_lookup that optionally
  applies a caller-specified transformation to each embedding. This is
  done through the `transform_fn` argument. If provided, the function is
  applied to each partitioned tensor of retrieved embeddings, colocated
  with the embeddings. This function will be called with a single `Tensor`
  argument of the same type as the `params` tensor and should return a
  `Tensor`. The shape of the argument will be the same as `params` except
  for the size of the first dimension. The first dimension of the result's
  shape must be the same size as the argument's.

  Args:
    params: See embedding_lookup.
    ids: See embedding_lookup.
    partition_strategy: See embedding_lookup.
    name: See embedding_lookup.
    max_norm: See embedding_lookup.
    transform_fn: An optional function to apply to each retrieved embedding. If
      max_norm is provided, transform_fn is applied to the norm-limited
      embeddings.

  Returns:
    See embedding_lookup for details.
  Raises:
    ValueError: If `params` is empty.
  """
    if params is None:
        raise ValueError("params must be specified")
    if isinstance(params, (list, tuple)) and not params:
        raise ValueError("Need at least one param")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]

    with ops.name_scope(name, "embedding_lookup", params + [ids]) as name:
        np = len(params)  # Number of partitions
        # Preserve the resource variable status to avoid accidental dense reads.
        if not any(
                isinstance(p, resource_variable_ops.ResourceVariable)
                for p in params):
            params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                               name="params")
        ids = ops.convert_to_tensor(ids, name="ids")
        if np == 1 and (not transform_fn or ids.get_shape().ndims == 1):
            with ops.colocate_with(params[0]):
                result = _clip(array_ops.gather(params[0], ids, name=name),
                               ids, max_norm)
                if transform_fn:
                    result = transform_fn(result)
            # Make sure the final result does not have colocation contraints on the
            # params. Similar to the case np > 1 where parallel_dynamic_stitch is
            # outside the scioe of all with ops.colocate_with(params[p]).
            return array_ops.identity(result)
        else:
            # Flatten the ids. There are two cases where we need to do this.
            # - There is more than one params tensor.
            # - There is a transform_fn and ids is not statically known to be 1-D.
            #   We must flatten in this case because transform_fn expects a flat
            #   tensor of embeddings.
            flat_ids = array_ops.reshape(ids, [-1])
            original_indices = math_ops.range(array_ops.size(flat_ids))

            # Create p_assignments and set new_ids depending on the strategy.
            if partition_strategy == "mod":
                p_assignments = flat_ids % np
                new_ids = flat_ids // np
            elif partition_strategy == "div":
                # Compute num_total_ids as the sum of dim-0 of params, then assign to
                # partitions based on a constant number of ids per partition. Optimize
                # if we already know the full shape statically.
                dim_0_size = tensor_shape.Dimension(
                    tensor_shape.dimension_value(params[0].get_shape()[0]))
                for p in xrange(1, np):
                    dim_0_size += tensor_shape.Dimension(
                        tensor_shape.dimension_value(params[p].get_shape()[0]))
                if dim_0_size.value:
                    num_total_ids = constant_op.constant(
                        dim_0_size.value, flat_ids.dtype)
                else:
                    dim_0_sizes = []
                    for p in xrange(np):
                        param_p_dim = tensor_shape.dimension_value(
                            params[p].get_shape()[0])
                        if param_p_dim is not None:
                            dim_0_sizes.append(param_p_dim)
                        else:
                            with ops.colocate_with(params[p]):
                                dim_0_sizes.append(
                                    array_ops.shape(params[p])[0])
                    num_total_ids = math_ops.reduce_sum(
                        math_ops.cast(array_ops.stack(dim_0_sizes),
                                      flat_ids.dtype))
                ids_per_partition = num_total_ids // np
                extras = num_total_ids % np

                p_assignments = math_ops.maximum(
                    flat_ids // (ids_per_partition + 1),
                    (flat_ids - extras) // ids_per_partition)

                # Emulate a conditional using a boolean indicator tensor
                new_ids = array_ops.where(
                    p_assignments < extras, flat_ids % (ids_per_partition + 1),
                    (flat_ids - extras) % ids_per_partition)
            else:
                raise ValueError("Unrecognized partition strategy: " +
                                 partition_strategy)

            # Cast partition assignments to int32 for use in dynamic_partition.
            # There really should not be more than 2^32 partitions.
            p_assignments = math_ops.cast(p_assignments, dtypes.int32)
            # Partition list of ids based on assignments into np separate lists
            gather_ids = data_flow_ops.dynamic_partition(
                new_ids, p_assignments, np)
            # Similarly, partition the original indices.
            pindices = data_flow_ops.dynamic_partition(original_indices,
                                                       p_assignments, np)
            # Do np separate lookups, finding embeddings for plist[p] in params[p]
            partitioned_result = []
            for p in xrange(np):
                pids = gather_ids[p]
                with ops.colocate_with(params[p]):
                    result = array_ops.gather(params[p], pids)
                    if transform_fn:
                        # If transform_fn is provided, the clip_by_norm precedes
                        # the transform and hence must be co-located. See below
                        # for the counterpart if transform_fn is not proveded.
                        result = transform_fn(_clip(result, pids, max_norm))
                partitioned_result.append(result)
            # Stitch these back together
            ret = data_flow_ops.parallel_dynamic_stitch(pindices,
                                                        partitioned_result,
                                                        name=name)

            # Determine the static element shape.
            if transform_fn is None:
                element_shape_s = params[0].get_shape()[1:]
                for p in params[1:]:
                    element_shape_s = element_shape_s.merge_with(
                        p.get_shape()[1:])
            else:
                element_shape_s = ret.get_shape()[1:]

            # Compute the dynamic element shape.
            if element_shape_s.is_fully_defined():
                element_shape_d = element_shape_s
            elif transform_fn is None:
                # It's important that we compute params[0].shape on the right device
                # to avoid data motion.
                with ops.colocate_with(params[0]):
                    params_shape = array_ops.shape(params[0])
                element_shape_d = params_shape[1:]
            else:
                element_shape_d = array_ops.shape(ret)[1:]

            # Reshape to reverse the flattening of ids.
            ret = array_ops.reshape(
                ret,
                array_ops.concat([array_ops.shape(ids), element_shape_d], 0))

            # Normally the reshape is sufficient, but setting shape explicitly
            # teaches shape inference that params[1:].get_shape() matters
            # (in the case that transform_fn is None).
            ret.set_shape(ids.get_shape().concatenate(element_shape_s))
            if not transform_fn:
                # If transform_fn was provided, the clip_by_norm was done above.
                ret = _clip(ret, ids, max_norm)
            return ret
Example #20
0
def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops,
                     gate_gradients, aggregation_method, stop_gradients):
    """Implementation of gradients()."""
    if context.executing_eagerly():
        raise RuntimeError("tf.gradients not supported when eager execution "
                           "is enabled. Use tf.contrib.eager.GradientTape "
                           "instead.")
    ys = _AsList(ys)
    xs = _AsList(xs)
    stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients)
    if grad_ys is None:
        grad_ys = [None] * len(ys)
    else:
        grad_ys = _AsList(grad_ys)

    with ops.name_scope(
            name, "gradients",
            list(ys) + list(xs) + list(stop_gradients) +
            list(grad_ys)) as grad_scope:
        # Get a uid for this call to gradients that can be used to help
        # cluster ops for compilation.
        gradient_uid = ops.get_default_graph().unique_name("uid")
        ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
        xs = [
            x.handle if resource_variable_ops.is_resource_variable(x) else x
            for x in xs
        ]
        xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs,
                                                                name="x",
                                                                as_ref=True)
        grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops,
                                 gradient_uid)

        # The approach we take here is as follows: Create a list of all ops in the
        # subgraph between the ys and xs.  Visit these ops in reverse order of ids
        # to ensure that when we visit an op the gradients w.r.t its outputs have
        # been collected.  Then aggregate these gradients if needed, call the op's
        # gradient function, and add the generated gradients to the gradients for
        # its input.

        # Initialize the pending count for ops in the connected subgraph from ys
        # to the xs.
        if len(ys) > 1:
            ys = [array_ops.identity(y) if y.consumers() else y for y in ys]
        to_ops = [t.op for t in ys]
        from_ops = [t.op for t in xs]
        stop_gradient_ops = [t.op for t in stop_gradients]
        reachable_to_ops, pending_count, loop_state = _PendingCount(
            ops.get_default_graph(), to_ops, from_ops,
            colocate_gradients_with_ops)

        # Iterate over the collected ops.
        #
        # grads: op => list of gradients received on each output endpoint of the
        # op.  The gradients for each endpoint are initially collected as a list.
        # When it is time to call the op's gradient function, for each endpoint we
        # aggregate the list of received gradients into a Add() Operation if there
        # is more than one.
        grads = {}

        # Add the initial gradients for the ys.
        for y, grad_y in zip(ys, grad_ys):
            _SetGrad(grads, y, grad_y)

        # Initialize queue with to_ops.
        queue = collections.deque()
        # Add the ops in 'to_ops' into the queue.
        to_ops_set = set()
        for op in to_ops:
            # 'ready' handles the case where one output gradient relies on
            # another output's gradient.
            # pylint: disable=protected-access
            ready = (pending_count[op._id] == 0)
            if ready and op._id not in to_ops_set and op._id in reachable_to_ops:
                to_ops_set.add(op._id)
                queue.append(op)
            # pylint: enable=protected-access

        if loop_state:
            loop_exits = loop_state.ProcessUnusedLoopExits(
                pending_count, to_ops_set)
            for y in loop_exits:
                if _IsTrainable(y):
                    _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
                    queue.append(y.op)

        stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count)
        while queue:
            # generate gradient subgraph for op.
            op = queue.popleft()
            with _maybe_colocate_with(op, gradient_uid,
                                      colocate_gradients_with_ops):
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=True)
                out_grads = _AggregatedGrads(grads, op, gradient_uid,
                                             loop_state, aggregation_method)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=True)

                grad_fn = None
                func_call = None
                # pylint: disable=protected-access
                is_func_call = ops.get_default_graph()._is_function(op.type)
                # pylint: enable=protected-access
                has_out_grads = any(
                    isinstance(g, ops.Tensor) or g for g in out_grads)
                if has_out_grads and (op._id not in stop_ops):
                    if is_func_call:
                        func_call = ops.get_default_graph()._get_function(
                            op.type)
                        # Note that __defun is not set if the graph is
                        # imported. If it's set, we prefer to access the original
                        # defun.
                        func_call = getattr(op, "__defun", func_call)
                        grad_fn = func_call.python_grad_func
                    else:
                        # A grad_fn must be defined, either as a function or as None
                        # for ops that do not have gradients.
                        try:
                            grad_fn = ops.get_gradient_function(op)
                        except LookupError:
                            raise LookupError(
                                "No gradient defined for operation '%s' (op type: %s)"
                                % (op.name, op.type))
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=False)
                if (grad_fn or is_func_call) and has_out_grads:
                    # NOTE: If _AggregatedGrads didn't compute a value for the i'th
                    # output, it means that the cost does not depend on output[i],
                    # therefore dC/doutput[i] is 0.
                    for i, out_grad in enumerate(out_grads):
                        if (not isinstance(out_grad, ops.Tensor)
                                and not out_grad) and (
                                    (not grad_fn and is_func_call)
                                    or _IsTrainable(op.outputs[i])):
                            # Only trainable outputs or outputs for a function call that
                            # will use SymbolicGradient get a zero gradient. Gradient
                            # functions should ignore the gradient for other outputs.
                            # TODO(apassos) gradients of resource handles might be an
                            # issue here because of zeros.
                            if loop_state:
                                out_grads[i] = loop_state.ZerosLike(op, i)
                            else:
                                out_grads[
                                    i] = control_flow_ops.ZerosLikeOutsideLoop(
                                        op, i)
                    with ops.name_scope(op.name + "_grad"):
                        # pylint: disable=protected-access
                        with ops.get_default_graph()._original_op(op):
                            # pylint: enable=protected-access
                            if grad_fn:
                                # If grad_fn was found, do not use SymbolicGradient even for
                                # functions.
                                in_grads = _MaybeCompile(
                                    grad_scope, op, func_call,
                                    lambda: grad_fn(op, *out_grads))
                            else:
                                # For function call ops, we add a 'SymbolicGradient'
                                # node to the graph to compute gradients.
                                in_grads = _MaybeCompile(
                                    grad_scope, op, func_call,
                                    lambda: _SymGrad(op, out_grads))
                            in_grads = _AsList(in_grads)
                            _VerifyGeneratedGradients(in_grads, op)
                            if gate_gradients and len(
                                [x for x in in_grads if x is not None]) > 1:
                                with ops.device(None):
                                    with ops._colocate_with_for_gradient(  # pylint: disable=protected-access
                                            None,
                                            gradient_uid,
                                            ignore_existing=True):
                                        in_grads = control_flow_ops.tuple(
                                            in_grads)
                    _LogOpGradients(op, out_grads, in_grads)
                else:
                    # If no grad_fn is defined or none of out_grads is available,
                    # just propagate a list of None backwards.
                    in_grads = [None] * len(op.inputs)
                for i, (t_in, in_grad) in enumerate(zip(op.inputs, in_grads)):
                    if in_grad is not None:
                        if (isinstance(in_grad, ops.Tensor)
                                and t_in.dtype != dtypes.resource):
                            try:
                                in_grad.set_shape(t_in.get_shape())
                            except ValueError:
                                raise ValueError(
                                    "Incompatible shapes between op input and calculated "
                                    "input gradient.  Forward operation: %s.  Input index: %d. "
                                    "Original input shape: %s.  "
                                    "Calculated input gradient shape: %s" %
                                    (op.name, i, t_in.shape, in_grad.shape))
                        _SetGrad(grads, t_in, in_grad)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=False)

            # Update pending count for the inputs of op and enqueue ready ops.
            _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count,
                                          loop_state)

    if loop_state:
        loop_state.PostProcessing()
    return [_GetGrad(grads, x) for x in xs]
Example #21
0
    def apply_op(self, op_type_name, g=None, name=None, **keywords):
        # pylint: disable=g-doc-args
        """Add a node invoking a registered Op to a graph.

    Config proto extensions must be provided via the 'ext' keyword argument.
    Example usage:
       # input1 and input2 can be Tensors or anything ops.convert_to_tensor()
       # will convert to a Tensor.
       op_def_library.apply_op("op", input1=input1, input2=input2)
       # If none of the inputs are Tensors and your session doesn't have a
       # default graph, you will have to specify the graph.
       op_def_library.apply_op("op", input1=input1, g=g)
       # Can specify a node name.
       op_def_library.apply_op("op", input1=input1, name="node_name")
       # Must use keyword arguments, with the names specified in the OpDef.
       op_def_library.apply_op("op", input_name=input, attr_name=attr)

    All attrs must either be inferred from an input or specified.
    (If inferred, the attr must not be specified.)  If an attr has a default
    value specified in the Op's OpDef, then you may pass None as the value
    of that attr to get the default.

    Args:
      op_type_name: string. Must match the name field of a registered Op.
      g: The graph context (optional)
      name: string. Optional name of the created op.
      **keywords: input Tensor and attr arguments specified by name,
        and optional parameters to pass when constructing the Operation.

    Returns:
      The Tensor(s) representing the output of the operation, or the Operation
      itself if there are no outputs.

    Raises:
      RuntimeError: On some errors.
      TypeError: On some errors.
      ValueError: On some errors.
    """
        op_info = self._ops.get(op_type_name, None)
        if op_info is None:
            raise RuntimeError("Unrecognized Op name " + op_type_name)
        op_def = op_info.op_def

        # Determine the graph context.
        try:
            # Need to flatten all the arguments into a list.
            # pylint: disable=protected-access
            g = ops._get_graph_from_inputs(_Flatten(keywords.values()), graph=g)
            # pyline: enable=protected-access
        except AssertionError as e:
            raise RuntimeError(
                "Need to specify g=graph to Op '%s' (could not determine graph due "
                "to: %s)" % (op_type_name, e.message)
            )

        # Default name if not specified.
        if name is None:
            name = op_type_name

        # Requires that op_def has passed validation (using the C++
        # ValidateOpDef() from ../framework/op_def_util.h).
        attrs = {}
        inputs = []
        input_types = []
        with g.as_default(), ops.name_scope(name) as scope:

            # Perform input type inference
            inferred_from = {}
            for input_arg in op_def.input_arg:
                input_name = input_arg.name
                if input_name in keywords:
                    values = keywords.pop(input_name)
                elif input_name + "_" in keywords:
                    # Handle the case where the name is a keyword or built-in
                    # for Python so we use the name + _ instead.
                    input_name += "_"
                    values = keywords.pop(input_name)
                else:
                    raise TypeError("No argument for input " + input_name)

                # Goals:
                # * Convert values to Tensors if it contains constants.
                # * Verify that values is a list if that matches the input_arg's
                #   type.
                # * If the input_arg's type is determined by attrs, either set
                #   those attrs and validate those attr values are legal (if
                #   they have not yet been set) or validate the input matches
                #   the type indicated by the attrs (if they have already been
                #   inferred via an earlier input).
                # * If the input_arg has an explicit type, make sure the input
                #   conforms.

                if _IsListParameter(input_arg):
                    if not _IsListValue(values):
                        raise TypeError(
                            "Expected list for '%s' argument to '%s' Op, not %s." % (input_name, op_type_name, values)
                        )
                    # In cases where we expect all elements of the list to have the
                    # same dtype, try to cast non-Tensor elements to that type.
                    dtype = None
                    if input_arg.type != types_pb2.DT_INVALID:
                        dtype = input_arg.type
                    elif input_arg.number_attr:
                        if input_arg.type_attr in attrs:
                            dtype = attrs[input_arg.type_attr]
                        else:
                            for t in values:
                                if isinstance(t, ops.Tensor):
                                    dtype = t.dtype
                                    break

                    try:
                        if not input_arg.is_ref and dtype:
                            dtype = dtypes.as_dtype(dtype).base_dtype
                        values = ops.convert_n_to_tensor_or_indexed_slices(
                            values, name=input_arg.name, dtype=dtype if dtype else None, as_ref=input_arg.is_ref
                        )
                    except (TypeError, ValueError):
                        assert dtype is not None, "Should not fail if dtype is None"
                        assert input_arg.number_attr, "Should be number_attr case"
                        # What types does the conversion function think values have?
                        values = ops.convert_n_to_tensor_or_indexed_slices(values, as_ref=input_arg.is_ref)
                        observed = ", ".join(v.dtype.base_dtype.name for v in values)

                        prefix = "Tensors in list passed to '%s' of '%s' Op have types [%s]" % (
                            input_name,
                            op_type_name,
                            observed,
                        )
                        if input_arg.type != types_pb2.DT_INVALID:
                            raise TypeError("%s that do not match expected type %s." % (prefix, dtype.name))
                        elif input_arg.type_attr in attrs:
                            raise TypeError(
                                "%s that do not match type %s inferred from "
                                "earlier arguments." % (prefix, dtype.name)
                            )
                        else:
                            raise TypeError("%s that don't all match." % prefix)

                    types = [x.dtype for x in values]
                    inputs.extend(values)
                else:
                    # In cases where we have an expected type, try to convert non-Tensor
                    # arguments to that type.
                    dtype = None
                    if input_arg.type != types_pb2.DT_INVALID:
                        dtype = input_arg.type
                    elif input_arg.type_attr in attrs:
                        dtype = attrs[input_arg.type_attr]
                    try:
                        values = ops.convert_to_tensor(
                            values, name=input_arg.name, dtype=dtype, as_ref=input_arg.is_ref
                        )
                    except ValueError:
                        # What type does convert_to_tensor think it has?
                        observed = ops.convert_to_tensor(values, as_ref=input_arg.is_ref).dtype.name
                        prefix = "Input '%s' of '%s' Op has type %s that does not match" % (
                            input_name,
                            op_type_name,
                            observed,
                        )
                        if input_arg.type != types_pb2.DT_INVALID:
                            raise TypeError("%s expected type of %s." % (prefix, dtypes.as_dtype(input_arg.type).name))
                        else:
                            raise TypeError(
                                "%s type %s of argument '%s'."
                                % (
                                    prefix,
                                    dtypes.as_dtype(attrs[input_arg.type_attr]).name,
                                    inferred_from[input_arg.type_attr],
                                )
                            )

                    types = [values.dtype]
                    inputs.append(values)
                base_types = [x.base_dtype for x in types]

                if input_arg.number_attr:
                    # <number-attr> * <type> or <number-attr> * <type-attr>
                    if input_arg.number_attr in attrs:
                        if len(values) != attrs[input_arg.number_attr]:
                            raise ValueError(
                                "List argument '%s' to '%s' Op with length %d must match "
                                "length %d of argument '%s'."
                                % (
                                    input_name,
                                    op_type_name,
                                    len(values),
                                    attrs[input_arg.number_attr],
                                    inferred_from[input_arg.number_attr],
                                )
                            )
                    else:
                        attrs[input_arg.number_attr] = len(values)
                        inferred_from[input_arg.number_attr] = input_name
                        num_attr = _Attr(op_def, input_arg.number_attr)
                        if num_attr.has_minimum and len(values) < num_attr.minimum:
                            raise ValueError(
                                "List argument '%s' to '%s' Op with length %d shorter "
                                "than minimum length %d." % (input_name, op_type_name, len(values), num_attr.minimum)
                            )
                    # All tensors must have the same base type.
                    if any([bt != base_types[0] for bt in base_types]):
                        raise TypeError(
                            "All tensors passed to '%s' of '%s' Op "
                            "must have the same type." % (input_name, op_type_name)
                        )
                    if input_arg.type != types_pb2.DT_INVALID:
                        # <number-attr> * <type> case
                        if base_types and base_types[0] != input_arg.type:
                            assert False, "Unreachable"
                    elif input_arg.type_attr in attrs:
                        # <number-attr> * <type-attr> case, where <type-attr> already
                        # has an inferred value.
                        if base_types and base_types[0] != attrs[input_arg.type_attr]:
                            assert False, "Unreachable"
                    else:
                        # <number-attr> * <type-attr> case, where we are now setting
                        # the <type-attr> based on this input
                        if not base_types:
                            raise TypeError(
                                "Don't know how to infer type variable from empty input "
                                "list passed to input '%s' of '%s' Op." % (input_name, op_type_name)
                            )
                        attrs[input_arg.type_attr] = base_types[0]
                        inferred_from[input_arg.type_attr] = input_name
                        type_attr = _Attr(op_def, input_arg.type_attr)
                        _SatisfiesTypeConstraint(base_types[0], type_attr)
                elif input_arg.type_attr:
                    # <type-attr>
                    attr_value = base_types[0]
                    if input_arg.type_attr in attrs:
                        if attrs[input_arg.type_attr] != attr_value:
                            assert False, "Unreachable"
                    else:
                        for base_type in base_types:
                            _SatisfiesTypeConstraint(base_type, _Attr(op_def, input_arg.type_attr))
                        attrs[input_arg.type_attr] = attr_value
                        inferred_from[input_arg.type_attr] = input_name
                elif input_arg.type_list_attr:
                    # <type-list-attr>
                    attr_value = base_types
                    if input_arg.type_list_attr in attrs:
                        if attrs[input_arg.type_list_attr] != attr_value:
                            raise TypeError(
                                "Input '%s' of '%s' Op has type list of %s that does not "
                                "match type list %s of argument '%s'."
                                % (
                                    input_name,
                                    op_type_name,
                                    ", ".join(dtypes.as_dtype(x).name for x in attr_value),
                                    ", ".join(dtypes.as_dtype(x).name for x in attrs[input_arg.type_list_attr]),
                                    inferred_from[input_arg.type_list_attr],
                                )
                            )
                    else:
                        for base_type in base_types:
                            _SatisfiesTypeConstraint(base_type, _Attr(op_def, input_arg.type_list_attr))
                        attrs[input_arg.type_list_attr] = attr_value
                        inferred_from[input_arg.type_list_attr] = input_name
                else:
                    # single Tensor with specified type
                    if base_types[0] != input_arg.type:
                        assert False, "Unreachable"

                if input_arg.is_ref:
                    if not all(x.is_ref_dtype for x in types):
                        raise TypeError("Input '%s' of '%s' Op requires l-value input" % (input_name, op_type_name))
                    input_types.extend(types)
                else:
                    input_types.extend(base_types)

            # Process remaining attrs
            for attr in op_def.attr:
                # Skip attrs that have already had their values inferred
                if attr.name in attrs:
                    if attr.name in keywords:
                        raise TypeError("Should not specify value for inferred attr '%s'." % attr.name)
                    continue
                if attr.name in keywords:
                    attrs[attr.name] = keywords.pop(attr.name)
                elif attr.name + "_" in keywords:
                    # Attrs whose names match Python keywords have an extra '_'
                    # appended, so we must check for that as well.
                    attrs[attr.name] = keywords.pop(attr.name + "_")
                else:
                    raise TypeError("No argument for attr " + attr.name)

            # Convert attr values to AttrValue protos.
            attr_protos = {}
            for attr_def in op_def.attr:
                key = attr_def.name
                value = attrs[key]
                attr_value = attr_value_pb2.AttrValue()
                if attr_def.HasField("default_value") and value is None:
                    attr_value.CopyFrom(attr_def.default_value)
                    attr_protos[key] = attr_value
                    continue
                if attr_def.type.startswith("list("):
                    if not _IsListValue(value):
                        raise TypeError("Expected list for attr " + key)
                    if attr_def.has_minimum:
                        if len(value) < attr_def.minimum:
                            raise ValueError(
                                "Attr '%s' of '%s' Op passed list of length %d "
                                "less than minimum %d." % (key, op_type_name, len(value), attr_def.minimum)
                            )
                if attr_def.type == "string":
                    attr_value.s = _MakeStr(value, key)
                    if attr_def.HasField("allowed_values"):
                        if attr_value.s not in attr_def.allowed_values.list.s:
                            raise ValueError(
                                "Attr '%s' of '%s' Op passed string '%s' not in: \"%s\"."
                                % (
                                    key,
                                    op_type_name,
                                    compat.as_text(attr_value.s),
                                    '", "'.join(map(compat.as_text, attr_def.allowed_values.list.s)),
                                )
                            )
                elif attr_def.type == "list(string)":
                    attr_value.list.s.extend([_MakeStr(x, key) for x in value])
                    if attr_def.HasField("allowed_values"):
                        for x in attr_value.list.s:
                            if x not in attr_def.allowed_values.list.s:
                                raise ValueError(
                                    "Attr '%s' of '%s' Op passed string '%s' not in: \"%s\"."
                                    % (
                                        key,
                                        op_type_name,
                                        compat.as_text(x),
                                        '", "'.join(map(compat.as_text, attr_def.allowed_values.list.s)),
                                    )
                                )
                elif attr_def.type == "int":
                    attr_value.i = _MakeInt(value, key)
                    if attr_def.has_minimum:
                        if attr_value.i < attr_def.minimum:
                            raise ValueError(
                                "Attr '%s' of '%s' Op passed %d less than minimum %d."
                                % (key, op_type_name, attr_value.i, attr_def.minimum)
                            )
                elif attr_def.type == "list(int)":
                    attr_value.list.i.extend([_MakeInt(x, key) for x in value])
                elif attr_def.type == "float":
                    attr_value.f = _MakeFloat(value, key)
                elif attr_def.type == "list(float)":
                    attr_value.list.f.extend([_MakeFloat(x, key) for x in value])
                elif attr_def.type == "bool":
                    attr_value.b = _MakeBool(value, key)
                elif attr_def.type == "list(bool)":
                    attr_value.list.b.extend([_MakeBool(x, key) for x in value])
                elif attr_def.type == "type":
                    attr_value.type = _MakeType(value, attr_def)
                elif attr_def.type == "list(type)":
                    attr_value.list.type.extend([_MakeType(x, attr_def) for x in value])
                elif attr_def.type == "shape":
                    attr_value.shape.CopyFrom(_MakeShape(value, key))
                elif attr_def.type == "list(shape)":
                    attr_value.list.shape.extend([_MakeShape(x, key) for x in value])
                elif attr_def.type == "tensor":
                    attr_value.tensor.CopyFrom(_MakeTensor(value, key))
                elif attr_def.type == "list(tensor)":
                    attr_value.list.tensor.extend([_MakeTensor(x, key) for x in value])
                elif attr_def.type == "func":
                    if not isinstance(value, compat.bytes_or_text_types):
                        raise TypeError("Expects a string for the func name")
                    attr_value.func.name = value
                else:
                    raise TypeError("Unrecognized Attr type " + attr_def.type)

                attr_protos[key] = attr_value
            del attrs  # attrs is no longer authoritative, use attr_protos instead

            # Determine output types (possibly using attrs)
            output_types = []
            output_structure = []
            for arg in op_def.output_arg:
                types = []
                if arg.number_attr:
                    n = _AttrValue(attr_protos, arg.number_attr).i
                    if arg.type_attr:
                        types = [_AttrValue(attr_protos, arg.type_attr).type] * n
                    else:
                        types = [arg.type] * n
                    output_structure.append(n)
                elif arg.type_attr:
                    t = _AttrValue(attr_protos, arg.type_attr)
                    types = [t.type]
                    output_structure.append(None)
                elif arg.type_list_attr:
                    t = _AttrValue(attr_protos, arg.type_list_attr)
                    types = t.list.type
                    output_structure.append(len(t.list.type))
                else:
                    types = [arg.type]
                    output_structure.append(None)
                if arg.is_ref:
                    types = [dtypes.as_dtype(x).as_ref for x in types]
                output_types.extend(types)

            if keywords:
                raise TypeError("apply_op() got unexpected keyword arguments: " + ", ".join(sorted(keywords.keys())))

            # Add Op to graph
            if output_structure:
                op = g.create_op(
                    op_type_name,
                    inputs,
                    output_types,
                    name=scope,
                    input_types=input_types,
                    attrs=attr_protos,
                    op_def=op_def,
                )
                outputs = op.outputs
                return _Restructure(ops.convert_n_to_tensor_or_indexed_slices(outputs), output_structure)
            else:
                return g.create_op(
                    op_type_name,
                    inputs,
                    output_types,
                    name=scope,
                    input_types=input_types,
                    attrs=attr_protos,
                    op_def=op_def,
                )
Example #22
0
def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
  """Fill in default values for grad_ys.

  Args:
    grad_ys: List of gradients, can contain None.
    ys: List of tensors.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.

  Returns:
    A list of gradients to use, without None.

  Raises:
    ValueError: If sizes of gradients and inputs don't match
    TypeError: If type of any gradient is not valid for its input.
  """
  if len(grad_ys) != len(ys):
    raise ValueError("Passed %d grad_ys for %d ys" % (len(grad_ys), len(ys)))
  grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
  new_grad_ys = []
  for i in xrange(len(grad_ys)):
    grad_y = grad_ys[i]
    y = ys[i]
    if grad_y is None:
      if y.dtype.is_complex:
        raise TypeError(
            "Gradients of complex tensors must set grad_ys (y.dtype = %r)" %
            y.dtype)
      with _maybe_colocate_with(y.op, colocate_gradients_with_ops):
        new_grad_ys.append(array_ops.fill(
            array_ops.shape(y), constant_op.constant(
                1, dtype=y.dtype, name="grad_ys_%d" % i)))
      continue
    if y.dtype.is_floating or y.dtype.is_integer:
      if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
        raise TypeError("Gradient type %s generated for real or "
                        "integer-valued tensor %s with type %s must be "
                        "real or integer" %
                        (dtypes.as_dtype(grad_y.dtype).name, y,
                         dtypes.as_dtype(y.dtype).name))
    elif y.dtype.is_complex:
      if not grad_y.dtype.is_complex:
        raise TypeError("Gradient type %s generated for complex-valued "
                        "tensor %s with type %s must be real" %
                        (dtypes.as_dtype(grad_y.dtype).name, y,
                         dtypes.as_dtype(y.dtype).name))
    else:
      raise TypeError("Tensor %s with type %s must be numeric "
                      "to obtain a default gradient" %
                      (y, dtypes.as_dtype(y.dtype).name))
    # Create a grad_y tensor in the name scope of the gradient.
    # Required for TensorArrays to identify which gradient call a
    # grad_y value is coming from.
    if isinstance(grad_y, ops.IndexedSlices):
      new_grad_ys.append(
          ops.IndexedSlices(
              indices=(array_ops.identity(grad_y.indices,
                                          name="grad_ys_%d_indices" % i)
                       if isinstance(grad_y.indices, ops.Tensor)
                       else grad_y.indices),
              values=(array_ops.identity(grad_y.values,
                                         name="grad_ys_%d_values" % i)
                      if isinstance(grad_y.values, ops.Tensor)
                      else grad_y.values),
              dense_shape=(array_ops.identity(grad_y.dense_shape,
                                              name="grad_ys_%d_shape" % i)
                           if isinstance(grad_y.dense_shape, ops.Tensor)
                           else grad_y.dense_shape)))
    else:
      new_grad_ys.append(array_ops.identity(grad_y, name="grad_ys_%d" % i))

  return new_grad_ys
Example #23
0
def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None,
              stop_gradients=None):
  """Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the derivatives of `ys` with
  respect to `xs`.  It returns a list of `Tensor` of length `len(xs)` where
  each tensor is the `sum(dy/dx)` for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  `stop_gradients` is a `Tensor` or a list of tensors to be considered constant
  with respect to all `xs`. These tensors will not be backpropagated through,
  as though they had been explicitly disconnected using `stop_gradient`.  Among
  other things, this allows computation of partial derivatives as opposed to
  total derivatives. For example:

  ```python
  a = tf.constant(0.)
  b = 2 * a
  g = tf.gradients(a + b, [a, b], stop_gradients=[a, b])
  ```

  Here the partial derivatives `g` evaluate to `[1.0, 1.0]`, compared to the
  total derivatives `tf.gradients(a + b, [a, b])`, which take into account the
  influence of `a` on `b` and evaluate to `[3.0, 1.0]`.  Note that the above is
  equivalent to:

  ```python
  a = tf.stop_gradient(tf.constant(0.))
  b = tf.stop_gradient(2 * a)
  g = tf.gradients(a + b, [a, b])
  ```

  `stop_gradients` provides a way of stopping gradient after the graph has
  already been constructed, as compared to `tf.stop_gradient` which is used
  during graph construction.  When the two approaches are combined,
  backpropagation stops at both `tf.stop_gradient` nodes and nodes in
  `stop_gradients`, whichever is encountered first.

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.
    stop_gradients: Optional. A `Tensor` or list of tensors not to differentiate
      through.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.
    RuntimeError: if called in Eager mode.

  """
  if context.in_eager_mode():
    raise RuntimeError("tf.gradients not supported in EAGER mode. Use "
                       "functions in tf.contrib.eager.backprop instead.")
  ys = _AsList(ys)
  xs = _AsList(xs)
  stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients)
  if grad_ys is None:
    grad_ys = [None] * len(ys)
  else:
    grad_ys = _AsList(grad_ys)

  with ops.name_scope(
      name, "gradients",
      list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope:
    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
    xs = [
        x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x
        for x in xs
    ]
    xs = ops.internal_convert_n_to_tensor_or_indexed_slices(
        xs, name="x", as_ref=True)
    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

    # The approach we take here is as follows: Create a list of all ops in the
    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
    # to ensure that when we visit an op the gradients w.r.t its outputs have
    # been collected.  Then aggregate these gradients if needed, call the op's
    # gradient function, and add the generated gradients to the gradients for
    # its input.

    # Initialize the pending count for ops in the connected subgraph from ys
    # to the xs.
    if len(ys) > 1:
      ys = [array_ops.identity(y) if y.consumers() else y for y in ys]
    to_ops = [t.op for t in ys]
    from_ops = [t.op for t in xs]
    stop_gradient_ops = [t.op for t in stop_gradients]
    pending_count, loop_state = _PendingCount(
        ops.get_default_graph(), to_ops, from_ops, colocate_gradients_with_ops)

    # Iterate over the collected ops.
    #
    # grads: op => list of gradients received on each output endpoint of the
    # op.  The gradients for each endpoint are initially collected as a list.
    # When it is time to call the op's gradient function, for each endpoint we
    # aggregate the list of received gradients into a Add() Operation if there
    # is more than one.
    grads = {}

    # Add the initial gradients for the ys.
    for y, grad_y in zip(ys, grad_ys):
      _SetGrad(grads, y, grad_y)

    # Initialize queue with to_ops.
    queue = collections.deque()
    # Add the ops in 'to_ops' into the queue.
    to_ops_set = set()
    for op in to_ops:
      # 'ready' handles the case where one output gradient relies on
      # another output's gradient.
      # pylint: disable=protected-access
      ready = (pending_count[op._id] == 0)
      if ready and op._id not in to_ops_set:
        to_ops_set.add(op._id)
        queue.append(op)
      # pylint: enable=protected-access

    if loop_state:
      loop_exits = loop_state.ProcessUnusedLoopExits(pending_count, to_ops_set)
      for y in loop_exits:
        if _IsTrainable(y):
          _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
          queue.append(y.op)

    stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count)
    while queue:
      # generate gradient subgraph for op.
      op = queue.popleft()
      with _maybe_colocate_with(op, colocate_gradients_with_ops):
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=True)
        out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=True)

        grad_fn = None
        # pylint: disable=protected-access
        func_call = None
        is_func_call = ops.get_default_graph()._is_function(op.type)
        has_out_grads = any(isinstance(g, ops.Tensor) or g for g in out_grads)
        if has_out_grads and (op._id not in stop_ops):
          if is_func_call:
            func_call = ops.get_default_graph()._get_function(op.type)
            grad_fn = func_call.python_grad_func
            # pylint: enable=protected-access
          else:
            # A grad_fn must be defined, either as a function or as None
            # for ops that do not have gradients.
            try:
              grad_fn = ops.get_gradient_function(op)
            except LookupError:
              raise LookupError(
                  "No gradient defined for operation '%s' (op type: %s)" %
                  (op.name, op.type))
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=False)
        if (grad_fn or is_func_call) and has_out_grads:
          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
          # output, it means that the cost does not depend on output[i],
          # therefore dC/doutput[i] is 0.
          for i, out_grad in enumerate(out_grads):
            if (not isinstance(out_grad, ops.Tensor) and not out_grad) and (
                (not grad_fn and is_func_call) or _IsTrainable(op.outputs[i])):
              # Only trainable outputs or outputs for a function call that
              # will use SymbolicGradient get a zero gradient. Gradient
              # functions should ignore the gradient for other outputs.
              # TODO(apassos) gradients of resource handles might be an
              # issue here because of zeros.
              if loop_state:
                out_grads[i] = loop_state.ZerosLike(op, i)
              else:
                out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i)
          with ops.name_scope(op.name + "_grad"):
            # pylint: disable=protected-access
            with ops.get_default_graph()._original_op(op):
              # pylint: enable=protected-access
              if grad_fn:
                # If grad_fn was found, do not use SymbolicGradient even for
                # functions.
                in_grads = _MaybeCompile(grad_scope, op, func_call,
                                         lambda: grad_fn(op, *out_grads))
              else:
                # For function call ops, we add a 'SymbolicGradient'
                # node to the graph to compute gradients.
                in_grads = _MaybeCompile(grad_scope, op, func_call,
                                         lambda: _SymGrad(op, out_grads))
              in_grads = _AsList(in_grads)
              _VerifyGeneratedGradients(in_grads, op)
              if gate_gradients and len([x for x in in_grads
                                         if x is not None]) > 1:
                with ops.device(None):
                  with ops.colocate_with(None, ignore_existing=True):
                    in_grads = control_flow_ops.tuple(in_grads)
          _LogOpGradients(op, out_grads, in_grads)
        else:
          # If no grad_fn is defined or none of out_grads is available,
          # just propagate a list of None backwards.
          in_grads = [None] * len(op.inputs)
        for i, (t_in, in_grad) in enumerate(zip(op.inputs, in_grads)):
          if in_grad is not None:
            if (isinstance(in_grad, ops.Tensor) and
                t_in.dtype != dtypes.resource):
              try:
                in_grad.set_shape(t_in.get_shape())
              except ValueError:
                raise ValueError(
                    "Incompatible shapes between op input and calculated "
                    "input gradient.  Forward operation: %s.  Input index: %d. "
                    "Original input shape: %s.  "
                    "Calculated input gradient shape: %s" %
                    (op.name, i, t_in.shape, in_grad.shape))
            _SetGrad(grads, t_in, in_grad)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=False)

      # Update pending count for the inputs of op and enqueue ready ops.
      _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state)

  if loop_state:
    loop_state.PostProcessing()
  return [_GetGrad(grads, x) for x in xs]
Example #24
0
def embedding_lookup(params, ids, name=None):
  """Looks up `ids` in a list of embedding tensors.

  This function is used to perform parallel lookups on the list of
  tensors in `params`.  It is a generalization of
  [`tf.gather()`](../../api_docs/python/array_ops.md#gather), where `params` is
  interpreted as a partition of a larger embedding tensor.

  If `len(params) > 1`, each element `id` of `ids` is partitioned between
  the elements of `params` by computing `p = id % len(params)`, and is
  then used to look up the slice `params[p][id // len(params), ...]`.

  The results of the lookup are then concatenated into a dense
  tensor. The returned tensor has shape `shape(ids) + shape(params)[1:]`.

  Args:
    params: A list of tensors with the same shape and type.
    ids: A `Tensor` with type `int32` containing the ids to be looked
      up in `params`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` with the same type as the tensors in `params`.

  Raises:
    ValueError: If `params` is empty.
  """
  if not isinstance(params, list):
    params = [params]
  with ops.op_scope(params + [ids], name, "embedding_lookup") as name:
    if not params:
      raise ValueError("Need at least one param")
    np = len(params)  # Number of partitions
    params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params")
    if np == 1:
      with ops.device(params[0].device):
        return array_ops.gather(params[0], ids, name=name)
    else:
      ids = ops.convert_to_tensor(ids, name="ids")
      flat_ids = array_ops.reshape(ids, [-1])
      original_indices = math_ops.range(0, array_ops.size(flat_ids))
      # Compute flat_ids % partitions for each id
      ids_mod_p = flat_ids % np
      if ids_mod_p.dtype != types.int32:
        ids_mod_p = math_ops.cast(ids_mod_p, types.int32)
      # Partition single list of ids based on ids % np into np separate lists
      plist = data_flow_ops.dynamic_partition(flat_ids, ids_mod_p, np)
      # Similarly, partition the original indices.
      pindices = data_flow_ops.dynamic_partition(original_indices, ids_mod_p,
                                                 np)
      # Do np separate lookups, finding embeddings for plist[p] in params[p]
      partitioned_result = []
      for p in xrange(np):
        # TODO(agarwal): handle device allocations here and later in the
        # colocate code.
        gather_ids = plist[p] // np
        with ops.device(params[p].device):
          partitioned_result.append(array_ops.gather(params[p], gather_ids))
      # Stitch these back together
      ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result,
                                         name=name)
      # Reshape to reverse the flattening of ids.
      # It's important that we compute params[0].shape on the right device
      # to avoid data motion.
      with ops.device(params[0].device):
        params_shape = array_ops.shape(params[0])
      ret = array_ops.reshape(ret, array_ops.concat(0, [
          array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])]))
      # output shape = ids.shape + params[*].shape[1:]
      # Normally the reshape is sufficient, but setting shape explicitly
      # teaches shape inference that params[1:].get_shape() matters.
      element_shape = params[0].get_shape()[1:]
      for p in params[1:]:
        element_shape = element_shape.merge_with(p.get_shape()[1:])
      ret.set_shape(ids.get_shape().concatenate(element_shape))
      return ret
Example #25
0
def RttDefaultGradYs(grad_ys,
                   ys,
                   colocate_gradients_with_ops,
                   gradient_uid="__unsupported__"):
    if len(grad_ys) != len(ys):
        raise ValueError("Passed %d grad_ys for %d ys" % (len(grad_ys), len(ys)))
    grad_ys = convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
    new_grad_ys = []
    for i in gradients_util.xrange(len(grad_ys)):
        grad_y = grad_ys[i]
        y = ys[i]
        with gradients_util._maybe_colocate_with(y.op, gradient_uid, colocate_gradients_with_ops):
            if grad_y is None:
                if y.dtype.is_complex:
                    raise TypeError(
                        "Gradients of complex tensors must set grad_ys (y.dtype = %r)" %
                        y.dtype)
                # NOTE(George): We set the default value as string "1.0".
                new_grad_ys.append(
                    array_ops.fill(
                        array_ops.shape(y),
                        constant_op.constant("1.0", dtype=y.dtype, name="grad_ys_%d" % i)))
                continue
            if y.dtype.is_floating or y.dtype.is_integer:
                if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
                    raise TypeError(
                        "Gradient type %s generated for real or "
                        "integer-valued tensor %s with type %s must be "
                        "real or integer" % (dtypes.as_dtype(grad_y.dtype).name, y,
                                             dtypes.as_dtype(y.dtype).name))
            elif y.dtype.is_complex:
                if not grad_y.dtype.is_complex:
                    raise TypeError(
                        "Gradient type %s generated for complex-valued "
                        "tensor %s with type %s must be real" % (dtypes.as_dtype(
                            grad_y.dtype).name, y, dtypes.as_dtype(y.dtype).name))
            elif y.dtype == dtypes.variant:
                if grad_y.dtype != dtypes.variant:
                    raise TypeError(
                        "Gradient type %s generated for variant "
                        "tensor %s with type %s must be variant" % (dtypes.as_dtype(
                            grad_y.dtype).name, y, dtypes.as_dtype(y.dtype).name))
            elif y.dtype == dtypes.resource:
                # We assume y is the handle of a ResourceVariable. The gradient of a
                # ResourceVariable should be a numeric value, not another resource.
                if grad_y.dtype == dtypes.resource:
                    raise TypeError("Input gradient %s for resource tensor %s should not "
                                    "be a resource" % (grad_y, y))
            else:
                raise TypeError(
                    "Tensor %s with type %s must be numeric "
                    "to obtain a default gradient" % (y, dtypes.as_dtype(y.dtype).name))
            # Create a grad_y tensor in the name scope of the gradient.
            # Required for TensorArrays to identify which gradient call a
            # grad_y value is coming from.
            if isinstance(grad_y, IndexedSlices):
                new_grad_ys.append(
                    IndexedSlices(
                        indices=(array_ops.identity(
                            grad_y.indices, name="grad_ys_%d_indices" % i)
                                 if isinstance(grad_y.indices, Tensor) else
                                 grad_y.indices),
                        values=(array_ops.identity(
                            grad_y.values, name="grad_ys_%d_values" % i) if isinstance(
                            grad_y.values, Tensor) else grad_y.values),
                        dense_shape=(array_ops.identity(
                            grad_y.dense_shape, name="grad_ys_%d_shape" % i)
                                     if isinstance(grad_y.dense_shape, Tensor) else
                                     grad_y.dense_shape)))
            else:
                new_grad_ys.append(array_ops.identity(grad_y, name="grad_ys_%d" % i))

    return new_grad_ys
Example #26
0
def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None):
    """Constructs symbolic partial derivatives of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the partial
  derivatives of `ys` with respect to `xs`.  It returns a list of
  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
  for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.

  """
    ys = _AsList(ys)
    xs = _AsList(xs)
    if grad_ys is None:
        grad_ys = [None] * len(ys)
    else:
        grad_ys = _AsList(grad_ys)
    with ops.op_scope(ys + xs + grad_ys, name, "gradients"):
        ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
        xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
        grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

        # The approach we take here is as follows: Create a list of all ops in the
        # subgraph between the ys and xs.  Visit these ops in reverse order of ids
        # to ensure that when we visit an op the gradients w.r.t its outputs have
        # been collected.  Then aggregate these gradients if needed, call the op's
        # gradient function, and add the generated gradients to the gradients for
        # its input.

        # Initialize the pending count for ops in the connected subgraph from ys
        # to the xs.
        to_ops = [t.op for t in ys]
        from_ops = [t.op for t in xs]
        pending_count, loop_state = _PendingCount(ops.get_default_graph(),
                                                  to_ops, from_ops)

        # Iterate over the collected ops.
        #
        # grads: op => list of gradients received on each output endpoint of the
        # op.  The gradients for each endpoint are initially collected as a list.
        # When it is time to call the op's gradient function, for each endpoint we
        # aggregate the list of received gradients into a Add() Operation if there
        # is more than one.
        grads = {}

        # Add the initial gradients for the ys.
        for y, grad_y in zip(ys, grad_ys):
            _SetGrad(grads, y, grad_y)

        # Initialize queue with to_ops.
        queue = collections.deque()
        # Add the ops in 'to_ops' into the queue.
        to_ops_set = set()
        for op in to_ops:
            # 'ready' handles the case where one output gradient relies on
            # another output's gradient.
            # pylint: disable=protected-access
            ready = (pending_count[op._id] == 0)
            if ready and op._id not in to_ops_set:
                to_ops_set.add(op._id)
                queue.append(op)

        if loop_state:
            # The "unused" exits of the loops are added to ys. As an example,
            # people often write:
            #         v1, _ = While(p, b, [x1, x2])
            #         result = gradients(v1, x1)
            # The exit node of x2 is not included by the betweenness analysis.
            # But we need it if x2 is involved in computing v1. So we add it
            # back in backprop with a zeros_like gradient.
            loop_exits = loop_state.GetAllLoopExits()
            for y in loop_exits:
                if pending_count[y.op._id] == 0 and y.op._id not in to_ops_set:
                    if _IsFloat(y):
                        # Floating-point outputs get a zero gradient.
                        _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
                    queue.append(y.op)

        # The set of 'from_ops'.
        stop_ops = _StopOps(from_ops, pending_count)
        while queue:
            # generate gradient subgraph for op.
            op = queue.popleft()
            with ops.device(_GetGradsDevice(op, colocate_gradients_with_ops)):
                if loop_state:
                    loop_state.EnterGradWhileContext(op)
                out_grads = _AggregatedGrads(grads, op, loop_state,
                                             aggregation_method)
                grad_fn = None

                # pylint: disable=protected-access
                is_func_call = ops.get_default_graph()._is_function(op.type)
                # pylint: enable=protected-access

                if not is_func_call and any(
                        out_grads) and op._id not in stop_ops:
                    # pylint: enable=protected-access
                    # A grad_fn must be defined, either as a function or as None
                    # for ops that do not have gradients.
                    try:
                        grad_fn = ops.get_gradient_function(op)
                    except LookupError:
                        raise LookupError(
                            "No gradient defined for operation '%s' (op type: %s)"
                            % (op.name, op.type))
                if (grad_fn or is_func_call) and any(out_grads):
                    # NOTE: If _AggregatedGrads didn't compute a value for the i'th
                    # output, it means that the cost does not depend on output[i],
                    # therefore dC/doutput[i] is 0.
                    for i, out_grad in enumerate(out_grads):
                        if not out_grad and _IsFloat(op.outputs[i]):
                            # Only floating-point outputs get a zero gradient. Gradient
                            # functions should ignore the gradient for other outputs.
                            if loop_state:
                                out_grads[i] = loop_state.ZerosLike(op, i)
                            else:
                                out_grads[i] = array_ops.zeros_like(
                                    op.outputs[i])
                    with ops.name_scope(op.name + "_grad"):
                        # pylint: disable=protected-access
                        with ops.get_default_graph()._original_op(op):
                            # pylint: enable=protected-access
                            wrapped_op = op
                            if loop_state:
                                wrapped_op = loop_state.MakeWrapper(op)
                            if is_func_call:
                                # For function call ops, we add a 'SymbolicGradient'
                                # node to the graph to compute gradients.
                                f_in = [x for x in op.inputs] + out_grads
                                f_types = [x.dtype for x in op.inputs]
                                # pylint: disable=protected-access
                                in_grads = _AsList(
                                    functional_ops._symbolic_gradient(
                                        f_in, f_types, op.type))
                                # pylint: enable=protected-access
                            else:
                                in_grads = _AsList(
                                    grad_fn(wrapped_op, *out_grads))
                            _VerifyGeneratedGradients(in_grads, op)
                            if gate_gradients and len(
                                    tuple(filter(None, in_grads))) > 1:
                                in_grads = control_flow_ops.tuple(in_grads)
                    logging.vlog(1, "Gradient for '" + op.name + "'")
                    logging.vlog(1, "  in  --> %s",
                                 ", ".join([x.name for x in out_grads if x]))
                    logging.vlog(1, "  out --> %s",
                                 ", ".join([x.name for x in in_grads if x]))
                else:
                    # If no grad_fn is defined or none of out_grads is available,
                    # just propagates a list of None backwards.
                    in_grads = [None] * len(op.inputs)
                for t_in, in_grad in zip(op.inputs, in_grads):
                    if in_grad:
                        _SetGrad(grads, t_in, in_grad)
                if loop_state:
                    loop_state.ExitGradWhileContext(op)

            # update pending count for the inputs of op.
            # pylint: disable=protected-access
            for x in op.inputs:
                pending_count[x.op._id] -= 1
                ready = (pending_count[x.op._id] == 0)
                if loop_state and not ready:
                    ready = (pending_count[x.op._id] > 0
                             and control_flow_ops.IsLoopSwitch(x.op))
                if ready:
                    queue.append(x.op)
            for x in op.control_inputs:
                pending_count[x._id] -= 1
                if pending_count[x._id] is 0:
                    queue.append(x)
            # pylint: enable=protected-access
    return [_GetGrad(grads, x) for x in xs]
Example #27
0
def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops,
                     gate_gradients, aggregation_method, stop_gradients):
  """Implementation of gradients()."""
  if context.executing_eagerly():
    raise RuntimeError("tf.gradients not supported when eager execution "
                       "is enabled. Use tf.contrib.eager.GradientTape "
                       "instead.")
  ys = _AsList(ys)
  xs = _AsList(xs)
  stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients)
  if grad_ys is None:
    grad_ys = [None] * len(ys)
  else:
    grad_ys = _AsList(grad_ys)

  with ops.name_scope(
      name, "gradients",
      list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope:
    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
    xs = [
        x.handle if resource_variable_ops.is_resource_variable(x) else x
        for x in xs
    ]
    xs = ops.internal_convert_n_to_tensor_or_indexed_slices(
        xs, name="x", as_ref=True)
    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

    # The approach we take here is as follows: Create a list of all ops in the
    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
    # to ensure that when we visit an op the gradients w.r.t its outputs have
    # been collected.  Then aggregate these gradients if needed, call the op's
    # gradient function, and add the generated gradients to the gradients for
    # its input.

    # Initialize the pending count for ops in the connected subgraph from ys
    # to the xs.
    if len(ys) > 1:
      ys = [array_ops.identity(y) if y.consumers() else y for y in ys]
    to_ops = [t.op for t in ys]
    from_ops = [t.op for t in xs]
    stop_gradient_ops = [t.op for t in stop_gradients]
    pending_count, loop_state = _PendingCount(
        ops.get_default_graph(), to_ops, from_ops, colocate_gradients_with_ops)

    # Iterate over the collected ops.
    #
    # grads: op => list of gradients received on each output endpoint of the
    # op.  The gradients for each endpoint are initially collected as a list.
    # When it is time to call the op's gradient function, for each endpoint we
    # aggregate the list of received gradients into a Add() Operation if there
    # is more than one.
    grads = {}

    # Add the initial gradients for the ys.
    for y, grad_y in zip(ys, grad_ys):
      _SetGrad(grads, y, grad_y)

    # Initialize queue with to_ops.
    queue = collections.deque()
    # Add the ops in 'to_ops' into the queue.
    to_ops_set = set()
    for op in to_ops:
      # 'ready' handles the case where one output gradient relies on
      # another output's gradient.
      # pylint: disable=protected-access
      ready = (pending_count[op._id] == 0)
      if ready and op._id not in to_ops_set:
        to_ops_set.add(op._id)
        queue.append(op)
      # pylint: enable=protected-access

    if loop_state:
      loop_exits = loop_state.ProcessUnusedLoopExits(pending_count, to_ops_set)
      for y in loop_exits:
        if _IsTrainable(y):
          _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
          queue.append(y.op)

    stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count)
    while queue:
      # generate gradient subgraph for op.
      op = queue.popleft()
      with _maybe_colocate_with(op, colocate_gradients_with_ops):
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=True)
        out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=True)

        grad_fn = None
        # pylint: disable=protected-access
        func_call = None
        is_func_call = ops.get_default_graph()._is_function(op.type)
        has_out_grads = any(isinstance(g, ops.Tensor) or g for g in out_grads)
        if has_out_grads and (op._id not in stop_ops):
          if is_func_call:
            func_call = ops.get_default_graph()._get_function(op.type)
            grad_fn = func_call.python_grad_func
            # pylint: enable=protected-access
          else:
            # A grad_fn must be defined, either as a function or as None
            # for ops that do not have gradients.
            try:
              grad_fn = ops.get_gradient_function(op)
            except LookupError:
              raise LookupError(
                  "No gradient defined for operation '%s' (op type: %s)" %
                  (op.name, op.type))
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=False)
        if (grad_fn or is_func_call) and has_out_grads:
          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
          # output, it means that the cost does not depend on output[i],
          # therefore dC/doutput[i] is 0.
          for i, out_grad in enumerate(out_grads):
            if (not isinstance(out_grad, ops.Tensor) and not out_grad) and (
                (not grad_fn and is_func_call) or _IsTrainable(op.outputs[i])):
              # Only trainable outputs or outputs for a function call that
              # will use SymbolicGradient get a zero gradient. Gradient
              # functions should ignore the gradient for other outputs.
              # TODO(apassos) gradients of resource handles might be an
              # issue here because of zeros.
              if loop_state:
                out_grads[i] = loop_state.ZerosLike(op, i)
              else:
                out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i)
          with ops.name_scope(op.name + "_grad"):
            # pylint: disable=protected-access
            with ops.get_default_graph()._original_op(op):
              # pylint: enable=protected-access
              if grad_fn:
                # If grad_fn was found, do not use SymbolicGradient even for
                # functions.
                in_grads = _MaybeCompile(grad_scope, op, func_call,
                                         lambda: grad_fn(op, *out_grads))
              else:
                # For function call ops, we add a 'SymbolicGradient'
                # node to the graph to compute gradients.
                in_grads = _MaybeCompile(grad_scope, op, func_call,
                                         lambda: _SymGrad(op, out_grads))
              in_grads = _AsList(in_grads)
              _VerifyGeneratedGradients(in_grads, op)
              if gate_gradients and len([x for x in in_grads
                                         if x is not None]) > 1:
                with ops.device(None):
                  with ops.colocate_with(None, ignore_existing=True):
                    in_grads = control_flow_ops.tuple(in_grads)
          _LogOpGradients(op, out_grads, in_grads)
        else:
          # If no grad_fn is defined or none of out_grads is available,
          # just propagate a list of None backwards.
          in_grads = [None] * len(op.inputs)
        for i, (t_in, in_grad) in enumerate(zip(op.inputs, in_grads)):
          if in_grad is not None:
            if (isinstance(in_grad, ops.Tensor) and
                t_in.dtype != dtypes.resource):
              try:
                in_grad.set_shape(t_in.get_shape())
              except ValueError:
                raise ValueError(
                    "Incompatible shapes between op input and calculated "
                    "input gradient.  Forward operation: %s.  Input index: %d. "
                    "Original input shape: %s.  "
                    "Calculated input gradient shape: %s" %
                    (op.name, i, t_in.shape, in_grad.shape))
            _SetGrad(grads, t_in, in_grad)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=False)

      # Update pending count for the inputs of op and enqueue ready ops.
      _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state)

  if loop_state:
    loop_state.PostProcessing()
  return [_GetGrad(grads, x) for x in xs]
Example #28
0
def _GradientsHelper(ys,
                     xs,
                     grad_ys=None,
                     name="gradients",
                     colocate_gradients_with_ops=False,
                     gate_gradients=False,
                     aggregation_method=None,
                     stop_gradients=None,
                     unconnected_gradients=UnconnectedGradients.NONE,
                     src_graph=None):
    """Implementation of gradients()."""
    if context.executing_eagerly():
        raise RuntimeError(
            "tf.gradients is not supported when eager execution "
            "is enabled. Use tf.GradientTape instead.")
    if src_graph is None:
        src_graph = ops.get_default_graph()
    try:
        unconnected_gradients = UnconnectedGradients(unconnected_gradients)
    except ValueError:
        raise ValueError("Unknown value for unconnected_gradients: %r" %
                         unconnected_gradients)

    # If src_graph is a _FuncGraph (i.e. a function body), gather it and all
    # ancestor graphs. This is necessary for correctly handling captured values.
    func_graphs = []
    curr_graph = src_graph
    while _IsFunction(curr_graph):
        func_graphs.append(curr_graph)
        if isinstance(curr_graph, FuncGraph):
            curr_graph = curr_graph.outer_graph
        else:
            assert isinstance(curr_graph, framework_function._FuncGraph)  # pylint: disable=protected-access
            curr_graph = curr_graph._outer_graph  # pylint: disable=protected-access

    ys = _AsList(ys)
    xs = _AsList(xs)
    stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients)
    if grad_ys is None:
        grad_ys = [None] * len(ys)
    else:
        grad_ys = _AsList(grad_ys)

    with ops.name_scope(
            name, "gradients",
            list(ys) + list(xs) + list(stop_gradients) +
            list(grad_ys)) as grad_scope:
        # Get a uid for this call to gradients that can be used to help
        # cluster ops for compilation.
        gradient_uid = ops.get_default_graph().unique_name("uid")
        ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
        xs = [
            x.handle if resource_variable_ops.is_resource_variable(x) else x
            for x in xs
        ]
        xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs,
                                                                name="x",
                                                                as_ref=True)
        xs_set = object_identity.ObjectIdentitySet(xs)
        grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops,
                                 gradient_uid)

        # The approach we take here is as follows: Create a list of all ops in the
        # subgraph between the ys and xs.  Visit these ops in reverse order of ids
        # to ensure that when we visit an op the gradients w.r.t its outputs have
        # been collected.  Then aggregate these gradients if needed, call the op's
        # gradient function, and add the generated gradients to the gradients for
        # its input.

        # Initialize the pending count for ops in the connected subgraph from ys
        # to the xs.
        to_ops = [t.op for t in ys]
        from_ops = [t.op for t in xs]
        stop_gradient_ops = [t.op for t in stop_gradients]
        reachable_to_ops, pending_count, loop_state = _PendingCount(
            to_ops, from_ops, colocate_gradients_with_ops, func_graphs, xs_set)

        # Iterate over the collected ops.
        #
        # grads: op => list of gradients received on each output endpoint of the
        # op.  The gradients for each endpoint are initially collected as a list.
        # When it is time to call the op's gradient function, for each endpoint we
        # aggregate the list of received gradients into a Add() Operation if there
        # is more than one.
        grads = {}

        # Add the initial gradients for the ys.
        for y, grad_y in zip(ys, grad_ys):
            _SetGrad(grads, y, grad_y)

        # Initialize queue with to_ops.
        queue = collections.deque()
        # Add the ops in 'to_ops' into the queue.
        to_ops_set = set()
        for op in to_ops:
            # 'ready' handles the case where one output gradient relies on
            # another output's gradient.
            ready = (pending_count[op] == 0)
            if ready and op not in to_ops_set and op in reachable_to_ops:
                to_ops_set.add(op)
                queue.append(op)

        if loop_state:
            loop_exits = loop_state.ProcessUnusedLoopExits(
                pending_count, to_ops_set)
            for y in loop_exits:
                if backprop_util.IsTrainable(y):
                    _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
                    queue.append(y.op)

        stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count, xs_set)
        while queue:
            # generate gradient subgraph for op.
            op = queue.popleft()
            with _maybe_colocate_with(op, gradient_uid,
                                      colocate_gradients_with_ops):
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=True)
                out_grads = _AggregatedGrads(grads, op, gradient_uid,
                                             loop_state, aggregation_method)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=True)

                grad_fn = None
                func_call = None
                is_partitioned_call = _IsPartitionedCall(op)
                # pylint: disable=protected-access
                is_func_call = (src_graph._is_function(op.type)
                                or is_partitioned_call)
                # pylint: enable=protected-access
                has_out_grads = any(
                    isinstance(g, ops.Tensor) or g for g in out_grads)
                if has_out_grads and (op not in stop_ops):
                    try:
                        grad_fn = ops.get_gradient_function(op)
                    except LookupError:
                        if is_func_call:
                            if is_partitioned_call:
                                func_call = src_graph._get_function(  # pylint: disable=protected-access
                                    compat.as_bytes(op.get_attr("f").name))
                            else:
                                func_call = src_graph._get_function(op.type)  # pylint: disable=protected-access
                            # Note that __defun is not set if the graph is
                            # imported. If it's set, we prefer to access the original
                            # defun.
                            func_call = getattr(op, "__defun", func_call)
                            grad_fn = func_call.python_grad_func
                        else:
                            raise LookupError(
                                "No gradient defined for operation '%s' (op type: %s)"
                                % (op.name, op.type))
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=False)

                # NOTE(skyewm): We don't support computing gradients wrt a loop variable
                # unless it's within the context of a single iteration (i.e. the
                # gradient is wrt to the loop parameter in the body function, not wrt or
                # through the initial value). This means if we're in a while loop
                # context, we should never see a switch node from this context.
                # pylint: disable=protected-access
                if (control_flow_util.IsSwitch(op)
                        and op._control_flow_context is not None
                        and op._control_flow_context.IsWhileContext()
                        and op._control_flow_context ==
                        ops.get_default_graph()._get_control_flow_context()):
                    _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs_set)
                # pylint: enable=protected-access

                if (grad_fn or is_func_call) and has_out_grads:
                    # NOTE: If _AggregatedGrads didn't compute a value for the i'th
                    # output, it means that the cost does not depend on output[i],
                    # therefore dC/doutput[i] is 0.
                    for i, out_grad in enumerate(out_grads):
                        if (not isinstance(out_grad, ops.Tensor)
                                and not out_grad) and (
                                    (not grad_fn and is_func_call) or
                                    backprop_util.IsTrainable(op.outputs[i])):
                            # Only trainable outputs or outputs for a function call that
                            # will use SymbolicGradient get a zero gradient. Gradient
                            # functions should ignore the gradient for other outputs.
                            # TODO(apassos) gradients of resource handles might be an
                            # issue here because of zeros.
                            if loop_state:
                                out_grads[i] = loop_state.ZerosLike(op, i)
                            elif default_gradient.supports_default_grad(
                                    op.outputs[i]):
                                # TODO(b/143286622): The supports_default_grad check is needed
                                # because While op emits non-differentiable resource tensors
                                # as outputs. Remove this check when that is not the case.
                                out_grads[
                                    i] = control_flow_state.ZerosLikeOutsideLoop(
                                        op, i)
                    with ops.name_scope(op.name + "_grad"):
                        # pylint: disable=protected-access
                        with src_graph._original_op(op):
                            # pylint: enable=protected-access
                            if grad_fn:
                                # If grad_fn was found, do not use SymbolicGradient even for
                                # functions.
                                in_grads = _MaybeCompile(
                                    grad_scope, op, func_call,
                                    lambda: grad_fn(op, *out_grads))
                            else:
                                # For function call ops, we add a 'SymbolicGradient'
                                # node to the graph to compute gradients.
                                in_grads = _MaybeCompile(
                                    grad_scope, op, func_call,
                                    lambda: _SymGrad(op, out_grads))
                            in_grads = _AsList(in_grads)
                            _VerifyGeneratedGradients(in_grads, op)
                            if gate_gradients and len(
                                [x for x in in_grads if x is not None]) > 1:
                                with ops.device(None):
                                    with ops._colocate_with_for_gradient(  # pylint: disable=protected-access
                                            None,
                                            gradient_uid,
                                            ignore_existing=True):
                                        in_grads = control_flow_ops.tuple(
                                            in_grads)
                    _LogOpGradients(op, out_grads, in_grads)
                else:
                    # If no grad_fn is defined or none of out_grads is available,
                    # just propagate a list of None backwards.
                    in_grads = [None] * len(_Inputs(op, xs_set))
                # Note: we don't filter out eager inputs here because the inputs need to
                # line up with in_grads.
                for i, (t_in, in_grad) in enumerate(
                        zip(_Inputs(op, xs_set), in_grads)):
                    if in_grad is not None:
                        if (isinstance(in_grad, ops.Tensor)
                                and t_in.dtype != dtypes.resource):
                            try:
                                in_grad.set_shape(t_in.get_shape())
                            except ValueError:
                                raise ValueError(
                                    "Incompatible shapes between op input and calculated "
                                    "input gradient.  Forward operation: %s.  Input index: %d. "
                                    "Original input shape: %s.  "
                                    "Calculated input gradient shape: %s" %
                                    (op.name, i, t_in.shape, in_grad.shape))
                        if not isinstance(t_in, ops.EagerTensor):
                            _SetGrad(grads, t_in, in_grad)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=False)

            # Update pending count for the inputs of op and enqueue ready ops.
            _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count,
                                          loop_state, xs_set)

    if loop_state:
        loop_state.PostProcessing()
    return [_GetGrad(grads, x, unconnected_gradients) for x in xs]
def _DefaultGradYs(grad_ys,
                   ys,
                   colocate_gradients_with_ops,
                   gradient_uid="__unsupported__"):
  """Fill in default values for grad_ys.

  Args:
    grad_ys: List of gradients, can contain None.
    ys: List of tensors.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gradient_uid: A unique identifier within the graph indicating
      which invocation of gradients is being executed. Used to cluster
      ops for compilation.

  Returns:
    A list of gradients to use, without None.

  Raises:
    ValueError: If sizes of gradients and inputs don't match
    TypeError: If type of any gradient is not valid for its input.
  """
  if len(grad_ys) != len(ys):
    raise ValueError("Passed %d grad_ys for %d ys" % (len(grad_ys), len(ys)))
  grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
  new_grad_ys = []
  for i in xrange(len(grad_ys)):
    grad_y = grad_ys[i]
    y = ys[i]
    with _maybe_colocate_with(y.op, gradient_uid, colocate_gradients_with_ops):
      if grad_y is None:
        if y.dtype.is_complex:
          raise TypeError(
              "Gradients of complex tensors must set grad_ys (y.dtype = %r)" %
              y.dtype)
        new_grad_ys.append(
            array_ops.fill(
                array_ops.shape(y),
                constant_op.constant(1, dtype=y.dtype, name="grad_ys_%d" % i)))
        continue
      if y.dtype.is_floating or y.dtype.is_integer:
        if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
          raise TypeError(
              "Gradient type %s generated for real or "
              "integer-valued tensor %s with type %s must be "
              "real or integer" % (dtypes.as_dtype(grad_y.dtype).name, y,
                                   dtypes.as_dtype(y.dtype).name))
      elif y.dtype.is_complex:
        if not grad_y.dtype.is_complex:
          raise TypeError(
              "Gradient type %s generated for complex-valued "
              "tensor %s with type %s must be real" % (dtypes.as_dtype(
                  grad_y.dtype).name, y, dtypes.as_dtype(y.dtype).name))
      elif y.dtype == dtypes.variant:
        if grad_y.dtype != dtypes.variant:
          raise TypeError(
              "Gradient type %s generated for variant "
              "tensor %s with type %s must be variant" % (dtypes.as_dtype(
                  grad_y.dtype).name, y, dtypes.as_dtype(y.dtype).name))
      elif y.dtype == dtypes.resource:
        # We assume y is the handle of a ResourceVariable. The gradient of a
        # ResourceVariable should be a numeric value, not another resource.
        if grad_y.dtype == dtypes.resource:
          raise TypeError("Input gradient %s for resource tensor %s should not "
                          "be a resource" % (grad_y, y))
      else:
        raise TypeError(
            "Tensor %s with type %s must be numeric "
            "to obtain a default gradient" % (y, dtypes.as_dtype(y.dtype).name))
      # Create a grad_y tensor in the name scope of the gradient.
      # Required for TensorArrays to identify which gradient call a
      # grad_y value is coming from.
      if isinstance(grad_y, ops.IndexedSlices):
        new_grad_ys.append(
            ops.IndexedSlices(
                indices=(array_ops.identity(
                    grad_y.indices, name="grad_ys_%d_indices" % i)
                         if isinstance(grad_y.indices, ops.Tensor) else
                         grad_y.indices),
                values=(array_ops.identity(
                    grad_y.values, name="grad_ys_%d_values" % i) if isinstance(
                        grad_y.values, ops.Tensor) else grad_y.values),
                dense_shape=(array_ops.identity(
                    grad_y.dense_shape, name="grad_ys_%d_shape" % i)
                             if isinstance(grad_y.dense_shape, ops.Tensor) else
                             grad_y.dense_shape)))
      else:
        new_grad_ys.append(array_ops.identity(grad_y, name="grad_ys_%d" % i))

  return new_grad_ys
def _embedding_lookup_and_transform(params,
                                    ids,
                                    partition_strategy="mod",
                                    name=None,
                                    max_norm=None,
                                    transform_fn=None):
  """Helper function for embedding_lookup and _compute_sampled_logits.

  This function is a generalization of embedding_lookup that optionally
  applies a caller-specified transformation to each embedding. This is
  done through the `transform_fn` argument. If provided, the function is
  applied to each partitioned tensor of retrieved embeddings, colocated
  with the embeddings. This function will be called with a single `Tensor`
  argument of the same type as the `params` tensor and should return a
  `Tensor`. The shape of the argument will be the same as `params` except
  for the size of the first dimension. The first dimension of the result's
  shape must be the same size as the argument's.

  Args:
    params: See embedding_lookup.
    ids: See embedding_lookup.
    partition_strategy: See embedding_lookup.
    name: See embedding_lookup.
    max_norm: See embedding_lookup.
    transform_fn: An optional function to apply to each retrieved embedding.
      If max_norm is provided, transform_fn is applied to the norm-limited
      embeddings.

  Returns:
    See embedding_lookup for details.
  Raises:
    ValueError: If `params` is empty.
  """
  if params is None or params in ((), []):
    raise ValueError("Need at least one param")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]

  with ops.name_scope(name, "embedding_lookup", params + [ids]) as name:
    np = len(params)  # Number of partitions
    # Preserve the resource variable status to avoid accidental dense reads.
    if not any(
        isinstance(p, resource_variable_ops.ResourceVariable) for p in params):
      params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params")
    ids = ops.convert_to_tensor(ids, name="ids")
    if np == 1 and (not transform_fn or ids.get_shape().ndims == 1):
      with ops.colocate_with(params[0]):
        result = _clip(array_ops.gather(params[0], ids, name=name),
                       ids, max_norm)
        if transform_fn:
          result = transform_fn(result)
      # Make sure the final result does not have colocation contraints on the
      # params. Similar to the case np > 1 where parallel_dynamic_stitch is
      # outside the scioe of all with ops.colocate_with(params[p]).
      return array_ops.identity(result)
    else:
      # Flatten the ids. There are two cases where we need to do this.
      # - There is more than one params tensor.
      # - There is a transform_fn and ids is not statically known to be 1-D.
      #   We must flatten in this case because transform_fn expects a flat
      #   tensor of embeddings.
      flat_ids = array_ops.reshape(ids, [-1])
      original_indices = math_ops.range(array_ops.size(flat_ids))

      # Create p_assignments and set new_ids depending on the strategy.
      if partition_strategy == "mod":
        p_assignments = flat_ids % np
        new_ids = flat_ids // np
      elif partition_strategy == "div":
        # Compute num_total_ids as the sum of dim-0 of params, then assign to
        # partitions based on a constant number of ids per partition. Optimize
        # if we already know the full shape statically.
        dim_0_size = tensor_shape.Dimension(tensor_shape.dimension_value(
            params[0].get_shape()[0]))
        for p in xrange(1, np):
          dim_0_size += tensor_shape.Dimension(tensor_shape.dimension_value(
              params[p].get_shape()[0]))
        if dim_0_size.value:
          num_total_ids = constant_op.constant(dim_0_size.value, flat_ids.dtype)
        else:
          dim_0_sizes = []
          for p in xrange(np):
            param_p_dim = tensor_shape.dimension_value(params[p].get_shape()[0])
            if param_p_dim is not None:
              dim_0_sizes.append(param_p_dim)
            else:
              with ops.colocate_with(params[p]):
                dim_0_sizes.append(array_ops.shape(params[p])[0])
          num_total_ids = math_ops.reduce_sum(
              math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype))
        ids_per_partition = num_total_ids // np
        extras = num_total_ids % np

        p_assignments = math_ops.maximum(
            flat_ids // (ids_per_partition + 1),
            (flat_ids - extras) // ids_per_partition)

        # Emulate a conditional using a boolean indicator tensor
        new_ids = array_ops.where(p_assignments < extras,
                                  flat_ids % (ids_per_partition + 1),
                                  (flat_ids - extras) % ids_per_partition)
      else:
        raise ValueError("Unrecognized partition strategy: " +
                         partition_strategy)

      # Cast partition assignments to int32 for use in dynamic_partition.
      # There really should not be more than 2^32 partitions.
      p_assignments = math_ops.cast(p_assignments, dtypes.int32)
      # Partition list of ids based on assignments into np separate lists
      gather_ids = data_flow_ops.dynamic_partition(new_ids, p_assignments, np)
      # Similarly, partition the original indices.
      pindices = data_flow_ops.dynamic_partition(original_indices,
                                                 p_assignments, np)
      # Do np separate lookups, finding embeddings for plist[p] in params[p]
      partitioned_result = []
      for p in xrange(np):
        pids = gather_ids[p]
        with ops.colocate_with(params[p]):
          result = array_ops.gather(params[p], pids)
          if transform_fn:
            # If transform_fn is provided, the clip_by_norm precedes
            # the transform and hence must be co-located. See below
            # for the counterpart if transform_fn is not proveded.
            result = transform_fn(_clip(result, pids, max_norm))
        partitioned_result.append(result)
      # Stitch these back together
      ret = data_flow_ops.parallel_dynamic_stitch(
          pindices, partitioned_result, name=name)

      # Determine the static element shape.
      if transform_fn is None:
        element_shape_s = params[0].get_shape()[1:]
        for p in params[1:]:
          element_shape_s = element_shape_s.merge_with(p.get_shape()[1:])
      else:
        element_shape_s = ret.get_shape()[1:]

      # Compute the dynamic element shape.
      if element_shape_s.is_fully_defined():
        element_shape_d = element_shape_s
      elif transform_fn is None:
        # It's important that we compute params[0].shape on the right device
        # to avoid data motion.
        with ops.colocate_with(params[0]):
          params_shape = array_ops.shape(params[0])
        element_shape_d = params_shape[1:]
      else:
        element_shape_d = array_ops.shape(ret)[1:]

      # Reshape to reverse the flattening of ids.
      ret = array_ops.reshape(ret,
                              array_ops.concat(
                                  [array_ops.shape(ids), element_shape_d], 0))

      # Normally the reshape is sufficient, but setting shape explicitly
      # teaches shape inference that params[1:].get_shape() matters
      # (in the case that transform_fn is None).
      ret.set_shape(ids.get_shape().concatenate(element_shape_s))
      if not transform_fn:
        # If transform_fn was provided, the clip_by_norm was done above.
        ret = _clip(ret, ids, max_norm)
      return ret
Example #31
0
def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None):
  """Constructs symbolic partial derivatives of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the partial
  derivatives of `ys` with respect to `xs`.  It returns a list of
  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
  for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.

  """
  ys = _AsList(ys)
  xs = _AsList(xs)
  if grad_ys is None:
    grad_ys = [None] * len(ys)
  else:
    grad_ys = _AsList(grad_ys)
  with ops.op_scope(ys + xs + grad_ys, name, "gradients"):
    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
    xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

    # The approach we take here is as follows: Create a list of all ops in the
    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
    # to ensure that when we visit an op the gradients w.r.t its outputs have
    # been collected.  Then aggregate these gradients if needed, call the op's
    # gradient function, and add the generated gradients to the gradients for
    # its input.

    # Initialize the pending count for ops in the connected subgraph from ys
    # to the xs.
    to_ops = [t.op for t in ys]
    from_ops = [t.op for t in xs]
    pending_count, has_control_flow = _PendingCount(ops.get_default_graph(),
                                                    to_ops, from_ops)

    # Iterate over the collected ops.
    #
    # grads: op => list of gradients received on each output endpoint of the
    # op.  The gradients for each endpoint are initially collected as a list.
    # When it is time to call the op's gradient function, for each endpoint we
    # aggregate the list of received gradients into a Add() Operation if there
    # is more than one.
    grads = {}

    # Add the initial gradients for the ys.
    for y, grad_y in zip(ys, grad_ys):
      _SetGrad(grads, y, grad_y)

    # Initialize queue with to_ops.
    queue = collections.deque()
    # Add the ops in 'to_ops' into the queue.
    to_ops_set = set()
    for op in to_ops:
      # 'ready' handles the case where one output gradient relies on
      # another output's gradient.
      ready = (pending_count[op._id] == 0)
      if ready and op._id not in to_ops_set:  # pylint: disable=protected-access
        to_ops_set.add(op._id)
        queue.append(op)
    # The set of 'from_ops'.
    stop_ops = _StopOps(from_ops, pending_count)
    while queue:
      # generate gradient subgraph for op.
      op = queue.popleft()
      with ops.device(_GetGradsDevice(op, colocate_gradients_with_ops)):
        if has_control_flow:
          control_flow_ops.EnterGradWhileContext(op)
        out_grads = _AggregatedGrads(grads, op, has_control_flow,
                                     aggregation_method)
        grad_fn = None
        if any(out_grads) and op._id not in stop_ops:
          # A grad_fn must be defined, either as a function or as None
          # for ops that do not have gradients.
          try:
            grad_fn = ops.get_gradient_function(op)
          except LookupError:
            raise LookupError(
                "No gradient defined for operation '%s' (op type: %s)" %
                (op.name, op.type))
        if grad_fn and any(out_grads):
          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
          # output, it means that the cost does not depend on output[i],
          # therefore dC/doutput[i] is 0.
          for i, out_grad in enumerate(out_grads):
            if (not out_grad and
                dtypes.as_dtype(op.outputs[i].dtype).base_dtype in
                (dtypes.float32, dtypes.float64)):
              # Only floating-point outputs get a zero gradient. Gradient
              # functions should ignore the gradient for other outputs.
              out_grads[i] = array_ops.zeros_like(op.outputs[i])
          with ops.name_scope(op.name + "_grad"):
            # pylint: disable=protected-access
            with ops.get_default_graph()._original_op(op):
              # pylint: enable=protected-access
              op_wrapper = op
              if has_control_flow:
                op_wrapper = control_flow_ops.MakeWrapper(op)
              in_grads = _AsList(grad_fn(op_wrapper, *out_grads))
              _VerifyGeneratedGradients(in_grads, op)
              if gate_gradients and len(in_grads) > 1:
                in_grads = control_flow_ops.tuple(in_grads)
          logging.vlog(1, "Gradient for '" + op.name + "'")
          logging.vlog(1, "  in  --> %s",
                       ", ".join([x.name for x in out_grads if x]))
          logging.vlog(1, "  out --> %s",
                       ", ".join([x.name for x in in_grads if x]))
        else:
          # If no grad_fn is defined or none of out_grads is available,
          # just propagates a list of None backwards.
          in_grads = [None] * len(op.inputs)
        for t_in, in_grad in zip(op.inputs, in_grads):
          if in_grad:
            _SetGrad(grads, t_in, in_grad)
        if has_control_flow:
          control_flow_ops.ExitGradWhileContext(op)

      # update pending count for the inputs of op.
      for x in op.inputs:
        pending_count[x.op._id] -= 1
        ready = (pending_count[x.op._id] == 0)
        if has_control_flow and not ready:
          ready = (pending_count[x.op._id] > 0 and
                   control_flow_ops.IsLoopSwitch(x.op))
        if ready:
          queue.append(x.op)
      for x in op.control_inputs:
        pending_count[x._id] -= 1
        if pending_count[x._id] is 0:
          queue.append(x)
  return [_GetGrad(grads, x) for x in xs]
def _GradientsHelper(ys,
                     xs,
                     grad_ys=None,
                     name="gradients",
                     colocate_gradients_with_ops=False,
                     gate_gradients=False,
                     aggregation_method=None,
                     stop_gradients=None,
                     unconnected_gradients=UnconnectedGradients.NONE,
                     src_graph=None):
  """Implementation of gradients()."""
  if context.executing_eagerly():
    raise RuntimeError("tf.gradients is not supported when eager execution "
                       "is enabled. Use tf.GradientTape instead.")
  if src_graph is None:
    src_graph = ops.get_default_graph()
  try:
    unconnected_gradients = UnconnectedGradients(unconnected_gradients)
  except ValueError:
    raise ValueError(
        "Unknown value for unconnected_gradients: %r" % unconnected_gradients)

  # If src_graph is a _FuncGraph (i.e. a function body), gather it and all
  # ancestor graphs. This is necessary for correctly handling captured values.
  func_graphs = []
  curr_graph = src_graph
  while _IsFunction(curr_graph):
    func_graphs.append(curr_graph)
    if isinstance(curr_graph, FuncGraph):
      curr_graph = curr_graph.outer_graph
    else:
      assert isinstance(curr_graph, framework_function._FuncGraph)  # pylint: disable=protected-access
      curr_graph = curr_graph._outer_graph  # pylint: disable=protected-access

  ys = _AsList(ys)
  xs = _AsList(xs)
  stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients)
  if grad_ys is None:
    grad_ys = [None] * len(ys)
  else:
    grad_ys = _AsList(grad_ys)

  with ops.name_scope(
      name, "gradients",
      list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope:
    # Get a uid for this call to gradients that can be used to help
    # cluster ops for compilation.
    gradient_uid = ops.get_default_graph().unique_name("uid")
    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
    xs = [
        x.handle if resource_variable_ops.is_resource_variable(x) else x
        for x in xs
    ]
    xs = ops.internal_convert_n_to_tensor_or_indexed_slices(
        xs, name="x", as_ref=True)
    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops,
                             gradient_uid)

    # The approach we take here is as follows: Create a list of all ops in the
    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
    # to ensure that when we visit an op the gradients w.r.t its outputs have
    # been collected.  Then aggregate these gradients if needed, call the op's
    # gradient function, and add the generated gradients to the gradients for
    # its input.

    # Initialize the pending count for ops in the connected subgraph from ys
    # to the xs.
    to_ops = [t.op for t in ys]
    from_ops = [t.op for t in xs]
    stop_gradient_ops = [t.op for t in stop_gradients]
    reachable_to_ops, pending_count, loop_state = _PendingCount(
        to_ops, from_ops, colocate_gradients_with_ops, func_graphs, xs)

    # Iterate over the collected ops.
    #
    # grads: op => list of gradients received on each output endpoint of the
    # op.  The gradients for each endpoint are initially collected as a list.
    # When it is time to call the op's gradient function, for each endpoint we
    # aggregate the list of received gradients into a Add() Operation if there
    # is more than one.
    grads = {}

    # Add the initial gradients for the ys.
    for y, grad_y in zip(ys, grad_ys):
      _SetGrad(grads, y, grad_y)

    # Initialize queue with to_ops.
    queue = collections.deque()
    # Add the ops in 'to_ops' into the queue.
    to_ops_set = set()
    for op in to_ops:
      # 'ready' handles the case where one output gradient relies on
      # another output's gradient.
      ready = (pending_count[op] == 0)
      if ready and op not in to_ops_set and op in reachable_to_ops:
        to_ops_set.add(op)
        queue.append(op)

    if loop_state:
      loop_exits = loop_state.ProcessUnusedLoopExits(pending_count, to_ops_set)
      for y in loop_exits:
        if IsTrainable(y):
          _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
          queue.append(y.op)

    stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count, xs)
    while queue:
      # generate gradient subgraph for op.
      op = queue.popleft()
      with _maybe_colocate_with(op, gradient_uid, colocate_gradients_with_ops):
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=True)
        out_grads = _AggregatedGrads(grads, op, gradient_uid, loop_state,
                                     aggregation_method)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=True)

        grad_fn = None
        func_call = None
        is_partitioned_call = _IsPartitionedCall(op)
        # pylint: disable=protected-access
        is_func_call = (
            src_graph._is_function(op.type) or is_partitioned_call)
        # pylint: enable=protected-access
        has_out_grads = any(isinstance(g, ops.Tensor) or g for g in out_grads)
        if has_out_grads and (op not in stop_ops):
          try:
            grad_fn = ops.get_gradient_function(op)
          except LookupError:
            if is_func_call:
              if is_partitioned_call:
                func_call = src_graph._get_function(  # pylint: disable=protected-access
                    compat.as_bytes(op.get_attr("f").name))
              else:
                func_call = src_graph._get_function(op.type)  # pylint: disable=protected-access
              # Note that __defun is not set if the graph is
              # imported. If it's set, we prefer to access the original
              # defun.
              func_call = getattr(op, "__defun", func_call)
              grad_fn = func_call.python_grad_func
            else:
              raise LookupError(
                  "No gradient defined for operation '%s' (op type: %s)" %
                  (op.name, op.type))
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=False)

        # NOTE(skyewm): We don't support computing gradients wrt a loop variable
        # unless it's within the context of a single iteration (i.e. the
        # gradient is wrt to the loop parameter in the body function, not wrt or
        # through the initial value). This means if we're in a while loop
        # context, we should never see a switch node from this context.
        # pylint: disable=protected-access
        if (control_flow_util.IsSwitch(op) and
            op._control_flow_context is not None and
            op._control_flow_context.IsWhileContext() and
            op._control_flow_context ==
            ops.get_default_graph()._get_control_flow_context()):
          _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs)
        # pylint: enable=protected-access

        if (grad_fn or is_func_call) and has_out_grads:
          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
          # output, it means that the cost does not depend on output[i],
          # therefore dC/doutput[i] is 0.
          for i, out_grad in enumerate(out_grads):
            if (not isinstance(out_grad, ops.Tensor) and not out_grad) and (
                (not grad_fn and is_func_call) or IsTrainable(op.outputs[i])):
              # Only trainable outputs or outputs for a function call that
              # will use SymbolicGradient get a zero gradient. Gradient
              # functions should ignore the gradient for other outputs.
              # TODO(apassos) gradients of resource handles might be an
              # issue here because of zeros.
              if loop_state:
                out_grads[i] = loop_state.ZerosLike(op, i)
              else:
                out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i)
          with ops.name_scope(op.name + "_grad"):
            # pylint: disable=protected-access
            with src_graph._original_op(op):
              # pylint: enable=protected-access
              if grad_fn:
                # If grad_fn was found, do not use SymbolicGradient even for
                # functions.
                in_grads = _MaybeCompile(grad_scope, op, func_call,
                                         lambda: grad_fn(op, *out_grads))
              else:
                # For function call ops, we add a 'SymbolicGradient'
                # node to the graph to compute gradients.
                in_grads = _MaybeCompile(grad_scope, op, func_call,
                                         lambda: _SymGrad(op, out_grads))
              in_grads = _AsList(in_grads)
              _VerifyGeneratedGradients(in_grads, op)
              if gate_gradients and len([x for x in in_grads
                                         if x is not None]) > 1:
                with ops.device(None):
                  with ops._colocate_with_for_gradient(  # pylint: disable=protected-access
                      None,
                      gradient_uid,
                      ignore_existing=True):
                    in_grads = control_flow_ops.tuple(in_grads)
          _LogOpGradients(op, out_grads, in_grads)
        else:
          # If no grad_fn is defined or none of out_grads is available,
          # just propagate a list of None backwards.
          in_grads = [None] * len(_NonEagerInputs(op, xs))
        for i, (t_in, in_grad) in enumerate(zip(_NonEagerInputs(op, xs),
                                                in_grads)):
          if in_grad is not None:
            if (isinstance(in_grad, ops.Tensor) and
                t_in.dtype != dtypes.resource):
              try:
                in_grad.set_shape(t_in.get_shape())
              except ValueError:
                raise ValueError(
                    "Incompatible shapes between op input and calculated "
                    "input gradient.  Forward operation: %s.  Input index: %d. "
                    "Original input shape: %s.  "
                    "Calculated input gradient shape: %s" %
                    (op.name, i, t_in.shape, in_grad.shape))
            _SetGrad(grads, t_in, in_grad)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=False)

      # Update pending count for the inputs of op and enqueue ready ops.
      _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state,
                                    xs)

  if loop_state:
    loop_state.PostProcessing()
  return [_GetGrad(grads, x, unconnected_gradients) for x in xs]
Example #33
0
def _DefaultGradYs(grad_ys,
                   ys,
                   colocate_gradients_with_ops,
                   gradient_uid="__unsupported__"):
    """Fill in default values for grad_ys.

  Args:
    grad_ys: List of gradients, can contain None.
    ys: List of tensors.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gradient_uid: A unique identifier within the graph indicating
      which invocation of gradients is being executed. Used to cluster
      ops for compilation.

  Returns:
    A list of gradients to use, without None.

  Raises:
    ValueError: If sizes of gradients and inputs don't match
    TypeError: If type of any gradient is not valid for its input.
  """
    if len(grad_ys) != len(ys):
        raise ValueError("Passed %d grad_ys for %d ys" %
                         (len(grad_ys), len(ys)))
    grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
    new_grad_ys = []
    for i, (y, grad_y) in enumerate(zip(ys, grad_ys)):
        with _maybe_colocate_with(y.op, gradient_uid,
                                  colocate_gradients_with_ops):
            if grad_y is None:
                if y.dtype.is_complex:
                    raise TypeError(
                        "Gradients of complex tensors must set grad_ys (y.dtype = %r)"
                        % y.dtype)
                new_grad_ys.append(
                    array_ops.fill(
                        array_ops.shape(y),
                        constant_op.constant(1,
                                             dtype=y.dtype,
                                             name="grad_ys_%d" % i)))
                continue
            if y.dtype.is_floating or y.dtype.is_integer:
                if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
                    raise TypeError(
                        "Gradient type %s generated for real or "
                        "integer-valued tensor %s with type %s must be "
                        "real or integer" %
                        (dtypes.as_dtype(grad_y.dtype).name, y,
                         dtypes.as_dtype(y.dtype).name))
            elif y.dtype.is_complex:
                if not grad_y.dtype.is_complex:
                    raise TypeError(
                        "Gradient type %s generated for complex-valued "
                        "tensor %s with type %s must be real" %
                        (dtypes.as_dtype(grad_y.dtype).name, y,
                         dtypes.as_dtype(y.dtype).name))
            elif y.dtype == dtypes.variant:
                if grad_y.dtype != dtypes.variant:
                    raise TypeError("Gradient type %s generated for variant "
                                    "tensor %s with type %s must be variant" %
                                    (dtypes.as_dtype(grad_y.dtype).name, y,
                                     dtypes.as_dtype(y.dtype).name))
            elif y.dtype == dtypes.resource:
                # We assume y is the handle of a ResourceVariable. The gradient of a
                # ResourceVariable should be a numeric value, not another resource.
                if grad_y.dtype == dtypes.resource:
                    raise TypeError(
                        "Input gradient %s for resource tensor %s should not "
                        "be a resource" % (grad_y, y))
            else:
                raise TypeError("Tensor %s with type %s must be numeric "
                                "to obtain a default gradient" %
                                (y, dtypes.as_dtype(y.dtype).name))
            # Create a grad_y tensor in the name scope of the gradient.
            # Required for TensorArrays to identify which gradient call a
            # grad_y value is coming from.
            if isinstance(grad_y, ops.IndexedSlices):
                new_grad_ys.append(
                    ops.IndexedSlices(
                        indices=(array_ops.identity(
                            grad_y.indices, name="grad_ys_%d_indices" %
                            i) if isinstance(grad_y.indices, ops.Tensor) else
                                 grad_y.indices),
                        values=(array_ops.identity(
                            grad_y.values, name="grad_ys_%d_values" %
                            i) if isinstance(grad_y.values, ops.Tensor) else
                                grad_y.values),
                        dense_shape=(array_ops.identity(
                            grad_y.dense_shape, name="grad_ys_%d_shape" %
                            i) if isinstance(grad_y.dense_shape, ops.Tensor)
                                     else grad_y.dense_shape)))
            else:
                new_grad_ys.append(
                    array_ops.identity(grad_y, name="grad_ys_%d" % i))

    return new_grad_ys
Example #34
0
def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None):
  """Returns the element-wise sum of a list of tensors.

  Optionally, pass `shape` and `tensor_dtype` for shape and type checking,
  otherwise, these are inferred.

  `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
  wait for all of its inputs to be ready before beginning to sum. This can
  save memory if inputs are ready at different times, since minimum temporary
  storage is proportional to the output size rather than the inputs size.

  Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.

  For example:

  ```python
  a = tf.constant([[1, 2], [3, 4]])
  b = tf.constant([[5, 0], [0, 6]])
  tf.accumulate_n_v2([a, b, a])  # [[7, 4], [6, 14]]

  # Explicitly pass shape and type
  tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)  
                                                                   # [[7,  4],
                                                                   #  [6, 14]]
  ```

  Args:
    inputs: A list of `Tensor` objects, each with same shape and type.
    shape: Shape of elements of `inputs`.
    tensor_dtype: The type of `inputs`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of same shape and type as the elements of `inputs`.

  Raises:
    ValueError: If `inputs` don't all have same shape and dtype or the shape
    cannot be inferred.
  """
  _INPUTS_ERR_MSG = ValueError("inputs must be a list of at least one Tensor"
                               "with the same dtype and shape")
  if not inputs or not isinstance(inputs, (list, tuple)):
    raise _INPUTS_ERR_MSG
  inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs)
  if not all(isinstance(x, ops.Tensor) for x in inputs):
    raise _INPUTS_ERR_MSG
  if not all(x.dtype == inputs[0].dtype for x in inputs):
    raise _INPUTS_ERR_MSG
  if shape is not None:
    shape = tensor_shape.as_shape(shape)
  else:
    shape = tensor_shape.unknown_shape()
  for input_tensor in inputs:
    if isinstance(input_tensor, ops.Tensor):
      shape = shape.merge_with(input_tensor.get_shape())

  # tensor_dtype is for safety only; operator's output type computed in C++
  if tensor_dtype is not None and tensor_dtype != inputs[0].dtype:
    raise TypeError("tensor_dtype is {}, but input is of type {}"
                    .format(tensor_dtype, inputs[0].dtype))

  if len(inputs) == 1 and name is None:
    return inputs[0]
  elif len(inputs) == 1 and name is not None:
    return array_ops.identity(inputs[0], name=name)
  elif context.in_eager_mode():
    # TemporaryVariable not currently supported in eager mode; fall back 
    # onto AddN for now.
    # TODO(frreiss) remove this once the lifetime of eager variables gets
    # addressed
    return math_ops.add_n(inputs, name=name)
  else:
    return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape)
Example #35
0
def embedding_lookup(params, ids, partition_strategy="mod", name=None,
                     validate_indices=True, max_norm=None):
  """Looks up `ids` in a list of embedding tensors.

  This function is used to perform parallel lookups on the list of
  tensors in `params`.  It is a generalization of
  [`tf.gather()`](../../api_docs/python/array_ops.md#gather), where `params` is
  interpreted as a partitioning of a large embedding tensor.  `params` may be
  a `PartitionedVariable` as returned by using `tf.get_variable()` with a
  partitioner.

  If `len(params) > 1`, each element `id` of `ids` is partitioned between
  the elements of `params` according to the `partition_strategy`.
  In all strategies, if the id space does not evenly divide the number of
  partitions, each of the first `(max_id + 1) % len(params)` partitions will
  be assigned one more id.

  If `partition_strategy` is `"mod"`, we assign each id to partition
  `p = id % len(params)`. For instance,
  13 ids are split across 5 partitions as:
  `[[0, 5, 10], [1, 6, 11], [2, 7, 12], [3, 8], [4, 9]]`

  If `partition_strategy` is `"div"`, we assign ids to partitions in a
  contiguous manner. In this case, 13 ids are split across 5 partitions as:
  `[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]]`

  The results of the lookup are concatenated into a dense
  tensor. The returned tensor has shape `shape(ids) + shape(params)[1:]`.

  Args:
    params: A list of tensors with the same type and which can be concatenated
      along dimension 0. Alternatively, a `PartitionedVariable`, created by
      partitioning along dimension 0.  Each element must be appropriately sized
      for the given `partition_strategy`.
    ids: A `Tensor` with type `int32` or `int64` containing the ids to be looked
      up in `params`.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`.
    name: A name for the operation (optional).
    validate_indices: Whether or not to validate gather indices.
    max_norm: If not None, embedding values are l2-normalized to the value of
     max_norm.

  Returns:
    A `Tensor` with the same type as the tensors in `params`.

  Raises:
    ValueError: If `params` is empty.
  """
  if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
    raise ValueError("Need at least one param")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]
  def maybe_normalize(x):
    if max_norm is not None:
      if x.get_shape().ndims is not None:
        ndims = x.get_shape().ndims
      else:
        ndims = array_ops.size(array_ops.shape(x))
      return clip_ops.clip_by_norm(x, max_norm, axes=list(range(1, ndims)))
    return x
  with ops.name_scope(name, "embedding_lookup", params + [ids]) as name:
    np = len(params)  # Number of partitions
    params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params")
    if np == 1:
      with ops.colocate_with(params[0]):
        # TODO(apassos): implement the sharded version as well.
        if isinstance(params[0], resource_variable_ops.ResourceVariable):
          ret = params[0].sparse_read(ids, name=name)
        else:
          ret = array_ops.gather(params[0], ids, name=name,
                                 validate_indices=validate_indices)
      return maybe_normalize(ret)
    else:
      ids = ops.convert_to_tensor(ids, name="ids")
      flat_ids = array_ops.reshape(ids, [-1])
      original_indices = math_ops.range(array_ops.size(flat_ids))

      # Create p_assignments and set new_ids depending on the strategy.
      if partition_strategy == "mod":
        p_assignments = flat_ids % np
        new_ids = flat_ids // np
      elif partition_strategy == "div":
        # Compute num_total_ids as the sum of dim-0 of params, then assign to
        # partitions based on a constant number of ids per partition. Optimize
        # if we already know the full shape statically.
        dim_0_size = params[0].get_shape()[0]
        for p in xrange(1, np):
          dim_0_size += params[p].get_shape()[0]
        if dim_0_size.value:
          num_total_ids = constant_op.constant(dim_0_size.value, flat_ids.dtype)
        else:
          dim_0_sizes = []
          for p in xrange(np):
            if params[p].get_shape()[0].value is not None:
              dim_0_sizes.append(params[p].get_shape()[0].value)
            else:
              with ops.colocate_with(params[p]):
                dim_0_sizes.append(array_ops.shape(params[p])[0])
          num_total_ids = math_ops.reduce_sum(
              math_ops.cast(array_ops.pack(dim_0_sizes), flat_ids.dtype))
        ids_per_partition = num_total_ids // np
        extras = num_total_ids % np

        p_assignments = math_ops.maximum(
            flat_ids // (ids_per_partition + 1),
            (flat_ids - extras) // ids_per_partition)

        # Emulate a conditional using a boolean indicator tensor
        is_in_first_extras_partitions = math_ops.cast(
            p_assignments < extras, flat_ids.dtype)
        new_ids = (
            is_in_first_extras_partitions * (
                flat_ids % (ids_per_partition + 1)) +
            (1 - is_in_first_extras_partitions) * (
                (flat_ids - extras) % ids_per_partition))
      else:
        raise ValueError("Unrecognized partition strategy: " +
                         partition_strategy)

      # Cast partition assignments to int32 for use in dynamic_partition.
      # There really should not be more than 2^32 partitions.
      p_assignments = math_ops.cast(p_assignments, dtypes.int32)
      # Partition list of ids based on assignments into np separate lists
      gather_ids = data_flow_ops.dynamic_partition(new_ids, p_assignments, np)
      # Similarly, partition the original indices.
      pindices = data_flow_ops.dynamic_partition(original_indices,
                                                 p_assignments, np)
      # Do np separate lookups, finding embeddings for plist[p] in params[p]
      partitioned_result = []
      for p in xrange(np):
        with ops.colocate_with(params[p]):
          partitioned_result.append(array_ops.gather(
              params[p], gather_ids[p],
              validate_indices=validate_indices))
      # Stitch these back together
      ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result,
                                         name=name)
      # Reshape to reverse the flattening of ids.
      element_shape = params[0].get_shape()[1:]
      for p in params[1:]:
        element_shape = element_shape.merge_with(p.get_shape()[1:])
      if element_shape.is_fully_defined():
        ret = array_ops.reshape(ret, array_ops.concat(0, [
            array_ops.shape(ids), element_shape]))
      else:
        # It's important that we compute params[0].shape on the right device
        # to avoid data motion.
        with ops.colocate_with(params[0]):
          params_shape = array_ops.shape(params[0])
        ret = array_ops.reshape(ret, array_ops.concat(0, [
            array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])]))
      # output shape = ids.shape + params[*].shape[1:]
      # Normally the reshape is sufficient, but setting shape explicitly
      # teaches shape inference that params[1:].get_shape() matters.
      ret.set_shape(ids.get_shape().concatenate(element_shape))
      return maybe_normalize(ret)
Example #36
0
def embedding_lookup(params, ids, name=None):
    """Return a tensor of embedding values by looking up "ids" in "params".

  Args:
    params: List of tensors of the same shape.  A single tensor is
            treated as a singleton list.
    ids: Tensor of integers containing the ids to be looked up in
         'params'.  Let P be len(params).  If P > 1, then the ids are
         partitioned by id % P, and we do separate lookups in params[p]
         for 0 <= p < P, and then stitch the results back together into
         a single result tensor.
    name: Optional name for the op.

  Returns:
    A tensor of shape ids.shape + params[0].shape[1:] containing the
    values params[i % P][i] for each i in ids.

  Raises:
    ValueError: if some parameters are invalid.
  """
    if not isinstance(params, list):
        params = [params]
    with ops.op_scope(params + [ids], name, "embedding_lookup") as name:
        if not params:
            raise ValueError("Need at least one param")
        np = len(params)  # Number of partitions
        params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                           name="params")
        if np == 1:
            with ops.device(params[0].device):
                return array_ops.gather(params[0], ids, name=name)
        else:
            ids = ops.convert_to_tensor(ids, name="ids")
            flat_ids = array_ops.reshape(ids, [-1])
            original_indices = math_ops.range(0, array_ops.size(flat_ids))
            # Compute flat_ids % partitions for each id
            ids_mod_p = flat_ids % np
            if ids_mod_p.dtype != types.int32:
                ids_mod_p = math_ops.cast(ids_mod_p, types.int32)
            # Partition single list of ids based on ids % np into np separate lists
            plist = data_flow_ops.dynamic_partition(flat_ids, ids_mod_p, np)
            # Similarly, partition the original indices.
            pindices = data_flow_ops.dynamic_partition(original_indices,
                                                       ids_mod_p, np)
            # Do np separate lookups, finding embeddings for plist[p] in params[p]
            partitioned_result = []
            for p in range(np):
                # TODO(agarwal): handle device allocations here and later in the
                # colocate code.
                gather_ids = plist[p] / np
                with ops.device(params[p].device):
                    partitioned_result.append(
                        array_ops.gather(params[p], gather_ids))
            # Stitch these back together
            ret = data_flow_ops.dynamic_stitch(pindices,
                                               partitioned_result,
                                               name=name)
            # Reshape to reverse the flattening of ids.
            # It's important that we compute params[0].shape on the right device
            # to avoid data motion.
            with ops.device(params[0].device):
                params_shape = array_ops.shape(params[0])
            ret = array_ops.reshape(
                ret,
                array_ops.concat(0, [
                    array_ops.shape(ids),
                    array_ops.slice(params_shape, [1], [-1])
                ]))
            # output shape = ids.shape + params[*].shape[1:]
            # Normally the reshape is sufficient, but setting shape explicitly
            # teaches shape inference that params[1:].get_shape() matters.
            element_shape = params[0].get_shape()[1:]
            for p in params[1:]:
                element_shape = element_shape.merge_with(p.get_shape()[1:])
            ret.set_shape(ids.get_shape().concatenate(element_shape))
            return ret
Example #37
0
def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
    """Fill in default values for grad_ys.

  Args:
    grad_ys: List of gradients, can contain None.
    ys: List of tensors.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.

  Returns:
    A list of gradients to use, without None.

  Raises:
    ValueError: If sizes of gradients and inputs don't match
    TypeError: If type of any gradient is not valid for its input.
  """
    if len(grad_ys) != len(ys):
        raise ValueError("Passed %d grad_ys for %d ys" %
                         (len(grad_ys), len(ys)))
    grad_ys = ops.convert_n_to_tensor_or_indexed_slices(grad_ys, name="grad_y")
    new_grad_ys = []
    for i in xrange(len(grad_ys)):
        grad_y = grad_ys[i]
        y = ys[i]
        with _maybe_colocate_with(y.op, colocate_gradients_with_ops):
            if grad_y is None:
                if y.dtype.is_complex:
                    raise TypeError(
                        "Gradients of complex tensors must set grad_ys (y.dtype = %r)"
                        % y.dtype)
                new_grad_ys.append(
                    array_ops.fill(
                        array_ops.shape(y),
                        constant_op.constant(1,
                                             dtype=y.dtype,
                                             name="grad_ys_%d" % i)))
                continue
            if y.dtype.is_floating or y.dtype.is_integer:
                if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
                    raise TypeError(
                        "Gradient type %s generated for real or "
                        "integer-valued tensor %s with type %s must be "
                        "real or integer" %
                        (dtypes.as_dtype(grad_y.dtype).name, y,
                         dtypes.as_dtype(y.dtype).name))
            elif y.dtype.is_complex:
                if not grad_y.dtype.is_complex:
                    raise TypeError(
                        "Gradient type %s generated for complex-valued "
                        "tensor %s with type %s must be real" %
                        (dtypes.as_dtype(grad_y.dtype).name, y,
                         dtypes.as_dtype(y.dtype).name))
            else:
                raise TypeError("Tensor %s with type %s must be numeric "
                                "to obtain a default gradient" %
                                (y, dtypes.as_dtype(y.dtype).name))
            # Create a grad_y tensor in the name scope of the gradient.
            # Required for TensorArrays to identify which gradient call a
            # grad_y value is coming from.
            if isinstance(grad_y, ops.IndexedSlices):
                new_grad_ys.append(
                    ops.IndexedSlices(
                        indices=(array_ops.identity(
                            grad_y.indices, name="grad_ys_%d_indices" %
                            i) if isinstance(grad_y.indices, ops.Tensor) else
                                 grad_y.indices),
                        values=(array_ops.identity(
                            grad_y.values, name="grad_ys_%d_values" %
                            i) if isinstance(grad_y.values, ops.Tensor) else
                                grad_y.values),
                        dense_shape=(array_ops.identity(
                            grad_y.dense_shape, name="grad_ys_%d_shape" %
                            i) if isinstance(grad_y.dense_shape, ops.Tensor)
                                     else grad_y.dense_shape)))
            else:
                new_grad_ys.append(
                    array_ops.identity(grad_y, name="grad_ys_%d" % i))

    return new_grad_ys
Example #38
0
def _validate(tensor_list):
  tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensor_list)
  if not tensor_list:
    raise ValueError("Expected at least one tensor in batch().")
  return tensor_list
Example #39
0
def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
  """Returns the element-wise sum of a list of tensors.

  Optionally, pass `shape` and `tensor_dtype` for shape and type checking,
  otherwise, these are inferred.

  For example:

  ```python
  # tensor 'a' is [[1, 2], [3, 4]]
  # tensor `b` is [[5, 0], [0, 6]]
  tf.accumulate_n([a, b, a]) ==> [[7, 4], [6, 14]]

  # Explicitly pass shape and type
  tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
    ==> [[7, 4], [6, 14]]
  ```

  Args:
    inputs: A list of `Tensor` objects, each with same shape and type.
    shape: Shape of elements of `inputs`.
    tensor_dtype: The type of `inputs`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of same shape and type as the elements of `inputs`.

  Raises:
    ValueError: If `inputs` don't all have same shape and dtype or the shape
    cannot be inferred.
  """
  if tensor_dtype is None:
    if not inputs or not isinstance(inputs, (list, tuple)):
      raise ValueError("inputs must be a list of at least one Tensor with the "
                       "same dtype and shape")
    inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs)
    if not all(isinstance(x, ops.Tensor) for x in inputs):
      raise ValueError("inputs must be a list of at least one Tensor with the "
                       "same dtype and shape")
    if not all(x.dtype == inputs[0].dtype for x in inputs):
      raise ValueError("inputs must be a list of at least one Tensor with the "
                       "same dtype and shape")
    tensor_dtype = inputs[0].dtype
  if shape is not None:
    shape = tensor_shape.as_shape(shape)
  else:
    shape = tensor_shape.unknown_shape()
    for input_tensor in inputs:
      if isinstance(input_tensor, ops.Tensor):
        shape = shape.merge_with(input_tensor.get_shape())
  if not shape.is_fully_defined():
    # TODO(pbar): Make a version of assign_add that accepts an uninitialized
    # lvalue, and takes its shape from that? This would allow accumulate_n to
    # work in all situations that add_n currently works.
    raise ValueError("Cannot infer the shape of the accumulator for "
                     "accumulate_n. Pass the shape argument, or set the shape "
                     "of at least one of the inputs.")
  with ops.op_scope(inputs, name, "AccumulateN") as name:
    var = gen_state_ops._temporary_variable(shape=shape, dtype=tensor_dtype)
    var_name = var.op.name
    var = state_ops.assign(var, array_ops.zeros_like(inputs[0]))
    update_ops = []
    for input_tensor in inputs:
      op = state_ops.assign_add(var, input_tensor, use_locking=True)
      update_ops.append(op)
    with ops.control_dependencies(update_ops):
      return gen_state_ops._destroy_temporary_variable(var,
                                                       var_name=var_name,
                                                       name=name)
Example #40
0
def hash_embedding_lookup(hash_table, params, keys, poisson=0.1, partition_strategy="mod", name=None):
  """Looks up raw woodblocks features in a list of embedding tensors.
  """
  if params in (None, (), []):
    raise ValueError("Need at least one param")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]

  if isinstance(hash_table, variables.PartitionedVariable):
    hash_table = list(hash_table)
  if not isinstance(hash_table, list):
    hash_table = [hash_table]

  with ops.name_scope(name, "hash_embedding_lookup", hash_table + params + [keys]) as name:
    #print("name scope name:", name)
    np = len(params)  # Number of partitions
    # Preserve the resource variable status to avoid accidental dense reads.
    if not any(isinstance(p, resource_variable_ops.ResourceVariable)
               for p in params):
      params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params")
    if np == 1:
      with ops.colocate_with(params[0]):
        values, idx = _do_hash_gather(hash_table[0], params[0], keys, poisson=poisson, name=name)
        #padding non-existing features
        return tf.scatter_nd(tf.reshape(idx, [-1, 1]), values, raw_key_ops.modify_first_dim(tf.shape(keys), tf.shape(values)))
    else:
      original_indices = math_ops.range(array_ops.shape(keys, out_type=tf.int32)[0])
      # murmur hash32 for partition
      key_hashes = raw_key_ops.murmur32(keys)
      if partition_strategy == "mod":
        p_assignments = key_hashes % np
      else:
        raise ValueError("Unrecognized partition strategy: " +
                         partition_strategy)

      # Cast partition assignments to int32 for use in dynamic_partition.
      # There really should not be more than 2^32 partitions.
      p_assignments = math_ops.cast(p_assignments, dtypes.int32)

      #gather_keys = data_flow_ops.dynamic_partition(keys, p_assignments, np)
      ## Similarly, partition the original indices.
      #pindices = data_flow_ops.dynamic_partition(original_indices,
      #                                           p_assignments, np)
      gather_keys = raw_key_ops.fast_dynamic_partition(keys, p_assignments, np)
      pindices = raw_key_ops.fast_dynamic_partition(original_indices, p_assignments, np)

      # Do np separate lookups, finding embeddings for plist[p] in params[p]
      partitioned_result_partial = []
      for p in xrange(np):
        with ops.colocate_with(params[p]):
          partitioned_result_partial.append(_do_hash_gather(hash_table[p], params[p], gather_keys[p], poisson=poisson))

      # Stitch these back together
      # padding zero for non-existing features
      partitioned_result = []
      for p in xrange(np):
        partitioned_result.append(tf.scatter_nd(tf.reshape(partitioned_result_partial[p][1], [-1, 1]),
                                                partitioned_result_partial[p][0],
                                                raw_key_ops.modify_first_dim(tf.shape(gather_keys[p]), tf.shape(partitioned_result_partial[p][0]))
                                                ))

      # filter non-existing features
      ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result,
                                         name=name)
      element_shape = params[0].get_shape()[1:]
      return array_ops.reshape(ret, array_ops.concat([array_ops.shape(key_hashes), element_shape], 0))
Example #41
0
def rejection_sample(tensors,
                     accept_prob_fn,
                     batch_size,
                     queue_threads=1,
                     enqueue_many=False,
                     prebatch_capacity=16,
                     prebatch_threads=1,
                     runtime_checks=False,
                     name=None):
    """Stochastically creates batches by rejection sampling.

  Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce
  a scalar tensor between 0 and 1. This tensor corresponds to the probability of
  being accepted. When `batch_size` tensor groups have been accepted, the batch
  queue will return a mini-batch.

  Args:
    tensors: List of tensors for data. All tensors are either one item or a
        batch, according to enqueue_many.
    accept_prob_fn: A python lambda that takes a non-batch tensor from each
        item in `tensors`, and produces a scalar tensor.
    batch_size: Size of batch to be returned.
    queue_threads: The number of threads for the queue that will hold the final
      batch.
    enqueue_many: Bool. If true, interpret input tensors as having a batch
        dimension.
    prebatch_capacity: Capacity for the large queue that is used to convert
      batched tensors to single examples.
    prebatch_threads: Number of threads for the large queue that is used to
      convert batched tensors to single examples.
    runtime_checks: Bool. If true, insert runtime checks on the output of
        `accept_prob_fn`. Using `True` might have a performance impact.
    name: Optional prefix for ops created by this function.
  Raises:
    ValueError: enqueue_many is True and labels doesn't have a batch
        dimension, or if enqueue_many is False and labels isn't a scalar.
    ValueError: enqueue_many is True, and batch dimension on data and labels
        don't match.
    ValueError: if a zero initial probability class has a nonzero target
        probability.
  Returns:
    A list of tensors of the same length as `tensors`, with batch dimension
    `batch_size`.

  Example:
    # Get tensor for a single data and label example.
    data, label = data_provider.Get(['data', 'label'])

    # Get stratified batch according to data tensor.
    accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2
    data_batch = tf.contrib.training.rejection_sample(
        [data, label], accept_prob_fn, 16)

    # Run batch through network.
    ...
  """
    with variable_scope.variable_scope(name, 'rejection_sample', tensors):
        tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
        # Reduce the case of a batched example to that of a batch of a single
        # example by taking a batch of size one.
        if enqueue_many:
            # Validate that batch dimension of the input is consistent.
            tensor_list = _verify_data_inputs(tensor_list)

            # Make a single queue to hold input examples. Reshape output so examples
            # don't have singleton batch dimension.
            batched = input_ops.batch(tensor_list,
                                      batch_size=1,
                                      num_threads=prebatch_threads,
                                      capacity=prebatch_capacity,
                                      enqueue_many=True)
            tensor_list = [array_ops.squeeze(x, [0]) for x in batched]

        # Set up a queue containing batches that have the distribution.
        cur_prob = accept_prob_fn(tensor_list)
        if runtime_checks:
            cur_prob = array_ops.identity(control_flow_ops.with_dependencies([
                check_ops.assert_less_equal(0.0, cur_prob),
                check_ops.assert_less_equal(cur_prob, 1.0)
            ], cur_prob),
                                          name='prob_with_checks')
        minibatch = input_ops.maybe_batch(
            tensor_list,
            keep_input=random_ops.random_uniform([]) < cur_prob,
            batch_size=batch_size,
            num_threads=queue_threads)

        # Queues return a single tensor if the list of enqueued tensors is one. Since
        # we want the type to always be the same, always return a list.
        if isinstance(minibatch, ops.Tensor):
            minibatch = [minibatch]

        return minibatch
Example #42
0
def rejection_sample(tensors, accept_prob_fn, batch_size, queue_threads=1,
                     enqueue_many=False, prebatch_capacity=16,
                     prebatch_threads=1, runtime_checks=False, name=None):
  """Stochastically creates batches by rejection sampling.

  Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce
  a scalar tensor between 0 and 1. This tensor corresponds to the probability of
  being accepted. When `batch_size` tensor groups have been accepted, the batch
  queue will return a mini-batch.

  Args:
    tensors: List of tensors for data. All tensors are either one item or a
        batch, according to enqueue_many.
    accept_prob_fn: A python lambda that takes a non-batch tensor from each
        item in `tensors`, and produces a scalar tensor.
    batch_size: Size of batch to be returned.
    queue_threads: The number of threads for the queue that will hold the final
      batch.
    enqueue_many: Bool. If true, interpret input tensors as having a batch
        dimension.
    prebatch_capacity: Capacity for the large queue that is used to convert
      batched tensors to single examples.
    prebatch_threads: Number of threads for the large queue that is used to
      convert batched tensors to single examples.
    runtime_checks: Bool. If true, insert runtime checks on the output of
        `accept_prob_fn`. Using `True` might have a performance impact.
    name: Optional prefix for ops created by this function.
  Raises:
    ValueError: enqueue_many is True and labels doesn't have a batch
        dimension, or if enqueue_many is False and labels isn't a scalar.
    ValueError: enqueue_many is True, and batch dimension on data and labels
        don't match.
    ValueError: if a zero initial probability class has a nonzero target
        probability.
  Returns:
    A list of tensors of the same length as `tensors`, with batch dimension
    `batch_size`.

  Example:
    # Get tensor for a single data and label example.
    data, label = data_provider.Get(['data', 'label'])

    # Get stratified batch according to data tensor.
    accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2
    data_batch = tf.contrib.training.rejection_sample(
        [data, label], accept_prob_fn, 16)

    # Run batch through network.
    ...
  """
  with variable_scope.variable_scope(name, 'rejection_sample', tensors):
    tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
    # Reduce the case of a batched example to that of a batch of a single
    # example by taking a batch of size one.
    if enqueue_many:
      # Validate that batch dimension of the input is consistent.
      tensor_list = _verify_data_inputs(tensor_list)

      # Make a single queue to hold input examples. Reshape output so examples
      # don't have singleton batch dimension.
      batched = input_ops.batch(tensor_list,
                                batch_size=1,
                                num_threads=prebatch_threads,
                                capacity=prebatch_capacity,
                                enqueue_many=True)
      tensor_list = [array_ops.squeeze(x, [0]) for x in batched]

    # Set up a queue containing batches that have the distribution.
    cur_prob = accept_prob_fn(tensor_list)
    if runtime_checks:
      cur_prob = array_ops.identity(control_flow_ops.with_dependencies(
          [check_ops.assert_less_equal(0.0, cur_prob),
           check_ops.assert_less_equal(cur_prob, 1.0)],
          cur_prob), name='prob_with_checks')
    keep_input = random_ops.random_uniform([]) < cur_prob
    return _conditional_batch(
        tensor_list, keep_input, batch_size, num_threads=queue_threads)
def _embedding_lookup_with_distributed_aggregation(params,
                                                   ids,
                                                   partition_strategy="mod",
                                                   name=None,
                                                   max_norm=None,
                                                   weights=None,
                                                   idx=None,
                                                   segment_ids=None):
  """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation."""
  if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
    raise ValueError("Need at least one param")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]

  def maybe_normalize(x):
    if max_norm is not None:
      if x.get_shape().ndims is not None:
        ndims = x.get_shape().ndims
      else:
        ndims = array_ops.size(array_ops.shape(x))
      return clip_ops.clip_by_norm(x, max_norm, axes=list(range(1, ndims)))
    return x

  with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation",
                      params + [ids]) as name:
    np = len(params)  # Number of partitions
    # Preserve the resource variable status to avoid accidental dense reads.
    if not any(
        isinstance(p, resource_variable_ops.ResourceVariable) for p in params):
      params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params")
    if np == 1:
      with ops.colocate_with(params[0]):
        ret = maybe_normalize(_do_gather(params[0], ids))
        ignore_weights = weights is None
        if not ignore_weights:
          if weights.dtype != ret.dtype:
            weights = math_ops.cast(weights, ret.dtype)
          # Reshape to allow broadcast
          ones = array_ops.fill(
              array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1)
          bcast_weights_shape = array_ops.concat(
              [array_ops.shape(weights), ones], 0)
          orig_weights_shape = weights.get_shape()
          weights = array_ops.reshape(weights, bcast_weights_shape)
          # Set weights shape after reshape
          if ret.get_shape().ndims is not None:
            weights.set_shape(
                orig_weights_shape.concatenate(
                    [1 for _ in range(ret.get_shape().ndims - 1)]))
          ret *= weights
          return math_ops.segment_sum(ret, segment_ids, name=name)
        else:
          return math_ops.sparse_segment_sum(ret, idx, segment_ids, name=name)
    else:
      ids = ops.convert_to_tensor(ids, name="ids")
      flat_ids = array_ops.reshape(ids, [-1])
      original_indices = math_ops.range(array_ops.size(flat_ids))

      # Create p_assignments and set new_ids depending on the strategy.
      if partition_strategy == "mod":
        p_assignments = flat_ids % np
        new_ids = flat_ids // np
      elif partition_strategy == "div":
        # Compute num_total_ids as the sum of dim-0 of params, then assign to
        # partitions based on a constant number of ids per partition. Optimize
        # if we already know the full shape statically.
        dim_0_size = params[0].get_shape()[0]
        for p in xrange(1, np):
          dim_0_size += params[p].get_shape()[0]
        if dim_0_size.value:
          num_total_ids = constant_op.constant(dim_0_size.value, flat_ids.dtype)
        else:
          dim_0_sizes = []
          for p in xrange(np):
            if params[p].get_shape()[0].value is not None:
              dim_0_sizes.append(params[p].get_shape()[0].value)
            else:
              with ops.colocate_with(params[p]):
                dim_0_sizes.append(array_ops.shape(params[p])[0])
          num_total_ids = math_ops.reduce_sum(
              math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype))
        ids_per_partition = num_total_ids // np
        extras = num_total_ids % np

        p_assignments = math_ops.maximum(flat_ids // (ids_per_partition + 1), (
            flat_ids - extras) // ids_per_partition)

        # Emulate a conditional using a boolean indicator tensor
        is_in_first_extras_partitions = math_ops.cast(p_assignments < extras,
                                                      flat_ids.dtype)
        new_ids = (is_in_first_extras_partitions * (flat_ids %
                                                    (ids_per_partition + 1)) +
                   (1 - is_in_first_extras_partitions) * (
                       (flat_ids - extras) % ids_per_partition))
      else:
        raise ValueError("Unrecognized partition strategy: " +
                         partition_strategy)

      # Cast partition assignments to int32 for use in dynamic_partition.
      # There really should not be more than 2^32 partitions.
      p_assignments = math_ops.cast(p_assignments, dtypes.int32)
      # Partition list of ids based on assignments into np separate lists
      gather_ids = data_flow_ops.dynamic_partition(new_ids, p_assignments, np)
      # Similarly, partition the original indices.
      pindices = data_flow_ops.dynamic_partition(original_indices,
                                                 p_assignments, np)
      # Do np separate lookups, finding embeddings for plist[p] in params[p]
      partitioned_result = []
      for p in xrange(np):
        with ops.colocate_with(params[p]):
          partitioned_result.append(_do_gather(params[p], gather_ids[p]))

      ignore_weights = weights is None
      if not ignore_weights:
        # Partition weights according to pindices.
        partitioned_weight = []
        for p in xrange(np):
          partitioned_weight.append(array_ops.gather(weights, pindices[p]))
      # Reshape each partition result.
      element_shape = params[0].get_shape()[1:]
      for p in params[1:]:
        element_shape = element_shape.merge_with(p.get_shape()[1:])
      if element_shape.is_fully_defined():
        for p in xrange(np):
          with ops.colocate_with(params[p]):
            partitioned_result[p] = array_ops.reshape(
                partitioned_result[p],
                array_ops.concat([array_ops.shape(pindices[p]), element_shape],
                                 0))
      else:
        with ops.colocate_with(params[0]):
          params_shape = array_ops.shape(params[0])
        for p in xrange(np):
          with ops.colocate_with(params[p]):
            partitioned_result[p] = array_ops.reshape(
                partitioned_result[p],
                array_ops.concat([
                    array_ops.shape(pindices[p]), array_ops.slice(
                        params_shape, [1], [-1])
                ], 0))
      # Normalize each partition result.
      for p in xrange(np):
        with ops.colocate_with(params[p]):
          partitioned_result[p] = maybe_normalize(partitioned_result[p])
      if not ignore_weights:
        # Multiply each partition result with partition weights.
        for p in xrange(np):
          with ops.colocate_with(params[p]):
            if partitioned_weight[p].dtype != partitioned_result[p].dtype:
              partitioned_weight[p] = math_ops.cast(partitioned_weight[p],
                                                    partitioned_result[p].dtype)
            # Reshape partition weights.
            ones = array_ops.fill(
                array_ops.expand_dims(
                    array_ops.rank(partitioned_result[p]) - 1, 0), 1)
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(partitioned_weight[p]), ones], 0)
            orig_weights_shape = partitioned_weight[p].get_shape()
            partitioned_weight[p] = array_ops.reshape(partitioned_weight[p],
                                                      bcast_weights_shape)
            if partitioned_result[p].get_shape().ndims is not None:
              partitioned_weight[p].set_shape(
                  orig_weights_shape.concatenate([
                      1
                      for _ in range(partitioned_result[p].get_shape().ndims -
                                     1)
                  ]))
            partitioned_result[p] *= partitioned_weight[p]
      partitioned_segment_ids = []
      for p in xrange(np):
        if not ignore_weights:
          # Partition segment_ids according to pindices.
          p_segment_ids = array_ops.gather(segment_ids, pindices[p])
          # Number the p_segment_ids to meet segment_sum's requirements. Note
          # that unique_p_segment_ids contains unique segment ids of this
          # partition and these ids' order is unchanged.
          unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
              p_segment_ids)
          partitioned_segment_ids.append(unique_p_segment_ids)
          # segment_sum this partition's result.
          with ops.colocate_with(params[p]):
            partitioned_result[p] = math_ops.segment_sum(
                partitioned_result[p], unique_p_segment_idx)
        else:
          # When ignore weights, we need to get indexs of elements in idx and
          # segment_ids.
          _, exclude_idx = array_ops.setdiff1d(idx, pindices[p])
          all_idx = math_ops.range(array_ops.shape(idx)[0])
          _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx)
          # Gather segment_ids and idx according to indexs.
          p_segment_ids = array_ops.gather(segment_ids, include_idx)
          p_idx = array_ops.gather(idx, include_idx)
          # Number the p_segment_ids, same as ignore_weights case above.
          unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
              p_segment_ids)
          _, unique_p_idx_idx = array_ops.unique(p_idx)
          partitioned_segment_ids.append(unique_p_segment_ids)
          with ops.colocate_with(params[p]):
            partitioned_result[p] = math_ops.sparse_segment_sum(
                partitioned_result[p], unique_p_idx_idx, unique_p_segment_idx)
      # Concat each partition's segment_ids and result for final segment_sum.
      concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0)
      concat_partitioned_result = array_ops.concat(partitioned_result, 0)
      return math_ops.unsorted_segment_sum(
          concat_partitioned_result,
          concat_segment_ids,
          math_ops.reduce_max(concat_segment_ids) + 1,
          name=name)
Example #44
0
def gradients(ys, xs, grad_ys=None, name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None):
  """Constructs symbolic partial derivatives of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the partial
  derivatives of `ys` with respect to `xs`.  It returns a list of
  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
  for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial 'grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.

  """
  ys = _AsList(ys)
  xs = _AsList(xs)
  if grad_ys is None:
    grad_ys = [None] * len(ys)
  else:
    grad_ys = _AsList(grad_ys)
  with ops.op_scope(ys + xs + grad_ys, name, "gradients"):
    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
    xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

    # The approach we take here is as follows: Create a list of all ops in the
    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
    # to ensure that when we visit an op the gradients w.r.t its outputs have
    # been collected.  Then aggregate these gradients if needed, call the op's
    # gradient function, and add the generated gradients to the gradients for
    # its input.

    # Initialize the pending count for ops in the connected subgraph from ys
    # to the xs.
    to_ops = [t.op for t in ys]
    from_ops = [t.op for t in xs]
    pending_count, has_control_flow = _PendingCount(
        ops.get_default_graph(), to_ops, from_ops)

    # Iterate over the collected ops.
    #
    # grads: op => list of gradients received on each output endpoint of the
    # op.  The gradients for each endpoint are initially collected as a list.
    # When it is time to call the op's gradient function, for each endpoint we
    # aggregate the list of received gradients into a Add() Operation if there
    # is more than one.
    grads = {}

    # Add the initial gradients for the ys.
    for y, grad_y in zip(ys, grad_ys):
      _SetGrad(grads, y, grad_y)

    # Initialize queue with to_ops.
    queue = collections.deque()
    # Add the ops in 'to_ops' into the queue.
    to_ops_set = set()
    for op in to_ops:
      if op._id not in to_ops_set:
        to_ops_set.add(op._id)
        queue.append(op)
    # The set of 'from_ops'.
    stop_ops = _StopOps(from_ops, pending_count)
    while queue:
      # generate gradient subgraph for op.
      op = queue.popleft()
      with ops.device(_GetGradsDevice(op, colocate_gradients_with_ops)):
        if has_control_flow:
          control_flow_ops.EnterGradWhileContext(op)
        out_grads = _AggregatedGrads(grads, op, has_control_flow,
                                     aggregation_method)
        grad_fn = None
        if any(out_grads) and op._id not in stop_ops:
          # A grad_fn must be defined, either as a function or as None
          # for ops that do not have gradients.
          try:
            grad_fn = ops.get_gradient_function(op)
          except LookupError:
            raise LookupError(
                "No gradient defined for operation '%s' (op type: %s)" %
                (op.name, op.type))
        if grad_fn and any(out_grads):
          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
          # output, it means that the cost does not depend on output[i],
          # therefore dC/doutput[i] is 0.
          for i, out_grad in enumerate(out_grads):
            if (not out_grad
                and types.as_dtype(op.outputs[i].dtype).base_dtype in 
Example #45
0
def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None):
  """Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the partial
  derivatives of `ys` with respect to `xs`.  It returns a list of
  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
  for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.

  """
  ys = _AsList(ys)
  xs = _AsList(xs)
  if grad_ys is None:
    grad_ys = [None] * len(ys)
  else:
    grad_ys = _AsList(grad_ys)

  with ops.name_scope(name, "gradients", ys + xs + grad_ys):
    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
    xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

    # The approach we take here is as follows: Create a list of all ops in the
    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
    # to ensure that when we visit an op the gradients w.r.t its outputs have
    # been collected.  Then aggregate these gradients if needed, call the op's
    # gradient function, and add the generated gradients to the gradients for
    # its input.

    # Initialize the pending count for ops in the connected subgraph from ys
    # to the xs.
    to_ops = [t.op for t in ys]
    from_ops = [t.op for t in xs]
    pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops,
                                              from_ops,
                                              colocate_gradients_with_ops)

    # Iterate over the collected ops.
    #
    # grads: op => list of gradients received on each output endpoint of the
    # op.  The gradients for each endpoint are initially collected as a list.
    # When it is time to call the op's gradient function, for each endpoint we
    # aggregate the list of received gradients into a Add() Operation if there
    # is more than one.
    grads = {}

    # Add the initial gradients for the ys.
    for y, grad_y in zip(ys, grad_ys):
      _SetGrad(grads, y, grad_y)

    # Initialize queue with to_ops.
    queue = collections.deque()
    # Add the ops in 'to_ops' into the queue.
    to_ops_set = set()
    for op in to_ops:
      # 'ready' handles the case where one output gradient relies on
      # another output's gradient.
      # pylint: disable=protected-access
      ready = (pending_count[op._id] == 0)
      if ready and op._id not in to_ops_set:
        to_ops_set.add(op._id)
        queue.append(op)
      # pylint: enable=protected-access

    if loop_state:
      loop_exits = loop_state.ProcessUnusedLoopExits(pending_count, to_ops_set)
      for y in loop_exits:
        if _IsTrainable(y):
          _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
          queue.append(y.op)

    # The set of 'from_ops'.
    stop_ops = _StopOps(from_ops, pending_count)
    while queue:
      # generate gradient subgraph for op.
      op = queue.popleft()
      with _maybe_colocate_with(op, colocate_gradients_with_ops):
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=True)
        out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=True)

        grad_fn = None
        # pylint: disable=protected-access
        is_func_call = ops.get_default_graph()._is_function(op.type)
        has_out_grads = any(isinstance(g, ops.Tensor) or g for g in out_grads)
        if has_out_grads and (op._id not in stop_ops):
          if is_func_call:
            grad_fn = ops.get_default_graph()._get_function(
                op.type).python_grad_func
            # pylint: enable=protected-access
          else:
            # A grad_fn must be defined, either as a function or as None
            # for ops that do not have gradients.
            try:
              grad_fn = ops.get_gradient_function(op)
            except LookupError:
              raise LookupError(
                  "No gradient defined for operation '%s' (op type: %s)" %
                  (op.name, op.type))
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=False)
        if (grad_fn or is_func_call) and has_out_grads:
          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
          # output, it means that the cost does not depend on output[i],
          # therefore dC/doutput[i] is 0.
          for i, out_grad in enumerate(out_grads):
            if (not isinstance(out_grad, ops.Tensor) and
                not out_grad) and _IsTrainable(op.outputs[i]):
              # Only floating-point outputs get a zero gradient. Gradient
              # functions should ignore the gradient for other outputs.
              if loop_state:
                out_grads[i] = loop_state.ZerosLike(op, i)
              else:
                out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i)
          with ops.name_scope(op.name + "_grad"):
            # pylint: disable=protected-access
            with ops.get_default_graph()._original_op(op):
              # pylint: enable=protected-access
              if grad_fn:
                # If grad_fn was found, do not use SymbolicGradient even for
                # functions.
                in_grads = grad_fn(op, *out_grads)
              else:
                # For function call ops, we add a 'SymbolicGradient'
                # node to the graph to compute gradients.
                in_grads = _SymGrad(op, out_grads)
              in_grads = _AsList(in_grads)
              _VerifyGeneratedGradients(in_grads, op)
              if gate_gradients and len(
                  [x for x in in_grads if x is not None]) > 1:
                in_grads = control_flow_ops.tuple(in_grads)
          _LogOpGradients(op, out_grads, in_grads)
        else:
          # If no grad_fn is defined or none of out_grads is available,
          # just propagate a list of None backwards.
          in_grads = [None] * len(op.inputs)
        for t_in, in_grad in zip(op.inputs, in_grads):
          if in_grad is not None:
            if isinstance(in_grad, ops.Tensor):
              in_grad.set_shape(t_in.get_shape())
            _SetGrad(grads, t_in, in_grad)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=False)

      # Update pending count for the inputs of op and enqueue ready ops.
      _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state)

  if loop_state:
    loop_state.PostProcessing()
  return [_GetGrad(grads, x) for x in xs]
Example #46
0
def stratified_sample(tensors, labels, target_probs, batch_size,
                      init_probs=None, enqueue_many=False, queue_capacity=16,
                      threads_per_queue=1, name=None):
  """Stochastically creates batches based on per-class probabilities.

  This method discards examples. Internally, it creates one queue to amortize
  the cost of disk reads, and one queue to hold the properly-proportioned
  batch. See `stratified_sample_unknown_dist` for a function that performs
  stratified sampling with one queue per class and doesn't require knowing the
  class data-distribution ahead of time.

  Args:
    tensors: List of tensors for data. All tensors are either one item or a
        batch, according to enqueue_many.
    labels: Tensor for label of data. Label is a single integer or a batch,
        depending on enqueue_many. It is not a one-hot vector.
    target_probs: Target class proportions in batch. An object whose type has a
        registered Tensor conversion function.
    batch_size: Size of batch to be returned.
    init_probs: Class proportions in the data. An object whose type has a
        registered Tensor conversion function, or `None` for estimating the
        initial distribution.
    enqueue_many: Bool. If true, interpret input tensors as having a batch
        dimension.
    queue_capacity: Capacity of the large queue that holds input examples.
    threads_per_queue: Number of threads for the large queue that holds input
        examples and for the final queue with the proper class proportions.
    name: Optional prefix for ops created by this function.
  Raises:
    ValueError: enqueue_many is True and labels doesn't have a batch
        dimension, or if enqueue_many is False and labels isn't a scalar.
    ValueError: enqueue_many is True, and batch dimension on data and labels
        don't match.
    ValueError: if probs don't sum to one.
    ValueError: if a zero initial probability class has a nonzero target
        probability.
    TFAssertion: if labels aren't integers in [0, num classes).
  Returns:
    (data_batch, label_batch), where data_batch is a list of tensors of the same
        length as `tensors`

  Example:
    # Get tensor for a single data and label example.
    data, label = data_provider.Get(['data', 'label'])

    # Get stratified batch according to per-class probabilities.
    target_probs = [...distribution you want...]
    [data_batch], labels = tf.contrib.training.stratified_sample(
        [data], label, target_probs)

    # Run batch through network.
    ...
  """
  with ops.name_scope(name, 'stratified_sample', tensors + [labels]):
    tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
    labels = ops.convert_to_tensor(labels)
    target_probs = ops.convert_to_tensor(target_probs, dtype=dtypes.float32)
    # Reduce the case of a single example to that of a batch of size 1.
    if not enqueue_many:
      tensor_list = [array_ops.expand_dims(tensor, 0) for tensor in tensor_list]
      labels = array_ops.expand_dims(labels, 0)

    # If `init_probs` is `None`, set up online estimation of data distribution.
    if init_probs is None:
      # We use `target_probs` to get the number of classes, so its shape must be
      # fully defined at graph construction time.
      target_probs.get_shape().assert_is_fully_defined()
      init_probs = _estimate_data_distribution(
          labels, target_probs.get_shape().num_elements())
    else:
      init_probs = ops.convert_to_tensor(init_probs, dtype=dtypes.float32)

    # Validate that input is consistent.
    tensor_list, labels, [init_probs, target_probs] = _verify_input(
        tensor_list, labels, [init_probs, target_probs])

    # Check that all zero initial probabilities also have zero target
    # probabilities.
    assert_op = control_flow_ops.Assert(
        math_ops.reduce_all(math_ops.logical_or(
            math_ops.not_equal(init_probs, 0),
            math_ops.equal(target_probs, 0))),
        ['All classes with zero initial probability must also have zero target '
         'probability: ', init_probs, target_probs])
    init_probs = control_flow_ops.with_dependencies([assert_op], init_probs)

    # Calculate acceptance sampling probabilities.
    accept_probs = _calculate_acceptance_probabilities(init_probs, target_probs)
    proportion_rejected = math_ops.reduce_sum((1 - accept_probs) * init_probs)
    accept_probs = control_flow_ops.cond(
        math_ops.less(proportion_rejected, .5),
        lambda: accept_probs,
        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
            accept_probs, [accept_probs],
            message='Proportion of examples rejected by sampler is high.',
            first_n=10))

    # Make a single queue to hold input examples. Reshape output so examples
    # don't have singleton batch dimension.
    batched = input_ops.batch(tensor_list + [labels],
                              batch_size=1,
                              num_threads=threads_per_queue,
                              capacity=queue_capacity,
                              enqueue_many=True)
    val_list = [array_ops.squeeze(x, [0]) for x in batched[:-1]]
    label = array_ops.squeeze(batched[-1], [0])

    # Set up second queue containing batches that have the desired class
    # proportions.
    cur_prob = array_ops.gather(accept_probs, label)
    keep_input = random_ops.random_uniform([]) < cur_prob
    batched = _conditional_batch(
        val_list + [label],
        keep_input,
        batch_size,
        num_threads=threads_per_queue)
    return batched[:-1], batched[-1]
Example #47
0
def _validate_join(tensor_list_list):
  tensor_list_list = [ops.convert_n_to_tensor_or_indexed_slices(tl)
                      for tl in tensor_list_list]
  if not tensor_list_list:
    raise ValueError("Expected at least one input in batch_join().")
  return tensor_list_list
Example #48
0
def stratified_sample_unknown_dist(tensors, labels, probs, batch_size,
                                   enqueue_many=False, queue_capacity=16,
                                   threads_per_queue=1, name=None):
  """Stochastically creates batches based on per-class probabilities.

  **NOTICE** This sampler can be significantly slower than `stratified_sample`
  due to each thread discarding all examples not in its assigned class.

  This uses a number of threads proportional to the number of classes. See
  `stratified_sample` for an implementation that discards fewer examples and
  uses a fixed number of threads. This function's only advantage over
  `stratified_sample` is that the class data-distribution doesn't need to be
  known ahead of time.

  Args:
    tensors: List of tensors for data. All tensors are either one item or a
        batch, according to enqueue_many.
    labels: Tensor for label of data. Label is a single integer or a batch,
        depending on enqueue_many. It is not a one-hot vector.
    probs: Target class probabilities. An object whose type has a registered
        Tensor conversion function.
    batch_size: Size of batch to be returned.
    enqueue_many: Bool. If true, interpret input tensors as having a batch
        dimension.
    queue_capacity: Capacity of each per-class queue.
    threads_per_queue: Number of threads for each per-class queue.
    name: Optional prefix for ops created by this function.
  Raises:
    ValueError: enqueue_many is True and labels doesn't have a batch
        dimension, or if enqueue_many is False and labels isn't a scalar.
    ValueError: enqueue_many is True, and batch dimension of data and labels
        don't match.
    ValueError: if probs don't sum to one.
    TFAssertion: if labels aren't integers in [0, num classes).
  Returns:
    (data_batch, label_batch), where data_batch is a list of tensors of the same
        length as `tensors`

  Example:
    # Get tensor for a single data and label example.
    data, label = data_provider.Get(['data', 'label'])

    # Get stratified batch according to per-class probabilities.
    init_probs = [1.0/NUM_CLASSES for _ in range(NUM_CLASSES)]
    [data_batch], labels = (
        tf.contrib.training.stratified_sample_unknown_dist(
            [data], label, init_probs, 16))

    # Run batch through network.
    ...
  """
  with ops.name_scope(name, 'stratified_sample_unknown_dist',
                      tensors + [labels]):
    tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
    labels = ops.convert_to_tensor(labels)
    probs = ops.convert_to_tensor(probs, dtype=dtypes.float32)
    # Reduce the case of a single example to that of a batch of size 1.
    if not enqueue_many:
      tensor_list = [array_ops.expand_dims(tensor, 0) for tensor in tensor_list]
      labels = array_ops.expand_dims(labels, 0)

    # Validate that input is consistent.
    tensor_list, labels, [probs] = _verify_input(tensor_list, labels, [probs])

    # Make per-class queues.
    per_class_queues = _make_per_class_queues(
        tensor_list, labels, probs.get_shape().num_elements(), queue_capacity,
        threads_per_queue)

    # Use the per-class queues to generate stratified batches.
    return _get_batch_from_per_class_queues(
        per_class_queues, probs, batch_size)