def _testSerializeDeserializeNestedBatchHelper(self,
                                                 serialize_fn,
                                                 deserialize_fn,
                                                 out_type=dtypes.string):
    with self.cached_session(use_gpu=False) as sess:
      sp_input = self._SparseTensorValue_5x6(np.arange(6))
      serialized = serialize_fn(sp_input, out_type=out_type)
      serialized = array_ops.stack([serialized, serialized])
      serialized = array_ops.stack([serialized, serialized])

      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)

      combined_indices, combined_values, combined_shape = sess.run(
          sp_deserialized)

      # minibatch 0
      self.assertAllEqual(combined_indices[:6, :2], [[0, 0]] * 6)
      self.assertAllEqual(combined_indices[:6, 2:], sp_input[0])
      self.assertAllEqual(combined_values[:6], sp_input[1])
      # minibatch 1
      self.assertAllEqual(combined_indices[6:12, :2], [[0, 1]] * 6)
      self.assertAllEqual(combined_indices[6:12, 2:], sp_input[0])
      self.assertAllEqual(combined_values[6:12], sp_input[1])
      # minibatch 2
      self.assertAllEqual(combined_indices[12:18, :2], [[1, 0]] * 6)
      self.assertAllEqual(combined_indices[12:18, 2:], sp_input[0])
      self.assertAllEqual(combined_values[12:18], sp_input[1])
      # minibatch 3
      self.assertAllEqual(combined_indices[18:, :2], [[1, 1]] * 6)
      self.assertAllEqual(combined_indices[18:, 2:], sp_input[0])
      self.assertAllEqual(combined_values[18:], sp_input[1])

      self.assertAllEqual(combined_shape, [2, 2, 5, 6])
  def __call__(self,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    is_list = isinstance(inputs, list)
    if self._use_dynamic_rnn:
      if is_list:
        inputs = array_ops.stack(inputs)
      outputs, state = rnn.dynamic_rnn(
          self._cell,
          inputs,
          sequence_length=sequence_length,
          initial_state=initial_state,
          dtype=dtype,
          time_major=True,
          scope=scope)
      if is_list:
        # Convert outputs back to list
        outputs = array_ops.unstack(outputs)
    else:  # non-dynamic rnn
      if not is_list:
        inputs = array_ops.unstack(inputs)
      outputs, state = contrib_rnn.static_rnn(self._cell,
                                              inputs,
                                              initial_state=initial_state,
                                              dtype=dtype,
                                              sequence_length=sequence_length,
                                              scope=scope)
      if not is_list:
        # Convert outputs back to tensor
        outputs = array_ops.stack(outputs)

    return outputs, state
def _prepare_inputs_for_rnn(sequence_features, context_features, num_unroll):
  """Prepares features batched by the SQSS for input to a state-saving RNN.

  Args:
    sequence_features: A dict of sequence feature name to `Tensor`, with
      tensors of shape `[batch_size, num_unroll, ...]` and type float32.
    context_features: A dict of context feature name to `Tensor`, with
      tensors of shape `[batch_size, 1, ...]` and type float32.
    num_unroll: Python integer, how many time steps to unroll at a time.
      The input sequences of length `k` are then split into `k / num_unroll`
      many segments.

  Returns:
    features_by_time: A list of length `num_unroll` with `Tensor` entries of
      shape `[batch_size, len(sequence_features) + len(context_features)]` of
      type float32. Features are stored in lexicographic order by their
      corresponding feature dict keys, first in the `sequence_features` and
      then in the `context_features` dicts. Context features are copied into
      each time step.
  """

  def _tile(feature):
    return array_ops.squeeze(
        array_ops.tile(array_ops.expand_dims(feature, 1), [1, num_unroll, 1]),
        axis=2)

  sequence_features = [sequence_features[k] for k in sorted(sequence_features)]
  if not context_features:
    return array_ops.unstack(array_ops.stack(sequence_features, 2), axis=1)
  context_features = [
      _tile(context_features[k]) for k in sorted(context_features)
  ]
  return array_ops.unstack(
      array_ops.stack(sequence_features + context_features, 2), axis=1)
Example #4
0
  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).

    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.

    Returns:
      If `state_size` is an int or TensorShape, then the return value is a
      `N-D` tensor of shape `[batch_size x state_size]` filled with zeros.

      If `state_size` is a nested list or tuple, then the return value is
      a nested list or tuple (of the same structure) of `2-D` tensors with
    the shapes `[batch_size x s]` for each s in `state_size`.
    """
    state_size = self.state_size
    if nest.is_sequence(state_size):
      state_size_flat = nest.flatten(state_size)
      zeros_flat = [
          array_ops.zeros(
              array_ops.stack(_state_size_with_prefix(
                  s, prefix=[batch_size])),
              dtype=dtype) for s in state_size_flat
      ]
      for s, z in zip(state_size_flat, zeros_flat):
        z.set_shape(_state_size_with_prefix(s, prefix=[None]))
      zeros = nest.pack_sequence_as(structure=state_size,
                                    flat_sequence=zeros_flat)
    else:
      zeros_size = _state_size_with_prefix(state_size, prefix=[batch_size])
      zeros = array_ops.zeros(array_ops.stack(zeros_size), dtype=dtype)
      zeros.set_shape(_state_size_with_prefix(state_size, prefix=[None]))

    return zeros
  def testBatch(self):
    # Build an arbitrary RGB image
    np.random.seed(7)
    batch_size = 5
    shape = (batch_size, 2, 7, 3)

    for nptype in self.float_types:
      inp = GenerateNumpyRandomRGB(shape).astype(nptype)

      # Convert to HSV and back, as a batch and individually
      with self.test_session() as sess:
        batch0 = array_ops.placeholder(nptype, shape=shape)
        with self.test_scope():
          batch1 = image_ops.rgb_to_hsv(batch0)
          batch2 = image_ops.hsv_to_rgb(batch1)
        split0 = array_ops.unstack(batch0)
        with self.test_scope():
          split1 = list(map(image_ops.rgb_to_hsv, split0))
          split2 = list(map(image_ops.hsv_to_rgb, split1))
        join1 = array_ops.stack(split1)
        join2 = array_ops.stack(split2)
        batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2],
                                                {batch0: inp})

      # Verify that processing batch elements together is the same as separate
      self.assertAllClose(batch1, join1)
      self.assertAllClose(batch2, join2)
      self.assertAllCloseAccordingToType(
          batch2, inp, bfloat16_atol=0.03, half_rtol=0.02)
Example #6
0
  def testConst(self):
    np.random.seed(7)
    with self.test_session(use_gpu=True):
      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
        data = np.random.randn(*shape).astype(np.float32)
        # Pack back into a single tensorflow tensor directly using np array
        c = array_ops.stack(data)
        # This is implemented via a Const:
        self.assertEqual(c.op.type, "Const")
        self.assertAllEqual(c.eval(), data)

        c = array_ops.parallel_stack(data)
        self.assertAllEqual(c.eval(), data)

        # Python lists also work for 1-D case:
        if len(shape) == 1:
          data_list = list(data)
          cl = array_ops.stack(data_list)
          self.assertEqual(cl.op.type, "Const")
          self.assertAllEqual(cl.eval(), data)

          cl = array_ops.parallel_stack(data_list)
          self.assertAllEqual(cl.eval(), data)

      # Verify that shape induction works with shapes produced via const stack
      a = constant_op.constant([1, 2, 3, 4, 5, 6])
      b = array_ops.reshape(a, array_ops.stack([2, 3]))
      self.assertAllEqual(b.get_shape(), [2, 3])
Example #7
0
  def testConcat(self):
    c = constant_op.constant([1.0, 2.0], dtype=dtypes.float32)
    l0 = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape())
    l1 = list_ops.tensor_list_from_tensor([-1.0], element_shape=scalar_shape())
    l_batch_0 = array_ops.stack([l0, l1])
    l_batch_1 = array_ops.stack([l1, l0])

    l_concat_01 = list_ops.tensor_list_concat_lists(
        l_batch_0, l_batch_1, element_dtype=dtypes.float32)
    l_concat_10 = list_ops.tensor_list_concat_lists(
        l_batch_1, l_batch_0, element_dtype=dtypes.float32)
    l_concat_00 = list_ops.tensor_list_concat_lists(
        l_batch_0, l_batch_0, element_dtype=dtypes.float32)
    l_concat_11 = list_ops.tensor_list_concat_lists(
        l_batch_1, l_batch_1, element_dtype=dtypes.float32)

    expected_00 = [[1.0, 2.0, 1.0, 2.0], [-1.0, -1.0]]
    expected_01 = [[1.0, 2.0, -1.0], [-1.0, 1.0, 2.0]]
    expected_10 = [[-1.0, 1.0, 2.0], [1.0, 2.0, -1.0]]
    expected_11 = [[-1.0, -1.0], [1.0, 2.0, 1.0, 2.0]]

    for i, (concat, expected) in enumerate(zip(
        [l_concat_00, l_concat_01, l_concat_10, l_concat_11],
        [expected_00, expected_01, expected_10, expected_11])):
      splitted = array_ops.unstack(concat)
      splitted_stacked_ret = self.evaluate(
          (list_ops.tensor_list_stack(splitted[0], dtypes.float32),
           list_ops.tensor_list_stack(splitted[1], dtypes.float32)))
      print("Test concat %d: %s, %s, %s, %s"
            % (i, expected[0], splitted_stacked_ret[0],
               expected[1], splitted_stacked_ret[1]))
      self.assertAllClose(expected[0], splitted_stacked_ret[0])
      self.assertAllClose(expected[1], splitted_stacked_ret[1])

    # Concatenating mismatched shapes fails.
    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
      self.evaluate(
          list_ops.tensor_list_concat_lists(
              l_batch_0,
              list_ops.empty_tensor_list(scalar_shape(), dtypes.float32),
              element_dtype=dtypes.float32))

    with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                 "element shapes are not identical at index 0"):
      l_batch_of_vec_tls = array_ops.stack(
          [list_ops.tensor_list_from_tensor([[1.0]], element_shape=[1])] * 2)
      self.evaluate(
          list_ops.tensor_list_concat_lists(l_batch_0, l_batch_of_vec_tls,
                                            element_dtype=dtypes.float32))

    with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                 r"input_b\[0\].dtype != element_dtype."):
      l_batch_of_int_tls = array_ops.stack(
          [list_ops.tensor_list_from_tensor([1], element_shape=scalar_shape())]
          * 2)
      self.evaluate(
          list_ops.tensor_list_concat_lists(l_batch_0, l_batch_of_int_tls,
                                            element_dtype=dtypes.float32))
Example #8
0
  def testWithExtensionAndAttr(self):
    with ops.Graph().as_default() as g:
      c = constant_op.constant(5.0, dtype=dtypes.float32, name="c")
      array_ops.stack([c, c], name="pack")
    gdef = g.as_graph_def()

    with self.test_session():
      pack, = importer.import_graph_def(gdef, return_elements=["pack"])
      self.assertAllEqual(pack.outputs[0].eval(), [5.0, 5.0])
def crop_to_bounding_box(image, offset_height, offset_width, target_height,
                         target_width):
  """Crops an image to a specified bounding box.

  This op cuts a rectangular part out of `image`. The top-left corner of the
  returned image is at `offset_height, offset_width` in `image`, and its
  lower-right corner is at
  `offset_height + target_height, offset_width + target_width`.

  Args:
    image: 3-D tensor with shape `[height, width, channels]`
    offset_height: Vertical coordinate of the top-left corner of the result in
                   the input.
    offset_width: Horizontal coordinate of the top-left corner of the result in
                  the input.
    target_height: Height of the result.
    target_width: Width of the result.

  Returns:
    3-D tensor of image with shape `[target_height, target_width, channels]`

  Raises:
    ValueError: If the shape of `image` is incompatible with the `offset_*` or
      `target_*` arguments, or either `offset_height` or `offset_width` is
      negative, or either `target_height` or `target_width` is not positive.
  """
  image = ops.convert_to_tensor(image, name='image')

  assert_ops = []
  assert_ops += _Check3DImage(image, require_static=False)

  height, width, depth = _ImageDimensions(image)

  assert_ops += _assert(offset_width >= 0, ValueError,
                        'offset_width must be >= 0.')
  assert_ops += _assert(offset_height >= 0, ValueError,
                        'offset_height must be >= 0.')
  assert_ops += _assert(target_width > 0, ValueError,
                        'target_width must be > 0.')
  assert_ops += _assert(target_height > 0, ValueError,
                        'target_height must be > 0.')
  assert_ops += _assert(width >= (target_width + offset_width), ValueError,
                        'width must be >= target + offset.')
  assert_ops += _assert(height >= (target_height + offset_height), ValueError,
                        'height must be >= target + offset.')
  image = control_flow_ops.with_dependencies(assert_ops, image)

  cropped = array_ops.slice(image,
                            array_ops.stack([offset_height, offset_width, 0]),
                            array_ops.stack([target_height, target_width, -1]))

  cropped_shape = [None if _is_tensor(i) else i
                   for i in [target_height, target_width, depth]]
  cropped.set_shape(cropped_shape)

  return cropped
Example #10
0
 def testOpsBetweenUnreachable(self):
   with ops.Graph().as_default() as g:
     t1 = constant(1.0)
     t2 = constant(2.0)
     _ = array_ops.stack([t1, t2])
     t4 = constant(1.0)
     t5 = constant(2.0)
     t6 = array_ops.stack([t4, t5])
   # Elements of to_ops are always listed.
   self._assertOpListEqual([t6.op], _OpsBetween(g, [t6.op], [t1.op]))
Example #11
0
  def testPack_Axis1(self):
    inputs = [np.random.rand(4, 7) for _ in range(3)]
    tf_val = array_ops.stack(inputs, axis=1)
    c_val = tensor_util.constant_value(tf_val)
    self.assertIsNone(c_val)

    tf_val = array_ops.stack(
        [inputs[0], array_ops.placeholder(dtypes.float32), inputs[2]], axis=1)
    c_val = tensor_util.constant_value(tf_val)
    self.assertIs(None, c_val)
Example #12
0
  def testSequenceLoss(self):
    with self.session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          'root', initializer=init_ops.constant_initializer(0.5)):
        batch_size = 2
        sequence_length = 3
        number_of_classes = 5
        logits = [
            constant_op.constant(
                i + 0.5, shape=[batch_size, number_of_classes])
            for i in range(sequence_length)
        ]
        logits = array_ops.stack(logits, axis=1)
        targets = [
            constant_op.constant(
                i, dtypes.int32, shape=[batch_size])
            for i in range(sequence_length)
        ]
        targets = array_ops.stack(targets, axis=1)
        weights = [
            constant_op.constant(
                1.0, shape=[batch_size]) for i in range(sequence_length)
        ]
        weights = array_ops.stack(weights, axis=1)

        average_loss_per_example = loss.sequence_loss(
            logits, targets, weights,
            average_across_timesteps=True,
            average_across_batch=True)
        res = sess.run(average_loss_per_example)
        self.assertAllClose(1.60944, res)

        average_loss_per_sequence = loss.sequence_loss(
            logits, targets, weights,
            average_across_timesteps=False,
            average_across_batch=True)
        res = sess.run(average_loss_per_sequence)
        compare_per_sequence = np.ones((sequence_length)) * 1.60944
        self.assertAllClose(compare_per_sequence, res)

        average_loss_per_batch = loss.sequence_loss(
            logits, targets, weights,
            average_across_timesteps=True,
            average_across_batch=False)
        res = sess.run(average_loss_per_batch)
        compare_per_batch = np.ones((batch_size)) * 1.60944
        self.assertAllClose(compare_per_batch, res)

        total_loss = loss.sequence_loss(
            logits, targets, weights,
            average_across_timesteps=False,
            average_across_batch=False)
        res = sess.run(total_loss)
        compare_total = np.ones((batch_size, sequence_length)) * 1.60944
        self.assertAllClose(compare_total, res)
Example #13
0
  def inference_graph(self, input_data, **inference_args):
    """Constructs a TF graph for evaluating a random forest.

    Args:
      input_data: A tensor or dict of string->Tensor for the input data.
                  This input_data must generate the same spec as the
                  input_data used in training_graph:  the dict must have
                  the same keys, for example, and all tensors must have
                  the same size in their first dimension.
      **inference_args: Keyword arguments to pass through to each tree.

    Returns:
      A tuple of (probabilities, tree_paths, variance), where variance
      is the variance over all the trees for regression problems only.

    Raises:
      NotImplementedError: If trying to use feature bagging with sparse
        features.
    """
    processed_dense_features, processed_sparse_features, data_spec = (
        data_ops.ParseDataTensorOrDict(input_data))

    probabilities = []
    paths = []
    for i in range(self.params.num_trees):
      with ops.device(self.variables.device_dummies[i].device):
        tree_data = processed_dense_features
        if self.params.bagged_features:
          if processed_sparse_features is not None:
            raise NotImplementedError(
                'Feature bagging not supported with sparse features.')
          tree_data = self._bag_features(i, tree_data)
        probs, path = self.trees[i].inference_graph(
            tree_data,
            data_spec,
            sparse_features=processed_sparse_features,
            **inference_args)
        probabilities.append(probs)
        paths.append(path)
    with ops.device(self.variables.device_dummies[0].device):
      # shape of all_predict should be [batch_size, num_trees, num_outputs]
      all_predict = array_ops.stack(probabilities, axis=1)
      average_values = math_ops.div(
          math_ops.reduce_sum(all_predict, 1),
          self.params.num_trees,
          name='probabilities')
      tree_paths = array_ops.stack(paths, axis=1)
      regression_variance = None
      if self.params.regression:
        expected_squares = math_ops.div(
            math_ops.reduce_sum(all_predict * all_predict, 1),
            self.params.num_trees)
        regression_variance = math_ops.maximum(
            0., expected_squares - average_values * average_values)
      return average_values, tree_paths, regression_variance
Example #14
0
def _ctc_state_trans(label_seq):
  """Compute CTC alignment model transition matrix.

  Args:
    label_seq: tensor of shape [batch_size, max_seq_length]

  Returns:
    tensor of shape [batch_size, states, states] with a state transition matrix
    computed for each sequence of the batch.
  """

  with ops.name_scope("ctc_state_trans"):
    label_seq = ops.convert_to_tensor(label_seq, name="label_seq")
    batch_size = _get_dim(label_seq, 0)
    num_labels = _get_dim(label_seq, 1)

    num_label_states = num_labels + 1
    num_states = 2 * num_label_states

    label_states = math_ops.range(num_label_states)
    blank_states = label_states + num_label_states

    # Start state to first label.
    start_to_label = [[1, 0]]

    # Blank to label transitions.
    blank_to_label = array_ops.stack([label_states[1:], blank_states[:-1]], 1)

    # Label to blank transitions.
    label_to_blank = array_ops.stack([blank_states, label_states], 1)

    # Scatter transitions that don't depend on sequence.
    indices = array_ops.concat(
        [start_to_label, blank_to_label, label_to_blank], 0)
    values = array_ops.ones([_get_dim(indices, 0)])
    trans = array_ops.scatter_nd(
        indices, values, shape=[num_states, num_states])
    trans += linalg_ops.eye(num_states)  # Self-loops.

    # Label to label transitions. Disallow transitions between repeated labels
    # with no blank state in between.
    batch_idx = array_ops.zeros_like(label_states[2:])
    indices = array_ops.stack(
        [batch_idx, label_states[2:], label_states[1:-1]], 1)
    indices = array_ops.tile(
        array_ops.expand_dims(indices, 0), [batch_size, 1, 1])
    batch_idx = array_ops.expand_dims(math_ops.range(batch_size), 1) * [1, 0, 0]
    indices += array_ops.expand_dims(batch_idx, 1)
    repeats = math_ops.equal(label_seq[:, :-1], label_seq[:, 1:])
    values = 1.0 - math_ops.cast(repeats, dtypes.float32)
    batched_shape = [batch_size, num_states, num_states]
    label_to_label = array_ops.scatter_nd(indices, values, batched_shape)

    return array_ops.expand_dims(trans, 0) + label_to_label
Example #15
0
  def testPack_Axis0(self):
    inputs = [np.random.rand(4, 7) for _ in range(3)]
    np_val = np.array(inputs)
    tf_val = array_ops.stack(inputs)
    c_val = tensor_util.constant_value(tf_val)
    self.assertAllClose(np_val, c_val)

    tf_val = array_ops.stack(
        [inputs[0], array_ops.placeholder(dtypes.float32), inputs[2]])
    c_val = tensor_util.constant_value(tf_val)
    self.assertIs(None, c_val)
  def _testAllFormats(self,
                      superdiag,
                      maindiag,
                      subdiag,
                      rhs,
                      expected,
                      dtype=dtypes.float64):
    superdiag_extended = np.pad(superdiag, [0, 1], 'constant')
    subdiag_extended = np.pad(subdiag, [1, 0], 'constant')
    diags_compact = np.stack([superdiag_extended, maindiag, subdiag_extended])
    diags_matrix = np.diag(superdiag, 1) + np.diag(maindiag, 0) + np.diag(
        subdiag, -1)

    diags_sequence = (constant_op.constant(superdiag_extended, dtype),
                      constant_op.constant(maindiag, dtype),
                      constant_op.constant(subdiag_extended, dtype))
    diags_compact = constant_op.constant(diags_compact, dtype)
    diags_matrix = constant_op.constant(diags_matrix, dtype)
    rhs = constant_op.constant(rhs, dtype)

    rhs_batch = array_ops.stack([rhs, 2 * rhs])
    diags_compact_batch = array_ops.stack([diags_compact, 2 * diags_compact])
    diags_matrix_batch = array_ops.stack([diags_matrix, 2 * diags_matrix])
    diags_sequence_batch = [array_ops.stack([x, 2 * x]) for x in diags_sequence]

    results = [
        linalg_impl.tridiagonal_matmul(
            diags_sequence, rhs, diagonals_format='sequence'),
        linalg_impl.tridiagonal_matmul(
            diags_compact, rhs, diagonals_format='compact'),
        linalg_impl.tridiagonal_matmul(
            diags_matrix, rhs, diagonals_format='matrix')
    ]
    results_batch = [
        linalg_impl.tridiagonal_matmul(
            diags_sequence_batch, rhs_batch, diagonals_format='sequence'),
        linalg_impl.tridiagonal_matmul(
            diags_compact_batch, rhs_batch, diagonals_format='compact'),
        linalg_impl.tridiagonal_matmul(
            diags_matrix_batch, rhs_batch, diagonals_format='matrix')
    ]

    with self.cached_session(use_gpu=True):
      results = self.evaluate(results)
      results_batch = self.evaluate(results_batch)

    expected = np.array(expected)
    expected_batch = np.stack([expected, 4 * expected])
    for result in results:
      self.assertAllClose(result, expected)
    for result in results_batch:
      self.assertAllClose(result, expected_batch)
 def _log_prob(self, x):
   # By convention, we always put the grid points right-most.
   y = array_ops.stack(
       [aff.inverse(x) for aff in self.interpolated_affine],
       axis=-1)
   log_prob = math_ops.reduce_sum(self.distribution.log_prob(y), axis=-2)
   # Because the affine transformation has a constant Jacobian, it is the case
   # that `affine.fldj(x) = -affine.ildj(x)`. This is not true in general.
   fldj = array_ops.stack(
       [aff.forward_log_det_jacobian(x) for aff in self.interpolated_affine],
       axis=-1)
   return math_ops.reduce_logsumexp(
       self.mixture_distribution.logits - fldj + log_prob, axis=-1)
Example #18
0
  def _dict_to_tensor(self, x, k1, k2):
    """Convert a dictionary to a tensor.

    Args:
      x: a k1 * k2 dictionary.
      k1: first dimension of x.
      k2: second dimension of x.
    Returns:
      a k1 * k2 tensor.
    """

    return array_ops.stack([array_ops.stack([x[i, j] for j in range(k2)])
                            for i in range(k1)])
Example #19
0
def _compute_energy_change(current_target_log_prob,
                           current_momentums,
                           proposed_target_log_prob,
                           proposed_momentums,
                           independent_chain_ndims,
                           name=None):
  """Helper to `kernel` which computes the energy change."""
  with ops.name_scope(
      name, "compute_energy_change",
      ([current_target_log_prob, proposed_target_log_prob,
        independent_chain_ndims] +
       current_momentums + proposed_momentums)):
    # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy
    # since they're a mouthful and lets us inline more.
    lk0, lk1 = [], []
    for current_momentum, proposed_momentum in zip(current_momentums,
                                                   proposed_momentums):
      axis = math_ops.range(independent_chain_ndims,
                            array_ops.rank(current_momentum))
      lk0.append(_log_sum_sq(current_momentum, axis))
      lk1.append(_log_sum_sq(proposed_momentum, axis))

    lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1),
                                                  axis=-1)
    lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1),
                                                  axis=-1)
    lp0 = -current_target_log_prob   # log_potential
    lp1 = -proposed_target_log_prob  # proposed_log_potential
    x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)],
                        axis=-1)

    # The sum is NaN if any element is NaN or we see both +Inf and -Inf.
    # Thus we will replace such rows with infinite energy change which implies
    # rejection. Recall that float-comparisons with NaN are always False.
    is_sum_determinate = (
        math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) &
        math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1))
    is_sum_determinate = array_ops.tile(
        is_sum_determinate[..., array_ops.newaxis],
        multiples=array_ops.concat([
            array_ops.ones(array_ops.rank(is_sum_determinate),
                           dtype=dtypes.int32),
            [4],
        ], axis=0))
    x = array_ops.where(is_sum_determinate,
                        x,
                        array_ops.fill(array_ops.shape(x),
                                       value=x.dtype.as_numpy_dtype(np.inf)))

    return math_ops.reduce_sum(x, axis=-1)
Example #20
0
  def _dict_to_tensor(self, x, k1, k2, k3):
    """Convert a dictionary to a tensor.

    Args:
      x: A k1 * k2 dictionary.
      k1: First dimension of x.
      k2: Second dimension of x.
      k3: Third dimension of x.
    Returns:
      A k1 * k2 * k3 tensor.
    """

    return array_ops.stack([array_ops.stack(
        [array_ops.stack([x[i, j, k] for k in range(k3)])
         for j in range(k2)]) for i in range(k1)])
Example #21
0
 def testIndexedSlicesToTensorList(self):
   with self.test_session():
     numpy_list = []
     dense_list = []
     sparse_list = []
     for _ in range(3):
       np_val = np.random.rand(4, 4, 4, 4).astype(np.float32)
       c = constant_op.constant(np_val)
       c_sparse = math_ops._as_indexed_slices(c)
       numpy_list.append(np_val)
       dense_list.append(c)
       sparse_list.append(c_sparse)
     packed_dense = array_ops.stack(dense_list)
     packed_sparse = array_ops.stack(sparse_list)
     self.assertAllClose(packed_dense.eval(), packed_sparse.eval())
def crop_to_bounding_box(image, offset_height, offset_width, target_height,
                         target_width, dynamic_shape=False):
  """Crops an image to a specified bounding box.

  This op cuts a rectangular part out of `image`. The top-left corner of the
  returned image is at `offset_height, offset_width` in `image`, and its
  lower-right corner is at
  `offset_height + target_height, offset_width + target_width`.

  Args:
    image: 3-D tensor with shape `[height, width, channels]`
    offset_height: Vertical coordinate of the top-left corner of the result in
                   the input.
    offset_width: Horizontal coordinate of the top-left corner of the result in
                  the input.
    target_height: Height of the result.
    target_width: Width of the result.
    dynamic_shape: Whether the input image has undertermined shape. If set to
      `True`, shape information will be retrieved at run time. Default to
      `False`.

  Returns:
    3-D tensor of image with shape `[target_height, target_width, channels]`

  Raises:
    ValueError: If the shape of `image` is incompatible with the `offset_*` or
    `target_*` arguments, and `dynamic_shape` is set to `False`.
  """
  image = tf.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=(not dynamic_shape))
  height, width, _ = _ImageDimensions(image, dynamic_shape=dynamic_shape)

  if not dynamic_shape:
    if offset_width < 0:
      raise ValueError('offset_width must be >= 0.')
    if offset_height < 0:
      raise ValueError('offset_height must be >= 0.')

    if width < (target_width + offset_width):
      raise ValueError('width must be >= target + offset.')
    if height < (target_height + offset_height):
      raise ValueError('height must be >= target + offset.')

  cropped = array_ops.slice(image,
                            array_ops.stack([offset_height, offset_width, 0]),
                            array_ops.stack([target_height, target_width, -1]))

  return cropped
Example #23
0
  def grow_tree(self, stats_summaries_list, feature_ids_list,
                last_layer_nodes_range):
    # For not in memory situation, we need to accumulate enough of batches first
    # before proceeding with building a tree layer.
    max_splits = _get_max_splits(self._tree_hparams)

    # Prepare accumulators.
    accumulators = []
    dependencies = []
    for i, feature_ids in enumerate(feature_ids_list):
      stats_summaries = stats_summaries_list[i]
      accumulator = data_flow_ops.ConditionalAccumulator(
          dtype=dtypes.float32,
          # The stats consist of grads and hessians (the last dimension).
          shape=[len(feature_ids), max_splits, self._bucket_size_list[i], 2],
          shared_name='numeric_stats_summary_accumulator_' + str(i))
      accumulators.append(accumulator)

      apply_grad = accumulator.apply_grad(
          array_ops.stack(stats_summaries, axis=0), self._stamp_token)
      dependencies.append(apply_grad)

    # Grow the tree if enough batches is accumulated.
    with ops.control_dependencies(dependencies):
      if not self._is_chief:
        return control_flow_ops.no_op()

      min_accumulated = math_ops.reduce_min(
          array_ops.stack([acc.num_accumulated() for acc in accumulators]))

      def grow_tree_from_accumulated_summaries_fn():
        """Updates tree with the best layer from accumulated summaries."""
        # Take out the accumulated summaries from the accumulator and grow.
        stats_summaries_list = []
        stats_summaries_list = [
            array_ops.unstack(accumulator.take_grad(1), axis=0)
            for accumulator in accumulators
        ]
        grow_op = self._grow_tree_from_stats_summaries(
            stats_summaries_list, feature_ids_list, last_layer_nodes_range)
        return grow_op

      grow_model = control_flow_ops.cond(
          math_ops.greater_equal(min_accumulated, self._n_batches_per_layer),
          grow_tree_from_accumulated_summaries_fn,
          control_flow_ops.no_op,
          name='wait_until_n_batches_accumulated')
      return grow_model
  def _testFeedSerializeDeserializeBatchHelper(self,
                                               serialize_fn,
                                               deserialize_fn,
                                               out_type=dtypes.string):
    with self.cached_session(use_gpu=False) as sess:
      sp_input0 = self._SparseTensorPlaceholder()
      sp_input1 = self._SparseTensorPlaceholder()
      input0_val = self._SparseTensorValue_5x6(np.arange(6))
      input1_val = self._SparseTensorValue_3x4(np.arange(6))
      serialized0 = serialize_fn(sp_input0, out_type=out_type)
      serialized1 = serialize_fn(sp_input1, out_type=out_type)
      serialized_concat = array_ops.stack([serialized0, serialized1])

      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32)

      combined_indices, combined_values, combined_shape = sess.run(
          sp_deserialized, {sp_input0: input0_val,
                            sp_input1: input1_val})

      self.assertAllEqual(combined_indices[:6, 0], [0] * 6)  # minibatch 0
      self.assertAllEqual(combined_indices[:6, 1:], input0_val[0])
      self.assertAllEqual(combined_indices[6:, 0], [1] * 6)  # minibatch 1
      self.assertAllEqual(combined_indices[6:, 1:], input1_val[0])
      self.assertAllEqual(combined_values[:6], input0_val[1])
      self.assertAllEqual(combined_values[6:], input1_val[1])
      self.assertAllEqual(combined_shape, [2, 5, 6])
  def _broadcast_uniform_partitioned_dimension(self, axis, lengths):
    """Broadcasts the partitioned dimension `axis` to match `lengths`."""
    axis_dim_size = self.dimension_size(axis)
    partitioned_sizes = list(self._partitioned_dim_sizes[:axis])

    if lengths.shape.ndims == 0:
      lengths = array_ops.where(
          math_ops.equal(axis_dim_size, 1), lengths, axis_dim_size)
      repeats = array_ops.where(math_ops.equal(axis_dim_size, 1), lengths, 1)
      splits = array_ops.stack([0, self.num_slices_in_dimension(axis)])
    else:
      splits = math_ops.range(
          array_ops.size(lengths, out_type=self.dim_size_dtype) + 1)
      repeats = lengths

    partitioned_sizes.append(lengths)

    for dim_size in self._partitioned_dim_sizes[axis + 1:]:
      if dim_size.shape.ndims == 0:
        partitioned_sizes.append(dim_size)
        splits *= dim_size
      else:
        partitioned_sizes.append(
            ragged_util.repeat_ranges(dim_size, splits, repeats))
        splits = array_ops.gather(
            ragged_util.lengths_to_splits(dim_size), splits)
    inner_sizes = self._inner_dim_sizes
    return RaggedTensorDynamicShape(partitioned_sizes, inner_sizes)
  def testNonSequenceNestedStructure(self):
    components = np.array([1, 2, 3], dtype=np.int64)

    dataset = dataset_ops.Dataset.from_tensors(components)
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([3], dataset.output_shapes)

    dataset = dataset.filter(
        lambda x: math_ops.reduce_all(math_ops.equal(x, components)))
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([3], dataset.output_shapes)

    dataset = dataset.map(lambda x: array_ops.stack([x, x]))
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([2, 3], dataset.output_shapes)

    dataset = dataset.flat_map(
        lambda x: dataset_ops.Dataset.from_tensor_slices(x))
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([3], dataset.output_shapes)

    iterator = dataset.make_one_shot_iterator()
    get_next = iterator.get_next()
    self.assertEquals(dtypes.int64, get_next.dtype)
    self.assertEquals([3], get_next.shape)
Example #27
0
  def testAggregate(self):
    a = array_ops.constant([3., 4.])
    b = array_ops.constant([5., 6.])
    hint = op_hint.OpHint("agg")
    a0, a1 = array_ops.unstack(a)
    b0, b1 = array_ops.unstack(b)

    a0 = hint.add_input(a0, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    b0 = hint.add_input(b0, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    a1 = hint.add_input(a1, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    b1 = hint.add_input(b1, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK)

    c0 = math_ops.add(a0, b0, name="addleft")
    c1 = math_ops.add(a1, b1, name="addright")
    c0 = hint.add_output(
        c0, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    c1 = hint.add_output(
        c1, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK)

    curr = array_ops.stack([c0, c1])
    output = array_ops.identity(curr, name="FINAL_OUTPUT")
    with self.cached_session() as sess:
      stubbed_graphdef = op_hint.convert_op_hints_to_stubs(
          graph_def=sess.graph_def)
      self.assertEqual(
          self._getGraphOpTypes(
              stubbed_graphdef,
              output_nodes=[op_hint._tensor_name_base(output.name)]),
          set(["agg", "Const", "Identity"]))
Example #28
0
 def feature_importances(self):
   tree_counts = [
       self.trees[i].feature_usage_counts()
       for i in range(self.params.num_trees)
   ]
   total_counts = math_ops.reduce_sum(array_ops.stack(tree_counts, 0), 0)
   return total_counts / math_ops.reduce_sum(total_counts)
Example #29
0
def _TopKGrad(op, grad, _):
  """Return the gradients for TopK.

  Args:
    op: The TopKOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the TopKOp.

  Returns:
    A list of two tensors, the first being the gradient w.r.t to the input and
    TopK, and the second being the gradient w.r.t. to the indices (all zero).
  """
  in_shape = array_ops.shape(op.inputs[0])
  ind_shape = array_ops.shape(op.outputs[1])

  ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1)
  # Flatten indices to 2D.
  ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim]))

  in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1)
  outerdim = array_ops.shape(ind_2d)[0]
  # Compute linear indices (flattened to 1D).
  ind = array_ops.reshape(ind_2d + array_ops.expand_dims(
      math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1])

  # Substitute grad to appropriate locations and fill the rest with zeros,
  # finally reshaping it to the original input shape.
  return [array_ops.reshape(
      sparse_ops.sparse_to_dense(ind,
                                 array_ops.reshape(
                                     math_ops.reduce_prod(in_shape), [1]),
                                 array_ops.reshape(grad, [-1]),
                                 validate_indices=False),
      in_shape), array_ops.zeros(
          [], dtype=dtypes.int32)]
  def _shape_dynamic(self):
    matrix_shape = array_ops.stack(
        (self._num_rows, self._num_rows), axis=0)
    if self._batch_shape_arg is None:
      return matrix_shape

    return array_ops.concat((self._batch_shape_arg, matrix_shape), 0)
Example #31
0
def _embedding_lookup_with_distributed_aggregation(params,
                                                   ids,
                                                   partition_strategy="mod",
                                                   name=None,
                                                   max_norm=None,
                                                   weights=None,
                                                   idx=None,
                                                   segment_ids=None):
    """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation."""
    if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
        raise ValueError("Need at least one param")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]

    def maybe_normalize(x):
        if max_norm is not None:
            if x.get_shape().ndims is not None:
                ndims = x.get_shape().ndims
            else:
                ndims = array_ops.size(array_ops.shape(x))
            return clip_ops.clip_by_norm(x,
                                         max_norm,
                                         axes=list(range(1, ndims)))
        return x

    with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation",
                        params + [ids]) as name:
        np = len(params)  # Number of partitions
        # Preserve the resource variable status to avoid accidental dense reads.
        if not any(
                isinstance(p, resource_variable_ops.ResourceVariable)
                for p in params):
            params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                               name="params")
        if np == 1:
            with ops.colocate_with(params[0]):
                ret = maybe_normalize(_do_gather(params[0], ids))
                ignore_weights = weights is None
                if not ignore_weights:
                    if weights.dtype != ret.dtype:
                        weights = math_ops.cast(weights, ret.dtype)
                    # Reshape to allow broadcast
                    ones = array_ops.fill(
                        array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1)
                    bcast_weights_shape = array_ops.concat(
                        [array_ops.shape(weights), ones], 0)
                    orig_weights_shape = weights.get_shape()
                    weights = array_ops.reshape(weights, bcast_weights_shape)
                    # Set weights shape after reshape
                    if ret.get_shape().ndims is not None:
                        weights.set_shape(
                            orig_weights_shape.concatenate(
                                [1 for _ in range(ret.get_shape().ndims - 1)]))
                    ret *= weights
                    return math_ops.segment_sum(ret, segment_ids, name=name)
                else:
                    return math_ops.sparse_segment_sum(ret,
                                                       idx,
                                                       segment_ids,
                                                       name=name)
        else:
            ids = ops.convert_to_tensor(ids, name="ids")
            flat_ids = array_ops.reshape(ids, [-1])
            original_indices = math_ops.range(array_ops.size(flat_ids))

            # Create p_assignments and set new_ids depending on the strategy.
            if partition_strategy == "mod":
                p_assignments = flat_ids % np
                new_ids = flat_ids // np
            elif partition_strategy == "div":
                # Compute num_total_ids as the sum of dim-0 of params, then assign to
                # partitions based on a constant number of ids per partition. Optimize
                # if we already know the full shape statically.
                dim_0_size = params[0].get_shape()[0]
                for p in xrange(1, np):
                    dim_0_size += params[p].get_shape()[0]
                if dim_0_size.value:
                    num_total_ids = constant_op.constant(
                        dim_0_size.value, flat_ids.dtype)
                else:
                    dim_0_sizes = []
                    for p in xrange(np):
                        if params[p].get_shape()[0].value is not None:
                            dim_0_sizes.append(params[p].get_shape()[0].value)
                        else:
                            with ops.colocate_with(params[p]):
                                dim_0_sizes.append(
                                    array_ops.shape(params[p])[0])
                    num_total_ids = math_ops.reduce_sum(
                        math_ops.cast(array_ops.stack(dim_0_sizes),
                                      flat_ids.dtype))
                ids_per_partition = num_total_ids // np
                extras = num_total_ids % np

                p_assignments = math_ops.maximum(
                    flat_ids // (ids_per_partition + 1),
                    (flat_ids - extras) // ids_per_partition)

                # Emulate a conditional using a boolean indicator tensor
                is_in_first_extras_partitions = math_ops.cast(
                    p_assignments < extras, flat_ids.dtype)
                new_ids = (is_in_first_extras_partitions *
                           (flat_ids % (ids_per_partition + 1)) +
                           (1 - is_in_first_extras_partitions) *
                           ((flat_ids - extras) % ids_per_partition))
            else:
                raise ValueError("Unrecognized partition strategy: " +
                                 partition_strategy)

            # Cast partition assignments to int32 for use in dynamic_partition.
            # There really should not be more than 2^32 partitions.
            p_assignments = math_ops.cast(p_assignments, dtypes.int32)
            # Partition list of ids based on assignments into np separate lists
            gather_ids = data_flow_ops.dynamic_partition(
                new_ids, p_assignments, np)
            # Similarly, partition the original indices.
            pindices = data_flow_ops.dynamic_partition(original_indices,
                                                       p_assignments, np)
            # Do np separate lookups, finding embeddings for plist[p] in params[p]
            partitioned_result = []
            for p in xrange(np):
                with ops.colocate_with(params[p]):
                    partitioned_result.append(
                        _do_gather(params[p], gather_ids[p]))

            ignore_weights = weights is None
            if not ignore_weights:
                # Partition weights according to pindices.
                partitioned_weight = []
                for p in xrange(np):
                    partitioned_weight.append(
                        array_ops.gather(weights, pindices[p]))
            # Reshape each partition result.
            element_shape = params[0].get_shape()[1:]
            for p in params[1:]:
                element_shape = element_shape.merge_with(p.get_shape()[1:])
            if element_shape.is_fully_defined():
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = array_ops.reshape(
                            partitioned_result[p],
                            array_ops.concat(
                                [array_ops.shape(pindices[p]), element_shape],
                                0))
            else:
                with ops.colocate_with(params[0]):
                    params_shape = array_ops.shape(params[0])
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = array_ops.reshape(
                            partitioned_result[p],
                            array_ops.concat([
                                array_ops.shape(pindices[p]),
                                array_ops.slice(params_shape, [1], [-1])
                            ], 0))
            # Normalize each partition result.
            for p in xrange(np):
                with ops.colocate_with(params[p]):
                    partitioned_result[p] = maybe_normalize(
                        partitioned_result[p])
            if not ignore_weights:
                # Multiply each partition result with partition weights.
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        if partitioned_weight[p].dtype != partitioned_result[
                                p].dtype:
                            partitioned_weight[p] = math_ops.cast(
                                partitioned_weight[p],
                                partitioned_result[p].dtype)
                        # Reshape partition weights.
                        ones = array_ops.fill(
                            array_ops.expand_dims(
                                array_ops.rank(partitioned_result[p]) - 1, 0),
                            1)
                        bcast_weights_shape = array_ops.concat(
                            [array_ops.shape(partitioned_weight[p]), ones], 0)
                        orig_weights_shape = partitioned_weight[p].get_shape()
                        partitioned_weight[p] = array_ops.reshape(
                            partitioned_weight[p], bcast_weights_shape)
                        if partitioned_result[p].get_shape().ndims is not None:
                            partitioned_weight[p].set_shape(
                                orig_weights_shape.concatenate([
                                    1 for _ in range(partitioned_result[p].
                                                     get_shape().ndims - 1)
                                ]))
                        partitioned_result[p] *= partitioned_weight[p]
            partitioned_segment_ids = []
            for p in xrange(np):
                if not ignore_weights:
                    # Partition segment_ids according to pindices.
                    p_segment_ids = array_ops.gather(segment_ids, pindices[p])
                    # Number the p_segment_ids to meet segment_sum's requirements. Note
                    # that unique_p_segment_ids contains unique segment ids of this
                    # partition and these ids' order is unchanged.
                    unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
                        p_segment_ids)
                    partitioned_segment_ids.append(unique_p_segment_ids)
                    # segment_sum this partition's result.
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = math_ops.segment_sum(
                            partitioned_result[p], unique_p_segment_idx)
                else:
                    # When ignore weights, we need to get indexs of elements in idx and
                    # segment_ids.
                    _, exclude_idx = array_ops.setdiff1d(idx, pindices[p])
                    all_idx = math_ops.range(array_ops.shape(idx)[0])
                    _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx)
                    # Gather segment_ids and idx according to indexs.
                    p_segment_ids = array_ops.gather(segment_ids, include_idx)
                    p_idx = array_ops.gather(idx, include_idx)
                    # Number the p_segment_ids, same as ignore_weights case above.
                    unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
                        p_segment_ids)
                    _, unique_p_idx_idx = array_ops.unique(p_idx)
                    partitioned_segment_ids.append(unique_p_segment_ids)
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = math_ops.sparse_segment_sum(
                            partitioned_result[p], unique_p_idx_idx,
                            unique_p_segment_idx)
            # Concat each partition's segment_ids and result for final segment_sum.
            concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0)
            concat_partitioned_result = array_ops.concat(partitioned_result, 0)
            return math_ops.unsorted_segment_sum(
                concat_partitioned_result,
                concat_segment_ids,
                math_ops.reduce_max(concat_segment_ids) + 1,
                name=name)
Example #32
0
def beam_attention_decoder(decoder_inputs,
                          initial_state,
                          attention_states,
                          cell,
                           embedding,
                          output_size=None,
                          num_heads=1,
                          loop_function=None,
                          dtype=None,
                          scope=None,
                          initial_state_attention=False, output_projection=None, beam_size=10):
    if not decoder_inputs:
        raise ValueError("Must provide at least 1 input to attention decoder.")
    if num_heads < 1:
        raise ValueError("With less than 1 heads, use a non-attention decoder.")
    if not attention_states.get_shape()[1:2].is_fully_defined():
        raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                         % attention_states.get_shape())
    if output_size is None:
        output_size = cell.output_size

    with variable_scope.variable_scope(scope or "attention_decoder", dtype=dtype) as scope:
        dtype = scope.dtype
        # batch_size = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
        attn_length = attention_states.get_shape()[1].value
        if attn_length is None:
            attn_length = array_ops.shape(attention_states)[1]
        attn_size = attention_states.get_shape()[2].value

        # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
        hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size])
        hidden_features = []
        v = []
        attention_vec_size = attn_size  # Size of query vectors for attention.
        for a in xrange(num_heads):
            k = variable_scope.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
            hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
            v.append(variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size]))

        state = []
        # 将encoder的最后一个隐层状态扩展成beam_size维,因为decoder阶段的batch_size是beam_size。
        # initial_state是一个列表,RNN有多少层就有多少个元素,每个元素都是一个LSTMStateTuple,包含h,c两个隐层状态
        # 所以要将其扩展成beam_size维,其实是把c和h进行扩展,最后再合成LSTMStateTuple就可以了
        for layers in initial_state:
            c = [layers.c] * beam_size
            h = [layers.h] * beam_size
            c = tf.concat(c, 0)
            h = tf.concat(h, 0)
            state.append(rnn_cell_impl.LSTMStateTuple(c, h))
        state = tuple(state)
        # state_size = int(initial_state.get_shape().with_rank(2)[1])
        # states = []
        # for kk in range(beam_size):
        #     states.append(initial_state)
        # state = tf.concat(states, 0)
        # state = initial_state

        def attention(query):
            ds = []  # Results of attention reads will be stored here.
            if nest.is_sequence(query):  # If the query is a tuple, flatten it.
                query_list = nest.flatten(query)
                for q in query_list:  # Check that ndims == 2 if specified.
                    ndims = q.get_shape().ndims
                    if ndims:
                        assert ndims == 2
                query = array_ops.concat(query_list, 1)
            for a in xrange(num_heads):
                with variable_scope.variable_scope("Attention_%d" % a):
                    y = Linear(query, attention_vec_size, True)#(query)
                    y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
                    # Attention mask is a softmax of v^T * tanh(...).
                    s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
                    a = nn_ops.softmax(s)
                    # Now calculate the attention-weighted vector d.
                    d = math_ops.reduce_sum(array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2])
                    ds.append(array_ops.reshape(d, [-1, attn_size]))
            return ds

        outputs = []
        prev = None
        # attention也要定义成beam_size为的tensor
        batch_attn_size = array_ops.stack([beam_size, attn_size])
        attns = [array_ops.zeros(batch_attn_size, dtype=dtype) for _ in xrange(num_heads)]
        for a in attns:  # Ensure the second shape of attention vectors is set.
            a.set_shape([None, attn_size])
        if initial_state_attention:
            attns = attention(initial_state)

        log_beam_probs, beam_path, beam_symbols = [], [], []
        for i, inp in enumerate(decoder_inputs):
            if i > 0:
                variable_scope.get_variable_scope().reuse_variables()
            # If loop_function is set, we use it instead of decoder_inputs.
            if i == 0:
                #i=0时,输入时一个batch_szie=beam_size的tensor,且里面每个元素的值都是相同的,都是<GO>标志
                inp = tf.nn.embedding_lookup(embedding, tf.constant(1, dtype=tf.int32, shape=[beam_size]))

            if loop_function is not None and prev is not None:
                with variable_scope.variable_scope("loop_function", reuse=True):
                    inp = loop_function(prev, i, log_beam_probs, beam_path, beam_symbols)
            # Merge input and previous attentions into one vector of the right size.
            input_size = inp.get_shape().with_rank(2)[1]
            if input_size.value is None:
                raise ValueError("Could not infer input size from input: %s" % inp.name)
            inputs = [inp] + attns
            x = Linear(inputs, input_size, True)#(inputs)

            # Run the RNN.
            cell_output, state = cell(x, state)
            # Run the attention mechanism.
            if i == 0 and initial_state_attention:
                with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=True):
                    attns = attention(state)
            else:
                attns = attention(state)

            with variable_scope.variable_scope("AttnOutputProjection"):
                inputs = [cell_output] + attns
                output = Linear(inputs, output_size, True)#(inputs)
            if loop_function is not None:
                prev = output
            outputs.append(tf.argmax(nn_ops.xw_plus_b(output, output_projection[0], output_projection[1]), axis=1))

    return outputs, state, tf.reshape(tf.concat(beam_path, 0), [-1, beam_size]), tf.reshape(tf.concat(beam_symbols, 0),
                                                                                            [-1, beam_size])
Example #33
0
 def call(self, inputs):
   if not isinstance(inputs, (list, tuple)):
     raise ValueError('A merge layer should be called on a list of inputs.')
   if self._reshape_required:
     reshaped_inputs = []
     input_ndims = list(map(backend.ndim, inputs))
     if None not in input_ndims:
       # If ranks of all inputs are available,
       # we simply expand each of them at axis=1
       # until all of them have the same rank.
       max_ndim = max(input_ndims)
       for x in inputs:
         x_ndim = backend.ndim(x)
         for _ in range(max_ndim - x_ndim):
           x = array_ops.expand_dims(x, axis=1)
         reshaped_inputs.append(x)
       return self._merge_function(reshaped_inputs)
     else:
       # Transpose all inputs so that batch size is the last dimension.
       # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size)
       transposed = False
       for x in inputs:
         x_ndim = backend.ndim(x)
         if x_ndim is None:
           x_shape = array_ops.shape(x)
           batch_size = x_shape[0]
           new_shape = backend.concatenate(
               [x_shape[1:],
                array_ops.expand_dims(batch_size, axis=-1)])
           x_transposed = array_ops.reshape(
               x,
               array_ops.stack(
                   [batch_size, math_ops.reduce_prod(x_shape[1:])], axis=0))
           x_transposed = array_ops.transpose(x_transposed, perm=(1, 0))
           x_transposed = array_ops.reshape(x_transposed, new_shape)
           reshaped_inputs.append(x_transposed)
           transposed = True
         elif x_ndim > 1:
           dims = list(range(1, x_ndim)) + [0]
           reshaped_inputs.append(array_ops.transpose(x, perm=dims))
           transposed = True
         else:
           # We don't transpose inputs if they are 1D vectors or scalars.
           reshaped_inputs.append(x)
       y = self._merge_function(reshaped_inputs)
       y_ndim = backend.ndim(y)
       if transposed:
         # If inputs have been transposed, we have to transpose the output too.
         if y_ndim is None:
           y_shape = array_ops.shape(y)
           y_ndim = array_ops.shape(y_shape)[0]
           batch_size = y_shape[y_ndim - 1]
           new_shape = backend.concatenate([
               array_ops.expand_dims(batch_size, axis=-1), y_shape[:y_ndim - 1]
           ])
           y = array_ops.reshape(y, (-1, batch_size))
           y = array_ops.transpose(y, perm=(1, 0))
           y = array_ops.reshape(y, new_shape)
         elif y_ndim > 1:
           dims = [y_ndim - 1] + list(range(y_ndim - 1))
           y = array_ops.transpose(y, perm=dims)
       return y
   else:
     return self._merge_function(inputs)
Example #34
0
def rnn(step_fn,
        inputs,
        initial_states,
        go_backwards=False,
        unroll=False,
        input_length=None,
        name='rnn_block'):
    with ops.name_scope(name):
        dim = ndim(inputs)
        if dim < 3:
            raise ValueError("Input should be at least 3D")
        perm = [1, 0] + list(range(2, dim))
        inputs = array_ops.transpose(inputs, perm=perm, name='to_time_major')
        if unroll:
            assert int_shape(inputs)[0] is not None,\
                "Unrolling requires a fixed number of time steps"
            states = initial_states
            successive_states = []
            successive_outputs = []
            input_list = array_ops.unstack(inputs)
            if go_backwards:
                input_list.reverse()
            for x in input_list:
                outputs, states = step_fn(x, states)
                successive_outputs.append(outputs)
                successive_states.append(states)
            last_output = successive_outputs[-1]
            new_states = successive_states[-1]
            outputs = array_ops.stack(successive_outputs)
        else:
            if go_backwards:
                inputs = array_ops.reverse(inputs, axis=0)
            states = tuple(initial_states)
            time_steps = array_ops.shape(inputs)[0]
            outputs, _ = step_fn(inputs[0], initial_states)
            output_ta = tensor_array_ops.TensorArray(
                dtype=outputs.dtype,
                size=time_steps,
                tensor_array_name='output_ta')
            input_ta = tensor_array_ops.TensorArray(
                dtype=inputs.dtype,
                size=time_steps,
                tensor_array_name='input_ta')
            # unstack inputs and write into input array
            input_ta = input_ta.unstack(inputs)
            time = array_ops.constant(0, dtype='int32', name='time')

            def _step(_time, _output_ta, *_states):
                current_input = input_ta.read(_time)
                output, _new_states = step_fn(current_input, tuple(_states))
                for state, new_state in zip(_states, _new_states):
                    new_state.set_shape(state.get_shape())
                _output_ta = _output_ta.write(_time, output)
                return (_time + 1, _output_ta) + tuple(_new_states)

            final_outputs = control_flow_ops.while_loop(
                cond=lambda _time, *_: _time < time_steps,
                body=_step,
                loop_vars=(time, output_ta) + states,
                parallel_iterations=32,
                swap_memory=True,
                maximum_iterations=input_length)
            last_time = final_outputs[0]
            output_ta = final_outputs[1]
            new_states = final_outputs[2:]
            outputs = output_ta.stack()
            last_output = output_ta.read(last_time - 1)
        perm = [1, 0] + list(range(2, ndim(outputs)))
        outputs = array_ops.transpose(outputs, perm=perm)
    return last_output, outputs, new_states
Example #35
0
def calibration_layer(uncalibrated_tensor,
                      num_keypoints,
                      keypoints_initializers=None,
                      keypoints_initializer_fns=None,
                      bound=False,
                      monotonic=None,
                      missing_input_values=None,
                      missing_output_values=None,
                      name=None,
                      **regularizer_amounts):
  """Creates a calibration layer for uncalibrated values.

  Returns a calibrated tensor of the same shape as the uncalibrated continuous
  signals passed in, and a list of projection ops, that must be applied at
  each step (or every so many steps) to project the model to a feasible space:
  used for bounding the outputs or for imposing monotonicity -- the list will be
  empty if bound and monotonic are not set.

  Args:
    uncalibrated_tensor: Tensor of shape [batch_size, ...] with uncalibrated
      values.
    num_keypoints: Number of keypoints to use. Either a scalar value that
      will be used for every uncalibrated signal, or a list of n values,
      per uncalibrated signal -- uncalibrated is first flattened (
      see tf.contrib.layers.flatten) to [batch_size, n], and there should
      be one value in the list per n. If a value of the list is 0 or None
      the correspondent signal won't be calibrated.
    keypoints_initializers: For evaluation or inference (or when resuming
      training from a checkpoint) the values will be loaded from disk, so they
      don't need to be given (leave it as None).
      Otherwise provide either a tuple of two tensors of shape [num_keypoints],
      or a list of n pairs of tensors, each of shape [num_keypoints]. In this
      list there should be one pair per uncalibrated signal, just like
      num_keypoints above. Notice that num_keypoints can be different per
      signal.
    keypoints_initializer_fns: Like keypoints_initializers but using lambda
      initializers. They should be compatible with tf.get_variable. If this is
      set, then keypoints_initializers must be None.
    bound: boolean whether output of calibration must be bound. Alternatively
      a list of n booleans, one per uncalibrated value, like num_keypoints
      above.
    monotonic: whether calibration is monotonic: None or 0 means no
      monotonicity. Positive or negative values mean increasing or decreasing
      monotonicity respectively. Alternatively a list of n monotonic values,
      one per uncalibrated value, like num_keypoints above.
    missing_input_values: If set, and if the input has this value it is assumed
      to be missing and the output will either be calibrated to some value
      between `[calibration_output_min, calibration_output_max]` or set to a
      fixed value set by missing_output_value. Limitation: it only works for
      scalars. Either one value for all inputs, or a list with one value per
      uncalibrated value.
    missing_output_values: Requires missing_input_value also to be set. If set
      if will convert missing input to this value. Either one value for all
      outputs, or a list with one value per uncalibrated value.
    name: Name scope for operations.
    **regularizer_amounts: Keyword args of regularization amounts passed to
      regularizers.calibrator_regularization(). Keyword names should be among
      supported regularizers.CALIBRATOR_REGULARIZERS and values should be
      either float or list of floats. If float, then same value is applied to
      all input signals.

  Returns:
    A tuple of:
    * calibrated tensor of shape [batch_size, ...], the same shape as
      uncalibrated.
    * list of projection ops, that must be applied at each step (or every so
      many steps) to project the model to a feasible space: used for bounding
      the outputs or for imposing monotonicity. Empty if none are requested.
    * None or tensor with regularization loss.

  Raises:
    ValueError: If dimensions don't match.
  """
  with ops.name_scope(name or 'calibration_layer'):
    # Flattening uncalibrated tensor [batch_Size, k1, k2, ..., kn] to
    # [batch_size, k1 * k2 * ... * kn].
    uncalibrated_shape = uncalibrated_tensor.get_shape().as_list()
    n = 1
    for non_batch_dim in uncalibrated_shape[1:]:
      n *= non_batch_dim
    flat_uncalibrated = array_ops.reshape(
        uncalibrated_tensor, shape=[-1, n], name='flat_uncalibrated')

    num_keypoints = tools.cast_to_list(num_keypoints, n, 'num_keypoints')
    keypoints_initializers = tools.cast_to_list(keypoints_initializers, n,
                                                'keypoints_initializers')
    keypoints_initializer_fns = tools.cast_to_list(keypoints_initializer_fns, n,
                                                   'keypoints_initializer_fns')
    bound = tools.cast_to_list(bound, n, 'bound')
    monotonic = tools.cast_to_list(monotonic, n, 'monotonic')
    missing_input_values = tools.cast_to_list(missing_input_values, n,
                                              'missing_input_values')
    missing_output_values = tools.cast_to_list(missing_output_values, n,
                                               'missing_output_values')
    regularizer_amounts = {
        regularizer_name: tools.cast_to_list(
            regularizer_amounts[regularizer_name], n, regularizer_name)
        for regularizer_name in regularizer_amounts
    }

    signal_names = ['signal_%d' % ii for ii in range(n)]

    uncalibrated_splits = array_ops.unstack(flat_uncalibrated, axis=1)
    calibrated_splits = []
    projection_ops = []
    total_regularization = None
    for ii in range(n):
      if not num_keypoints[ii]:
        # No calibration for this signal.
        calibrated_splits += [uncalibrated_splits[ii]]
      else:
        signal_regularizer_amounts = {
            regularizer_name: regularizer_amounts[regularizer_name][ii]
            for regularizer_name in regularizer_amounts
        }
        calibrated, projection, reg = one_dimensional_calibration_layer(
            uncalibrated_splits[ii],
            num_keypoints[ii],
            signal_name=signal_names[ii],
            keypoints_initializers=keypoints_initializers[ii],
            keypoints_initializer_fns=keypoints_initializer_fns[ii],
            bound=bound[ii],
            monotonic=monotonic[ii],
            missing_input_value=missing_input_values[ii],
            missing_output_value=missing_output_values[ii],
            **signal_regularizer_amounts)
        calibrated_splits += [calibrated]
        if projection is not None:
          projection_ops += [projection]
        total_regularization = tools.add_if_not_none(total_regularization, reg)
    flat_calibrated = array_ops.stack(
        calibrated_splits, axis=1, name='stack_calibrated')
    reshaped_calibrated = array_ops.reshape(
        flat_calibrated,
        shape=array_ops.shape(uncalibrated_tensor),
        name='reshape_calibrated')
    return reshaped_calibrated, projection_ops, total_regularization
Example #36
0
def expand_dims(input, axis, name=None):  # pylint: disable=redefined-builtin
    """Inserts a dimension with shape 1 into a potentially ragged tensor's shape.

  Given a potentially ragged tenor `input`, this operation inserts a
  dimension with size 1 at the dimension `axis` of `input`'s shape.

  * If `input` is a `Tensor`, then this is equivalent to
    `tf.expand_dims`.
  * If `input` is ragged, and `axis=0`, then the new dimension will be
    uniform; but the previously outermost dimension will become ragged.
  * If `input` is ragged, and `0 < axis < input.ragged_rank`, then the
    new dimension will be ragged.
  * If `input` is ragged, and axis >= input.ragged_rank`, then the new
    dimension will be uniform.

  The following table gives some examples showing how `ragged.expand_dims`
  impacts the shapes of different input tensors.  Ragged dimensions are
  indicated by enclosing them in parentheses.

  input.shape             | axis | result.shape
  ----------------------- | ---- | -----------------------------
  `[D1, D2]`              |  `0` | `[1, D1, D2]`
  `[D1, D2]`              |  `1` | `[D1, 1, D2]`
  `[D1, D2]`              |  `2` | `[D1, D2, 1]`
  `[D1, (D2), (D3), D4]`  |  `0` | `[1, (D1), (D2), (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `1` | `[D1, (1), (D2), (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `2` | `[D1, (D2), (1), (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `3` | `[D1, (D2), (D3), 1, D4]`
  `[D1, (D2), (D3), D4]`  |  `4` | `[D1, (D2), (D3), D4, 1]`

  Args:
    input: The potentially tensor that should be expanded with a new
      dimension.
    axis: An integer constant indicating where the new dimension should be
      inserted.
    name: A name for the operation (optional).

  Returns:
    A tensor with the same values as `input`, with an added dimension of
    size 1 at `axis`.

  #### Examples:

  >>> rt = tf.ragged.constant([[1, 2], [3]])
  >>> print(rt.shape)
  (2, None)

  >>> expanded = tf.expand_dims(rt, axis=0)
  >>> print(expanded.shape, expanded)
  (1, None, None) <tf.RaggedTensor [[[1, 2], [3]]]>

  >>> expanded = tf.expand_dims(rt, axis=1)
  >>> print(expanded.shape, expanded)
  (2, None, None) <tf.RaggedTensor [[[1, 2]], [[3]]]>

  >>> expanded = tf.expand_dims(rt, axis=2)
  >>> print(expanded.shape, expanded)
  (2, None, 1) <tf.RaggedTensor [[[1], [2]], [[3]]]>
  """
    with ops.name_scope(name, 'RaggedExpandDims', [input]):
        input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input,
                                                                 name='input')

        if not ragged_tensor.is_ragged(input):
            return array_ops.expand_dims(input, axis)

        ndims = None if input.shape.ndims is None else input.shape.ndims + 1
        axis = ragged_util.get_positive_axis(axis, ndims)
        if axis == 0:
            values = input
            splits = array_ops.stack([0, input.nrows()])
        elif axis == 1:
            values = input
            splits = math_ops.range(input.nrows() + 1)
        else:
            values = expand_dims(input.values, axis - 1)
            splits = input.row_splits

        return ragged_tensor.RaggedTensor.from_row_splits(values,
                                                          splits,
                                                          validate=False)
Example #37
0
def _compute_random_ri_sampled_logits(ri_tensors,
                                      k_dim,
                                      s_active,
                                      weights,
                                      labels,
                                      inputs,
                                      num_sampled,
                                      num_true=1,
                                      subtract_log_q=True,
                                      partition_strategy="mod",
                                      name=None,
                                      seed=None):
    """ Random Random Index Sampled Logits with negative sampling

    https://arxiv.org/pdf/1410.8251.pdf

    Computes the sampled logits from the space of all possible random indexes.
    Since any random index is possible, we sample, not from the existing random indexes
    but from the space of possible random indexes so that the model learns which combinations
    of bases are NOT the ones used to predict a given feature.

    Args:
        ri_tensors:
        k_dim:
        s_active:
        weights:
        labels:
        inputs:
        num_sampled:
        sampled_values:
        num_true:
        subtract_log_q:
        remove_accidental_hits:
        partition_strategy:
        name:
        seed:

    Returns:

    """
    if isinstance(weights, variables.PartitionedVariable):
        weights = list(weights)
    if not isinstance(weights, list):
        weights = [weights]

    with ops.name_scope(name, "random_ri_sampled_logits",
                        weights + [inputs, labels]):
        if labels.dtype != dtypes.int64:
            labels = math_ops.cast(labels, dtypes.int64)
        labels_flat = array_ops.reshape(labels, [-1])

        true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat)
        sampled_ris, expected_true_ris, expected_sampled_ris = sample_ri(k_dim, s_active, num_sampled, true_ris)

        all_ris = sparse_ops.sparse_concat(axis=0, sp_inputs=[true_ris, sampled_ris])

        sp_values = all_ris
        sp_indices = tx.sparse_indices(sp_values)

        # Retrieve the weights

        # weights shape is [num_classes, dim]
        all_w = embedding_lookup_sparse(
            weights, sp_indices, sp_values, combiner="sum", partition_strategy=partition_strategy)

        # true_w shape is [batch_size * num_true, dim]
        true_w = array_ops.slice(all_w, [0, 0],
                                 array_ops.stack(
                                     [array_ops.shape(labels_flat)[0], -1]))

        sampled_w = array_ops.slice(
            all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1])
        # inputs has shape [batch_size, dim]
        # sampled_w has shape [num_sampled, dim]
        # Apply X*W', which yields [batch_size, num_sampled]
        sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True)

        dim = array_ops.shape(true_w)[1:2]
        new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0)
        row_wise_dots = math_ops.multiply(
            array_ops.expand_dims(inputs, 1),
            array_ops.reshape(true_w, new_true_w_shape))
        # We want the row-wise dot plus biases which yields a
        # [batch_size, num_true] tensor of true_logits.
        dots_as_matrix = array_ops.reshape(row_wise_dots,
                                           array_ops.concat([[-1], dim], 0))
        true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])

        if subtract_log_q:
            # Subtract log of Q(l), prior probability that label appears in sampled.
            true_logits -= math_ops.log(expected_true_ris)
            sampled_logits -= math_ops.log(expected_sampled_ris)

        # Construct output logits and labels. The true labels/logits start at col 0.
        out_logits = array_ops.concat([true_logits, sampled_logits], 1)

        # true_logits is a float tensor, ones_like(true_logits) is a float
        # tensor of ones. We then divide by num_true to ensure the per-example
        # labels sum to 1.0, i.e. form a proper probability distribution.
        out_labels = array_ops.concat([
            array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(sampled_logits)
        ], 1)

        return out_logits, out_labels
Example #38
0
def _block_lstm(seq_len_max,
                x,
                w,
                b,
                cs_prev=None,
                h_prev=None,
                wci=None,
                wcf=None,
                wco=None,
                forget_bias=None,
                cell_clip=None,
                use_peephole=None,
                name=None):
    r"""TODO(williamchan): add doc.

  Args:
    seq_len_max: A `Tensor` of type `int64`.
    x: A list of at least 1 `Tensor` objects of the same type in: `float32`.
    w: A `Tensor`. Must have the same type as `x`.
    b: A `Tensor`. Must have the same type as `x`.
    cs_prev: A `Tensor`. Must have the same type as `x`.
    h_prev: A `Tensor`. Must have the same type as `x`.
    wci: A `Tensor`. Must have the same type as `x`.
    wcf: A `Tensor`. Must have the same type as `x`.
    wco: A `Tensor`. Must have the same type as `x`.
    forget_bias: An optional `float`. Defaults to `1`.
    cell_clip: An optional `float`. Defaults to `3`.
    use_peephole: An optional `bool`. Defaults to `False`.
    name: A name for the operation (optional).

  Returns:
    A tuple of `Tensor` objects (i, cs, f, o, ci, co, h).
    i: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    cs: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    f: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    o: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    ci: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    co: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    h: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.

  Raises:
    ValueError: If `b` does not have a valid shape.
  """
    batch_size = x[0].get_shape().with_rank(2)[0].value
    cell_size4 = b.get_shape().with_rank(1)[0].value
    if cell_size4 is None:
        raise ValueError("`b` shape must not be None.")
    cell_size = cell_size4 / 4
    zero_state = None
    if cs_prev is None or h_prev is None:
        zero_state = array_ops.constant(0,
                                        dtype=dtypes.float32,
                                        shape=[batch_size, cell_size])
    if cs_prev is None:
        cs_prev = zero_state
    if h_prev is None:
        h_prev = zero_state
    if wci is None:
        wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size])
        wco = wci
        wcf = wci

    # pylint: disable=protected-access
    i, cs, f, o, ci, co, h = _lstm_ops_so.block_lstm(seq_len_max=seq_len_max,
                                                     x=array_ops.stack(x),
                                                     cs_prev=cs_prev,
                                                     h_prev=h_prev,
                                                     w=w,
                                                     wci=wci,
                                                     wco=wco,
                                                     wcf=wcf,
                                                     b=b,
                                                     forget_bias=forget_bias,
                                                     cell_clip=cell_clip,
                                                     name=name,
                                                     use_peephole=use_peephole)

    return array_ops.unstack(i), array_ops.unstack(cs), array_ops.unstack(
        f), array_ops.unstack(o), array_ops.unstack(ci), array_ops.unstack(
            co), array_ops.unstack(h)
Example #39
0
    def __call__(self,
                 inputs,
                 initial_state=None,
                 dtype=None,
                 sequence_length=None,
                 scope=None):
        """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`
        or a list of `time_len` tensors of shape `[batch_size, input_size]`.
      initial_state: a tuple `(initial_cell_state, initial_output)` with tensors
        of shape `[batch_size, self._num_units]`. If this is not provided, the
        cell is expected to create a zero initial state of type `dtype`.
      dtype: The data type for the initial state and expected output. Required
        if `initial_state` is not provided or RNN state has a heterogeneous
        dtype.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len).`
        Defaults to `time_len` for each element.
      scope: `VariableScope` for the created subgraph; defaults to class name.

    Returns:
      A pair containing:

      - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]`
        or a list of time_len tensors of shape `[batch_size, output_size]`,
        to match the type of the `inputs`.
      - Final state: a tuple `(cell_state, output)` matching `initial_state`.

    Raises:
      ValueError: in case of shape mismatches
    """
        with vs.variable_scope(scope or "lstm_block_wrapper"):
            is_list = isinstance(inputs, list)
            if is_list:
                inputs = array_ops.stack(inputs)
            inputs_shape = inputs.get_shape().with_rank(3)
            if not inputs_shape[2]:
                raise ValueError("Expecting inputs_shape[2] to be set: %s" %
                                 inputs_shape)
            batch_size = inputs_shape[1].value
            if batch_size is None:
                batch_size = array_ops.shape(inputs)[1]
            time_len = inputs_shape[0].value
            if time_len is None:
                time_len = array_ops.shape(inputs)[0]

            # Provide default values for initial_state and dtype
            if initial_state is None:
                if dtype is None:
                    raise ValueError(
                        "Either initial_state or dtype needs to be specified")
                z = array_ops.zeros(array_ops.stack(
                    [batch_size, self.num_units]),
                                    dtype=dtype)
                initial_state = z, z
            else:
                if len(initial_state) != 2:
                    raise ValueError(
                        "Expecting initial_state to be a tuple with length 2 or None"
                    )
                if dtype is None:
                    dtype = initial_state[0].dtype

            # create the actual cell
            if sequence_length is not None:
                sequence_length = ops.convert_to_tensor(sequence_length)
            initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
            cell_states, outputs = self._call_cell(inputs, initial_cell_state,
                                                   initial_output, dtype,
                                                   sequence_length)

            if sequence_length is not None:
                # Mask out the part beyond sequence_length
                mask = array_ops.transpose(
                    array_ops.sequence_mask(sequence_length,
                                            time_len,
                                            dtype=dtype), [1, 0])
                mask = array_ops.tile(array_ops.expand_dims(mask, [-1]),
                                      [1, 1, self.num_units])
                outputs *= mask
                # Prepend initial states to cell_states and outputs for indexing to work
                # correctly,since we want to access the last valid state at
                # sequence_length - 1, which can even be -1, corresponding to the
                # initial state.
                mod_cell_states = array_ops.concat_v2([
                    array_ops.expand_dims(initial_cell_state, [0]), cell_states
                ], 0)
                mod_outputs = array_ops.concat_v2(
                    [array_ops.expand_dims(initial_output, [0]), outputs], 0)
                final_cell_state = self._gather_states(mod_cell_states,
                                                       sequence_length,
                                                       batch_size)
                final_output = self._gather_states(mod_outputs,
                                                   sequence_length, batch_size)
            else:
                # No sequence_lengths used: final state is the last state
                final_cell_state = cell_states[-1]
                final_output = outputs[-1]

            if is_list:
                # Input was a list, so return a list
                outputs = array_ops.unstack(outputs)

            return outputs, (final_cell_state, final_output)
Example #40
0
    def encode(self, x):
        """Encoder using LSTM.

    Args:
      x: tensor of size [num_children, num_groups, embedding_size]

    Returns:
      last_c, last_h: tensors of size [num_children, hidden_size], the final
        LSTM states
      attn_mem: tensor of size [num_children, num_groups, hidden_size], the
      attention
        memory, i.e. concatenation of all hidden states, linearly transformed by
        an attention matrix attn_w_1
    """
        if self.hparams.bi_lstm:
            with variable_scope.variable_scope(self.hparams.name, reuse=True):
                w_lstm_forward = variable_scope.get_variable(
                    "encoder_lstm_forward")
                w_lstm_backward = variable_scope.get_variable(
                    "encoder_lstm_backward")
                forget_bias = variable_scope.get_variable(
                    "encoder_forget_bias")
                attn_w_1 = variable_scope.get_variable("attn_w_1")
        else:
            with variable_scope.variable_scope(self.hparams.name, reuse=True):
                w_lstm = variable_scope.get_variable("encoder_lstm")
                forget_bias = variable_scope.get_variable(
                    "encoder_forget_bias")
                attn_w_1 = variable_scope.get_variable("attn_w_1")

        embedding_size = array_ops.shape(x)[2]

        signals = array_ops.split(x, self.num_groups, axis=1)
        for i in range(len(signals)):
            signals[i] = array_ops.reshape(
                signals[i], [self.hparams.num_children, embedding_size])

        if self.hparams.bi_lstm:

            def body(i, prev_c_forward, prev_h_forward, prev_c_backward,
                     prev_h_backward):
                """while loop for LSTM."""
                signal_forward = signals[i]
                next_c_forward, next_h_forward = lstm(signal_forward,
                                                      prev_c_forward,
                                                      prev_h_forward,
                                                      w_lstm_forward,
                                                      forget_bias)

                signal_backward = signals[self.num_groups - 1 - i]
                next_c_backward, next_h_backward = lstm(
                    signal_backward, prev_c_backward, prev_h_backward,
                    w_lstm_backward, forget_bias)

                next_h = array_ops.concat([next_h_forward, next_h_backward],
                                          axis=1)
                all_h.append(next_h)

                return (next_c_forward, next_h_forward, next_c_backward,
                        next_h_backward)

            c_forward = array_ops.zeros(
                [self.hparams.num_children, self.hparams.hidden_size / 2],
                dtype=dtypes.float32)
            h_forward = array_ops.zeros(
                [self.hparams.num_children, self.hparams.hidden_size / 2],
                dtype=dtypes.float32)

            c_backward = array_ops.zeros(
                [self.hparams.num_children, self.hparams.hidden_size / 2],
                dtype=dtypes.float32)
            h_backward = array_ops.zeros(
                [self.hparams.num_children, self.hparams.hidden_size / 2],
                dtype=dtypes.float32)
            all_h = []

            for i in range(0, self.num_groups):
                c_forward, h_forward, c_backward, h_backward = body(
                    i, c_forward, h_forward, c_backward, h_backward)

            last_c = array_ops.concat([c_forward, c_backward], axis=1)
            last_h = array_ops.concat([h_forward, h_backward], axis=1)
            attn_mem = array_ops.stack(all_h)

        else:

            def body(i, prev_c, prev_h):
                signal = signals[i]
                next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm,
                                      forget_bias)
                all_h.append(next_h)
                return next_c, next_h

            c = array_ops.zeros(
                [self.hparams.num_children, self.hparams.hidden_size],
                dtype=dtypes.float32)
            h = array_ops.zeros(
                [self.hparams.num_children, self.hparams.hidden_size],
                dtype=dtypes.float32)
            all_h = []

            for i in range(0, self.num_groups):
                c, h = body(i, c, h)

            last_c = c
            last_h = h
            attn_mem = array_ops.stack(all_h)

        attn_mem = array_ops.transpose(attn_mem, [1, 0, 2])
        attn_mem = array_ops.reshape(attn_mem, [
            self.hparams.num_children * self.num_groups,
            self.hparams.hidden_size
        ])
        attn_mem = math_ops.matmul(attn_mem, attn_w_1)
        attn_mem = array_ops.reshape(attn_mem, [
            self.hparams.num_children, self.num_groups,
            self.hparams.hidden_size
        ])

        return last_c, last_h, attn_mem
Example #41
0
 def testDimOutOfRange(self):
     t = [constant_op.constant([1, 2, 3]), constant_op.constant([4, 5, 6])]
     with self.assertRaisesRegexp(ValueError, r"axis = 2 not in \[-2, 2\)"):
         array_ops.stack(t, axis=2)
Example #42
0
def _UnpackGrad(op, *grads):
    """Gradient for unpack op."""
    return array_ops.stack(grads, axis=op.get_attr("axis"))
Example #43
0
def shard(computation,
          inputs=None,
          num_shards=1,
          input_shard_axes=None,
          outputs_from_all_shards=True,
          output_shard_axes=None,
          infeed_queue=None,
          device_assignment=None,
          name=None):
    """Shards `computation` for parallel execution.

  `inputs` must be a list of Tensors or None (equivalent to an empty
  list), each of which has a corresponding split axis (from
  `input_shard_axes`). Each input is split into `num_shards` pieces
  along the corresponding axis, and computation is applied to each
  shard in parallel.

  Tensors are broadcast to all shards if they are lexically captured by
  `computation`. e.g.,

  x = tf.constant(7)
  def computation():
    return x + 3
  ... = shard(computation, ...)

  TODO(phawkins): consider adding support for broadcasting Tensors passed
  as inputs.

  If `outputs_from_all_shards` is true, the outputs from all shards of
  `computation` are concatenated back together along their `output_shards_axes`.
  Otherwise, each output is taken from an arbitrary shard.

  Inputs and outputs of the computation must be at least rank-1 Tensors.

  Args:
    computation: A Python function that builds a computation to apply to each
      shard of the input.
    inputs: A list of input tensors or None (equivalent to an empty
      list). Each input tensor has a corresponding shard axes, given
      by `input_shard_axes`, which must have size divisible by
      `num_shards`.
    num_shards: The number of shards.
    input_shard_axes: A list of dimensions along which to shard `inputs`, or
      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
      there must be one dimension per input.
    outputs_from_all_shards: Boolean or list of boolean. For each output, if
      `True`, outputs from all shards are concatenated along the corresponding
      `output_shard_axes` entry. Otherwise, each output is taken
      from an arbitrary shard. If the argument is a boolean, the argument's
      value is used for each output.
    output_shard_axes: A list of dimensions along which to concatenate the
      outputs of `computation`, or `None`. `None` means "concatenate all outputs
      along dimension 0". If not `None`, there must be one dimension per output.
      Ignored if `outputs_from_all_shards` is False.
    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
      of `computation`.
    device_assignment: If not `None`, a `DeviceAssignment` describing the
      mapping between logical cores in the computation with physical cores in
      the TPU topology. Uses a default device assignment if `None`. The
      `DeviceAssignment` may be omitted if each shard of the computation uses
      only one core, and there is either only one shard, or the number of shards
      is equal to the number of cores in the TPU system.
    name: (Deprecated) Does nothing.
  Returns:
    A list of output tensors.
  Raises:
    ValueError: If num_shards <= 0
    ValueError: If len(input_shard_axes) != len(inputs)
    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
  """

    if num_shards <= 0:
        raise ValueError("num_shards must be a positive integer.")

    # Converts inputs to Tensors.
    inputs = [] if inputs is None else [
        ops.convert_to_tensor(x) for x in inputs
    ]

    if input_shard_axes is None:
        input_shard_axes = [0] * len(inputs)
    if len(inputs) != len(input_shard_axes):
        raise ValueError(
            "Length of input_shard_axes must be equal to the number "
            "of inputs.")

    if inputs:
        # Splits the `inputs` along the corresponding `input_shard_axes`, giving
        # lists with layout [input][shard]
        split_inputs = [
            array_ops.split(x, num_shards, axis=axis)
            for (axis, x) in zip(input_shard_axes, inputs)
        ]

        # Transposes the input lists to have layout [shard][input]
        transposed_inputs = [list(i) for i in zip(*split_inputs)]
    else:
        transposed_inputs = [[]] * num_shards

    outputs = replicate(computation,
                        transposed_inputs,
                        infeed_queue=infeed_queue,
                        device_assignment=device_assignment,
                        name=name)

    # There must be at least one shard since num_shards > 0.
    # TODO(b/36647078) remove disable when pylint bug is fixed.
    # pylint: disable=indexing-exception
    if isinstance(outputs[0], ops.Operation):
        # pylint: enable=indexing-exception
        # There were no outputs from the computation and replicate returned a list
        # of NoOps with control dependencies on the computation. Return the first
        # one so it can be used as a control dependency or fetch node.
        # TODO(b/36647078) remove disable when pylint bug is fixed.
        # pylint: disable=indexing-exception
        return [outputs[0]]
        # pylint: enable=indexing-exception

    # TODO(b/36647078) remove disable when pylint bug is fixed.
    # pylint: disable=indexing-exception
    num_outputs = len(outputs[0])
    # pylint: enable=indexing-exception

    if output_shard_axes is None:
        output_shard_axes = [0] * num_outputs
    if num_outputs != len(output_shard_axes):
        raise ValueError(
            "Length of output_shard_axes must be equal to the number "
            "of outputs.")

    if isinstance(outputs_from_all_shards, bool):
        outputs_from_all_shards = [outputs_from_all_shards] * num_outputs

    if num_outputs != len(outputs_from_all_shards):
        raise ValueError(
            "Length of outputs_from_all_shards must be equal to the "
            "number of outputs.")

    results = []
    for (axis, all_shards, x) in zip(output_shard_axes,
                                     outputs_from_all_shards, zip(*outputs)):
        if all_shards:
            # Concatenate all of the outputs together (use stack for scalars).
            shape = x[0].shape
            is_scalar = shape is not None and (shape.ndims == 0)
            results.append((array_ops.stack(list(x)) if is_scalar else
                            array_ops.concat(list(x), axis=axis)))
        else:
            # TODO(phawkins): use a smarter policy, e.g., round-robin across shards.
            results.append(x[0])

    return results
Example #44
0
def _compute_ri_sampled_logits(ri_tensors,
                               weights,
                               labels,
                               inputs,
                               num_sampled,
                               num_classes,
                               sampled_values,
                               num_true=1,
                               subtract_log_q=True,
                               remove_accidental_hits=False,
                               partition_strategy="mod",
                               name=None,
                               seed=None):
    if isinstance(weights, variables.PartitionedVariable):
        weights = list(weights)
    if not isinstance(weights, list):
        weights = [weights]

    with ops.name_scope(name, "ri_sampled_logits",
                        weights + [inputs, labels]):
        if labels.dtype != dtypes.int64:
            labels = math_ops.cast(labels, dtypes.int64)
        labels_flat = array_ops.reshape(labels, [-1])

        # Sample the negative labels.
        #   sampled shape: [num_sampled] tensor
        #   true_expected_count shape = [batch_size, 1] tensor
        #   sampled_expected_count shape = [num_sampled] tensor
        if sampled_values is None:
            sampled_values = candidate_sampling_ops.uniform_candidate_sampler(
                true_classes=labels,
                num_true=num_true,
                num_sampled=num_sampled,
                unique=True,
                range_max=num_classes,
                seed=seed)
        # NOTE: pylint cannot tell that 'sampled_values' is a sequence
        # pylint: disable=unpacking-non-sequence
        sampled, true_expected_count, sampled_expected_count = (
            array_ops.stop_gradient(s) for s in sampled_values)
        # pylint: enable=unpacking-non-sequence
        sampled = math_ops.cast(sampled, dtypes.int64)

        all_ids = array_ops.concat([labels_flat, sampled], 0)

        # true_ris = tx.gather_sparse(ri_tensors, labels_flat)
        # another way is to sample from ri_tensor
        # sampled_ris = generate_ri(k, s, num_sampled)
        # all_ris = sparse_ops.sparse_concat(0, [true_ris, sampled_ris])

        all_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=all_ids)
        sp_values = all_ris
        sp_indices = tx.sparse_indices(sp_values)

        # Retrieve the true weights and the logits of the sampled weights.

        # weights shape is [num_classes, dim]
        all_w = embedding_lookup_sparse(
            weights, sp_indices, sp_values, combiner="sum", partition_strategy=partition_strategy)

        # true_w shape is [batch_size * num_true, dim]
        true_w = array_ops.slice(all_w, [0, 0],
                                 array_ops.stack(
                                     [array_ops.shape(labels_flat)[0], -1]))

        sampled_w = array_ops.slice(
            all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1])
        # inputs has shape [batch_size, dim]
        # sampled_w has shape [num_sampled, dim]
        # Apply X*W', which yields [batch_size, num_sampled]inputs
        # for energy based models the inputs are the predicted feature vectors
        sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True)

        # inputs shape is [batch_size, dim]
        # true_w shape is [batch_size * num_true, dim]
        # row_wise_dots is [batch_size, num_true, dim]
        dim = array_ops.shape(true_w)[1:2]
        new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0)
        row_wise_dots = math_ops.multiply(
            array_ops.expand_dims(inputs, 1),
            array_ops.reshape(true_w, new_true_w_shape))
        # We want the row-wise dot plus biases which yields a
        # [batch_size, num_true] tensor of true_logits.
        dots_as_matrix = array_ops.reshape(row_wise_dots,
                                           array_ops.concat([[-1], dim], 0))
        true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])

        if remove_accidental_hits:
            acc_hits = candidate_sampling_ops.compute_accidental_hits(
                labels, sampled, num_true=num_true)
            acc_indices, acc_ids, acc_weights = acc_hits

            # This is how SparseToDense expects the indices.
            acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1])
            acc_ids_2d_int32 = array_ops.reshape(
                math_ops.cast(acc_ids, dtypes.int32), [-1, 1])
            sparse_indices = array_ops.concat([acc_indices_2d, acc_ids_2d_int32], 1,
                                              "sparse_indices")
            # Create sampled_logits_shape = [batch_size, num_sampled]
            sampled_logits_shape = array_ops.concat(
                [array_ops.shape(labels)[:1],
                 array_ops.expand_dims(num_sampled, 0)], 0)
            if sampled_logits.dtype != acc_weights.dtype:
                acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype)
            sampled_logits += sparse_ops.sparse_to_dense(
                sparse_indices,
                sampled_logits_shape,
                acc_weights,
                default_value=0.0,
                validate_indices=False)

        if subtract_log_q:
            # Subtract log of Q(l), prior probability that label appears in sampled.
            true_logits -= math_ops.log(true_expected_count)
            sampled_logits -= math_ops.log(sampled_expected_count)

        # Construct output logits and labels. The true labels/logits start at col 0.
        out_logits = array_ops.concat([true_logits, sampled_logits], 1)

        # true_logits is a float tensor, ones_like(true_logits) is a float
        # tensor of ones. We then divide by num_true to ensure the per-example
        # labels sum to 1.0, i.e. form a proper probability distribution.
        out_labels = array_ops.concat([
            array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(sampled_logits)
        ], 1)

        return out_logits, out_labels
Example #45
0
def input_calibration_layer(columns_to_tensors,
                            num_keypoints,
                            feature_columns=None,
                            keypoints_initializers=None,
                            keypoints_initializer_fns=None,
                            bound=False,
                            monotonic=None,
                            missing_input_values=None,
                            missing_output_values=None,
                            dtype=dtypes.float32,
                            **regularizer_amounts):
  """Creates a calibration layer for the given input and feature_columns.

  Returns a tensor with the calibrated values of the given features, a list
  of the names of the features in the order they feature in the returned, and
  a list of projection ops, that must be applied at each step (or every so many
  steps) to project the model to a feasible space: used for bounding the outputs
  or for imposing monotonic -- the list will be empty if bound and
  monotonic are not set.

  Args:
    columns_to_tensors: A mapping from feature name to tensors. 'string' key
      means a base feature (not-transformed). If feature_columns is not set
      these are the features calibrated. Otherwise the transformed
      feature_columns are the ones calibrated.
    num_keypoints: Number of keypoints to use. Either a single int, or a dict
      mapping feature names to num_keypoints. If a value of the dict is 0 or
      None the correspondent feature won't be calibrated.
    feature_columns: Optional. If set to a set of FeatureColumns, these will
      be the features used and calibrated.
    keypoints_initializers: For evaluation or inference (or when resuming
      training from a checkpoint) the values will be loaded from disk, so they
      don't need to be given (leave it as None).
      Either a tuple of two tensors of shape [num_keypoints], or a dict mapping
      feature names to pair of tensors of shape [num_keypoints[feature_name]].
      See load_keypoints_from_quantiles or uniform_keypoints_for_signal on how
      to generate these (module keypoints_initialization).
    keypoints_initializer_fns: Like keypoints_initializers but using lambda
      initializers. They should be compatible with tf.get_variable. If this is
      set, then keypoints_initializers must be None.
    bound: boolean whether output of calibration must be bound. Alternatively
      a dict mapping feature name to boundness.
    monotonic: whether calibration has to be kept monotonic: None or 0 means
      no monotonic. Positive or negative values mean increasing or decreasing
      monotonic respectively. Alternatively a dict mapping feature name
      to monotonic.
    missing_input_values: If set, and if the input has this value it is assumed
      to be missing and the output will either be calibrated to some value
      between `[calibration_output_min, calibration_output_max]` or set to a
      fixed value set by missing_output_value. Limitation: it only works for
      scalars. Either one value for all inputs, or a dict mapping feature name
      to missing_input_value for the respective feature.
    missing_output_values: Requires missing_input_value also to be set. If set
      if will convert missing input to this value. Either one value for all
      inputs, or a dict mapping feature name to missing_input_value for the
      respective feature.
    dtype: If any of the scalars are not given as tensors, they are converted
      to tensors with this dtype.
    **regularizer_amounts: Keyword args of regularization amounts passed to
      regularizers.calibrator_regularization(). Keyword names should be among
      supported regularizers.CALIBRATOR_REGULARIZERS and values should be
      either float or {feature_name: float}. If float, then same value is
      applied to all features.

  Returns:
    A tuple of:
    * calibrated tensor of shape [batch_size, sum(features dimensions)].
    * list of the feature names in the order they feature in the calibrated
      tensor. A name may appear more than once if the feature is
      multi-dimension (for instance a multi-dimension embedding)
    * list of projection ops, that must be applied at each step (or every so
      many steps) to project the model to a feasible space: used for bounding
      the outputs or for imposing monotonicity. Empty if none are requested.
    * None or tensor with regularization loss.

  Raises:
    ValueError: if dtypes are incompatible.


  """
  with ops.name_scope('input_calibration_layer'):
    feature_names = tools.get_sorted_feature_names(columns_to_tensors,
                                                   feature_columns)
    num_keypoints = tools.cast_to_dict(num_keypoints, feature_names,
                                       'num_keypoints')
    bound = tools.cast_to_dict(bound, feature_names, 'bound')
    monotonic = tools.cast_to_dict(monotonic, feature_names, 'monotonic')
    keypoints_initializers = tools.cast_to_dict(
        keypoints_initializers, feature_names, 'keypoints_initializers')
    keypoints_initializer_fns = tools.cast_to_dict(
        keypoints_initializer_fns, feature_names, 'keypoints_initializer_fns')
    missing_input_values = tools.cast_to_dict(
        missing_input_values, feature_names, 'missing_input_values')
    missing_output_values = tools.cast_to_dict(
        missing_output_values, feature_names, 'missing_output_values')
    regularizer_amounts = {
        regularizer_name: tools.cast_to_dict(
            regularizer_amounts[regularizer_name], feature_names,
            regularizer_name) for regularizer_name in regularizer_amounts
    }

    per_dimension_feature_names = []

    # Get uncalibrated tensors, either from columns_to_tensors, or using
    # feature_columns.
    if feature_columns is None:
      uncalibrated_features = [
          columns_to_tensors[name] for name in feature_names
      ]
    else:
      transformed_columns_to_tensors = columns_to_tensors.copy()
      dict_feature_columns = {f_col.name: f_col for f_col in feature_columns}
      uncalibrated_features = [
          tools.input_from_feature_column(transformed_columns_to_tensors,
                                          dict_feature_columns[name], dtype)
          for name in feature_names
      ]

    projection_ops = []
    calibrated_splits = []
    total_regularization = None
    for feature_idx in range(len(feature_names)):
      name = feature_names[feature_idx]
      uncalibrated_feature = uncalibrated_features[feature_idx]
      if uncalibrated_feature.shape.ndims == 1:
        feature_dim = 1
        uncalibrated_splits = [uncalibrated_feature]
      elif uncalibrated_feature.shape.ndims == 2:
        feature_dim = uncalibrated_feature.shape.dims[1].value
        uncalibrated_splits = array_ops.unstack(uncalibrated_feature, axis=1)
      else:
        raise ValueError(
            'feature {}: it has rank {}, but only ranks 1 or 2 are '
            'supported; feature shape={}'.format(
                name, uncalibrated_feature.shape.ndims,
                uncalibrated_feature.shape))
      missing_input_value = missing_input_values[name]
      missing_output_value = missing_output_values[name]
      feature_regularizer_amounts = {
          regularizer_name: regularizer_amounts[regularizer_name][name]
          for regularizer_name in regularizer_amounts
      }

      # FutureWork: make the interpolation ops handle multi-dimension values,
      #   so this step is not needed.
      for dim_idx in range(feature_dim):
        per_dimension_feature_names += [name]
        split_name = name
        if feature_dim > 1:
          split_name = '{}_dim_{}'.format(name, dim_idx)
        uncalibrated = uncalibrated_splits[dim_idx]
        if not num_keypoints[name]:
          # No calibration for this feature:
          calibrated_splits += [uncalibrated]
          if (missing_input_value is not None or
              missing_output_value is not None):
            raise ValueError(
                'feature %s: cannot handle missing values if feature is not '
                'calibrated, missing_input_value=%s, missing_output_value=%s' %
                (name, missing_input_value, missing_output_value))
        else:
          calibrated, projection, reg = one_dimensional_calibration_layer(
              uncalibrated,
              num_keypoints[name],
              signal_name=split_name,
              keypoints_initializers=keypoints_initializers[name],
              keypoints_initializer_fns=keypoints_initializer_fns[name],
              bound=bound[name],
              monotonic=monotonic[name],
              missing_input_value=missing_input_value,
              missing_output_value=missing_output_value,
              **feature_regularizer_amounts)
          calibrated_splits += [calibrated]
          if projection is not None:
            projection_ops += [projection]
          total_regularization = tools.add_if_not_none(total_regularization,
                                                       reg)

    all_calibrated = array_ops.stack(
        calibrated_splits, axis=1, name='stack_calibrated')
    return (all_calibrated, per_dimension_feature_names, projection_ops,
            total_regularization)
Example #46
0
def attn_decoder(decoder_inputs,
                 attention_states,
                 encoder_state,
                 cells,
                 model_size,
                 lstm_size,
                 batch_size,
                 embedding_size,
                 num_symbols,
                 loop_function=None,
                 num_heads=1,
                 initial_state_attention=False,
                 output_size=None,
                 attention=True,
                 scope=None):

    # encoder size
    num_encoder_word = model_size['encoder']['h1']
    num_encoder_sen = model_size['encoder']['h2']
    # decoder size
    num_decoder_word = model_size['decoder']['h1']
    num_decoder_sen = model_size['decoder']['h2']

    outputs, attn_outputs = [], []

    if output_size is None:
        output_size = cells["decoder_h1"].output_size

    with variable_scope.variable_scope(scope or "attention_decoder"):
        batch_size = array_ops.shape(
            decoder_inputs[0])[0]  # Needed for reshaping.
        # print(attention_shapes.get_shape())
        attn_length = attention_states.get_shape()[1].value
        attn_size = attention_states.get_shape()[2].value
        word_attn_size = num_encoder_word * num_encoder_sen

        # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
        hidden = array_ops.reshape(attention_states,
                                   [-1, attn_length, 1, attn_size])
        hidden_features = []
        v = []
        attention_vec_size = attn_size  # Size of query vectors for attention.
        for a in range(num_heads):
            k = variable_scope.get_variable(
                "AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
            # print(k.get_shape())
            hidden_features.append(
                nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
            v.append(
                variable_scope.get_variable("AttnV_%d" % a,
                                            [attention_vec_size]))

        def attention(state):
            """Put attention masks on hidden using hidden_features and query."""
            if np.array(state).ndim > 1:
                concat_layers = [tf.concat([c, h], 1) for c, h in state]
                query = tf.concat(concat_layers, 1)
            else:
                query = tf.concat([state[0], state[1]], 1)
            ds, ass = [], []  # Results of attention reads will be stored here.
            for a in range(num_heads):
                with variable_scope.variable_scope("Attention_%d" % a):
                    y = linear(query, attention_vec_size, True)
                    y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
                    # Attention mask is a softmax of v^T * tanh(...).
                    s = math_ops.reduce_sum(
                        v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
                    a = nn_ops.softmax(s)
                    ass.append(a)
                    # Now calculate the attention-weighted vector d.
                    d = math_ops.reduce_sum(
                        array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                        [1, 2])
                    ds.append(array_ops.reshape(d, [-1, attn_size]))
            return ds, ass

        batch_attn_size = array_ops.stack([batch_size, attn_size])
        batch_word_attn_size = array_ops.stack([batch_size, word_attn_size])
        attns = [
            array_ops.zeros(batch_attn_size, dtype=dtypes.float32)
            for _ in range(num_heads)
        ]
        word_attns = [
            array_ops.zeros(batch_attn_size, dtype=dtypes.float32)
            for _ in range(num_heads)
        ]
        for a in attns:  # Ensure the second shape of attention vectors is set.
            a.set_shape([None, attn_size])
        if initial_state_attention:
            attns, word_attns = attention(initial_state)

        prev = None
        sen_state = encoder_state
        decoder_word_idx = 0
        for i in range(num_decoder_sen):
            if i > 0: variable_scope.get_variable_scope().reuse_variables()

            with tf.variable_scope(scope or "decode_words"):
                word_input, word_output = None, None
                word_state = cells["decoder_h1"].zero_state(
                    batch_size, tf.float32)

                for t in range(num_decoder_word):
                    if t > 0:
                        variable_scope.get_variable_scope().reuse_variables()
                    word_state = word_state if t else sen_state

                    # If loop_function is set, we use it instead of decoder_inputs.
                    if loop_function is not None and prev is not None:
                        with variable_scope.variable_scope("loop_function",
                                                           reuse=True):
                            word_input = loop_function(prev, i)
                    else:
                        word_input = decoder_inputs[decoder_word_idx]
                        decoder_word_idx += 1

                    x = linear([word_input] + attns, output_size, True)
                    word_output, word_state = cells["decoder_h1"](x,
                                                                  word_state)

                    if not i and initial_state_attention:
                        with variable_scope.variable_scope(
                                variable_scope.get_variable_scope(),
                                reuse=True):
                            attns, word_attns = attention(word_state)
                    else:
                        attns, word_attns = attention(word_state)

                    with variable_scope.variable_scope("AttnOutputProjection"):
                        output = linear([word_output] + attns, output_size,
                                        True)
                        outputs.append(output)
                        attn_outputs.append(word_attns)

                    if loop_function is not None:
                        prev = word_output

            _, sen_state = cells["decoder_h2"](word_output, sen_state)

    return outputs, sen_state, attn_outputs
Example #47
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None,
                                      multi_label=False,
                                      label_weights=None):
  """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value, float tensor, python list, or tuple of float
      thresholds in `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    label_weights: (optional) tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
  if multi_label and label_weights is not None:
    raise ValueError('`label_weights` for multilabel data should be handled '
                     'outside of `update_confusion_matrix_variables` when '
                     '`multi_label` is True.')
  if variables_to_update is None:
    return
  if not any(
      key for key in variables_to_update if key in list(ConfusionMatrix)):
    raise ValueError(
        'Please provide at least one valid confusion matrix '
        'variable to update. Valid variable key options are: "{}". '
        'Received: "{}"'.format(
            list(ConfusionMatrix), variables_to_update.keys()))

  variable_dtype = list(variables_to_update.values())[0].dtype

  y_true = math_ops.cast(y_true, dtype=variable_dtype)
  y_pred = math_ops.cast(y_pred, dtype=variable_dtype)
  thresholds = ops.convert_to_tensor_v2(thresholds, dtype=variable_dtype)
  num_thresholds = thresholds.shape[0]
  if multi_label:
    one_thresh = math_ops.equal(
        math_ops.cast(1, dtype=dtypes.int32),
        array_ops.rank(thresholds),
        name='one_set_of_thresholds_cond')
  else:
    [y_pred,
     y_true], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                               sample_weight)
    one_thresh = math_ops.cast(True, dtype=dtypes.bool)

  invalid_keys = [
      key for key in variables_to_update if key not in list(ConfusionMatrix)
  ]
  if invalid_keys:
    raise ValueError(
        'Invalid keys: {}. Valid variable key options are: "{}"'.format(
            invalid_keys, list(ConfusionMatrix)))

  with ops.control_dependencies([
      check_ops.assert_greater_equal(
          y_pred,
          math_ops.cast(0.0, dtype=y_pred.dtype),
          message='predictions must be >= 0'),
      check_ops.assert_less_equal(
          y_pred,
          math_ops.cast(1.0, dtype=y_pred.dtype),
          message='predictions must be <= 1')
  ]):
    if sample_weight is None:
      y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
          y_pred, y_true)
    else:
      sample_weight = math_ops.cast(sample_weight, dtype=variable_dtype)
      y_pred, y_true, sample_weight = (
          losses_utils.squeeze_or_expand_dimensions(
              y_pred, y_true, sample_weight=sample_weight))
  y_pred.shape.assert_is_compatible_with(y_true.shape)

  if top_k is not None:
    y_pred = _filter_top_k(y_pred, top_k)
  if class_id is not None:
    y_true = y_true[..., class_id]
    y_pred = y_pred[..., class_id]

  pred_shape = array_ops.shape(y_pred)
  num_predictions = pred_shape[0]
  if y_pred.shape.ndims == 1:
    num_labels = 1
  else:
    num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0)
  thresh_label_tile = control_flow_ops.cond(
      one_thresh, lambda: num_labels,
      lambda: math_ops.cast(1, dtype=dtypes.int32))

  # Reshape predictions and labels, adding a dim for thresholding.
  if multi_label:
    predictions_extra_dim = array_ops.expand_dims(y_pred, 0)
    labels_extra_dim = array_ops.expand_dims(
        math_ops.cast(y_true, dtype=dtypes.bool), 0)
  else:
    # Flatten predictions and labels when not multilabel.
    predictions_extra_dim = array_ops.reshape(y_pred, [1, -1])
    labels_extra_dim = array_ops.reshape(
        math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

  # Tile the thresholds for every prediction.
  if multi_label:
    thresh_pretile_shape = [num_thresholds, 1, -1]
    thresh_tiles = [1, num_predictions, thresh_label_tile]
    data_tiles = [num_thresholds, 1, 1]
  else:
    thresh_pretile_shape = [num_thresholds, -1]
    thresh_tiles = [1, num_predictions * num_labels]
    data_tiles = [num_thresholds, 1]

  thresh_tiled = array_ops.tile(
      array_ops.reshape(thresholds, thresh_pretile_shape),
      array_ops.stack(thresh_tiles))

  # Tile the predictions for every threshold.
  preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles)

  # Compare predictions and threshold.
  pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

  # Tile labels by number of thresholds
  label_is_pos = array_ops.tile(labels_extra_dim, data_tiles)

  if sample_weight is not None:
    sample_weight = weights_broadcast_ops.broadcast_weights(
        math_ops.cast(sample_weight, dtype=variable_dtype), y_pred)
    weights_tiled = array_ops.tile(
        array_ops.reshape(sample_weight, thresh_tiles), data_tiles)
  else:
    weights_tiled = None

  if label_weights is not None and not multi_label:
    label_weights = array_ops.expand_dims(label_weights, 0)
    label_weights = weights_broadcast_ops.broadcast_weights(label_weights,
                                                            y_pred)
    label_weights_tiled = array_ops.tile(
        array_ops.reshape(label_weights, thresh_tiles), data_tiles)
    if weights_tiled is None:
      weights_tiled = label_weights_tiled
    else:
      weights_tiled = math_ops.multiply(weights_tiled, label_weights_tiled)

  update_ops = []

  def weighted_assign_add(label, pred, weights, var):
    label_and_pred = math_ops.cast(
        math_ops.logical_and(label, pred), dtype=var.dtype)
    if weights is not None:
      label_and_pred *= math_ops.cast(weights, dtype=var.dtype)
    return var.assign_add(math_ops.reduce_sum(label_and_pred, 1))

  loop_vars = {
      ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
  }
  update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
  update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
  update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

  if update_fn or update_tn:
    pred_is_neg = math_ops.logical_not(pred_is_pos)
    loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg)

  if update_fp or update_tn:
    label_is_neg = math_ops.logical_not(label_is_pos)
    loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos)
    if update_tn:
      loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg)

  for matrix_cond, (label, pred) in loop_vars.items():

    if matrix_cond in variables_to_update:
      update_ops.append(
          weighted_assign_add(label, pred, weights_tiled,
                              variables_to_update[matrix_cond]))

  return control_flow_ops.group(update_ops)
Example #48
0
 def Bar(x):
   return array_ops.stack([x])
Example #49
0
 def _get_pixel(image, _y, _x):
     b, _h, _w = image.get_shape().as_list()[0:-1]
     batch_idx = array_ops.reshape(math_ops.range(b), shape=(b, 1, 1))
     batch_idx = array_ops.tile(batch_idx, multiples=(1, _h - 1, _w - 1))
     indices = array_ops.stack([batch_idx, _y, _x], axis=3)
     return array_ops.gather_nd(image, indices)
Example #50
0
def _broadcast_to_ragged_shape(rt_input, dst_shape,
                               broadcast_inner_dimensions):
    """Broadcasts rt_input to the ragged shape `dst_shape`."""
    # Check that rt_input and dst_shape have the same row_splits dtype.
    if (isinstance(rt_input, ragged_tensor.RaggedTensor)
            and rt_input.row_splits.dtype != dst_shape.dim_size_dtype):
        if not ragged_config.auto_cast_partition_dtype():
            raise ValueError(
                'rt_input and dst_shape have different row_split '
                'dtypes; use RaggedTensor.with_row_splits_dtype() or '
                'RaggedTensorDynamicShape.with_dim_size_dtype() to '
                'convert to a compatible dtype.')
        rt_input = rt_input.with_row_splits_dtype(dtypes.int64)
        dst_shape = dst_shape.with_dim_size_dtype(dtypes.int64)

    # dst_shape's rank and ragged_rank must be greater than or equal to rt_input's
    if rt_input.shape.ndims is None or dst_shape.rank is None:
        raise ValueError('Unable to broadcast: unknown rank')
    if rt_input.shape.ndims > dst_shape.rank:
        raise ValueError('Incompatible with shape: rank mismatch')
    if (isinstance(rt_input, ragged_tensor.RaggedTensor)
            and rt_input.ragged_rank >= dst_shape.num_partitioned_dimensions):
        raise ValueError('Incompatible with shape: ragged rank mismatch')

    src_shape = RaggedTensorDynamicShape.from_tensor(rt_input)
    src_shape = src_shape.broadcast_to_rank(dst_shape.rank)

    # Add dimensions to rt_input so its rank and ragged_rank matches dst_shape.
    if dst_shape.rank > rt_input.shape.ndims:
        if rt_input.shape.ndims < dst_shape.num_inner_dimensions + 1:
            rt_input = array_ops.reshape(
                rt_input,
                array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0))
        for _ in range(dst_shape.rank - rt_input.shape.ndims):
            if ragged_tensor.is_ragged(rt_input):
                nrows = rt_input.nrows()
            else:
                nrows = array_ops.shape(rt_input,
                                        out_type=dst_shape.dim_size_dtype)[0]
            rt_input = ragged_tensor.RaggedTensor.from_row_lengths(
                rt_input, [nrows], validate=False)

    # Add ragged dimensions to match dst_shape.
    if ragged_tensor.is_ragged(rt_input):
        inner_rank_diff = (rt_input.flat_values.shape.ndims - 1 -
                           dst_shape.num_inner_dimensions)
        if inner_rank_diff > 0:
            rt_input = rt_input.with_flat_values(
                ragged_tensor.RaggedTensor.from_tensor(
                    rt_input.flat_values,
                    ragged_rank=inner_rank_diff,
                    row_splits_dtype=dst_shape.dim_size_dtype))
    else:
        rt_input = ragged_tensor.RaggedTensor.from_tensor(
            rt_input,
            ragged_rank=dst_shape.num_partitioned_dimensions - 1,
            row_splits_dtype=dst_shape.dim_size_dtype)

    # Do broadcasting for any dimensions that will remain uniform.  We can do
    # these all at once, since they're independent of one another.
    multiples = [1] * dst_shape.rank
    for axis in range(dst_shape.num_partitioned_dimensions):
        if not src_shape.is_ragged(axis) and not dst_shape.is_ragged(axis):
            src_size = src_shape.dimension_size(axis)
            dst_size = dst_shape.dimension_size(axis)
            if ((tensor_util.constant_value(src_size) in (1, None))
                    and (tensor_util.constant_value(dst_size) != 1)):
                multiples[axis] = array_ops.where(math_ops.equal(src_size, 1),
                                                  dst_size, 1)
    if not all(isinstance(v, int) and v == 1 for v in multiples):
        multiples = array_ops.stack(multiples, axis=0)
        rt_input = ragged_array_ops.tile(rt_input, multiples)

    if broadcast_inner_dimensions:
        new_shape = array_ops.broadcast_dynamic_shape(
            array_ops.shape(rt_input.flat_values,
                            out_type=dst_shape.dim_size_dtype),
            array_ops.concat([[1], dst_shape.inner_dim_sizes], axis=0))
        rt_input = rt_input.with_flat_values(
            array_ops.broadcast_to(rt_input.flat_values, new_shape))

    # Do broadcasting for dimensions that become ragged.  We must do these from
    # outermost to innermost.
    for axis in range(dst_shape.num_partitioned_dimensions):
        if not src_shape.is_ragged(axis) and dst_shape.is_ragged(axis):
            dst_size = dst_shape.dimension_size(axis)
            rt_input = _ragged_tile_axis(rt_input, axis, dst_size,
                                         dst_shape.dim_size_dtype)

    return rt_input
Example #51
0
 def loop_fn(i):
     x1 = array_ops.gather(x, i)
     return array_ops.stack([x1, y], axis=-1)
 def stack(self, name=None):
   """See TensorArray."""
   if self._tensor_array:
     for ix in range(len(self._tensor_array)):
       self._maybe_zero(ix)
   return array_ops.stack(self._tensor_array, name=name)
Example #53
0
def hessians(ys, xs, name="hessians", colocate_gradients_with_ops=False,
            gate_gradients=False, aggregation_method=None):
  """Constructs the Hessian of sum of `ys` with respect to `x` in `xs`.

  `hessians()` adds ops to the graph to output the Hessian matrix of `ys`
  with respect to `xs`.  It returns a list of `Tensor` of length `len(xs)`
  where each tensor is the Hessian of `sum(ys)`. This function currently
  only supports evaluating the Hessian with respect to (a list of) one-
  dimensional tensors.

  The Hessian is a matrix of second-order partial derivatives of a scalar
  tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'hessians'.
    colocate_gradients_with_ops: See `gradients()` documentation for details.
    gate_gradients: See `gradients()` documentation for details.
    aggregation_method: See `gradients()` documentation for details.

  Returns:
    A list of Hessian matrices of `sum(y)` for each `x` in `xs`.

  Raises:
    LookupError: if one of the operations between `xs` and `ys` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid or not supported. Currently,
      this function only supports one-dimensional `x` in `xs`.
  """
  xs = _AsList(xs)
  kwargs = {
      'colocate_gradients_with_ops': colocate_gradients_with_ops,
      'gate_gradients': gate_gradients,
      'aggregation_method': aggregation_method
    }
  # Compute a hessian matrix for each x in xs
  hessians = []
  for i, x in enumerate(xs):
    # Check dimensions
    ndims = x.get_shape().ndims
    if ndims is None:
      raise ValueError('Cannot compute Hessian because the dimensionality of '
                       'element number %d of `xs` cannot be determined' % i)
    elif ndims != 1:
      raise ValueError('Computing hessians is currently only supported for '
                       'one-dimensional tensors. Element number %d of `xs` has '
                       '%d dimensions.' % (i, ndims))
    with ops.name_scope(name + '_first_derivative'):
      # Compute the partial derivatives of the input with respect to all
      # elements of `x`
      _gradients = gradients(ys, x, **kwargs)[0]
      # Unpack the gradients into a list so we can take derivatives with
      # respect to each element
      _gradients = array_ops.unstack(_gradients)
    with ops.name_scope(name + '_second_derivative'):
      # Compute the partial derivatives with respect to each element of the list
      _hess = [gradients(_gradient, x, **kwargs)[0] for _gradient in _gradients]
      # Pack the list into a matrix and add to the list of hessians
      hessians.append(array_ops.stack(_hess, name=name))
  return hessians
 def gather(self, indices, name=None):
   """See TensorArray."""
   del name  # not meaningful in Eager mode
   return array_ops.stack([self._maybe_zero(i) for i in indices.numpy()])
  def testLSTMFusedSequenceLengths(self):
    """Verify proper support for sequence lengths in LSTMBlockFusedCell."""
    with self.session(use_gpu=True) as sess:
      batch_size = 3
      input_size = 4
      cell_size = 5
      max_sequence_length = 6

      inputs = []
      for _ in range(max_sequence_length):
        inp = ops.convert_to_tensor(
            np.random.randn(batch_size, input_size), dtype=dtypes.float32)
        inputs.append(inp)
      seq_lengths = constant_op.constant([3, 4, 5])
      cell_inputs = array_ops.stack(inputs)

      initializer = init_ops.random_uniform_initializer(
          -0.01, 0.01, seed=19890213)

      with variable_scope.variable_scope("lstm_cell", initializer=initializer):
        # magic naming so that the cells pick up these variables and reuse them
        variable_scope.get_variable(
            "kernel",
            shape=[input_size + cell_size, cell_size * 4],
            dtype=dtypes.float32)

        variable_scope.get_variable(
            "bias",
            shape=[cell_size * 4],
            dtype=dtypes.float32,
            initializer=init_ops.zeros_initializer())

      cell = lstm_ops.LSTMBlockFusedCell(
          cell_size, cell_clip=0, use_peephole=False, reuse=True,
          name="lstm_cell")

      fused_outputs_op, fused_state_op = cell(
          cell_inputs, dtype=dtypes.float32, sequence_length=seq_lengths)

      cell_vars = [
          v for v in variables.trainable_variables()
          if v.name.endswith("kernel") or v.name.endswith("bias")
      ]

      # Verify that state propagation works if we turn our sequence into
      # tiny (single-time) subsequences, i.e. unfuse the cell
      unfused_outputs_op = []
      state = None
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=True):
        for i, inp in enumerate(inputs):
          lengths = [int(i < l) for l in seq_lengths.eval()]
          output, state = cell(
              array_ops.expand_dims(inp, 0),
              initial_state=state,
              dtype=dtypes.float32,
              sequence_length=lengths)
          unfused_outputs_op.append(output[0])
      unfused_outputs_op = array_ops.stack(unfused_outputs_op)

      sess.run([variables.global_variables_initializer()])
      unfused_outputs, unfused_state = sess.run([unfused_outputs_op, state[0]])
      unfused_grads = sess.run(
          gradients_impl.gradients(unfused_outputs_op, inputs))
      unfused_wgrads = sess.run(
          gradients_impl.gradients(unfused_outputs_op, cell_vars))

      fused_outputs, fused_state = sess.run(
          [fused_outputs_op, fused_state_op[0]])
      fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs))
      fused_wgrads = sess.run(
          gradients_impl.gradients(fused_outputs_op, cell_vars))

      self.assertAllClose(fused_outputs, unfused_outputs)
      self.assertAllClose(fused_state, unfused_state)
      self.assertAllClose(fused_grads, unfused_grads)
      for fused, unfused in zip(fused_wgrads, unfused_wgrads):
        self.assertAllClose(fused, unfused, rtol=1e-6, atol=1e-6)
Example #56
0
def stack_dynamic_partitions(data, partitions, num_partitions, name=None):
    """Stacks dynamic partitions of a Tensor or RaggedTensor.

  Returns a RaggedTensor `output` with `num_partitions` rows, where the row
  `output[i]` is formed by stacking all slices `data[j1...jN]` such that
  `partitions[j1...jN] = i`.  Slices of `data` are stacked in row-major
  order.

  If `num_partitions` is an `int` (not a `Tensor`), then this is equivalent to
  `tf.ragged.stack(tf.dynamic_partition(data, partitions, num_partitions))`.

  #### Example:

  >>> data           = ['a', 'b', 'c', 'd', 'e']
  >>> partitions     = [  3,   0,   2,   2,   3]
  >>> num_partitions = 5
  >>> tf.ragged.stack_dynamic_partitions(data, partitions, num_partitions)
  <tf.RaggedTensor [[b'b'], [], [b'c', b'd'], [b'a', b'e'], []]>

  Args:
    data: A `Tensor` or `RaggedTensor` containing the values to stack.
    partitions: An `int32` or `int64` `Tensor` or `RaggedTensor` specifying the
      partition that each slice of `data` should be added to.
      `partitions.shape` must be a prefix of `data.shape`.  Values must be
      greater than or equal to zero, and less than `num_partitions`.
      `partitions` is not required to be sorted.
    num_partitions: An `int32` or `int64` scalar specifying the number of
      partitions to output.  This determines the number of rows in `output`.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the stacked partitions.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_partitions, (D)] + data.shape[partitions.rank:]`, where `(D)` is a
    ragged dimension whose length is the number of data slices stacked for
    each `partition`.
  """
    with ops.name_scope(name, 'SegmentStack',
                        [data, partitions, num_partitions]):
        # Convert inputs to tensors.
        data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data,
                                                                name='data')
        row_splits_dtype = (data.row_splits.dtype if isinstance(
            data, ragged_tensor.RaggedTensor) else None)
        partitions = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            partitions, name='partitions', preferred_dtype=row_splits_dtype)
        num_partitions = ops.convert_to_tensor(
            num_partitions,
            name='num_partitions',
            preferred_dtype=partitions.dtype)
        if row_splits_dtype is not None:
            partitions = math_ops.cast(partitions, row_splits_dtype)
        num_partitions = math_ops.cast(num_partitions, partitions.dtype)

        # Sanity-checks for shapes.
        partitions_rank = partitions.shape.ndims
        if partitions_rank is None:
            raise ValueError('partitions must have known rank.')
        num_partitions.shape.assert_has_rank(0)
        partitions.shape.assert_is_compatible_with(
            data.shape[:partitions_rank])

        if partitions_rank == 0:
            # If partitions is a scalar, then just create a RaggedTensor containing
            # that single the complete `data` value in the specified row.
            return ragged_tensor.RaggedTensor.from_value_rowids(
                values=array_ops.stack([data]),
                value_rowids=array_ops.stack([partitions]),
                nrows=num_partitions,
                validate=False)

        elif partitions_rank == 1:
            # If partitions is a vector (the typical case): we can just use data and
            # partitions as the `values` and `value_rowids` for `from_value_rowids`,
            # as long as we sort them first.
            permutation = sort_ops.argsort(partitions, stable=True)
            value_rowids = array_ops.gather(partitions, permutation)
            values = array_ops.gather(data, permutation)
            check = check_ops.assert_less(
                value_rowids[-1:],
                num_partitions,
                message='partitions must be less than num_partitions')
            with ops.control_dependencies([check]):
                return ragged_tensor.RaggedTensor.from_value_rowids(
                    values, value_rowids, nrows=num_partitions, validate=False)

        else:
            # Handle higher-dimensional partitions via recursion.
            if not isinstance(data, ragged_tensor.RaggedTensor):
                data = ragged_tensor.RaggedTensor.from_tensor(
                    data, row_splits_dtype=partitions.dtype, ragged_rank=1)
            if not isinstance(partitions, ragged_tensor.RaggedTensor):
                partitions = ragged_tensor.RaggedTensor.from_tensor(
                    partitions,
                    row_splits_dtype=partitions.dtype,
                    ragged_rank=max(data.ragged_rank, partitions_rank - 1))
            check = check_ops.assert_equal(
                data.row_splits,
                partitions.row_splits,
                message='data and partitions have incompatible ragged shapes')
            with ops.control_dependencies([check]):
                return stack_dynamic_partitions(data.values, partitions.values,
                                                num_partitions)
def blocks_match(sess, use_peephole, dtype=dtypes.float32, cell_clip=None):
  batch_size = 2
  input_size = 3
  cell_size = 4
  sequence_length = 4

  inputs = []
  for _ in range(sequence_length):
    inp = ops.convert_to_tensor(
        np.random.randn(batch_size, input_size), dtype=dtype)
    inputs.append(inp)
  stacked_inputs = array_ops.stack(inputs)

  init_bound = 1e-1 if dtype == dtypes.float16 else 1e-2
  initializer = _get_initializer(init_bound, dtype=dtype, seed=19890212)

  with variable_scope.variable_scope("test", initializer=initializer):
    # magic naming so that the cells pick up these variables and reuse them
    if use_peephole:
      wci = variable_scope.get_variable(
          "rnn/lstm_cell/w_i_diag", shape=[cell_size], dtype=dtype)
      wcf = variable_scope.get_variable(
          "rnn/lstm_cell/w_f_diag", shape=[cell_size], dtype=dtype)
      wco = variable_scope.get_variable(
          "rnn/lstm_cell/w_o_diag", shape=[cell_size], dtype=dtype)

    w = variable_scope.get_variable(
        "rnn/lstm_cell/kernel",
        shape=[input_size + cell_size, cell_size * 4],
        dtype=dtype)
    b = variable_scope.get_variable(
        "rnn/lstm_cell/bias",
        shape=[cell_size * 4],
        dtype=dtype,
        initializer=init_ops.zeros_initializer())

    basic_cell = rnn_cell.LSTMCell(
        cell_size,
        use_peepholes=use_peephole,
        cell_clip=cell_clip,
        dtype=dtype,
        state_is_tuple=True,
        reuse=True)
    basic_outputs_op, basic_state_op = rnn.static_rnn(
        basic_cell, inputs, dtype=dtype)

    if use_peephole:
      _, _, _, _, _, _, block_outputs_op = block_lstm(
          ops.convert_to_tensor(sequence_length, dtype=dtypes.int64),
          inputs,
          w,
          b,
          wci=wci,
          wcf=wcf,
          wco=wco,
          cell_clip=cell_clip,
          use_peephole=True)
    else:
      _, _, _, _, _, _, block_outputs_op = block_lstm(
          ops.convert_to_tensor(sequence_length, dtype=dtypes.int64),
          inputs,
          w,
          b,
          cell_clip=cell_clip)

    fused_cell = lstm_ops.LSTMBlockFusedCell(
        cell_size,
        cell_clip=cell_clip,
        use_peephole=use_peephole,
        reuse=True,
        name="rnn/lstm_cell")
    fused_outputs_op, fused_state_op = fused_cell(stacked_inputs, dtype=dtype)

    sess.run([variables.global_variables_initializer()])
    basic_outputs, basic_state = sess.run([basic_outputs_op, basic_state_op[0]])
    basic_grads = sess.run(gradients_impl.gradients(basic_outputs_op, inputs))
    xs = [w, b]
    if use_peephole:
      xs += [wci, wcf, wco]
    basic_wgrads = sess.run(gradients_impl.gradients(basic_outputs_op, xs))

    block_outputs = sess.run(block_outputs_op)
    block_grads = sess.run(gradients_impl.gradients(block_outputs_op, inputs))
    block_wgrads = sess.run(gradients_impl.gradients(block_outputs_op, xs))

    xs = [w, b]
    if use_peephole:
      xs += [wci, wcf, wco]
    fused_outputs, fused_state = sess.run([fused_outputs_op, fused_state_op[0]])
    fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs))
    fused_wgrads = sess.run(gradients_impl.gradients(fused_outputs_op, xs))

    return (basic_state, fused_state, basic_outputs, block_outputs,
            fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads,
            block_wgrads, fused_wgrads)
Example #58
0
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
    """Gradient for concat op.

  Args:
    op: An operation.
    grad: `Tensor` or `IndexedSlices` representing the gradients with respect
      to each output of the op.
    start_value_index: An integer index of the first value in the op.inputs.
    end_value_index: An integer index of the last value in the op.inputs.
    dim_index: An interger index of concat_dim or axis parameter in op.inputs.

  Returns:
    Tensors represending the partial gradients with respect to each input
    of the op.

  Raises:
    ValueError: if concat_dim/axis is not statically known.
  """
    def _CreateDenseMaskAndBegin(sizes, concat_dim):
        """Create variables for iteratively slicing a dense gradients tensor."""
        # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
        shape_of_shape = array_ops.shape(sizes[0])
        # Make a vector of length equal to the input's dimensions,
        # with 0's everywhere and 1 in the concat dim position.
        # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
        mask = array_ops.concat([
            array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1],
            array_ops.fill(shape_of_shape - concat_dim - 1, 0)
        ], 0)
        begin = array_ops.fill(shape_of_shape, 0)
        return mask, begin

    def _ExtractInputShapes(inputs):
        """Extract the shapes of a set of input tensors."""
        sizes = []
        fully_known = True
        for x in inputs:
            input_shape = array_ops.shape(x)
            if not isinstance(input_shape,
                              ops.Tensor) or input_shape.op.type != "Const":
                fully_known = False
                break
            else:
                sizes.append(input_shape)

        if fully_known:
            return sizes
        else:
            return array_ops.shape_n(inputs)

    # Degenerate concatenation, just return grad.
    if len(op.inputs) == 2:
        return grad + [None] if end_value_index <= dim_index else [None] + grad

    concat_dim = op.inputs[dim_index]
    input_values = op.inputs[start_value_index:end_value_index]
    # Using mod here for convenience since concat_dim is already verified
    # in concat implementation to be within the allowed [-rank, rank) range.
    non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0])

    out_grads = []
    if isinstance(grad, ops.Tensor):
        # Get the inputs' tensor shapes
        sizes = _ExtractInputShapes(input_values)
        # The magic number of 16 was found through benchmarking a range of sizes
        # on CPUs and a Maxwell TitanX.  A speedup was seen in a large majority of
        # cases when switching implementations at N=16, but it is possible that
        # there will be a small number of performance regressions.
        # pylint: disable=protected-access
        if len(sizes) > 16:
            # extract the size of each input along the concat dimension
            sizes = array_ops.squeeze(
                array_ops.slice(array_ops.stack(sizes, axis=1),
                                [non_neg_concat_dim, 0], [1, -1]))
            out_grads = array_ops.split(grad, sizes, non_neg_concat_dim)
        else:
            offset = gen_array_ops._concat_offset(non_neg_concat_dim, sizes)
            for (begin, size) in zip(offset, sizes):
                out_grads.append(array_ops.slice(grad, begin, size))
        # pylint: enable=protected-access
    elif isinstance(grad, ops.IndexedSlices):
        concat_dim_static = tensor_util.constant_value(concat_dim)
        if concat_dim_static is None:
            raise ValueError("Can only compute IndexedSlices gradient with "
                             "statically-known concat_dim")
        if concat_dim_static < 0:
            rank = tensor_util.constant_value(array_ops.rank(input_values[0]))
            if rank is None:
                raise ValueError(
                    "Can only compute IndexedSlices gradient with "
                    "negative concat_dim when first value rank is "
                    "statically-known.")
            concat_dim_static %= rank
        # Get the inputs' tensor shapes
        sizes = [array_ops.shape(x) for x in input_values]
        if concat_dim_static > 0:
            # IndexedSlices, non_neg_concat_dim > 0. Each input gets IndexedSlices
            # gradients with all the indices, but with grad.values sliced accordingly.
            # This is like the Tensor case, except shape(grad.values)[0] is not equal
            # to shape(sizes[i])[0], since only a subset of the dim-0 values are
            # stored.
            mask, begin = _CreateDenseMaskAndBegin(sizes, non_neg_concat_dim)
            for size in sizes:
                new_values = array_ops.slice(
                    grad.values, begin,
                    array_ops.concat(
                        [[-1], array_ops.slice(size, [1], [-1])], 0))
                out_grads.append(
                    ops.IndexedSlices(new_values, grad.indices, size))
                # Lint complains begin = begin + ...
                begin = math_ops.add(begin, size * mask)
        else:
            # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients
            # only for the relevant indices.
            start = constant_op.constant(0, dtype=grad.indices.dtype)
            for size in sizes:
                size_concat_dim = array_ops.gather(size, non_neg_concat_dim)
                if size_concat_dim.dtype != grad.indices.dtype:
                    size_concat_dim = math_ops.cast(size_concat_dim,
                                                    dtype=grad.indices.dtype)
                end = start + size_concat_dim
                # Compute the 1-D Tensor of indices relevant for this input.
                indices_to_select = array_ops.squeeze(array_ops.where(
                    math_ops.logical_and(grad.indices >= start,
                                         grad.indices < end)),
                                                      squeeze_dims=[1])
                new_indices = array_ops.gather(grad.indices,
                                               indices_to_select) - start
                new_values = array_ops.gather(grad.values, indices_to_select)
                out_grads.append(
                    ops.IndexedSlices(new_values, new_indices, size))
                start = end
    else:
        raise TypeError("Expected Tensor or IndexedSlices, got %s" %
                        type(grad))

    return (out_grads + [None] if end_value_index <= dim_index else [None] +
            out_grads)
Example #59
0
def frechet_classifier_distance(real_images,
                                generated_images,
                                classifier_fn,
                                num_batches=1):
  """Classifier distance for evaluating a generative model.

  This is based on the Frechet Inception distance, but for an arbitrary
  classifier.

  This technique is described in detail in https://arxiv.org/abs/1706.08500.
  Given two Gaussian distribution with means m and m_w and covariance matrices
  C and C_w, this function calculates

              |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))

  which captures how different the distributions of real images and generated
  images (or more accurately, their visual features) are. Note that unlike the
  Inception score, this is a true distance and utilizes information about real
  world images.

  Note that when computed using sample means and sample covariance matrices,
  Frechet distance is biased. It is more biased for small sample sizes. (e.g.
  even if the two distributions are the same, for a small sample size, the
  expected Frechet distance is large). It is important to use the same
  sample size to compute Frechet classifier distance when comparing two
  generative models.

  NOTE: This function consumes images, computes their activations, and then
  computes the classifier score. If you would like to precompute many
  activations for real and generated images for large batches, please use
  frechet_clasifier_distance_from_activations(), which this method also uses.

  Args:
    real_images: Real images to use to compute Frechet Inception distance.
    generated_images: Generated images to use to compute Frechet Inception
      distance.
    classifier_fn: A function that takes images and produces activations
      based on a classifier.
    num_batches: Number of batches to split images in to in order to
      efficiently run them through the classifier network.

  Returns:
    The Frechet Inception distance. A floating-point scalar of the same type
    as the output of `classifier_fn`.
  """
  real_images_list = array_ops.split(
      real_images, num_or_size_splits=num_batches)
  generated_images_list = array_ops.split(
      generated_images, num_or_size_splits=num_batches)

  real_imgs = array_ops.stack(real_images_list)
  generated_imgs = array_ops.stack(generated_images_list)

  # Compute the activations using the memory-efficient `map_fn`.
  def compute_activations(elems):
    return functional_ops.map_fn(fn=classifier_fn,
                                 elems=elems,
                                 parallel_iterations=1,
                                 back_prop=False,
                                 swap_memory=True,
                                 name='RunClassifier')

  real_a = compute_activations(real_imgs)
  gen_a = compute_activations(generated_imgs)

  # Ensure the activations have the right shapes.
  real_a = array_ops.concat(array_ops.unstack(real_a), 0)
  gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)

  return frechet_classifier_distance_from_activations(real_a, gen_a)
Example #60
0
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner=None,
                                 default_id=None,
                                 name=None,
                                 partition_strategy="div",
                                 max_norm=None):
    """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
  may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights:  A list of `P` float tensors or values representing
        partitioned embedding tensors.  Alternatively, a `PartitionedVariable`,
        created by partitioning along dimension 0.  The total unpartitioned
        shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the
        vocab size and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
        ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights
        are be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
        the default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy.
        Currently `"div"` and `"mod"` are supported. Default is `"div"`.
    max_norm: If not None, all embeddings are l2-normalized to max_norm before
        combining.


  Returns:
    Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
    if combiner is None:
        logging.warn("The default value of combiner will change from \"mean\" "
                     "to \"sqrtn\" after 2016/11/01.")
        combiner = "mean"
    if embedding_weights is None:
        raise ValueError("Missing embedding_weights %s." % embedding_weights)
    if isinstance(embedding_weights, variables.PartitionedVariable):
        embedding_weights = list(
            embedding_weights)  # get underlying Variables.
    if not isinstance(embedding_weights, list):
        embedding_weights = [embedding_weights]
    if len(embedding_weights) < 1:
        raise ValueError("Missing embedding_weights %s." % embedding_weights)

    dtype = sparse_weights.dtype if sparse_weights is not None else None
    if isinstance(embedding_weights, variables.PartitionedVariable):
        embedding_weights = list(embedding_weights)
    embedding_weights = [
        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
    ]

    contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                                [sparse_weights])

    with ops.name_scope(name, "embedding_lookup", embedding_weights +
                        [sparse_ids, sparse_weights]) as scope:
        # Reshape higher-rank sparse ids and weights to linear segment ids.
        original_shape = sparse_ids.dense_shape
        original_rank_dim = sparse_ids.dense_shape.get_shape()[0]
        original_rank = (array_ops.size(original_shape)
                         if original_rank_dim.value is None else
                         original_rank_dim.value)
        sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
            math_ops.reduce_prod(
                array_ops.slice(original_shape, [0], [original_rank - 1])),
            array_ops.gather(original_shape, original_rank - 1)
        ])
        if sparse_weights is not None:
            sparse_weights = sparse_tensor.SparseTensor(
                sparse_ids.indices, sparse_weights.values,
                sparse_ids.dense_shape)

        # Prune invalid ids and weights.
        sparse_ids, sparse_weights = _prune_invalid_ids(
            sparse_ids, sparse_weights)

        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, default_id or 0)
        if sparse_weights is not None:
            sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
                sparse_weights, 1.0)

        result = embedding_ops.embedding_lookup_sparse(
            embedding_weights,
            sparse_ids,
            sparse_weights,
            combiner=combiner,
            partition_strategy=partition_strategy,
            name=None if default_id is None else scope,
            max_norm=max_norm)

        if default_id is None:
            # Broadcast is_row_empty to the same shape as embedding_lookup_result,
            # for use in Select.
            is_row_empty = array_ops.tile(
                array_ops.reshape(is_row_empty, [-1, 1]),
                array_ops.stack([1, array_ops.shape(result)[1]]))

            result = array_ops.where(is_row_empty,
                                     array_ops.zeros_like(result),
                                     result,
                                     name=scope)

        # Reshape back from linear ids back into higher-dimensional dense result.
        final_result = array_ops.reshape(
            result,
            array_ops.concat([
                array_ops.slice(math_ops.cast(original_shape, dtypes.int32),
                                [0], [original_rank - 1]),
                array_ops.slice(array_ops.shape(result), [1], [-1])
            ], 0))
        final_result.set_shape(
            tensor_shape.unknown_shape(
                (original_rank_dim - 1).value).concatenate(
                    result.get_shape()[1:]))
        return final_result