예제 #1
0
    def body(it, cost):
      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
      cost = control_flow_ops.cond(
          math_ops.equal(it, 3), lambda: math_ops.square(cost),
          (lambda: cost + math_ops.reduce_sum(embedding)))
      return it + 1, cost

      _, cost = control_flow_ops.while_loop(
          cond, body, [constant_op.constant(0),
                       constant_op.constant(0.0)])

      dynamic_grads = gradients_impl.gradients(cost, [embedding_matrix])[0]
      dynamic_grads = math_ops.segment_sum(dynamic_grads.values,
                                           dynamic_grads.indices)

      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
      static = math_ops.square(
          math_ops.reduce_sum(embedding) + math_ops.reduce_sum(embedding) +
          math_ops.reduce_sum(embedding)) + math_ops.reduce_sum(embedding)
      static_grads = gradients_impl.gradients(static, [embedding_matrix])[0]
      static_grads = math_ops.segment_sum(static_grads.values,
                                          static_grads.indices)

      with self.cached_session():
        self.evaluate(variables.global_variables_initializer())
        self.assertAllEqual(*self.evaluate([static_grads, dynamic_grads]))
  def doTestIndexedSlicesGradientInCondInWhileLoop(self, use_resource=False):
    with ops.Graph().as_default():
      embedding_matrix = variable_scope.get_variable(
          "embedding_matrix", [5, 5],
          initializer=init_ops.random_normal_initializer(),
          use_resource=use_resource)

      def Cond(it, _):
        return it < 5

      def Body(it, cost):
        embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
        cost = control_flow_ops.cond(
            math_ops.equal(it, 3), lambda: math_ops.square(cost),
            lambda: cost + math_ops.reduce_sum(embedding))
        return it + 1, cost

      _, cost = control_flow_ops.while_loop(
          Cond, Body, [constant_op.constant(0), constant_op.constant(0.0)])

      dynamic_grads = gradients_impl.gradients(cost, [embedding_matrix])[0]
      dynamic_grads = math_ops.segment_sum(dynamic_grads.values,
                                           dynamic_grads.indices)

      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
      static = math_ops.square(
          math_ops.reduce_sum(embedding) + math_ops.reduce_sum(embedding) +
          math_ops.reduce_sum(embedding)) + math_ops.reduce_sum(embedding)
      static_grads = gradients_impl.gradients(static, [embedding_matrix])[0]
      static_grads = math_ops.segment_sum(static_grads.values,
                                          static_grads.indices)

      with self.test_session() as sess:
        sess.run(variables.global_variables_initializer())
        self.assertAllEqual(*sess.run([static_grads, dynamic_grads]))
  def testGradientMatchesSegmentSum(self):
    # Strategy: compute the gradient for UnsortedSegmentSum and SegmentSum
    # and compare the outputs, which should be identical.
    # NB: for this test to work, indices must be valid for SegmentSum, namely
    # it must be sorted, the indices must be contiguous, and num_segments
    # must be max(indices) + 1.
    indices = [0, 0, 1, 1, 1, 2, 3, 4, 5]
    n = len(indices)
    num_cols = 2
    shape = [n, num_cols]
    num_segments = max(indices) + 1
    for dtype in self.differentiable_dtypes:
      with self.cached_session(use_gpu=True):
        tf_x, np_x = self._input(shape, dtype=dtype)
        # Results from UnsortedSegmentSum
        unsorted_s = math_ops.unsorted_segment_sum(
            data=tf_x, segment_ids=indices, num_segments=num_segments)
        unsorted_jacob_t, unsorted_jacob_n = (
            gradient_checker.compute_gradient(tf_x, shape, unsorted_s,
                                              [num_segments, num_cols],
                                              x_init_value=np_x, delta=1))

        # Results from SegmentSum
        sorted_s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
        sorted_jacob_t, sorted_jacob_n = gradient_checker.compute_gradient(
            tf_x,
            shape,
            sorted_s, [num_segments, num_cols],
            x_init_value=np_x,
            delta=1)
      self.assertAllClose(unsorted_jacob_t, sorted_jacob_t)
      self.assertAllClose(unsorted_jacob_n, sorted_jacob_n)
 def testSegmentIdsValid(self):
   # This is a baseline for the following SegmentIdsInvalid* tests.
   shape = [4, 4]
   with self.test_session():
     tf_x, _ = self._input(shape)
     indices = [0, 0, 0, 1]
     result = math_ops.segment_sum(data=tf_x, segment_ids=indices).eval()
     self.assertAllEqual([[15, 18, 21, 24], [13, 14, 15, 16]], result)
 def testSegmentIdsInvalid5(self):
   shape = [4, 4]
   with self.test_session():
     tf_x, _ = self._input(shape)
     indices = [0, 0, 0, -2]
     s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
     with self.assertRaisesOpError("segment ids must be >= 0"):
       s.eval()
 def testSegmentIdsInvalid2(self):
   shape = [4, 4]
   with self.cached_session():
     tf_x, _ = self._input(shape)
     indices = [0, 1, 0, 1]
     s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
     with self.assertRaisesOpError("segment ids are not increasing"):
       s.eval()
 def testSegmentIdsInvalid2(self):
   shape = [4, 4]
   with self.test_session():
     tf_x, _ = self._input(shape)
     indices = [1, 1, 2, 2]
     s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
     with self.assertRaisesOpError("segment ids do not start at 0"):
       s.eval()
 def testSegmentIdsSize(self):
   shape = [4, 4]
   with self.test_session():
     tf_x, _ = self._input(shape)
     indices = [0, 1]
     s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
     with self.assertRaisesOpError("segment_ids should be the same size"):
       s.eval()
예제 #9
0
def _SegmentMeanGrad(op, grad):
    """Gradient for SegmentMean."""
    input_rank = array_ops.rank(op.inputs[0])
    ones_shape = array_ops.concat(
        0, [array_ops.shape(op.inputs[1]), array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)]
    )
    ones = array_ops.fill(ones_shape, constant_op.constant(1, dtype=grad.dtype))
    scaled_grad = grad * math_ops.inv(math_ops.segment_sum(ones, op.inputs[1]))
    return array_ops.gather(scaled_grad, op.inputs[1]), None
 def testSegmentIdsInvalid5(self):
   shape = [4, 4]
   for use_gpu in [True, False]:
     with self.cached_session(use_gpu=use_gpu):
       tf_x, _ = self._input(shape, dtype=dtypes_lib.float32)
       indices = [0, 0, 0, -2]
       s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
       with self.assertRaisesOpError("segment ids must be >= 0"):
         s.eval()
 def testSegmentIdsHole(self):
   shape = [4, 4]
   with self.test_session():
     tf_x, np_x = self._input(shape)
     indices = [0, 0, 3, 3]
     np_ans = self._segmentReduce(indices, np_x, np.add)
     s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
     tf_ans = s.eval()
     self.assertAllClose(np_ans, tf_ans)
 def testSegmentIdsSize(self):
   shape = [4, 4]
   for use_gpu in [True, False]:
     with self.cached_session(use_gpu=use_gpu):
       tf_x, _ = self._input(shape)
       indices = [0, 1]
       s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
       with self.assertRaisesOpError("segment_ids should be the same size"):
         s.eval()
 def testSegmentIdsValid(self):
   # This is a baseline for the following SegmentIdsInvalid* tests.
   shape = [4, 4]
   for use_gpu in [True, False]:
     with self.cached_session(use_gpu=use_gpu):
       tf_x, _ = self._input(shape, dtype=dtypes_lib.float32)
       indices = [0, 0, 0, 1]
       result = math_ops.segment_sum(data=tf_x, segment_ids=indices).eval()
       self.assertAllEqual([[15, 18, 21, 24], [13, 14, 15, 16]], result)
예제 #14
0
 def testSegmentIdsInvalid1(self):
     shape = [4, 4]
     with self.cached_session():
         tf_x, _ = self._input(shape)
         indices = [-1, -1, 0, 0]
         s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
         with self.assertRaisesOpError(
                 r"Segment id -1 out of range \[0, 1\), possibly because "
                 "'segment_ids' input is not sorted."):
             self.evaluate(s)
예제 #15
0
파일: math_grad.py 프로젝트: c0g/tomserflow
def _SegmentMeanGrad(op, grad):
  """Gradient for SegmentMean."""
  input_rank = array_ops.rank(op.inputs[0])
  ones_shape = array_ops.concat(
      0, [array_ops.shape(op.inputs[1]),
          array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)])
  ones = array_ops.fill(ones_shape,
                        constant_op.constant(1, dtype=grad.dtype))
  scaled_grad = grad * math_ops.inv(math_ops.segment_sum(ones, op.inputs[1]))
  return array_ops.gather(scaled_grad, op.inputs[1]), None
예제 #16
0
 def testSegmentIdsInvalid5(self):
     shape = [4, 4]
     with self.test_session():
         tf_x, _ = self._input(shape)
         indices = [0, 1, 2, 0]
         s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
         with self.assertRaisesOpError(
                 r"Segment id 1 out of range \[0, 1\), probably "
                 "because 'segment_ids' input is not sorted."):
             s.eval()
 def testSegmentIdsSize(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
         with self.test_session(use_gpu=use_gpu):
             tf_x, _ = self._input(shape)
             indices = [0, 1]
             s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
             with self.assertRaisesOpError(
                     "segment_ids should be the same size"):
                 s.eval()
 def testSegmentIdsHole(self):
   shape = [4, 4]
   for use_gpu in [True, False]:
     with self.cached_session(use_gpu=use_gpu):
       tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32)
       indices = [0, 0, 3, 3]
       np_ans = self._segmentReduce(indices, np_x, np.add)
       s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
       tf_ans = s.eval()
       self.assertAllClose(np_ans, tf_ans)
예제 #19
0
 def testSegmentIdsHole(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
         with self.cached_session(use_gpu=use_gpu):
             tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32)
             indices = [0, 0, 3, 3]
             np_ans = self._segmentReduce(indices, np_x, np.add)
             s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
             tf_ans = self.evaluate(s)
             self.assertAllClose(np_ans, tf_ans)
 def testSegmentIdsInvalid3(self):
   shape = [4, 4]
   with self.cached_session():
     tf_x, _ = self._input(shape)
     indices = [0, 1, 2, 0]
     s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
     with self.assertRaisesOpError(
         r"Segment id 1 out of range \[0, 1\), possibly "
         "because 'segment_ids' input is not sorted."):
       s.eval()
 def testSegmentIdsGreaterThanZero(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
         with self.test_session(use_gpu=use_gpu):
             tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32)
             indices = [1, 1, 2, 2]
             np_ans = self._segmentReduce(indices, np_x, np.add)
             s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
             tf_ans = s.eval()
             self.assertAllClose(np_ans, tf_ans)
예제 #22
0
    def doTestIndexedSlicesGradientInCondInWhileLoop(self, use_resource=False):
        with ops.Graph().as_default():
            embedding_matrix = variable_scope.get_variable(
                "embedding_matrix", [5, 5],
                initializer=init_ops.random_normal_initializer(),
                use_resource=use_resource)

            def cond(it, _):
                return it < 5

            def body(it, cost):
                embedding = embedding_ops.embedding_lookup(
                    embedding_matrix, [0])
                cost = control_flow_ops.cond(
                    math_ops.equal(it, 3), lambda: math_ops.square(cost),
                    lambda: cost + math_ops.reduce_sum(embedding))
                return it + 1, cost

            _, cost = control_flow_ops.while_loop(
                cond, body,
                [constant_op.constant(0),
                 constant_op.constant(0.0)])

            dynamic_grads = gradients_impl.gradients(cost,
                                                     [embedding_matrix])[0]
            dynamic_grads = math_ops.segment_sum(dynamic_grads.values,
                                                 dynamic_grads.indices)

            embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
            static = math_ops.square(
                math_ops.reduce_sum(embedding) +
                math_ops.reduce_sum(embedding) + math_ops.reduce_sum(embedding)
            ) + math_ops.reduce_sum(embedding)
            static_grads = gradients_impl.gradients(static,
                                                    [embedding_matrix])[0]
            static_grads = math_ops.segment_sum(static_grads.values,
                                                static_grads.indices)

            with self.cached_session() as sess:
                self.evaluate(variables.global_variables_initializer())
                self.assertAllEqual(
                    *self.evaluate([static_grads, dynamic_grads]))
예제 #23
0
 def testSegmentIdsValid(self):
     # This is a baseline for the following SegmentIdsInvalid* tests.
     shape = [4, 4]
     for use_gpu in [True, False]:
         with self.test_session(use_gpu=use_gpu):
             tf_x, _ = self._input(shape, dtype=dtypes_lib.float32)
             indices = [0, 0, 0, 1]
             result = math_ops.segment_sum(data=tf_x,
                                           segment_ids=indices).eval()
             self.assertAllEqual([[15, 18, 21, 24], [13, 14, 15, 16]],
                                 result)
예제 #24
0
def _SegmentMinOrMaxGrad(op, grad):
  """ Gradient for SegmentMin and SegmentMax. """
  zeros = array_ops.zeros_like(op.inputs[0], dtype=op.inputs[0].dtype)
  # Get the number of selected (minimum or maximum) elements in each segment.
  gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
  is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
  num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype),
                                      op.inputs[1])
  # Compute the gradient for each segment. The gradient for the ith segment is
  # divided evenly among the selected elements in that segment.
  weighted_grads = math_ops.div(grad, num_selected)
  gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])
  return array_ops.where(is_selected, gathered_grads, zeros), None
예제 #25
0
def _SegmentMinOrMaxGrad(op, grad):
    """ Gradient for SegmentMin and SegmentMax. """
    zeros = array_ops.zeros_like(op.inputs[0], dtype=op.inputs[0].dtype)
    # Get the number of selected (minimum or maximum) elements in each segment.
    gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
    is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
    num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype),
                                        op.inputs[1])
    # Compute the gradient for each segment. The gradient for the ith segment is
    # divided evenly among the selected elements in that segment.
    weighted_grads = math_ops.div(grad, num_selected)
    gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])
    return array_ops.where(is_selected, gathered_grads, zeros), None
예제 #26
0
    def _linear_predictions(self, examples):
        """Returns predictions of the form w*x."""
        with name_scope("sdca/prediction"):
            sparse_variables = self._convert_n_to_tensor(self._variables["sparse_features_weights"])
            result = 0.0
            for sfc, sv in zip(examples["sparse_features"], sparse_variables):
                # TODO(sibyl-Aix6ihai): following does not take care of missing features.
                result += math_ops.segment_sum(
                    math_ops.mul(array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices
                )
            dense_features = self._convert_n_to_tensor(examples["dense_features"])
            dense_variables = self._convert_n_to_tensor(self._variables["dense_features_weights"])

            for i in range(len(dense_variables)):
                result += math_ops.matmul(dense_features[i], array_ops.expand_dims(dense_variables[i], -1))

        # Reshaping to allow shape inference at graph construction time.
        return array_ops.reshape(result, [-1])
예제 #27
0
 def _logits(self, examples):
   """Compute logits for each example."""
   with name_scope('logits'):
     sparse_variables = self._convert_n_to_tensor(self._variables[
         'sparse_features_weights'])
     logits = 0
     for st_i, sv in zip(examples['sparse_features'], sparse_variables):
       ei, fi = array_ops.split(1, 2, st_i.indices)
       ei = array_ops.reshape(ei, [-1])
       fi = array_ops.reshape(fi, [-1])
       fv = array_ops.reshape(st_i.values, [-1])
       # TODO(rohananil): This does not work if examples have empty features.
       logits += math_ops.segment_sum(
           math_ops.mul(
               array_ops.gather(sv, fi), fv), array_ops.reshape(ei, [-1]))
     dense_features = self._convert_n_to_tensor(examples['dense_features'])
     dense_variables = self._convert_n_to_tensor(self._variables[
         'dense_features_weights'])
     for i in xrange(len(dense_variables)):
       logits += dense_features[i] * dense_variables[i]
     return logits
예제 #28
0
 def _linear_predictions(self, examples):
   """Returns predictions of the form w*x."""
   with name_scope('sdca/prediction'):
     sparse_variables = self._convert_n_to_tensor(self._variables[
         'sparse_features_weights'])
     predictions = 0
     for st_i, sv in zip(examples['sparse_features'], sparse_variables):
       ei, fi = array_ops.split(1, 2, st_i.indices)
       ei = array_ops.reshape(ei, [-1])
       fi = array_ops.reshape(fi, [-1])
       fv = array_ops.reshape(st_i.values, [-1])
       # TODO(rohananil): This does not work if examples have empty features.
       predictions += math_ops.segment_sum(
           math_ops.mul(
               array_ops.gather(sv, fi), fv), array_ops.reshape(ei, [-1]))
     dense_features = self._convert_n_to_tensor(examples['dense_features'])
     dense_variables = self._convert_n_to_tensor(self._variables[
         'dense_features_weights'])
     for i in range(len(dense_variables)):
       predictions += dense_features[i] * dense_variables[i]
   return predictions
예제 #29
0
  def _linear_predictions(self, examples):
    """Returns predictions of the form w*x."""
    with name_scope('sdca/prediction'):
      sparse_variables = self._convert_n_to_tensor(self._variables[
          'sparse_features_weights'])
      result = 0.0
      for sfc, sv in zip(examples['sparse_features'], sparse_variables):
        # TODO(sibyl-Aix6ihai): following does not take care of missing features.
        result += math_ops.segment_sum(
            math_ops.mul(
                array_ops.gather(sv, sfc.feature_indices), sfc.feature_values),
            sfc.example_indices)
      dense_features = self._convert_n_to_tensor(examples['dense_features'])
      dense_variables = self._convert_n_to_tensor(self._variables[
          'dense_features_weights'])

      for i in range(len(dense_variables)):
        result += dense_features[i] * dense_variables[i]

    # Reshaping to allow shape inference at graph construction time.
    return array_ops.reshape(result, [-1])
 def testGradientMatchesSegmentSum(self):
     # Strategy: compute the gradient for UnsortedSegmentSum and SegmentSum
     # and compare the outputs, which should be identical.
     # NB: for this test to work, indices must be valid for SegmentSum, namely
     # it must be sorted, the indices must be contiguous, and num_segments
     # must be max(indices) + 1.
     indices = [0, 0, 1, 1, 1, 2, 3, 4, 5]
     n = len(indices)
     num_cols = 2
     shape = [n, num_cols]
     num_segments = max(indices) + 1
     with self.test_session(use_gpu=self.use_gpu):
         tf_x, np_x = self._input(shape, dtype=dtypes_lib.float64)
         # Results from UnsortedSegmentSum
         unsorted_s = math_ops.unsorted_segment_sum(
             data=tf_x, segment_ids=indices, num_segments=num_segments)
         (unsorted_jacob_t,
          unsorted_jacob_n) = gradient_checker.compute_gradient(
              tf_x,
              shape,
              unsorted_s, [num_segments, num_cols],
              x_init_value=np_x.astype(np.double),
              delta=1)
         # Results from SegmentSum
         sorted_s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
         sorted_jacob_t, sorted_jacob_n = gradient_checker.compute_gradient(
             tf_x,
             shape,
             sorted_s, [num_segments, num_cols],
             x_init_value=np_x.astype(np.double),
             delta=1)
     self.assertAllClose(unsorted_jacob_t,
                         sorted_jacob_t,
                         rtol=1e-3,
                         atol=1e-3)
     self.assertAllClose(unsorted_jacob_n,
                         sorted_jacob_n,
                         rtol=1e-3,
                         atol=1e-3)
예제 #31
0
 def _logits(self, examples):
   """Compute logits for each example."""
   with name_scope('logits'):
     sparse_variables = self._convert_n_to_tensor(self._variables[
         'sparse_features_weights'])
     logits = 0
     for st_i, sv in zip(examples['sparse_features'], sparse_variables):
       ei, fi = array_ops.split(1, 2, st_i.indices)
       ei = array_ops.reshape(ei, [-1])
       fi = array_ops.reshape(fi, [-1])
       fv = array_ops.reshape(st_i.values, [-1])
       # TODO(rohananil): This does not work if examples have empty
       # features.
       logits += math_ops.segment_sum(
           math_ops.mul(
               array_ops.gather(sv, fi), fv), array_ops.reshape(ei, [-1]))
     dense_features = self._convert_n_to_tensor(examples['dense_features'])
     dense_variables = self._convert_n_to_tensor(self._variables[
         'dense_features_weights'])
     for i in xrange(len(dense_variables)):
       logits += dense_features[i] * dense_variables[i]
     return logits
예제 #32
0
  def _linear_predictions(self, examples):
    """Returns predictions of the form w*x."""
    with name_scope('sdca/prediction'):
      sparse_variables = self._convert_n_to_tensor(self._variables[
          'sparse_features_weights'])
      result = 0.0
      for st_i, sv in zip(examples['sparse_features'], sparse_variables):
        ei, fi = array_ops.split(1, 2, st_i.indices)
        ei = array_ops.reshape(ei, [-1])
        fi = array_ops.reshape(fi, [-1])
        fv = array_ops.reshape(st_i.values, [-1])
        # TODO(sibyl-Aix6ihai): This does not work if examples have empty features.
        result += math_ops.segment_sum(
            math_ops.mul(array_ops.gather(sv, fi), fv), ei)
      dense_features = self._convert_n_to_tensor(examples['dense_features'])
      dense_variables = self._convert_n_to_tensor(self._variables[
          'dense_features_weights'])

      for i in range(len(dense_variables)):
        result += dense_features[i] * dense_variables[i]

    # Reshaping to allow shape inference at graph construction time.
    return array_ops.reshape(result, [-1])
예제 #33
0
  def _linear_predictions(self, examples):
    """Returns predictions of the form w*x."""
    with name_scope('sdca/prediction'):
      sparse_variables = self._convert_n_to_tensor(self._variables[
          'sparse_features_weights'])
      result = 0.0
      for st_i, sv in zip(examples['sparse_features'], sparse_variables):
        ei, fi = array_ops.split(1, 2, st_i.indices)
        ei = array_ops.reshape(ei, [-1])
        fi = array_ops.reshape(fi, [-1])
        fv = array_ops.reshape(st_i.values, [-1])
        # TODO(sibyl-Aix6ihai): This does not work if examples have empty features.
        result += math_ops.segment_sum(
            math_ops.mul(array_ops.gather(sv, fi), fv), ei)
      dense_features = self._convert_n_to_tensor(examples['dense_features'])
      dense_variables = self._convert_n_to_tensor(self._variables[
          'dense_features_weights'])

      for i in range(len(dense_variables)):
        result += dense_features[i] * dense_variables[i]

    # Reshaping to allow shape inference at graph construction time.
    return array_ops.reshape(result, [-1])
예제 #34
0
def _SegmentMinOrMaxGrad(op, grad, is_sorted):
    """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code."""
    zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
                            dtype=op.inputs[0].dtype)

    # Get the number of selected (minimum or maximum) elements in each segment.
    gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
    is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
    if is_sorted:
        num_selected = math_ops.segment_sum(
            math_ops.cast(is_selected, grad.dtype), op.inputs[1])
    else:
        num_selected = math_ops.unsorted_segment_sum(
            math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2])

    # Compute the gradient for each segment. The gradient for the ith segment is
    # divided evenly among the selected elements in that segment.
    weighted_grads = math_ops.div(grad, num_selected)
    gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])

    if is_sorted:
        return array_ops.where(is_selected, gathered_grads, zeros), None
    else:
        return array_ops.where(is_selected, gathered_grads, zeros), None, None
예제 #35
0
def _SegmentMinOrMaxGrad(op, grad, is_sorted):
  """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code."""
  zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
                          dtype=op.inputs[0].dtype)

  # Get the number of selected (minimum or maximum) elements in each segment.
  gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
  is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
  if is_sorted:
    num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype),
                                        op.inputs[1])
  else:
    num_selected = math_ops.unsorted_segment_sum(
        math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2])

  # Compute the gradient for each segment. The gradient for the ith segment is
  # divided evenly among the selected elements in that segment.
  weighted_grads = math_ops.div(grad, num_selected)
  gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])

  if is_sorted:
    return array_ops.where(is_selected, gathered_grads, zeros), None
  else:
    return array_ops.where(is_selected, gathered_grads, zeros), None, None
예제 #36
0
def _embedding_lookup_with_distributed_aggregation(params,
                                                   ids,
                                                   partition_strategy="mod",
                                                   name=None,
                                                   max_norm=None,
                                                   weights=None,
                                                   idx=None,
                                                   segment_ids=None):
  """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation."""
  if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
    raise ValueError("Need at least one param")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]

  def maybe_normalize(x):
    if max_norm is not None:
      if x.get_shape().ndims is not None:
        ndims = x.get_shape().ndims
      else:
        ndims = array_ops.size(array_ops.shape(x))
      return clip_ops.clip_by_norm(x, max_norm, axes=list(range(1, ndims)))
    return x

  with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation",
                      params + [ids]) as name:
    np = len(params)  # Number of partitions
    # Preserve the resource variable status to avoid accidental dense reads.
    if not any(
        isinstance(p, resource_variable_ops.ResourceVariable) for p in params):
      params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params")
    if np == 1:
      with ops.colocate_with(params[0]):
        ret = maybe_normalize(_do_gather(params[0], ids))
        ignore_weights = weights is None
        if not ignore_weights:
          if weights.dtype != ret.dtype:
            weights = math_ops.cast(weights, ret.dtype)
          # Reshape to allow broadcast
          ones = array_ops.fill(
              array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1)
          bcast_weights_shape = array_ops.concat(
              [array_ops.shape(weights), ones], 0)
          orig_weights_shape = weights.get_shape()
          weights = array_ops.reshape(weights, bcast_weights_shape)
          # Set weights shape after reshape
          if ret.get_shape().ndims is not None:
            weights.set_shape(
                orig_weights_shape.concatenate(
                    [1 for _ in range(ret.get_shape().ndims - 1)]))
          ret *= weights
          return math_ops.segment_sum(ret, segment_ids, name=name)
        else:
          return math_ops.sparse_segment_sum(ret, idx, segment_ids, name=name)
    else:
      ids = ops.convert_to_tensor(ids, name="ids")
      flat_ids = array_ops.reshape(ids, [-1])
      original_indices = math_ops.range(array_ops.size(flat_ids))

      # Create p_assignments and set new_ids depending on the strategy.
      if partition_strategy == "mod":
        p_assignments = flat_ids % np
        new_ids = flat_ids // np
      elif partition_strategy == "div":
        # Compute num_total_ids as the sum of dim-0 of params, then assign to
        # partitions based on a constant number of ids per partition. Optimize
        # if we already know the full shape statically.
        dim_0_size = params[0].get_shape()[0]
        for p in xrange(1, np):
          dim_0_size += params[p].get_shape()[0]
        if dim_0_size.value:
          num_total_ids = constant_op.constant(dim_0_size.value, flat_ids.dtype)
        else:
          dim_0_sizes = []
          for p in xrange(np):
            if params[p].get_shape()[0].value is not None:
              dim_0_sizes.append(params[p].get_shape()[0].value)
            else:
              with ops.colocate_with(params[p]):
                dim_0_sizes.append(array_ops.shape(params[p])[0])
          num_total_ids = math_ops.reduce_sum(
              math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype))
        ids_per_partition = num_total_ids // np
        extras = num_total_ids % np

        p_assignments = math_ops.maximum(flat_ids // (ids_per_partition + 1), (
            flat_ids - extras) // ids_per_partition)

        # Emulate a conditional using a boolean indicator tensor
        is_in_first_extras_partitions = math_ops.cast(p_assignments < extras,
                                                      flat_ids.dtype)
        new_ids = (is_in_first_extras_partitions * (flat_ids %
                                                    (ids_per_partition + 1)) +
                   (1 - is_in_first_extras_partitions) * (
                       (flat_ids - extras) % ids_per_partition))
      else:
        raise ValueError("Unrecognized partition strategy: " +
                         partition_strategy)

      # Cast partition assignments to int32 for use in dynamic_partition.
      # There really should not be more than 2^32 partitions.
      p_assignments = math_ops.cast(p_assignments, dtypes.int32)
      # Partition list of ids based on assignments into np separate lists
      gather_ids = data_flow_ops.dynamic_partition(new_ids, p_assignments, np)
      # Similarly, partition the original indices.
      pindices = data_flow_ops.dynamic_partition(original_indices,
                                                 p_assignments, np)
      # Do np separate lookups, finding embeddings for plist[p] in params[p]
      partitioned_result = []
      for p in xrange(np):
        with ops.colocate_with(params[p]):
          partitioned_result.append(_do_gather(params[p], gather_ids[p]))

      ignore_weights = weights is None
      if not ignore_weights:
        # Partition weights according to pindices.
        partitioned_weight = []
        for p in xrange(np):
          partitioned_weight.append(array_ops.gather(weights, pindices[p]))
      # Reshape each partition result.
      element_shape = params[0].get_shape()[1:]
      for p in params[1:]:
        element_shape = element_shape.merge_with(p.get_shape()[1:])
      if element_shape.is_fully_defined():
        for p in xrange(np):
          with ops.colocate_with(params[p]):
            partitioned_result[p] = array_ops.reshape(
                partitioned_result[p],
                array_ops.concat([array_ops.shape(pindices[p]), element_shape],
                                 0))
      else:
        with ops.colocate_with(params[0]):
          params_shape = array_ops.shape(params[0])
        for p in xrange(np):
          with ops.colocate_with(params[p]):
            partitioned_result[p] = array_ops.reshape(
                partitioned_result[p],
                array_ops.concat([
                    array_ops.shape(pindices[p]), array_ops.slice(
                        params_shape, [1], [-1])
                ], 0))
      # Normalize each partition result.
      for p in xrange(np):
        with ops.colocate_with(params[p]):
          partitioned_result[p] = maybe_normalize(partitioned_result[p])
      if not ignore_weights:
        # Multiply each partition result with partition weights.
        for p in xrange(np):
          with ops.colocate_with(params[p]):
            if partitioned_weight[p].dtype != partitioned_result[p].dtype:
              partitioned_weight[p] = math_ops.cast(partitioned_weight[p],
                                                    partitioned_result[p].dtype)
            # Reshape partition weights.
            ones = array_ops.fill(
                array_ops.expand_dims(
                    array_ops.rank(partitioned_result[p]) - 1, 0), 1)
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(partitioned_weight[p]), ones], 0)
            orig_weights_shape = partitioned_weight[p].get_shape()
            partitioned_weight[p] = array_ops.reshape(partitioned_weight[p],
                                                      bcast_weights_shape)
            if partitioned_result[p].get_shape().ndims is not None:
              partitioned_weight[p].set_shape(
                  orig_weights_shape.concatenate([
                      1
                      for _ in range(partitioned_result[p].get_shape().ndims -
                                     1)
                  ]))
            partitioned_result[p] *= partitioned_weight[p]
      partitioned_segment_ids = []
      for p in xrange(np):
        if not ignore_weights:
          # Partition segment_ids according to pindices.
          p_segment_ids = array_ops.gather(segment_ids, pindices[p])
          # Number the p_segment_ids to meet segment_sum's requirements. Note
          # that unique_p_segment_ids contains unique segment ids of this
          # partition and these ids' order is unchanged.
          unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
              p_segment_ids)
          partitioned_segment_ids.append(unique_p_segment_ids)
          # segment_sum this partition's result.
          with ops.colocate_with(params[p]):
            partitioned_result[p] = math_ops.segment_sum(
                partitioned_result[p], unique_p_segment_idx)
        else:
          # When ignore weights, we need to get indexs of elements in idx and
          # segment_ids.
          _, exclude_idx = array_ops.setdiff1d(idx, pindices[p])
          all_idx = math_ops.range(array_ops.shape(idx)[0])
          _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx)
          # Gather segment_ids and idx according to indexs.
          p_segment_ids = array_ops.gather(segment_ids, include_idx)
          p_idx = array_ops.gather(idx, include_idx)
          # Number the p_segment_ids, same as ignore_weights case above.
          unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
              p_segment_ids)
          _, unique_p_idx_idx = array_ops.unique(p_idx)
          partitioned_segment_ids.append(unique_p_segment_ids)
          with ops.colocate_with(params[p]):
            partitioned_result[p] = math_ops.sparse_segment_sum(
                partitioned_result[p], unique_p_idx_idx, unique_p_segment_idx)
      # Concat each partition's segment_ids and result for final segment_sum.
      concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0)
      concat_partitioned_result = array_ops.concat(partitioned_result, 0)
      return math_ops.unsorted_segment_sum(
          concat_partitioned_result,
          concat_segment_ids,
          math_ops.reduce_max(concat_segment_ids) + 1,
          name=name)
예제 #37
0
def embedding_lookup_sparse_with_distributed_aggregation(
    params,
    sp_ids,
    sp_weights,
    partition_strategy="mod",
    name=None,
    combiner=None,
    max_norm=None):
  """Computes embeddings for the given ids and weights.

  Embeddings belonging to same param are aggregated on that device first. This
  op is intended to decrease data transmission and improve parallelism. See
  `tf.nn.embedding_lookup_sparse` for the functionality and example of this op.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If not None, each embedding is normalized to have l2 norm equal
      to max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if combiner not in ("mean", "sqrtn", "sum"):
    raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]
  if not isinstance(sp_ids, sparse_tensor.SparseTensor):
    raise TypeError("sp_ids must be SparseTensor")
  ignore_weights = sp_weights is None
  if not ignore_weights:
    if not isinstance(sp_weights, sparse_tensor.SparseTensor):
      raise TypeError("sp_weights must be either None or SparseTensor")
    sp_ids.values.get_shape().assert_is_compatible_with(
        sp_weights.values.get_shape())
    sp_ids.indices.get_shape().assert_is_compatible_with(
        sp_weights.indices.get_shape())
    sp_ids.dense_shape.get_shape().assert_is_compatible_with(
        sp_weights.dense_shape.get_shape())
    # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
    # sp_weights have equal indices and shapes.

  with ops.name_scope(name, "embedding_lookup_sparse",
                      params + [sp_ids]) as name:
    segment_ids = sp_ids.indices[:, 0]
    if segment_ids.dtype != dtypes.int32:
      segment_ids = math_ops.cast(segment_ids, dtypes.int32)

    ids = sp_ids.values
    if ignore_weights:
      ids, idx = array_ops.unique(ids)
    else:
      idx = None

    weights = None if ignore_weights else sp_weights.values
    embeddings = _embedding_lookup_with_distributed_aggregation(
        params,
        ids,
        partition_strategy=partition_strategy,
        max_norm=max_norm,
        weights=weights,
        idx=idx,
        segment_ids=segment_ids)
    # Set weights to all one if ignore weights.
    if ignore_weights:
      weights = array_ops.fill([array_ops.shape(segment_ids)[0]], 1)
    if weights.dtype != embeddings.dtype:
      weights = math_ops.cast(weights, embeddings.dtype)
    # Reshape weights.
    ones = array_ops.fill(
        array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
    bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones], 0)
    orig_weights_shape = weights.get_shape()
    weights = array_ops.reshape(weights, bcast_weights_shape)
    if embeddings.get_shape().ndims is not None:
      weights.set_shape(
          orig_weights_shape.concatenate(
              [1 for _ in range(embeddings.get_shape().ndims - 1)]))

    if combiner == "mean":
      weight_sum = math_ops.segment_sum(weights, segment_ids)
      embeddings = math_ops.div(embeddings, weight_sum)
    elif combiner == "sqrtn":
      weights_squared = math_ops.pow(weights, 2)
      weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
      weight_sum_sqrt = math_ops.sqrt(weight_sum)
      embeddings = math_ops.div(embeddings, weight_sum_sqrt)
    elif combiner != "sum":
      assert False, "Unrecognized combiner"
    return embeddings
예제 #38
0
    def get_dense_tensor(self, transformation_cache, state_manager):
        if isinstance(self.categorical_column,
                      fc_lib.SequenceCategoricalColumn):
            raise ValueError(
                "In embedding_column: {}. "
                "categorical_column must not be of "
                "type SequenceCategoricalColumn. "
                "Suggested fix A: If you wish to use DenseFeatures, use a "
                "non-sequence categorical_column_with_*. "
                "Suggested fix B: If you wish to create sequence input, use "
                "SequenceFeatures instead of DenseFeatures. "
                "Given (type {}): {}".format(
                    self.name,
                    type(self.categorical_column),
                    self.categorical_column,
                ))
        # Get sparse IDs and weights.
        sparse_tensors = self.categorical_column.get_sparse_tensors(
            transformation_cache, state_manager)

        # Look up the embedding from the sparse input
        sparse_ids = sparse_tensors.id_tensor
        sparse_weights = sparse_tensors.weight_tensor

        unique_ids, idx = tf.unique(sparse_ids.values)
        batch_embedding = tf.py_function(self.lookup_embedding,
                                         inp=[unique_ids],
                                         Tout=tf.float32)

        segment_ids = sparse_ids.indices[:, 0]
        if segment_ids.dtype != tf.int32:
            segment_ids = tf.cast(segment_ids, tf.int32)

        if sparse_weights is not None:
            weights = sparse_weights.values
            if weights.dtype != batch_embedding.dtype:
                weights = math_ops.cast(weights, batch_embedding.dtype)

            batch_embedding = array_ops.gather(batch_embedding, idx)

            # Reshape weights to allow broadcast
            ones = array_ops.fill(
                array_ops.expand_dims(array_ops.rank(batch_embedding) - 1, 0),
                1,
            )
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(weights), ones], 0)
            weights = array_ops.reshape(weights, bcast_weights_shape)

            batch_embedding *= weights

            if self.combiner == "sum":
                batch_embedding = math_ops.segment_sum(batch_embedding,
                                                       segment_ids)
            elif self.combiner == "mean":
                batch_embedding = math_ops.segment_sum(batch_embedding,
                                                       segment_ids)
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                batch_embedding = math_ops.div(batch_embedding, weight_sum)
            elif self.combiner == "sqrtn":
                batch_embedding = math_ops.segment_sum(batch_embedding,
                                                       segment_ids)
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                batch_embedding = math_ops.div(batch_embedding,
                                               weight_sum_sqrt)
            else:
                assert False, "Unrecognized combiner"
        else:
            assert idx is not None
            if self.combiner == "sum":
                batch_embedding = tf.sparse.segment_sum(
                    batch_embedding, idx, segment_ids)
            elif self.combiner == "mean":
                batch_embedding = tf.sparse.segment_mean(
                    batch_embedding, idx, segment_ids)
            elif self.combiner == "sqrtn":
                batch_embedding = tf.sparse.segment_sqrt_n(
                    batch_embedding, idx, segment_ids)
            else:
                assert False, "Unrecognized combiner"

        return batch_embedding
예제 #39
0
def embedding_lookup_sparse(params,
                            sp_ids,
                            sp_weights,
                            partition_strategy="mod",
                            name=None,
                            combiner=None,
                            max_norm=None):
    """Computes embeddings for the given ids and weights.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If not None, each embedding is normalized to have l2 norm equal
      to max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if

      shape(combined params) = [p0, p1, ..., pm]

    and

      shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]

    then

      shape(output) = [d0, d1, ..., dn-1, p1, ..., pm].

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0

    with `combiner`="mean", then the output will be a 3x20 matrix where

      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = params[0, :] * 1.0
      output[2, :] = params[1, :] * 3.0

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
    if combiner is None:
        logging.warn("The default value of combiner will change from \"mean\" "
                     "to \"sqrtn\" after 2016/11/01.")
        combiner = "mean"
    if combiner not in ("mean", "sqrtn", "sum"):
        raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]
    if not isinstance(sp_ids, sparse_tensor.SparseTensor):
        raise TypeError("sp_ids must be SparseTensor")
    ignore_weights = sp_weights is None
    if not ignore_weights:
        if not isinstance(sp_weights, sparse_tensor.SparseTensor):
            raise TypeError("sp_weights must be either None or SparseTensor")
        sp_ids.values.get_shape().assert_is_compatible_with(
            sp_weights.values.get_shape())
        sp_ids.indices.get_shape().assert_is_compatible_with(
            sp_weights.indices.get_shape())
        sp_ids.dense_shape.get_shape().assert_is_compatible_with(
            sp_weights.dense_shape.get_shape())
        # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
        # sp_weights have equal indices and shapes.

    with ops.name_scope(name, "embedding_lookup_sparse",
                        params + [sp_ids]) as name:
        segment_ids = sp_ids.indices[:, 0]
        if segment_ids.dtype != dtypes.int32:
            segment_ids = math_ops.cast(segment_ids, dtypes.int32)

        ids = sp_ids.values
        if ignore_weights:
            ids, idx = array_ops.unique(ids)
        else:
            idx = None

        embeddings = embedding_lookup(params,
                                      ids,
                                      partition_strategy=partition_strategy,
                                      max_norm=max_norm)
        if not ignore_weights:
            weights = sp_weights.values
            if weights.dtype != embeddings.dtype:
                weights = math_ops.cast(weights, embeddings.dtype)

            # Reshape weights to allow broadcast
            ones = array_ops.fill(
                array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
            bcast_weights_shape = array_ops.concat_v2(
                [array_ops.shape(weights), ones], 0)

            orig_weights_shape = weights.get_shape()
            weights = array_ops.reshape(weights, bcast_weights_shape)

            # Set the weight shape, since after reshaping to bcast_weights_shape,
            # the shape becomes None.
            if embeddings.get_shape().ndims is not None:
                weights.set_shape(
                    orig_weights_shape.concatenate(
                        [1 for _ in range(embeddings.get_shape().ndims - 1)]))

            embeddings *= weights

            if combiner == "sum":
                embeddings = math_ops.segment_sum(embeddings,
                                                  segment_ids,
                                                  name=name)
            elif combiner == "mean":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                embeddings = math_ops.div(embeddings, weight_sum, name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                embeddings = math_ops.div(embeddings,
                                          weight_sum_sqrt,
                                          name=name)
            else:
                assert False, "Unrecognized combiner"
        else:
            assert idx is not None
            if combiner == "sum":
                embeddings = math_ops.sparse_segment_sum(embeddings,
                                                         idx,
                                                         segment_ids,
                                                         name=name)
            elif combiner == "mean":
                embeddings = math_ops.sparse_segment_mean(embeddings,
                                                          idx,
                                                          segment_ids,
                                                          name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.sparse_segment_sqrt_n(embeddings,
                                                            idx,
                                                            segment_ids,
                                                            name=name)
            else:
                assert False, "Unrecognized combiner"

        return embeddings
예제 #40
0
def embedding_lookup_sparse(params, sp_ids, sp_weights,
                            partition_strategy="mod",
                            name=None,
                            combiner=None,
                            max_norm=None):
  """Computes embeddings for the given ids and weights.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If not None, each embedding is normalized to have l2 norm equal
      to max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if

      shape(combined params) = [p0, p1, ..., pm]

    and

      shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]

    then

      shape(output) = [d0, d1, ..., dn-1, p1, ..., pm].

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0

    with `combiner`="mean", then the output will be a 3x20 matrix where

      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = params[0, :] * 1.0
      output[2, :] = params[1, :] * 3.0

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if combiner not in ("mean", "sqrtn", "sum"):
    raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]
  if not isinstance(sp_ids, sparse_tensor.SparseTensor):
    raise TypeError("sp_ids must be SparseTensor")
  ignore_weights = sp_weights is None
  if not ignore_weights:
    if not isinstance(sp_weights, sparse_tensor.SparseTensor):
      raise TypeError("sp_weights must be either None or SparseTensor")
    sp_ids.values.get_shape().assert_is_compatible_with(
        sp_weights.values.get_shape())
    sp_ids.indices.get_shape().assert_is_compatible_with(
        sp_weights.indices.get_shape())
    sp_ids.shape.get_shape().assert_is_compatible_with(
        sp_weights.shape.get_shape())
    # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
    # sp_weights have equal indices and shapes.

  with ops.name_scope(name, "embedding_lookup_sparse",
                      params + [sp_ids]) as name:
    segment_ids = sp_ids.indices[:, 0]
    if segment_ids.dtype != dtypes.int32:
      segment_ids = math_ops.cast(segment_ids, dtypes.int32)

    ids = sp_ids.values
    if ignore_weights:
      ids, idx = array_ops.unique(ids)
    else:
      idx = None

    embeddings = embedding_lookup(
        params, ids, partition_strategy=partition_strategy, max_norm=max_norm)
    if not ignore_weights:
      weights = sp_weights.values
      if weights.dtype != embeddings.dtype:
        weights = math_ops.cast(weights, embeddings.dtype)

      # Reshape weights to allow broadcast
      ones = array_ops.fill(
          array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
      bcast_weights_shape = array_ops.concat(0, [
          array_ops.shape(weights), ones])

      orig_weights_shape = weights.get_shape()
      weights = array_ops.reshape(weights, bcast_weights_shape)

      # Set the weight shape, since after reshaping to bcast_weights_shape,
      # the shape becomes None.
      if embeddings.get_shape().ndims is not None:
        weights.set_shape(orig_weights_shape.concatenate(
            [1 for _ in range(embeddings.get_shape().ndims - 1)]))

      embeddings *= weights

      if combiner == "sum":
        embeddings = math_ops.segment_sum(embeddings, segment_ids, name=name)
      elif combiner == "mean":
        embeddings = math_ops.segment_sum(embeddings, segment_ids)
        weight_sum = math_ops.segment_sum(weights, segment_ids)
        embeddings = math_ops.div(embeddings, weight_sum, name=name)
      elif combiner == "sqrtn":
        embeddings = math_ops.segment_sum(embeddings, segment_ids)
        weights_squared = math_ops.pow(weights, 2)
        weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
        weight_sum_sqrt = math_ops.sqrt(weight_sum)
        embeddings = math_ops.div(embeddings, weight_sum_sqrt, name=name)
      else:
        assert False, "Unrecognized combiner"
    else:
      assert idx is not None
      if combiner == "sum":
        embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids,
                                                 name=name)
      elif combiner == "mean":
        embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids,
                                                  name=name)
      elif combiner == "sqrtn":
        embeddings = math_ops.sparse_segment_sqrt_n(embeddings, idx,
                                                    segment_ids, name=name)
      else:
        assert False, "Unrecognized combiner"

    return embeddings
예제 #41
0
class SegmentReductionOpBenchmark(test.Benchmark):
    outer_dim_options = [2**x for x in range(9, 14, 2)]
    ratio_options = [2**x for x in range(1, 6, 2)]
    inner_dim_options = [2**x for x in range(9, 14, 2)]
    #randomly generated sizes with less alignments
    inner_dim_options += [
        1120, 1215, 1856, 1302, 1329, 1531, 1313, 1672, 1851, 1584
    ]
    dtype_options = [np.float32, np.float64]
    options = (outer_dim_options, ratio_options, inner_dim_options,
               dtype_options)
    op_functors = [
        lambda vc, vs, seg_ids: ("sorted", math_ops.segment_sum(vc, vs)),
        lambda vc, vs, seg_ids:
        ("unsorted", math_ops.unsorted_segment_sum(vc, vs, seg_ids[-1] + 1))
    ]
    repeat = 10

    def _npTypeToStr(self, t):
        if t == np.float32:
            return "fp32"
        if t == np.float64:
            return "fp64"

    def _runGraph(self, op_functor, outer_dim, ratio, inner_dim, dtype):
        output_outer_dim = int(outer_dim / ratio)
        const = np.random.randint(5, size=(outer_dim, inner_dim))
        seg_ids = np.sort(np.random.randint(output_outer_dim, size=outer_dim))
        vs = variables.Variable(seg_ids.astype(np.int32))
        with ops.device("/gpu:0"):
            vc = variables.Variable(const.astype(dtype))
        name, op = op_functor(vc, vs, seg_ids)
        with session.Session() as sess:
            variables.global_variables_initializer().run()
            r = self.run_op_benchmark(
                sess,
                op,
                min_iters=self.repeat,
                name="_".join(
                    map(str, [
                        name, outer_dim, ratio, inner_dim,
                        self._npTypeToStr(dtype)
                    ])))
        return name, r["wall_time"]

    def benchmarkSegmentSumGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return
        for outer_dim, ratio, inner_dim, dtype in itertools.product(
                *self.options):
            output_outer_dim = int(outer_dim / ratio)
            op_functor = self.op_functors[0]
            with ops.Graph().as_default():
                self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype)

    def benchmarkUnsortedSegmentSumGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return
        for outer_dim, ratio, inner_dim, dtype in itertools.product(
                *self.options):
            output_outer_dim = int(outer_dim / ratio)
            op_functor = self.op_functors[1]
            with ops.Graph().as_default():
                self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype)
예제 #42
0
def embedding_lookup_sparse(params,
                            sp_ids,
                            sp_weights,
                            partition_strategy="mod",
                            name=None,
                            combiner=None,
                            max_norm=None):
    """Looks up embeddings for the given ids and weights from a list of tensors.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  `sp_ids` and `sp_weights` (if not None) are `SparseTensor`s with rank of 2.
  Embeddings are always aggregated along the last dimension.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor, or a
      list tensors all of same shape except for the first dimension,
      representing sharded embedding tensors. Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M `SparseTensor` of int64 ids where N is typically batch size
      and M is arbitrary.
    sp_weights: either a `SparseTensor` of float / double weights, or `None` to
      indicate all weights should be taken to be 1. If specified, `sp_weights`
      must have exactly the same shape and indices as `sp_ids`.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported. "sum" computes the weighted sum of the embedding
      results for each row. "mean" is the weighted sum divided by the total
      weight. "sqrtn" is the weighted sum divided by the square root of the sum
      of the squares of the weights. Defaults to `mean`.
    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
      than this value, before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by `sp_ids`, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if

      `shape(combined params) = [p0, p1, ..., pm]`

    and

      `shape(sp_ids) = shape(sp_weights) = [d0, d1]`

    then

      `shape(output) = [d0, p1, ..., pm]`.

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      ```python
      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0
      ```

    with `combiner`="mean", then the output will be a 3x20 matrix where

      ```python
      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = (params[0, :] * 1.0) / 1.0
      output[2, :] = (params[1, :] * 3.0) / 3.0
      ```

  Raises:
    TypeError: If `sp_ids` is not a `SparseTensor`, or if `sp_weights` is
      neither `None` nor `SparseTensor`.
    ValueError: If `combiner` is not one of {"mean", "sqrtn", "sum"}.
  """
    if combiner is None:
        combiner = "mean"
    if combiner not in ("mean", "sqrtn", "sum"):
        raise ValueError(
            f"combiner must be one of 'mean', 'sqrtn' or 'sum', got {combiner}"
        )
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]
    if not isinstance(sp_ids, sparse_tensor.SparseTensor):
        raise TypeError(f"sp_ids must be SparseTensor, got {type(sp_ids)}")
    ignore_weights = sp_weights is None
    if not ignore_weights:
        if not isinstance(sp_weights, sparse_tensor.SparseTensor):
            raise TypeError(f"sp_weights must be either None or SparseTensor,"
                            f"got {type(sp_weights)}")
        sp_ids.values.get_shape().assert_is_compatible_with(
            sp_weights.values.get_shape())
        sp_ids.indices.get_shape().assert_is_compatible_with(
            sp_weights.indices.get_shape())
        sp_ids.dense_shape.get_shape().assert_is_compatible_with(
            sp_weights.dense_shape.get_shape())
        # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
        # sp_weights have equal indices and shapes.

    with ops.name_scope(name, "embedding_lookup_sparse",
                        params + [sp_ids]) as name:
        segment_ids = sp_ids.indices[:, 0]

        ids = sp_ids.values
        ids, idx = array_ops.unique(ids)

        embeddings = embedding_lookup(params,
                                      ids,
                                      partition_strategy=partition_strategy,
                                      max_norm=max_norm)
        if not ignore_weights:
            if segment_ids.dtype != dtypes.int32:
                segment_ids = math_ops.cast(segment_ids, dtypes.int32)

            weights = sp_weights.values
            embeddings = array_ops.gather(embeddings, idx)

            original_dtype = embeddings.dtype
            if embeddings.dtype in (dtypes.float16, dtypes.bfloat16):
                # Cast low-precision embeddings to float32 during the computation to
                # avoid numerical issues.
                embeddings = math_ops.cast(embeddings, dtypes.float32)
            if weights.dtype != embeddings.dtype:
                weights = math_ops.cast(weights, embeddings.dtype)

            # Reshape weights to allow broadcast
            ones_shape = array_ops.expand_dims(
                array_ops.rank(embeddings) - 1, 0)
            ones = array_ops.ones(ones_shape, dtype=dtypes.int32)
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(weights), ones], 0)

            orig_weights_shape = weights.get_shape()
            weights = array_ops.reshape(weights, bcast_weights_shape)

            # Set the weight shape, since after reshaping to bcast_weights_shape,
            # the shape becomes None.
            if embeddings.get_shape().ndims is not None:
                weights.set_shape(
                    orig_weights_shape.concatenate(
                        [1 for _ in range(embeddings.get_shape().ndims - 1)]))

            embeddings *= weights

            if combiner == "sum":
                embeddings = math_ops.segment_sum(embeddings,
                                                  segment_ids,
                                                  name=name)
            elif combiner == "mean":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                embeddings = math_ops.div_no_nan(embeddings,
                                                 weight_sum,
                                                 name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                embeddings = math_ops.div_no_nan(embeddings,
                                                 weight_sum_sqrt,
                                                 name=name)
            else:
                assert False, "Unrecognized combiner"
            if embeddings.dtype != original_dtype:
                embeddings = math_ops.cast(embeddings, original_dtype)
        else:
            if segment_ids.dtype not in (dtypes.int32, dtypes.int64):
                segment_ids = math_ops.cast(segment_ids, dtypes.int32)
            assert idx is not None
            if combiner == "sum":
                embeddings = math_ops.sparse_segment_sum(embeddings,
                                                         idx,
                                                         segment_ids,
                                                         name=name)
            elif combiner == "mean":
                embeddings = math_ops.sparse_segment_mean(embeddings,
                                                          idx,
                                                          segment_ids,
                                                          name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.sparse_segment_sqrt_n(embeddings,
                                                            idx,
                                                            segment_ids,
                                                            name=name)
            else:
                assert False, "Unrecognized combiner"

        return embeddings
예제 #43
0
 def testSegmentIdsShape(self):
     shape = [4, 4]
     tf_x, _ = self._input(shape)
     indices = constant_op.constant([0, 1, 2, 2], shape=[2, 2])
     with self.assertRaises(ValueError):
         math_ops.segment_sum(data=tf_x, segment_ids=indices)
def embedding_lookup_sparse(
        params,
        sp_ids,
        sp_weights,
        partition_strategy=None,  # no used
        name="embedding_lookup_sparse",
        combiner="mean",
        max_norm=None,
        return_trainable=False):
    """Provides a dynamic version of embedding_lookup_sparse
    similar with tf.nn.embedding_lookup_sparse.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single `dynamic_embedding.Variable` instance representing
      the complete embedding tensor.
    sp_ids: N x M `SparseTensor` of int64 ids where N is typically batch size
      and M is arbitrary.
    sp_weights: either a `SparseTensor` of float / double weights, or `None` to
      indicate all weights should be taken to be 1. If specified, `sp_weights`
      must have exactly the same shape and indices as `sp_ids`.
    partition_strategy: No used.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported. "sum" computes the weighted sum of the embedding
      results for each row. "mean" is the weighted sum divided by the total
      weight. "sqrtn" is the weighted sum divided by the square root of the sum
      of the squares of the weights.
    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
      than this value, before combining.
    return_trainable: optional, If True, also return TrainableWrapper create by
      `dynamic_embedding.embedding_lookup`

  Returns:
    combined_embeddings: A dense tensor representing the combined embeddings
      for the sparse ids. For each row in the dense tensor represented by
      `sp_ids`, the op looks up the embeddings for all ids in that row,
      multiplies them by the corresponding weight, and combines these embeddings
      as specified.

      In other words, if

        `shape(combined params) = [+infinity, dim]`

      and

        `shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]`

      then

        `shape(output) = [d0, dim]`.

      For instance, if params dim=20, and sp_ids / sp_weights are

        ```python
        [0, 0]: id 1, weight 2.0
        [0, 1]: id 3, weight 0.5
        [1, 0]: id 0, weight 1.0
        [2, 3]: id 1, weight 3.0
        ```

      with `combiner`="mean", then the output will be a 3x20 matrix where

        ```python
        output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
        output[1, :] = (params[0, :] * 1.0) / 1.0
        output[2, :] = (params[1, :] * 3.0) / 3.0
        ```
    trainable_wrap:
      A TrainableWrapper object used to fill the Optimizers `var_list`
        Only provided if `return_trainable` is True.
  Raises:
    TypeError: If `sp_ids` is not a `SparseTensor`, or if `sp_weights` is
      neither `None` nor `SparseTensor`.
    ValueError: If `combiner` is not one of {"mean", "sqrtn", "sum"}.
  """
    if combiner not in ("mean", "sqrtn", "sum"):
        raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")

    if not isinstance(sp_ids, sparse_tensor.SparseTensor):
        raise TypeError("sp_ids must be SparseTensor")

    ignore_weights = sp_weights is None
    if not ignore_weights:
        if not isinstance(sp_weights, sparse_tensor.SparseTensor):
            raise TypeError("sp_weights must be either None or SparseTensor")

    scope = variable_scope.get_variable_scope()
    full_name = scope.name + "/" + name if scope.name else name
    with ops.name_scope(full_name + "/"):
        segment_ids = sp_ids.indices[:, 0]
        if segment_ids.dtype != dtypes.int32:
            segment_ids = math_ops.cast(segment_ids, dtypes.int32)

        ids = sp_ids.values
        ids, idx = array_ops.unique(ids)

        embeddings, trainable_ = embedding_lookup(
            params,
            ids,
            name=name + '/embedding_lookup',
            partition_strategy=partition_strategy,
            max_norm=max_norm,
            return_trainable=True)
        if embeddings.dtype in (dtypes.float16, dtypes.bfloat16):
            embeddings = math_ops.cast(embeddings, dtypes.float32)
        if not ignore_weights:
            weights = sp_weights.values
            if weights.dtype != embeddings.dtype:
                weights = math_ops.cast(weights, embeddings.dtype)

            embeddings = array_ops.gather(embeddings, idx)

            # Reshape weights to allow broadcast
            ones = array_ops.fill(
                array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(weights), ones], 0)

            orig_weights_shape = weights.get_shape()
            weights = array_ops.reshape(weights, bcast_weights_shape)

            # Set the weight shape, since after reshaping to bcast_weights_shape,
            # the shape becomes None.
            if embeddings.get_shape().ndims is not None:
                weights.set_shape(
                    orig_weights_shape.concatenate(
                        [1 for _ in range(embeddings.get_shape().ndims - 1)]))

            embeddings *= weights

            if combiner == "sum":
                embeddings = math_ops.segment_sum(embeddings,
                                                  segment_ids,
                                                  name=name)
            elif combiner == "mean":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                embeddings = math_ops.div(embeddings, weight_sum, name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                embeddings = math_ops.div(embeddings,
                                          weight_sum_sqrt,
                                          name=name)
            else:
                assert False, "Unrecognized combiner"
        else:
            assert idx is not None
            if combiner == "sum":
                embeddings = math_ops.sparse_segment_sum(embeddings,
                                                         idx,
                                                         segment_ids,
                                                         name=name)
            elif combiner == "mean":
                embeddings = math_ops.sparse_segment_mean(embeddings,
                                                          idx,
                                                          segment_ids,
                                                          name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.sparse_segment_sqrt_n(embeddings,
                                                            idx,
                                                            segment_ids,
                                                            name=name)
            else:
                assert False, "Unrecognized combiner"

        return (embeddings, trainable_) if return_trainable else embeddings
 def testSegmentIdsShape(self):
   shape = [4, 4]
   tf_x, _ = self._input(shape)
   indices = constant_op.constant([0, 1, 2, 2], shape=[2, 2])
   with self.assertRaises(ValueError):
     math_ops.segment_sum(data=tf_x, segment_ids=indices)
예제 #46
0
    def safe_embedding_lookup_sparse(
        self, sparse_ids, sparse_weights=None, combiner="mean", default_id=None
    ):
        """Lookup embedding results, accounting for invalid IDs and empty
        features. The result of this function is the same as
        tf.nn.safe_embeddding_lookup_sparse`. But, this function is implemented
        to support lookup embedding using ParameterServer distribution
        strategy.
        """
        self._init_for_graph_mode_if_necessary()

        sparse_ids = _prune_invalid_ids(sparse_ids)
        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, 0
        )
        unique_ids, idx = tf.unique(sparse_ids.values)

        segment_ids = sparse_ids.indices[:, 0]
        if segment_ids.dtype != tf.int32:
            segment_ids = tf.cast(segment_ids, tf.int32)

        ids = sparse_ids.values
        unique_ids, idx = tf.unique(ids)
        batch_embedding = self._get_embeddings_by_id(unique_ids)

        if sparse_weights is not None:
            if self.tape:
                batch_embedding = self._record_gradients(
                    batch_embedding=batch_embedding, ids=ids
                )

            weights = sparse_weights.values
            if weights.dtype != batch_embedding.dtype:
                weights = math_ops.cast(weights, batch_embedding.dtype)

            batch_embedding = array_ops.gather(batch_embedding, idx)
            # Reshape weights to allow broadcast
            ones = array_ops.fill(
                array_ops.expand_dims(array_ops.rank(batch_embedding) - 1, 0),
                1,
            )
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(weights), ones], 0
            )

            orig_weights_shape = weights.get_shape()
            weights = array_ops.reshape(weights, bcast_weights_shape)

            # Set the weight shape, since after reshaping to
            # bcast_weights_shape, the shape becomes None.
            if batch_embedding.get_shape().ndims is not None:
                weights.set_shape(
                    orig_weights_shape.concatenate(
                        [
                            1
                            for _ in range(
                                batch_embedding.get_shape().ndims - 1
                            )
                        ]
                    )
                )

            batch_embedding *= weights

            if combiner == "sum":
                batch_embedding = math_ops.segment_sum(
                    batch_embedding, segment_ids
                )
            elif combiner == "mean":
                batch_embedding = math_ops.segment_sum(
                    batch_embedding, segment_ids
                )
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                batch_embedding = math_ops.div(batch_embedding, weight_sum)
            elif combiner == "sqrtn":
                batch_embedding = math_ops.segment_sum(
                    batch_embedding, segment_ids
                )
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                batch_embedding = math_ops.div(
                    batch_embedding, weight_sum_sqrt
                )
            else:
                assert False, "Unrecognized combiner"
        else:
            if self.tape:
                batch_embedding = self._record_gradients(
                    batch_embedding=batch_embedding, ids=unique_ids,
                )

            assert idx is not None
            if combiner == "sum":
                batch_embedding = math_ops.sparse_segment_sum(
                    batch_embedding, idx, segment_ids
                )
            elif combiner == "mean":
                batch_embedding = math_ops.sparse_segment_mean(
                    batch_embedding, idx, segment_ids
                )
            elif combiner == "sqrtn":
                batch_embedding = math_ops.sparse_segment_sqrt_n(
                    batch_embedding, idx, segment_ids
                )
            else:
                assert False, "Unrecognized combiner"

        # Broadcast is_row_empty to the same shape as embedding_lookup_result,
        # for use in Select.
        is_row_empty = array_ops.tile(
            array_ops.reshape(is_row_empty, [-1, 1]),
            array_ops.stack([1, array_ops.shape(batch_embedding)[1]]),
        )

        batch_embedding = array_ops.where(
            is_row_empty,
            array_ops.zeros_like(batch_embedding),
            batch_embedding,
            name=self.name,
        )
        batch_embedding.set_shape((None, self.output_dim))
        return batch_embedding
예제 #47
0
def embedding_lookup_sparse(params, sp_ids, sp_weights,
                            name=None,
                            combiner="mean"):
  """Computes embeddings for the given ids and weights.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors. In the latter case, the ids are
      partitioned by id % P, and we do separate lookups in params[p] for
      0 <= p < P, and then stitch the results back together into a single
      result tensor. The first dimension is allowed to vary as the vocab
      size is not necessarily a multiple of P.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean" and "sum"
      are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if
      shape(combined params) = [p0, p1, ..., pm]
    and
      shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]
    then
      shape(output) = [d0, d1, ..., dn-1, p1, ..., pm].

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0

    with combiner="mean", then the output will be a 3x20 matrix where
      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = params[0, :] * 1.0
      output[2, :] = params[1, :] * 3.0

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sum"}.
  """
  if combiner not in ("mean", "sum"):
    raise ValueError("combiner must be one of 'mean' or 'sum'")
  if not isinstance(params, list):
    params = [params]
  if not isinstance(sp_ids, ops.SparseTensor):
    raise TypeError("sp_ids must be SparseTensor")
  ignore_weights = sp_weights is None
  if not ignore_weights and not isinstance(sp_weights, ops.SparseTensor):
    raise TypeError("sp_weights must be either None or SparseTensor")

  with ops.op_scope(params + [sp_ids], name, "embedding_lookup_sparse") as name:
    segment_ids = sp_ids.indices[:, 0]
    if segment_ids.dtype != types.int32:
      segment_ids = math_ops.cast(segment_ids, types.int32)

    ids = sp_ids.values
    if ignore_weights:
      ids, idx = array_ops.unique(ids)
    else:
      idx = None

    embeddings = embedding_lookup(params, ids)
    if not ignore_weights:
      weights = sp_weights.values
      if weights.dtype != embeddings.dtype:
        weights = math_ops.cast(weights, embeddings.dtype)

      # Reshape weights to allow broadcast
      ones = array_ops.fill(
          array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
      bcast_weights_shape = array_ops.concat(0, [
          array_ops.shape(weights), ones])
      weights = array_ops.reshape(weights, bcast_weights_shape)
      embeddings *= weights

      if combiner == "sum":
        embeddings = math_ops.segment_sum(embeddings, segment_ids, name=name)
      elif combiner == "mean":
        embeddings = math_ops.segment_sum(embeddings, segment_ids)
        weight_sum = math_ops.segment_sum(weights, segment_ids)
        embeddings = math_ops.div(embeddings, weight_sum, name=name)
      else:
        assert False, "Unrecognized combiner"
    else:
      assert idx is not None
      if combiner == "sum":
        embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids,
                                                 name=name)
      elif combiner == "mean":
        embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids,
                                                  name=name)
      else:
        assert False, "Unrecognized combiner"

    return embeddings