def quantiles_ready():
    """The subgraph for when the quantiles are ready."""
    quantized_feature = quantile_ops.quantiles([sparse_column_values], [],
                                               [quantile_buckets], [])
    quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64)
    quantized_feature = array_ops.reshape(quantized_feature, [-1])
    example_indices, _ = array_ops.split(
        sparse_column_indices, num_or_size_splits=2, axis=1)
    example_indices = array_ops.squeeze(example_indices, [1])
    filtered_gradients = array_ops.gather(gradients, example_indices)
    filtered_hessians = array_ops.gather(hessians, example_indices)
    filtered_partition_ids = array_ops.gather(example_partition_ids,
                                              example_indices)
    unique_partitions, mapped_partitions = array_ops.unique(
        example_partition_ids)

    # Compute aggregate stats for each partition.
    per_partition_gradients = math_ops.unsorted_segment_sum(
        gradients, mapped_partitions, array_ops.size(unique_partitions))
    per_partition_hessians = math_ops.unsorted_segment_sum(
        hessians, mapped_partitions, array_ops.size(unique_partitions))

    # Prepend a bias feature per partition that accumulates the stats for all
    # examples in that partition.
    bias_feature_ids = array_ops.fill(
        array_ops.shape(unique_partitions), _BIAS_FEATURE_ID)
    bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64)
    partition_ids = array_ops.concat(
        [unique_partitions, filtered_partition_ids], 0)
    filtered_gradients = array_ops.concat(
        [per_partition_gradients, filtered_gradients], 0)
    filtered_hessians = array_ops.concat(
        [per_partition_hessians, filtered_hessians], 0)
    bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0)
    return partition_ids, bucket_ids, filtered_gradients, filtered_hessians
  def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers):
    """Creates an op for training for full batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
    cluster_sums = []
    cluster_counts = []
    epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype)
    for inp, cluster_idx in zip(inputs, cluster_idx_list):
      with ops.colocate_with(inp):
        cluster_sums.append(
            math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters))
        cluster_counts.append(
            math_ops.unsorted_segment_sum(
                array_ops.reshape(
                    array_ops.ones(
                        array_ops.reshape(array_ops.shape(inp)[0], [-1])),
                    [-1, 1]), cluster_idx, self._num_clusters))
    with ops.colocate_with(cluster_centers):
      new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast(
          math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon)
      if self._clusters_l2_normalized():
        new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1)
    return state_ops.assign(cluster_centers, new_clusters_centers)
    def active_inputs():
      """The normal flow when the handler is active."""
      # Remove the second column of example indices matrix since it is not
      # useful.
      example_indices, _ = array_ops.split(
          self._sparse_int_column.indices, num_or_size_splits=2, axis=1)
      example_indices = array_ops.squeeze(example_indices, [1])

      filtered_gradients = array_ops.gather(gradients, example_indices)
      filtered_hessians = array_ops.gather(hessians, example_indices)
      filtered_partition_ids = array_ops.gather(example_partition_ids,
                                                example_indices)
      unique_partitions, mapped_partitions = array_ops.unique(
          example_partition_ids)

      # Compute aggregate stats for each partition.
      # The bias is computed on gradients and hessians (and not
      # filtered_gradients) which have exactly one value per example, so we
      # don't double count a gradient in multivalent columns.
      # Since unsorted_segment_sum can be numerically unstable, use 64bit
      # operation.
      gradients64 = math_ops.cast(gradients, dtypes.float64)
      hessians64 = math_ops.cast(hessians, dtypes.float64)
      per_partition_gradients = math_ops.unsorted_segment_sum(
          gradients64, mapped_partitions, array_ops.size(unique_partitions))
      per_partition_hessians = math_ops.unsorted_segment_sum(
          hessians64, mapped_partitions, array_ops.size(unique_partitions))
      per_partition_gradients = math_ops.cast(per_partition_gradients,
                                              dtypes.float32)
      per_partition_hessians = math_ops.cast(per_partition_hessians,
                                             dtypes.float32)
      # Prepend a bias feature per partition that accumulates the stats for all
      # examples in that partition.
      # Bias is added to the stats even if there are no examples with values in
      # the current sparse column. The reason is that the other example batches
      # might have values in these partitions so we have to keep the bias
      # updated.
      bias_feature_ids = array_ops.fill(
          array_ops.shape(unique_partitions), _BIAS_FEATURE_ID)
      bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64)
      partition_ids = array_ops.concat(
          [unique_partitions, filtered_partition_ids], 0)
      filtered_gradients = array_ops.concat(
          [per_partition_gradients, filtered_gradients], 0)
      filtered_hessians = array_ops.concat(
          [per_partition_hessians, filtered_hessians], 0)
      feature_ids = array_ops.concat(
          [bias_feature_ids, self._sparse_int_column.values], 0)
      # Dimension is always zero for sparse int features.
      dimension_ids = array_ops.zeros_like(feature_ids, dtype=dtypes.int64)
      feature_ids_and_dimensions = array_ops.stack(
          [feature_ids, dimension_ids], axis=1)
      return (partition_ids, feature_ids_and_dimensions, filtered_gradients,
              filtered_hessians)
  def quantiles_ready():
    """The subgraph for when the quantiles are ready."""
    quantized_feature = quantile_ops.quantiles([], [sparse_column_values], [],
                                               [quantile_buckets],
                                               [sparse_column_indices])

    quantized_feature = math_ops.cast(quantized_feature[1], dtypes.int64)
    quantized_feature = array_ops.squeeze(quantized_feature, axis=0)

    example_indices, _ = array_ops.split(
        sparse_column_indices, num_or_size_splits=2, axis=1)
    example_indices = array_ops.squeeze(example_indices, [1])
    filtered_gradients = array_ops.gather(gradients, example_indices)
    filtered_hessians = array_ops.gather(hessians, example_indices)
    filtered_partition_ids = array_ops.gather(example_partition_ids,
                                              example_indices)
    unique_partitions, mapped_partitions = array_ops.unique(
        example_partition_ids)

    # Compute aggregate stats for each partition.
    # Since unsorted_segment_sum can be numerically unstable, use 64bit
    # operation.
    gradients64 = math_ops.cast(gradients, dtypes.float64)
    hessians64 = math_ops.cast(hessians, dtypes.float64)
    per_partition_gradients = math_ops.unsorted_segment_sum(
        gradients64, mapped_partitions, array_ops.size(unique_partitions))
    per_partition_hessians = math_ops.unsorted_segment_sum(
        hessians64, mapped_partitions, array_ops.size(unique_partitions))
    per_partition_gradients = math_ops.cast(per_partition_gradients,
                                            dtypes.float32)
    per_partition_hessians = math_ops.cast(per_partition_hessians,
                                           dtypes.float32)
    # Prepend a bias feature per partition that accumulates the stats for all
    # examples in that partition.
    bias_feature_ids = array_ops.fill(
        array_ops.shape(unique_partitions), _BIAS_FEATURE_ID)
    bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64)
    zeros = array_ops.zeros_like(bias_feature_ids)
    bias_feature_ids = array_ops.stack([bias_feature_ids, zeros], axis=1)

    partition_ids = array_ops.concat(
        [unique_partitions, filtered_partition_ids], 0)
    filtered_gradients = array_ops.concat(
        [per_partition_gradients, filtered_gradients], 0)
    filtered_hessians = array_ops.concat(
        [per_partition_hessians, filtered_hessians], 0)

    bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0)

    return partition_ids, bucket_ids, filtered_gradients, filtered_hessians
Exemple #5
0
 def approximate_hessian(self, grads_and_vars, name=None):
   """
   I haven't tested this yet so I have no idea if it works, but even if it
   does it's probably super slow, and either way nothing else has been modified
   to deal with it.
   """
   
   gv = 0
   var_refs = []
   for g_t, x_tm1 in grads_and_vars:
     var_refs.append(x_tm1.ref())
     if g_t is None:
       continue
     with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device):
       if isinstance(g_t, ops.Tensor):
         gv += math_ops.reduce_sum(g_t * random_ops.random_normal(g_t.get_shape()))
       else:
         idxs, idxs_ = array_ops.unique(g_t.indices)
         g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
         gv += math_ops.reduce_sum(g_t_ * random_ops.random_normal(g_t_.get_shape()))
   hesses = gradients.gradients(gv, var_refs,
                                gate_gradients=(gate_gradients == Optimizer.GATE_OP),
                                aggregation_method=aggregation_method,
                                colocate_gradients_with_ops=colocate_gradients_with_ops)
   return zip([g_t for g_t, _ in grads_and_vars], [x_tm1 for _, x_tm1 in grads_and_vars], hesses)
 def testDropNegatives(self):
   # Note: the test is done by replacing segment_ids with 8 to -1
   # for index  and replace values generated by numpy with 0.
   dtypes = [
       dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64,
       dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128
   ]
   indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
   num_segments = 12
   for indices in indices_flat, indices_flat.reshape(5, 2):
     shape = indices.shape + (2,)
     for dtype in dtypes:
       with self.test_session(use_gpu=True):
         tf_x, np_x = self._input(shape, dtype=dtype)
         np_ans = self._segmentReduce(
             indices, np_x, np.add, op2=None, num_segments=num_segments)
         # Replace np_ans[8] with 0 for the value
         np_ans[8:] = 0
         # Replace 8 with -1 in indices
         np.place(indices, indices == 8, [-1])
         s = math_ops.unsorted_segment_sum(
             data=tf_x, segment_ids=indices, num_segments=num_segments)
         tf_ans = s.eval()
       self.assertAllClose(np_ans, tf_ans)
       self.assertShapeEqual(np_ans, s)
    def DynamicStitchGrads(op, grad):
        num_values = len(op.inputs) // 2
        indices_grad = [None] * num_values

        def AsInt32(x):
            return (x if op.inputs[0].dtype == dtypes.int32 else
                    math_ops.cast(x, dtypes.int32))

        idxs = [AsInt32(array_ops.reshape(op.inputs[i], (-1,)))
                for i in range(num_values)]
        if isinstance(grad, ops.IndexedSlices):
            output_shape = array_ops.shape(op.outputs[0])
            output_rows = output_shape[0]
            grad = math_ops.unsorted_segment_sum(grad.values, grad.indices,
                                                 output_rows)

        values_grad = []
        zeros = array_ops.zeros_like(grad)
        idx_zeros = [zeros[:array_ops.shape(x)[0]] for x in idxs]
        grad_range = math_ops.range(array_ops.shape(grad)[0])
        for i in range(num_values):
            if i == num_values - 1:
                v_grad = grad
            else:
                v_grad = data_flow_ops.dynamic_stitch(
                    [grad_range] + idxs[i + 1:], [grad] + idx_zeros[i + 1:])
            v_grad = array_ops.gather(v_grad, AsInt32(op.inputs[i]))
            values_grad += [v_grad]

        return indices_grad + values_grad
  def testGradientMatchesSegmentSum(self):
    # Strategy: compute the gradient for UnsortedSegmentSum and SegmentSum
    # and compare the outputs, which should be identical.
    # NB: for this test to work, indices must be valid for SegmentSum, namely
    # it must be sorted, the indices must be contiguous, and num_segments
    # must be max(indices) + 1.
    indices = [0, 0, 1, 1, 1, 2, 3, 4, 5]
    n = len(indices)
    num_cols = 2
    shape = [n, num_cols]
    num_segments = max(indices) + 1
    for dtype in self.differentiable_dtypes:
      with self.cached_session(use_gpu=True):
        tf_x, np_x = self._input(shape, dtype=dtype)
        # Results from UnsortedSegmentSum
        unsorted_s = math_ops.unsorted_segment_sum(
            data=tf_x, segment_ids=indices, num_segments=num_segments)
        unsorted_jacob_t, unsorted_jacob_n = (
            gradient_checker.compute_gradient(tf_x, shape, unsorted_s,
                                              [num_segments, num_cols],
                                              x_init_value=np_x, delta=1))

        # Results from SegmentSum
        sorted_s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
        sorted_jacob_t, sorted_jacob_n = gradient_checker.compute_gradient(
            tf_x,
            shape,
            sorted_s, [num_segments, num_cols],
            x_init_value=np_x,
            delta=1)
      self.assertAllClose(unsorted_jacob_t, sorted_jacob_t)
      self.assertAllClose(unsorted_jacob_n, sorted_jacob_n)
Exemple #9
0
def _TileGrad(op, grad):
  """Sum reduces grad along the tiled dimensions."""
  input_shape = array_ops.shape(op.inputs[0])
  # We interleave multiples and input_shape to get split_shape,
  # reshape grad to split_shape, and reduce along all even
  # dimensions (the tiled dimensions) to get the result
  # with shape input_shape.  For example
  #   input_shape = [20, 30, 40]
  #   multiples = [2, 3, 4]
  #   split_shape = [2, 20, 3, 30, 4, 40]
  #   axes = [0, 2, 4]
  split_shape = array_ops.reshape(
      array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1])
  axes = math_ops.range(0, array_ops.size(split_shape), 2)
  # Sum reduces grad along the first dimension for IndexedSlices
  if isinstance(grad, ops.IndexedSlices):
    grad = math_ops.unsorted_segment_sum(
        grad.values,
        math_ops.mod(grad.indices, input_shape[0]),
        input_shape[0])
    split_shape = array_ops.concat([[1], split_shape[1:]], axis=0)
  input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes)
  # Fix shape inference
  if not context.executing_eagerly():
    input_grad.set_shape(op.inputs[0].get_shape())
  return [input_grad, None]
Exemple #10
0
 def _apply_sparse(self, g_t, x_tm1, prepare):
   """"""
   
   idxs, idxs_ = array_ops.unique(g_t.indices)
   g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
   updates = []
   
   if self._mu > 0:
     m_and_t = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', self._mu)
     m_t_ = array_ops.gather(m_and_t[0], idxs)
     gamma_t = ops.convert_to_tensor(self._gamma)
     m_bar_t_ = (1-gamma_t)*m_t_ + gamma_t*g_t_
     updates.extend(m_and_t)
   else:
     m_bar_t_ = g_t_
   
   if self._ups > 0:
     v_and_t = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', self._ups)
     v_t_ = array_ops.gather(v_and_t[0], idxs)
     eps_t = ops.convert_to_tensor(self._eps)
     v_bar_t_ = math_ops.sqrt(v_t_ + eps_t)
     updates.extend(v_and_t)
   else:
     v_bar_t_ = 1.
   
   lr_t = ops.convert_to_tensor(self._lr)
   s_t_ = lr_t * m_bar_t_ / v_bar_t_
   return [[s_t_, x_tm1, idxs, g_t]] + updates
 def _apply_sparse(self, grad, var):
   if len(grad.indices.get_shape()) == 1:
     grad_indices = grad.indices
     grad_values = grad.values
   else:
     grad_indices = array_ops.reshape(grad.indices, [-1])
     grad_values = array_ops.reshape(grad.values, [-1, grad.values.get_shape()[-1].value])
   gidxs, metagidxs = array_ops.unique(grad_indices)
   sizegidxs = array_ops.size(gidxs)
   gvals = math_ops.unsorted_segment_sum(grad_values, metagidxs, sizegidxs)
   # m_t = mu * m + (1 - mu) * g_t
   m = self.get_slot(var, "m")
   m_scaled_g_values = gvals * (1 - self._mu_t)
   m_t = state_ops.scatter_update(m, gidxs,
                                  array_ops.gather(m, gidxs) * self._mu_t,
                                  use_locking=self._use_locking)
   m_t = state_ops.scatter_add(m_t, gidxs, m_scaled_g_values,
                               use_locking=self._use_locking)
   m_t_ = array_ops.gather(m_t, gidxs) / (1 - self._mu2_t * self._mu_power)
   # m_bar = mu * m_t + (1 - mu) * g_t
   m_bar = self._mu2_t * m_t_ + m_scaled_g_values / (1 - self._mu_power)
   var_update = state_ops.scatter_sub(var, gidxs,
                                    self._lr_t * m_bar,
                                    use_locking=self._use_locking)
   return control_flow_ops.group(*[var_update, m_t])
Exemple #12
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = constant_op.constant(np.array([0, 1]))
      ind2 = constant_op.constant(np.array([2, 3]))
      ind3 = constant_op.constant(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * constant_op.constant(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = constant_op.constant(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]
    grad = self.evaluate(ops.convert_to_tensor(grad))

    if not context.executing_eagerly():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad, self.evaluate(tf_dense_grad))
Exemple #13
0
 def _prepare(self, grads_and_vars):
   """"""
   
   if self._lr is None:
     sTy = 0
     sTs = 0
     yTy = 0
     for g_t, x_tm1 in grads_and_vars:
       if g_t is None:
         continue
       with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device):
         if isinstance(g_t, ops.Tensor):
           g_tm1 = self.get_slot(x_tm1, 'g')
           s_tm1 = self.get_slot(x_tm1, 's')
           y_t = (g_t-g_tm1)
           sTy += math_ops.reduce_sum(s_tm1*y_t)
           sTs += math_ops.reduce_sum(s_tm1**2)
           yTy += math_ops.reduce_sum(y_t**2)
         else:
           idxs, idxs_ = array_ops.unique(g_t.indices)
           g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
           g_tm1 = self.get_slot(x_tm1, 'g')
           g_tm1_ = array_ops.gather(g_tm1, idxs)
           s_tm1 = self.get_slot(x_tm1, 's')
           s_tm1_ = array_ops.gather(s_tm1, idxs)
           y_t_ = (g_t_-g_tm1_)
           sTy += math_ops.reduce_sum(s_tm1_*y_t_)
           sTs += math_ops.reduce_sum(s_tm1_**2)
           yTy += math_ops.reduce_sum(y_t_**2)
     sTy = math_ops.abs(sTy)
     self._lr = sTs / (sTy + self._eps)
Exemple #14
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = tensor.Tensor(np.array([0, 1]))
      ind2 = tensor.Tensor(np.array([2, 3]))
      ind3 = tensor.Tensor(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = tensor.Tensor(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]

    with context.graph_mode(), self.test_session():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
Exemple #15
0
 def _apply_sparse(self, g_t, x_tm1, prepare):
   """"""
   
   idxs, idxs_ = array_ops.unique(g_t.indices)
   g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
   
   s_t_ = self._lr * g_t_
   return [[s_t_, x_tm1, idxs, g_t_]]
Exemple #16
0
def _GatherV2Grad(op, grad):
  """Gradient for GatherV2 op."""
  # params can be large, so colocate the shape calculation with it.
  #
  # params can be very large for sparse model, array_ops.shape raises
  # exception on the Windows platform when any dimension is larger than
  # int32. params_shape is not used in optimizer apply_sparse gradients,
  # so it's fine to convert it back to int32 regardless of truncation.
  params = op.inputs[0]
  with ops.colocate_with(params):
    params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
    params_shape = math_ops.to_int32(params_shape)

  indices = op.inputs[1]
  indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
  axis = op.inputs[2]
  axis_static = tensor_util.constant_value(axis)

  # For axis 0 gathers, build an appropriately shaped IndexedSlices.
  if axis_static == 0:
    values_shape = array_ops.concat([indices_size, params_shape[1:]], 0)
    values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)
    return [ops.IndexedSlices(values, indices, params_shape), None, None]

  outer_shape = params_shape[:axis]
  outer_dims = array_ops.size(outer_shape)
  inner_shape = params_shape[axis:][1:]
  inner_dims = array_ops.size(inner_shape)

  outer_axes_indices = math_ops.range(outer_dims)
  inner_axes_indices = math_ops.range(outer_dims + 1,
                                      outer_dims + 1 + inner_dims)

  values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0)
  values = array_ops.reshape(grad, values_shape)
  indices = array_ops.reshape(indices, indices_size)

  # We need to sum up every slice `values[..., i, ....]` corresponding to
  # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
  # support an axis parameter, we transpose the gather dimension to the front,
  # then use `unsorted_segment_sum` to build a
  # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
  # affecting each index in `gather_axis` summed up.
  transpose_dims = array_ops.concat(
      [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
  values_transpose = array_ops.transpose(values, transpose_dims)
  num_segments = params_shape[axis]

  params_grad = math_ops.unsorted_segment_sum(
      values_transpose, indices, num_segments)

  # Inverts the above transpose by moving dimension 0 back to its original
  # position.
  invert_transpose_dims = array_ops.concat(
      [outer_axes_indices + 1, [0], inner_axes_indices], 0)
  params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
  return [params_grad, None, None]
 def testBadIndices(self):
   # Note: GPU kernel does not return the out-of-range error needed for this
   # test, so this test is marked as cpu-only.
   with self.test_session(use_gpu=False):
     for bad in [[-1]], [[7]]:
       unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2)
       with self.assertRaisesOpError(
           r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]):
         unsorted.eval()
 def testBadIndices(self):
   # Note: GPU kernel does not return the out-of-range error needed for this
   # test, so this test is marked as cpu-only.
   # Note: With PR #13055 a negative index will be ignored silently.
   with self.session(use_gpu=False):
     for bad in [[2]], [[7]]:
       unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2)
       with self.assertRaisesOpError(
           r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]):
         self.evaluate(unsorted)
 def testEmptySecondDimension(self):
   dtypes = [np.float16, np.float32, np.float64, np.int64, np.int32,
             np.complex64, np.complex128]
   with self.session(use_gpu=True):
     for dtype in dtypes:
       for itype in (np.int32, np.int64):
         data = np.zeros((2, 0), dtype=dtype)
         segment_ids = np.array([0, 1], dtype=itype)
         unsorted = math_ops.unsorted_segment_sum(data, segment_ids, 2)
         self.assertAllEqual(unsorted.eval(), np.zeros((2, 0), dtype=dtype))
 def UnsortedSegmentSum(self, data, indices, num_segments):
   with self.test_session() as sess, self.test_scope():
     d = array_ops.placeholder(data.dtype, shape=data.shape)
     if isinstance(indices, int):
       i = array_ops.placeholder(np.int32, shape=[])
     else:
       i = array_ops.placeholder(indices.dtype, shape=indices.shape)
     return sess.run(
         math_ops.unsorted_segment_sum(d, i, num_segments),
         {d: data,
          i: indices})
  def _aggregate_sparse_grad(self, grad, var, train_ops):
    """Aggregate sparse gradients.

    Args:
      grad: The sparse gradient to aggregate.
      var: The variable to apply this gradient to.
      train_ops: The train_ops for the worker to run.

    Returns:
      aggregated_grad: Aggregated grad.
    """
    # Sparse gradients have to be inserted as one pair of (value,
    # indice) as an element instead of the whole "indexedslice" because
    # their shapes are not deterministic.
    sparse_grad_queue = (data_flow_ops.FIFOQueue(
        -1,
        (grad.values.dtype, grad.indices.dtype),
        shapes=(var.get_shape().as_list()[1:], ()),
        shared_name="sparse_grad_q_%s" % var.name))
    self._sparse_grad_queues_and_devs.append((sparse_grad_queue, var.device))

    # Sparse token is inserted after the "enqueue_many" finishes. This
    # is needed to make sure enough sparse gradients have been enqueued
    # before applying them to the variables.
    sparse_token_queue = (data_flow_ops.FIFOQueue(
        self._replicas_to_aggregate * 2,
        types_pb2.DT_INT32,
        shapes=(),
        shared_name="sparse_token_q_%s" % var.name))
    self._one_element_queue_list.append((sparse_token_queue, var.device))

    enqueue_spares_op = sparse_grad_queue.enqueue_many([grad.values,
                                                        grad.indices])
    with ops.control_dependencies([enqueue_spares_op]):
      train_ops.append(sparse_token_queue.enqueue((1,)))

    with ops.control_dependencies([sparse_token_queue.dequeue_many(
        self._replicas_to_aggregate)]):
      values, indices = sparse_grad_queue.dequeue_many(sparse_grad_queue.size())
      concat_grad = ops.IndexedSlices(values, indices, grad.dense_shape)

      # Sum the gradients of the same variables in the sparse layers so
      # that each variable is only updated once. Note that with 2
      # gradients g1 and g2 from 2 replicas for the same variable,
      # apply(g1+g2) is different from apply(g1) and then apply(g2) when
      # the optimizer is complex like Momentum or Adagrad.
      values = concat_grad.values
      indices = concat_grad.indices
      new_indices, indx = array_ops.unique(indices)
      num_indices = array_ops.shape(new_indices)[0]
      sum_values = math_ops.unsorted_segment_sum(values, indx, num_indices)
      return ops.IndexedSlices(sum_values, new_indices, concat_grad.dense_shape)
Exemple #22
0
def _DynamicStitchGrads(op, grad):
  """Gradients for DynamicStitch."""

  num_values = len(op.inputs) // 2
  indices_grad = [None] * num_values

  def AsInt32(x):
    return (x if op.inputs[0].dtype == dtypes.int32 else
            math_ops.cast(x, dtypes.int32))
  inputs = [AsInt32(op.inputs[i]) for i in xrange(num_values)]
  if isinstance(grad, ops.IndexedSlices):
    output_shape = array_ops.shape(op.outputs[0])
    output_rows = output_shape[0]
    grad = math_ops.unsorted_segment_sum(grad.values, grad.indices, output_rows)
  values_grad = [array_ops.gather(grad, inp) for inp in inputs]
  return indices_grad + values_grad
Exemple #23
0
def _BroadcastToGrad(op, grad):
  input_value = op.inputs[0]
  broadcast_shape = op.inputs[1]
  # Assign ids for each position in input_value.
  input_value_shape = array_ops.shape(input_value)
  input_value_size = array_ops.size(input_value)
  ids = array_ops.reshape(math_ops.range(input_value_size), input_value_shape)
  broadcast_ids = array_ops.broadcast_to(ids, broadcast_shape)
  # Group by ids and sum its gradients.
  grad_flatten = array_ops.reshape(grad, [-1])
  broadcast_ids_flatten = array_ops.reshape(broadcast_ids, [-1])
  updates_grad_flatten = math_ops.unsorted_segment_sum(grad_flatten,
                                                       broadcast_ids_flatten,
                                                       input_value_size)
  updates_grad = array_ops.reshape(updates_grad_flatten, input_value_shape)
  return [updates_grad, None]
def _UnsortedSegmentMinOrMaxGrad(op, grad):
  """ Gradient for UnsortedSegmentMin and UnsortedSegmentMax. """
  # Get the number of selected (minimum or maximum) elements in each segment.
  gathered_outputs, zero_clipped_indices, is_positive = \
      _GatherDropNegatives(op.outputs[0], op.inputs[1])
  is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
  is_selected = math_ops.logical_and(is_selected, is_positive)
  num_selected = math_ops.unsorted_segment_sum(
      math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2])
  # Compute the gradient for each segment. The gradient for the ith segment is
  # divided evenly among the selected elements in that segment.
  weighted_grads = math_ops.div(grad, num_selected)
  gathered_grads, _, _ = _GatherDropNegatives(weighted_grads, None,
                                              zero_clipped_indices,
                                              is_positive)
  zeros = array_ops.zeros_like(gathered_grads)
  return array_ops.where(is_selected, gathered_grads, zeros), None, None
 def testGradient(self):
   num_cols = 2
   indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
   num_segments = max(indices_flat) + 3
   for indices in indices_flat, indices_flat.reshape(5, 2):
     shape = indices.shape + (num_cols,)
     with self.test_session(use_gpu=self.use_gpu):
       tf_x, np_x = self._input(shape, dtype=dtypes_lib.float64)
       s = math_ops.unsorted_segment_sum(
           data=tf_x, segment_ids=indices, num_segments=num_segments)
       jacob_t, jacob_n = gradient_checker.compute_gradient(
           tf_x,
           shape,
           s, [num_segments, num_cols],
           x_init_value=np_x.astype(np.double),
           delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
Exemple #26
0
def _deduplicate_indexed_slices(values, indices):
  """Sums `values` associated with any non-unique `indices`.

  Args:
    values: A `Tensor` with rank >= 1.
    indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
  Returns:
    A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
    de-duplicated version of `indices` and `summed_values` contains the sum of
    `values` slices associated with each unique index.
  """
  unique_indices, new_index_positions = array_ops.unique(indices)
  summed_values = math_ops.unsorted_segment_sum(
      values, new_index_positions,
      array_ops.shape(unique_indices)[0])
  return (summed_values, unique_indices)
Exemple #27
0
def _indexed_slices_to_tensor(value):
  """Converts an IndexedSlices object `value` to a Tensor.

  Args:
    value: An ops.IndexedSlices object.

  Returns:
    A dense Tensor representing the values in the given IndexedSlices.

  Raises:
    ValueError: If the IndexedSlices does not have the same dtype.
  """
  if value.dense_shape is None:
    raise ValueError(
        "Tensor conversion requested for IndexedSlices without dense_shape: %s"
        % str(value))
  return math_ops.unsorted_segment_sum(value.values, value.indices,
                                       value.dense_shape[0])
Exemple #28
0
def _IndexedSlicesToTensor(value, dtype=None, name=None, as_ref=False):
  """Converts an IndexedSlices object `value` to a Tensor.

  NOTE(mrry): This function is potentially expensive.

  Args:
    value: An ops.IndexedSlices object.
    dtype: The dtype of the Tensor to be returned.
    name: Optional name to use for the returned Tensor.
    as_ref: True if a ref is requested.

  Returns:
    A dense Tensor representing the values in the given IndexedSlices.

  Raises:
    ValueError: If the IndexedSlices does not have the same dtype.
  """
  _ = as_ref
  if dtype and not dtype.is_compatible_with(value.dtype):
    raise ValueError(
        "Tensor conversion requested dtype %s for IndexedSlices with dtype %s" %
        (dtype.name, value.dtype.name))
  if value.dense_shape is None:
    raise ValueError(
        "Tensor conversion requested for IndexedSlices without dense_shape: %s"
        % str(value))
  # TODO(mrry): Consider adding static shape information to
  # IndexedSlices, to avoid using numpy here.
  dense_shape_value = tensor_util.ConstantValue(value.dense_shape)
  if dense_shape_value is not None:
    num_elements = np.prod(dense_shape_value)
    if num_elements >= _LARGE_SPARSE_NUM_ELEMENTS:
      warnings.warn(
          "Converting sparse IndexedSlices to a dense Tensor with %d elements. "
          "This may consume a large amount of memory." % num_elements)
  else:
    warnings.warn(
        "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
        "This may consume a large amount of memory.")
  return math_ops.unsorted_segment_sum(value.values,
                                       value.indices,
                                       value.dense_shape[0],
                                       name=name)
 def testValues(self):
   dtypes = [
       dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64,
       dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128
   ]
   indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
   num_segments = 12
   for indices in indices_flat, indices_flat.reshape(5, 2):
     shape = indices.shape + (2,)
     for dtype in dtypes:
       with self.test_session(use_gpu=True):
         tf_x, np_x = self._input(shape, dtype=dtype)
         np_ans = self._segmentReduce(
             indices, np_x, np.add, op2=None, num_segments=num_segments)
         s = math_ops.unsorted_segment_sum(
             data=tf_x, segment_ids=indices, num_segments=num_segments)
         tf_ans = s.eval()
       self.assertAllClose(np_ans, tf_ans)
       self.assertShapeEqual(np_ans, s)
Exemple #30
0
def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
  """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  """
  with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope:
    values = ops.convert_to_tensor(values, name='values')
    values = gen_array_ops.reshape(values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins')
    nbins_float = math_ops.cast(nbins, values.dtype)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32)

    return math_ops.unsorted_segment_sum(
        array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
Exemple #31
0
def histogram_fixed_width(values,
                          value_range,
                          nbins=100,
                          dtype=dtypes.int32,
                          name=None):
    """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor`.  new_values <= value_range[0] will be
      mapped to hist[0], values >= value_range[1] will be mapped to hist[-1].
      Must be same dtype as new_values.
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.default_session() as sess:
    hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
    variables.initialize_all_variables().run()
    sess.run(hist) => [2, 1, 1, 0, 2]
  ```
  """
    with ops.op_scope([values, value_range, nbins], name,
                      'histogram_fixed_width') as scope:
        values = ops.convert_to_tensor(values, name='values')
        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
        nbins_float = math_ops.to_float(nbins)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

        # TODO(langmore) This creates an array of ones to add up and place in the
        # bins.  This is inefficient, so replace when a better Op is available.
        return math_ops.unsorted_segment_sum(array_ops.ones_like(indices,
                                                                 dtype=dtype),
                                             indices,
                                             nbins,
                                             name=scope)
Exemple #32
0
 def tpu_function(sparse):
     # Assumes dense_shape is (2, *)
     looked_up = array_ops.gather(table, sparse.values)
     segment_sum = math_ops.unsorted_segment_sum(
         looked_up, sparse.indices[:, 0], 2)
     return {"sparse": sparse, "segment_sum": segment_sum}
Exemple #33
0
def _GatherV2Grad(op, grad):
    """Gradient for GatherV2 op."""
    # params can be large, so colocate the shape calculation with it.
    #
    # params can be very large for sparse model, array_ops.shape raises
    # exception on the Windows platform when any dimension is larger than
    # int32. params_shape is not used in optimizer apply_sparse gradients,
    # so it's fine to convert it back to int32 regardless of truncation.
    params = op.inputs[0]
    with ops.colocate_with(params):
        params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
        params_shape = math_ops.to_int32(params_shape)

    indices = op.inputs[1]
    indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
    axis = op.inputs[2]
    axis_static = tensor_util.constant_value(axis)

    # For axis 0 gathers, build an appropriately shaped IndexedSlices.
    if axis_static == 0:
        if context.in_eager_mode():
            params_tail_shape = params_shape.cpu()[1:]
        else:
            params_tail_shape = params_shape[1:]
        values_shape = array_ops.concat([indices_size, params_tail_shape], 0)
        values = array_ops.reshape(grad, values_shape)
        indices = array_ops.reshape(indices, indices_size)
        return [ops.IndexedSlices(values, indices, params_shape), None, None]

    outer_shape = params_shape[:axis]
    outer_dims = array_ops.size(outer_shape)
    inner_shape = params_shape[axis:][1:]
    inner_dims = array_ops.size(inner_shape)

    outer_axes_indices = math_ops.range(outer_dims)
    inner_axes_indices = math_ops.range(outer_dims + 1,
                                        outer_dims + 1 + inner_dims)

    values_shape = array_ops.concat([outer_shape, indices_size, inner_shape],
                                    0)
    values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)

    # We need to sum up every slice `values[..., i, ....]` corresponding to
    # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
    # support an axis parameter, we transpose the gather dimension to the front,
    # then use `unsorted_segment_sum` to build a
    # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
    # affecting each index in `gather_axis` summed up.
    transpose_dims = array_ops.concat(
        [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
    values_transpose = array_ops.transpose(values, transpose_dims)
    num_segments = params_shape[axis]

    params_grad = math_ops.unsorted_segment_sum(values_transpose, indices,
                                                num_segments)

    # Inverts the above transpose by moving dimension 0 back to its original
    # position.
    invert_transpose_dims = array_ops.concat(
        [outer_axes_indices + 1, [0], inner_axes_indices], 0)
    params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
    return [params_grad, None, None]
Exemple #34
0
 def loop_fn(i):
   data = array_ops.gather(t, i)
   data_0 = array_ops.gather(t, 0)
   seg_ids = array_ops.gather(segment_ids, i)
   return (math_ops.unsorted_segment_sum(data, seg_ids, num_segments),
           math_ops.unsorted_segment_sum(data_0, seg_ids, num_segments))
def _SparseSegmentSumGrad(op, grad):
    """Gradient for SparseSegmentSum."""
    input_rows = array_ops.shape(op.inputs[0])[0]
    return (math_ops.unsorted_segment_sum(array_ops.gather(grad, op.inputs[2]),
                                          op.inputs[1],
                                          input_rows), None, None)
Exemple #36
0
    def _mini_batch_training_op(self, inputs, cluster_idx_list,
                                cluster_centers, total_counts):
        """Creates an op for training for mini batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.
      total_counts: Tensor Ref of cluster counts.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
        update_ops = []
        for inp, cluster_idx in zip(inputs, cluster_idx_list):
            with ops.colocate_with(inp, ignore_existing=True):
                assert total_counts is not None
                cluster_idx = array_ops.reshape(cluster_idx, [-1])
                # Dedupe the unique ids of cluster_centers being updated so that updates
                # can be locally aggregated.
                unique_ids, unique_idx = array_ops.unique(cluster_idx)
                num_unique_cluster_idx = array_ops.size(unique_ids)
                # Fetch the old values of counts and cluster_centers.
                with ops.colocate_with(total_counts, ignore_existing=True):
                    old_counts = array_ops.gather(total_counts, unique_ids)
                # TODO(agarwal): This colocation seems to run into problems. Fix it.
                with ops.colocate_with(cluster_centers, ignore_existing=True):
                    old_cluster_centers = array_ops.gather(
                        cluster_centers, unique_ids)
                # Locally aggregate the increment to counts.
                count_updates = math_ops.unsorted_segment_sum(
                    array_ops.ones_like(unique_idx, dtype=total_counts.dtype),
                    unique_idx, num_unique_cluster_idx)
                # Locally compute the sum of inputs mapped to each id.
                # For a cluster with old cluster value x, old count n, and with data
                # d_1,...d_k newly assigned to it, we recompute the new value as
                # x += (sum_i(d_i) - k * x) / (n + k).
                # Compute sum_i(d_i), see comment above.
                cluster_center_updates = math_ops.unsorted_segment_sum(
                    inp, unique_idx, num_unique_cluster_idx)
                # Shape to enable broadcasting count_updates and learning_rate to inp.
                # It extends the shape with 1's to match the rank of inp.
                broadcast_shape = array_ops.concat([
                    array_ops.reshape(num_unique_cluster_idx, [1]),
                    array_ops.ones(array_ops.reshape(
                        array_ops.rank(inp) - 1, [1]),
                                   dtype=dtypes.int32)
                ], 0)
                # Subtract k * x, see comment above.
                cluster_center_updates -= math_ops.cast(
                    array_ops.reshape(count_updates, broadcast_shape),
                    inp.dtype) * old_cluster_centers
                learning_rate = math_ops.reciprocal(
                    math_ops.cast(old_counts + count_updates, inp.dtype))
                learning_rate = array_ops.reshape(learning_rate,
                                                  broadcast_shape)
                # scale by 1 / (n + k), see comment above.
                cluster_center_updates *= learning_rate
                # Apply the updates.
            update_counts = state_ops.scatter_add(total_counts, unique_ids,
                                                  count_updates)
            update_cluster_centers = state_ops.scatter_add(
                cluster_centers, unique_ids, cluster_center_updates)
            update_ops.extend([update_counts, update_cluster_centers])
        return control_flow_ops.group(*update_ops)
Exemple #37
0
        def _training_examples_and_variables():
            """Returns dictionaries for training examples and variables."""
            batch_size = targets.get_shape()[0]

            # Iterate over all feature columns and create appropriate lists for dense
            # and sparse features as well as dense and sparse weights (variables) for
            # SDCA.
            # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
            # dict as 1-dimensional tensors.
            dense_features, sparse_features, sparse_feature_with_values = [], [], []
            dense_feature_weights = []
            sparse_feature_weights, sparse_feature_with_values_weights = [], []
            for column in sorted(columns_to_variables.keys(),
                                 key=lambda x: x.key):
                transformed_tensor = features[column]
                if isinstance(column, layers.feature_column._RealValuedColumn):  # pylint: disable=protected-access
                    # A real-valued column corresponds to a dense feature in SDCA. A
                    # transformed tensor corresponding to a RealValuedColumn should have
                    # rank at most 2. In order to be passed to SDCA, its rank needs to be
                    # exactly 2 (i.e., its shape should be [batch_size, column.dim]).
                    check_rank_op = control_flow_ops.Assert(
                        math_ops.less_equal(array_ops.rank(transformed_tensor),
                                            2),
                        ['transformed_tensor should have rank at most 2.'])
                    # Reshape to [batch_size, dense_column_dimension].
                    with ops.control_dependencies([check_rank_op]):
                        transformed_tensor = array_ops.reshape(
                            transformed_tensor,
                            [array_ops.shape(transformed_tensor)[0], -1])

                    dense_features.append(transformed_tensor)
                    # For real valued columns, the variables list contains exactly one
                    # element.
                    dense_feature_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column,
                                layers.feature_column._BucketizedColumn):  # pylint: disable=protected-access
                    # A bucketized column corresponds to a sparse feature in SDCA. The
                    # bucketized feature is "sparsified" for SDCA by converting it to a
                    # SparseFeatureColumn representing the one-hot encoding of the
                    # bucketized feature.
                    #
                    # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
                    # bucketized feature column to a dense feature in SDCA. This will
                    # likely depend on the number of buckets.
                    dense_bucket_tensor = column._to_dnn_input_layer(
                        transformed_tensor)  # pylint: disable=protected-access
                    sparse_feature_column = _dense_tensor_to_sparse_feature_column(
                        dense_bucket_tensor)
                    sparse_feature_with_values.append(sparse_feature_column)
                    # If a partitioner was used during variable creation, we will have a
                    # list of Variables here larger than 1.
                    vars_to_append = columns_to_variables[column][0]
                    if len(columns_to_variables[column]) > 1:
                        vars_to_append = columns_to_variables[column]
                    sparse_feature_with_values_weights.append(vars_to_append)
                elif isinstance(
                        column,
                    (
                        layers.feature_column._WeightedSparseColumn,  # pylint: disable=protected-access
                        layers.feature_column._CrossedColumn,  # pylint: disable=protected-access
                        layers.feature_column._SparseColumn)):  # pylint: disable=protected-access

                    if isinstance(column,
                                  layers.feature_column._WeightedSparseColumn):  # pylint: disable=protected-access
                        id_tensor = column.id_tensor(transformed_tensor)
                        weight_tensor = array_ops.reshape(
                            column.weight_tensor(transformed_tensor).values,
                            [-1])
                    else:
                        id_tensor = transformed_tensor
                        weight_tensor = array_ops.ones(
                            [array_ops.shape(id_tensor.indices)[0]],
                            dtypes.float32)

                    example_ids = array_ops.reshape(id_tensor.indices[:, 0],
                                                    [-1])

                    flat_ids = array_ops.reshape(id_tensor.values, [-1])
                    # Prune invalid IDs (< 0) from the flat_ids, example_ids, and
                    # weight_tensor.  These can come from looking up an OOV entry in the
                    # vocabulary (default value being -1).
                    is_id_valid = math_ops.greater_equal(flat_ids, 0)
                    flat_ids = array_ops.boolean_mask(flat_ids, is_id_valid)
                    example_ids = array_ops.boolean_mask(
                        example_ids, is_id_valid)
                    weight_tensor = array_ops.boolean_mask(
                        weight_tensor, is_id_valid)

                    projection_length = math_ops.reduce_max(flat_ids) + 1
                    # project ids based on example ids so that we can dedup ids that
                    # occur multiple times for a single example.
                    projected_ids = projection_length * example_ids + flat_ids

                    # Remove any redundant ids.
                    ids, idx = array_ops.unique(projected_ids)
                    # Keep only one example id per duplicated ids.
                    example_ids_filtered = math_ops.unsorted_segment_min(
                        example_ids, idx,
                        array_ops.shape(ids)[0])

                    # reproject ids back feature id space.
                    reproject_ids = (ids -
                                     projection_length * example_ids_filtered)

                    weights = array_ops.reshape(
                        math_ops.unsorted_segment_sum(weight_tensor, idx,
                                                      array_ops.shape(ids)[0]),
                        [-1])
                    sparse_feature_with_values.append(
                        SparseFeatureColumn(example_ids_filtered,
                                            reproject_ids, weights))
                    # If a partitioner was used during variable creation, we will have a
                    # list of Variables here larger than 1.
                    vars_to_append = columns_to_variables[column][0]
                    if len(columns_to_variables[column]) > 1:
                        vars_to_append = columns_to_variables[column]
                    sparse_feature_with_values_weights.append(vars_to_append)
                else:
                    raise ValueError(
                        'SDCAOptimizer does not support column type %s.' %
                        type(column).__name__)

            example_weights = array_ops.reshape(
                features[weight_column_name], shape=[
                    -1
                ]) if weight_column_name else array_ops.ones([batch_size])
            example_ids = features[self._example_id_column]
            sparse_feature_with_values.extend(sparse_features)
            sparse_feature_with_values_weights.extend(sparse_feature_weights)
            examples = dict(sparse_features=sparse_feature_with_values,
                            dense_features=dense_features,
                            example_labels=math_ops.cast(
                                array_ops.reshape(targets, shape=[-1]),
                                dtypes.float32),
                            example_weights=example_weights,
                            example_ids=example_ids)
            sdca_variables = dict(
                sparse_features_weights=sparse_feature_with_values_weights,
                dense_features_weights=dense_feature_weights)
            return examples, sdca_variables
def _sampled_scattered_embedding_lookup_sparse(params,
                                               sp_values,
                                               dimension=None,
                                               sampled_candidates=None,
                                               hash_key=None,
                                               with_sign_hash=False,
                                               name=None):
    """Looks up embeddings using parameter hashing for sparse values.

  This method looks up selected embedding dimensions if `sampled_candidates` is
  given, otherwise looks up all dimensions.

  The i-th embedding component of a value v in `values` is found by retrieving
  the weight whose index is a fingerprint of the pair (v,i).
  The concept is explored as "feature hashing" for model compression in this
  paper: http://arxiv.org/pdf/1504.04788.pdf

  This is logically equivalent to:
  * Transforming `sp_values` (which has shape `[d0, d1]`) into a one-hot
    `Tensor` of shape `[d0, N]`.
  * Multiplying with a `Tensor` `h` of shape `[N, dimension]`, where
    `h(i, j) = params[hash(i, j)]`.

  Args:
    params: A float `Tensor` with rank 1 and fully-defined shape.
    sp_values: A 2D `SparseTensor` to be embedded with shape `[d0, d1]`.
    dimension: An int `Tensor` of the final dimension. The user needs to provide
      either `dimension` or `sampled_candidates`.
    sampled_candidates: An optional `Tensor` of column indices to keep along the
      final dimension with shape `[d0, N]`. If given, `dimension` is ignored. If
      `None`, looks up all candidates.
    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
      function to combine the crosses fingerprints on SparseFeatureCrossOp
      (optional).
    with_sign_hash:  A `bool` indicating whether `h(i, j)` should be multiplied
      by `+1` or `-1`, where the value selected is determined by hashing `(i,
      j)`. This is often necessary to remove bias resulting from hash
      collisions.
    name: An optional name for this op.

  Returns:
    A `Tensor` of shape `[d0, dimension]`.
    If `sampled_candidates` is given, the output shape is `[d0, N]`.

  Raises:
    TypeError: If sp_values is not `SparseTensor`.
    ValueError: If both `dimension` and `sampled_candidates` are `None`.
  """
    if not isinstance(sp_values, sparse_tensor.SparseTensor):
        raise TypeError("sp_values must be SparseTensor")

    with ops.name_scope(
            name=name,
            default_name="sampled_scattered_embedding_lookup_sparse",
            values=[sp_values, params, dimension,
                    sampled_candidates]) as name_scope:
        segment_ids = sp_values.indices[:, 0]
        if sampled_candidates is not None:
            # Tile sampled_candidates so there is one line corresponding to each
            # element in sp_values.values
            sampled_candidates = array_ops.gather(sampled_candidates,
                                                  segment_ids)

        embeddings = _sampled_scattered_embedding_lookup(
            params,
            sp_values.values,
            dimension=dimension,
            sampled_candidates=sampled_candidates,
            hash_key=hash_key,
            name="values_lookup")
        if with_sign_hash:
            signs = _sampled_scattered_embedding_lookup(
                array_ops.constant([-1., 1.]),
                sp_values.values,
                dimension=dimension,
                sampled_candidates=sampled_candidates,
                hash_key=hash_key,
                name="signs_lookup")
            embeddings = math_ops.multiply(signs,
                                           embeddings,
                                           name="signs_hash")

        if segment_ids.dtype != dtypes.int32:
            segment_ids = math_ops.cast(segment_ids, dtypes.int32)
        num_segments = array_ops.shape(sp_values)[0]

        return math_ops.unsorted_segment_sum(embeddings,
                                             segment_ids,
                                             num_segments=num_segments,
                                             name=name_scope)
def _embedding_lookup_with_distributed_aggregation(params,
                                                   ids,
                                                   partition_strategy="mod",
                                                   name=None,
                                                   max_norm=None,
                                                   weights=None,
                                                   idx=None,
                                                   segment_ids=None):
    """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation."""
    if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
        raise ValueError("Need at least one param")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]

    def maybe_normalize(x):
        if max_norm is not None:
            if x.get_shape().ndims is not None:
                ndims = x.get_shape().ndims
            else:
                ndims = array_ops.size(array_ops.shape(x))
            return clip_ops.clip_by_norm(x,
                                         max_norm,
                                         axes=list(range(1, ndims)))
        return x

    with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation",
                        params + [ids]) as name:
        np = len(params)  # Number of partitions
        # Preserve the resource variable status to avoid accidental dense reads.
        if not any(
                isinstance(p, resource_variable_ops.ResourceVariable)
                for p in params):
            params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                               name="params")
        if np == 1:
            with ops.colocate_with(params[0]):
                ret = maybe_normalize(_do_gather(params[0], ids))
                ignore_weights = weights is None
                if not ignore_weights:
                    if weights.dtype != ret.dtype:
                        weights = math_ops.cast(weights, ret.dtype)
                    # Reshape to allow broadcast
                    ones = array_ops.fill(
                        array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1)
                    bcast_weights_shape = array_ops.concat(
                        [array_ops.shape(weights), ones], 0)
                    orig_weights_shape = weights.get_shape()
                    weights = array_ops.reshape(weights, bcast_weights_shape)
                    # Set weights shape after reshape
                    if ret.get_shape().ndims is not None:
                        weights.set_shape(
                            orig_weights_shape.concatenate(
                                [1 for _ in range(ret.get_shape().ndims - 1)]))
                    ret *= weights
                    return math_ops.segment_sum(ret, segment_ids, name=name)
                else:
                    return math_ops.sparse_segment_sum(ret,
                                                       idx,
                                                       segment_ids,
                                                       name=name)
        else:
            ids = ops.convert_to_tensor(ids, name="ids")
            flat_ids = array_ops.reshape(ids, [-1])
            original_indices = math_ops.range(array_ops.size(flat_ids))

            # Create p_assignments and set new_ids depending on the strategy.
            if partition_strategy == "mod":
                p_assignments = flat_ids % np
                new_ids = flat_ids // np
            elif partition_strategy == "div":
                # Compute num_total_ids as the sum of dim-0 of params, then assign to
                # partitions based on a constant number of ids per partition. Optimize
                # if we already know the full shape statically.
                dim_0_size = params[0].get_shape().dims[0]
                for p in xrange(1, np):
                    dim_0_size += params[p].get_shape().dims[0]
                if dim_0_size.value:
                    num_total_ids = constant_op.constant(
                        dim_0_size, flat_ids.dtype)
                else:
                    dim_0_sizes = []
                    for p in xrange(np):
                        if params[p].get_shape().dims[0].value is not None:
                            dim_0_sizes.append(
                                params[p].get_shape().dims[0].value)
                        else:
                            with ops.colocate_with(params[p]):
                                dim_0_sizes.append(
                                    array_ops.shape(params[p])[0])
                    num_total_ids = math_ops.reduce_sum(
                        math_ops.cast(array_ops.stack(dim_0_sizes),
                                      flat_ids.dtype))
                ids_per_partition = num_total_ids // np
                extras = num_total_ids % np

                p_assignments = math_ops.maximum(
                    flat_ids // (ids_per_partition + 1),
                    (flat_ids - extras) // ids_per_partition)

                # Emulate a conditional using a boolean indicator tensor
                is_in_first_extras_partitions = math_ops.cast(
                    p_assignments < extras, flat_ids.dtype)
                new_ids = (is_in_first_extras_partitions *
                           (flat_ids % (ids_per_partition + 1)) +
                           (1 - is_in_first_extras_partitions) *
                           ((flat_ids - extras) % ids_per_partition))
            else:
                raise ValueError("Unrecognized partition strategy: " +
                                 partition_strategy)

            # Cast partition assignments to int32 for use in dynamic_partition.
            # There really should not be more than 2^32 partitions.
            p_assignments = math_ops.cast(p_assignments, dtypes.int32)
            # Partition list of ids based on assignments into np separate lists
            gather_ids = data_flow_ops.dynamic_partition(
                new_ids, p_assignments, np)
            # Similarly, partition the original indices.
            pindices = data_flow_ops.dynamic_partition(original_indices,
                                                       p_assignments, np)
            # Do np separate lookups, finding embeddings for plist[p] in params[p]
            partitioned_result = []
            for p in xrange(np):
                with ops.colocate_with(params[p]):
                    partitioned_result.append(
                        _do_gather(params[p], gather_ids[p]))

            ignore_weights = weights is None
            if not ignore_weights:
                # Partition weights according to pindices.
                partitioned_weight = []
                for p in xrange(np):
                    partitioned_weight.append(
                        array_ops.gather(weights, pindices[p]))
            # Reshape each partition result.
            element_shape = params[0].get_shape()[1:]
            for p in params[1:]:
                element_shape = element_shape.merge_with(p.get_shape()[1:])
            if element_shape.is_fully_defined():
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = array_ops.reshape(
                            partitioned_result[p],
                            array_ops.concat(
                                [array_ops.shape(pindices[p]), element_shape],
                                0))
            else:
                with ops.colocate_with(params[0]):
                    params_shape = array_ops.shape(params[0])
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = array_ops.reshape(
                            partitioned_result[p],
                            array_ops.concat([
                                array_ops.shape(pindices[p]),
                                array_ops.slice(params_shape, [1], [-1])
                            ], 0))
            # Normalize each partition result.
            for p in xrange(np):
                with ops.colocate_with(params[p]):
                    partitioned_result[p] = maybe_normalize(
                        partitioned_result[p])
            if not ignore_weights:
                # Multiply each partition result with partition weights.
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        if partitioned_weight[p].dtype != partitioned_result[
                                p].dtype:
                            partitioned_weight[p] = math_ops.cast(
                                partitioned_weight[p],
                                partitioned_result[p].dtype)
                        # Reshape partition weights.
                        ones = array_ops.fill(
                            array_ops.expand_dims(
                                array_ops.rank(partitioned_result[p]) - 1, 0),
                            1)
                        bcast_weights_shape = array_ops.concat(
                            [array_ops.shape(partitioned_weight[p]), ones], 0)
                        orig_weights_shape = partitioned_weight[p].get_shape()
                        partitioned_weight[p] = array_ops.reshape(
                            partitioned_weight[p], bcast_weights_shape)
                        if partitioned_result[p].get_shape().ndims is not None:
                            partitioned_weight[p].set_shape(
                                orig_weights_shape.concatenate([
                                    1 for _ in range(partitioned_result[p].
                                                     get_shape().ndims - 1)
                                ]))
                        partitioned_result[p] *= partitioned_weight[p]
            partitioned_segment_ids = []
            for p in xrange(np):
                if not ignore_weights:
                    # Partition segment_ids according to pindices.
                    p_segment_ids = array_ops.gather(segment_ids, pindices[p])
                    # Number the p_segment_ids to meet segment_sum's requirements. Note
                    # that unique_p_segment_ids contains unique segment ids of this
                    # partition and these ids' order is unchanged.
                    unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
                        p_segment_ids)
                    partitioned_segment_ids.append(unique_p_segment_ids)
                    # segment_sum this partition's result.
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = math_ops.segment_sum(
                            partitioned_result[p], unique_p_segment_idx)
                else:
                    # When ignore weights, we need to get indexs of elements in idx and
                    # segment_ids.
                    _, exclude_idx = array_ops.setdiff1d(idx, pindices[p])
                    all_idx = math_ops.range(array_ops.shape(idx)[0])
                    _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx)
                    # Gather segment_ids and idx according to indexs.
                    p_segment_ids = array_ops.gather(segment_ids, include_idx)
                    p_idx = array_ops.gather(idx, include_idx)
                    # Number the p_segment_ids, same as ignore_weights case above.
                    unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
                        p_segment_ids)
                    _, unique_p_idx_idx = array_ops.unique(p_idx)
                    partitioned_segment_ids.append(unique_p_segment_ids)
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = math_ops.sparse_segment_sum(
                            partitioned_result[p], unique_p_idx_idx,
                            unique_p_segment_idx)
            # Concat each partition's segment_ids and result for final segment_sum.
            concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0)
            concat_partitioned_result = array_ops.concat(partitioned_result, 0)
            return math_ops.unsorted_segment_sum(
                concat_partitioned_result,
                concat_segment_ids,
                math_ops.reduce_max(concat_segment_ids) + 1,
                name=name)
Exemple #40
0
class SegmentReductionOpBenchmark(test.Benchmark):
    outer_dim_options = [2**x for x in range(9, 14, 2)]
    ratio_options = [2**x for x in range(1, 6, 2)]
    inner_dim_options = [2**x for x in range(9, 14, 2)]
    # randomly generated sizes with less alignments
    inner_dim_options += [
        1120, 1215, 1856, 1302, 1329, 1531, 1313, 1672, 1851, 1584
    ]
    dtype_options = [np.float32, np.float64]
    options = (outer_dim_options, ratio_options, inner_dim_options,
               dtype_options)
    # pylint: disable=g-long-lambda
    op_functors = [
        lambda vc, vs, seg_ids: ("sorted", math_ops.segment_sum(vc, vs)),
        lambda vc, vs, seg_ids:
        ("unsorted", math_ops.unsorted_segment_sum(vc, vs, seg_ids[-1] + 1))
    ]
    # pylint: enable=g-long-lambda
    repeat = 10

    def _npTypeToStr(self, t):
        if t == np.float32:
            return "fp32"
        if t == np.float64:
            return "fp64"

    def _runGraph(self, op_functor, outer_dim, ratio, inner_dim, dtype):
        output_outer_dim = int(outer_dim / ratio)
        const = np.random.randint(5, size=(outer_dim, inner_dim))
        seg_ids = np.sort(np.random.randint(output_outer_dim, size=outer_dim))
        vs = variables.Variable(seg_ids.astype(np.int32))
        with ops.device("/gpu:0"):
            vc = variables.Variable(const.astype(dtype))
        name, op = op_functor(vc, vs, seg_ids)
        with session.Session() as sess:
            self.evaluate(variables.global_variables_initializer())
            r = self.run_op_benchmark(
                sess,
                op,
                min_iters=self.repeat,
                name="_".join(
                    map(str, [
                        name, outer_dim, ratio, inner_dim,
                        self._npTypeToStr(dtype)
                    ])))
        return name, r["wall_time"]

    def benchmarkSegmentSumGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return
        for outer_dim, ratio, inner_dim, dtype in itertools.product(
                *self.options):
            op_functor = self.op_functors[0]
            with ops.Graph().as_default():
                self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype)

    def benchmarkUnsortedSegmentSumGPU(self):
        if not test.is_gpu_available(cuda_only=True):
            return
        for outer_dim, ratio, inner_dim, dtype in itertools.product(
                *self.options):
            op_functor = self.op_functors[1]
            with ops.Graph().as_default():
                self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype)
def overlap_and_add(signal, frame_step, name=None):
    """Reconstructs a signal from a framed representation.

  Adds potentially overlapping frames of a signal with shape
  `[..., frames, frame_length]`, offsetting subsequent frames by `frame_step`.
  The resulting tensor has shape `[..., output_size]` where

      output_size = (frames - 1) * frame_step + frame_length

  Args:
    signal: A [..., frames, frame_length] `Tensor`. All dimensions may be
      unknown, and rank must be at least 2.
    frame_step: An integer or scalar `Tensor` denoting overlap offsets. Must be
      less than or equal to `frame_length`.
    name: An optional name for the operation.

  Returns:
    A `Tensor` with shape `[..., output_size]` containing the overlap-added
    frames of `signal`'s inner-most two dimensions.

  Raises:
    ValueError: If `signal`'s rank is less than 2, `frame_step` is not a scalar
      integer or `frame_step` is greater than `frame_length`.
  """
    with ops.name_scope(name, "overlap_and_add", [signal, frame_step]):
        signal = ops.convert_to_tensor(signal, name="signal")
        signal.shape.with_rank_at_least(2)
        frame_step = ops.convert_to_tensor(frame_step, name="frame_step")
        frame_step.shape.assert_has_rank(0)
        if not frame_step.dtype.is_integer:
            raise ValueError("frame_step must be an integer. Got %s" %
                             frame_step.dtype)

        # If frame_length and frame_step are known at graph construction time, check
        # frame_step is less than or equal to frame_length.
        frame_step_static = tensor_util.constant_value(frame_step)
        if (frame_step_static is not None and signal.shape.ndims is not None
                and signal.shape[-1].value is not None
                and frame_step_static > signal.shape[-1].value):
            raise ValueError(
                "frame_step (%d) must be less than or equal to frame_length (%d)"
                % (frame_step_static, signal.shape[-1].value))

        signal_shape = array_ops.shape(signal)

        # All dimensions that are not part of the overlap-and-add. Can be empty for
        # rank 2 inputs.
        outer_dimensions = signal_shape[:-2]

        signal_rank = array_ops.rank(signal)
        frames = signal_shape[-2]
        frame_length = signal_shape[-1]

        subframe_length = util_ops.gcd(frame_length, frame_step)
        subframe_step = frame_step // subframe_length
        subframes_per_frame = frame_length // subframe_length
        output_size = frame_step * (frames - 1) + frame_length
        output_subframes = output_size // subframe_length

        # To avoid overlap-adding sample-by-sample, we overlap-add at the "subframe"
        # level, where a subframe is gcd(frame_length, frame_step). Reshape signal
        # from [..., frames, frame_length] into [..., subframes, subframe_length].
        subframe_shape = array_ops.concat(
            [outer_dimensions, [-1, subframe_length]], 0)
        subframe_signal = array_ops.reshape(signal, subframe_shape)

        # Now we shuffle the last [subframes, subframe_length] dimensions to the
        # front.
        # TODO(rjryan): Add an axis argument to unsorted_segment_sum so we can
        # avoid this pair of transposes.
        subframe_signal = _shuffle_to_front(subframe_signal, 2)

        # Use unsorted_segment_sum to add overlapping subframes together.
        segment_ids = array_ops.reshape(
            shape_ops.frame(math_ops.range(output_subframes),
                            subframes_per_frame,
                            subframe_step,
                            pad_end=False), [-1])
        result = math_ops.unsorted_segment_sum(subframe_signal,
                                               segment_ids,
                                               num_segments=output_subframes)

        # result is a [subframes, subframe_length, ...outer_dimensions] tensor. We
        # return a [...outer_dimensions, output_size] tensor with a transpose and
        # reshape.
        result_shape = array_ops.concat([outer_dimensions, [output_size]], 0)
        return array_ops.reshape(_shuffle_to_front(result, signal_rank - 2),
                                 result_shape)
Exemple #42
0
 def step_fn(example):
     segment_ids = array_ops.zeros_like_v2(example)
     num_segment = array_ops.shape(example)[0]
     # If number of segments is dynamic, output should be a dynamic shape.
     return math_ops.unsorted_segment_sum(example, segment_ids,
                                          num_segment)