def quantiles_ready(): """The subgraph for when the quantiles are ready.""" quantized_feature = quantile_ops.quantiles([sparse_column_values], [], [quantile_buckets], []) quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64) quantized_feature = array_ops.reshape(quantized_feature, [-1]) example_indices, _ = array_ops.split( sparse_column_indices, num_or_size_splits=2, axis=1) example_indices = array_ops.squeeze(example_indices, [1]) filtered_gradients = array_ops.gather(gradients, example_indices) filtered_hessians = array_ops.gather(hessians, example_indices) filtered_partition_ids = array_ops.gather(example_partition_ids, example_indices) unique_partitions, mapped_partitions = array_ops.unique( example_partition_ids) # Compute aggregate stats for each partition. per_partition_gradients = math_ops.unsorted_segment_sum( gradients, mapped_partitions, array_ops.size(unique_partitions)) per_partition_hessians = math_ops.unsorted_segment_sum( hessians, mapped_partitions, array_ops.size(unique_partitions)) # Prepend a bias feature per partition that accumulates the stats for all # examples in that partition. bias_feature_ids = array_ops.fill( array_ops.shape(unique_partitions), _BIAS_FEATURE_ID) bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64) partition_ids = array_ops.concat( [unique_partitions, filtered_partition_ids], 0) filtered_gradients = array_ops.concat( [per_partition_gradients, filtered_gradients], 0) filtered_hessians = array_ops.concat( [per_partition_hessians, filtered_hessians], 0) bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0) return partition_ids, bucket_ids, filtered_gradients, filtered_hessians
def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers): """Creates an op for training for full batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. Returns: An op for doing an update of mini-batch k-means. """ cluster_sums = [] cluster_counts = [] epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype) for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp): cluster_sums.append( math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters)) cluster_counts.append( math_ops.unsorted_segment_sum( array_ops.reshape( array_ops.ones( array_ops.reshape(array_ops.shape(inp)[0], [-1])), [-1, 1]), cluster_idx, self._num_clusters)) with ops.colocate_with(cluster_centers): new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast( math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon) if self._clusters_l2_normalized(): new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1) return state_ops.assign(cluster_centers, new_clusters_centers)
def active_inputs(): """The normal flow when the handler is active.""" # Remove the second column of example indices matrix since it is not # useful. example_indices, _ = array_ops.split( self._sparse_int_column.indices, num_or_size_splits=2, axis=1) example_indices = array_ops.squeeze(example_indices, [1]) filtered_gradients = array_ops.gather(gradients, example_indices) filtered_hessians = array_ops.gather(hessians, example_indices) filtered_partition_ids = array_ops.gather(example_partition_ids, example_indices) unique_partitions, mapped_partitions = array_ops.unique( example_partition_ids) # Compute aggregate stats for each partition. # The bias is computed on gradients and hessians (and not # filtered_gradients) which have exactly one value per example, so we # don't double count a gradient in multivalent columns. # Since unsorted_segment_sum can be numerically unstable, use 64bit # operation. gradients64 = math_ops.cast(gradients, dtypes.float64) hessians64 = math_ops.cast(hessians, dtypes.float64) per_partition_gradients = math_ops.unsorted_segment_sum( gradients64, mapped_partitions, array_ops.size(unique_partitions)) per_partition_hessians = math_ops.unsorted_segment_sum( hessians64, mapped_partitions, array_ops.size(unique_partitions)) per_partition_gradients = math_ops.cast(per_partition_gradients, dtypes.float32) per_partition_hessians = math_ops.cast(per_partition_hessians, dtypes.float32) # Prepend a bias feature per partition that accumulates the stats for all # examples in that partition. # Bias is added to the stats even if there are no examples with values in # the current sparse column. The reason is that the other example batches # might have values in these partitions so we have to keep the bias # updated. bias_feature_ids = array_ops.fill( array_ops.shape(unique_partitions), _BIAS_FEATURE_ID) bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64) partition_ids = array_ops.concat( [unique_partitions, filtered_partition_ids], 0) filtered_gradients = array_ops.concat( [per_partition_gradients, filtered_gradients], 0) filtered_hessians = array_ops.concat( [per_partition_hessians, filtered_hessians], 0) feature_ids = array_ops.concat( [bias_feature_ids, self._sparse_int_column.values], 0) # Dimension is always zero for sparse int features. dimension_ids = array_ops.zeros_like(feature_ids, dtype=dtypes.int64) feature_ids_and_dimensions = array_ops.stack( [feature_ids, dimension_ids], axis=1) return (partition_ids, feature_ids_and_dimensions, filtered_gradients, filtered_hessians)
def quantiles_ready(): """The subgraph for when the quantiles are ready.""" quantized_feature = quantile_ops.quantiles([], [sparse_column_values], [], [quantile_buckets], [sparse_column_indices]) quantized_feature = math_ops.cast(quantized_feature[1], dtypes.int64) quantized_feature = array_ops.squeeze(quantized_feature, axis=0) example_indices, _ = array_ops.split( sparse_column_indices, num_or_size_splits=2, axis=1) example_indices = array_ops.squeeze(example_indices, [1]) filtered_gradients = array_ops.gather(gradients, example_indices) filtered_hessians = array_ops.gather(hessians, example_indices) filtered_partition_ids = array_ops.gather(example_partition_ids, example_indices) unique_partitions, mapped_partitions = array_ops.unique( example_partition_ids) # Compute aggregate stats for each partition. # Since unsorted_segment_sum can be numerically unstable, use 64bit # operation. gradients64 = math_ops.cast(gradients, dtypes.float64) hessians64 = math_ops.cast(hessians, dtypes.float64) per_partition_gradients = math_ops.unsorted_segment_sum( gradients64, mapped_partitions, array_ops.size(unique_partitions)) per_partition_hessians = math_ops.unsorted_segment_sum( hessians64, mapped_partitions, array_ops.size(unique_partitions)) per_partition_gradients = math_ops.cast(per_partition_gradients, dtypes.float32) per_partition_hessians = math_ops.cast(per_partition_hessians, dtypes.float32) # Prepend a bias feature per partition that accumulates the stats for all # examples in that partition. bias_feature_ids = array_ops.fill( array_ops.shape(unique_partitions), _BIAS_FEATURE_ID) bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64) zeros = array_ops.zeros_like(bias_feature_ids) bias_feature_ids = array_ops.stack([bias_feature_ids, zeros], axis=1) partition_ids = array_ops.concat( [unique_partitions, filtered_partition_ids], 0) filtered_gradients = array_ops.concat( [per_partition_gradients, filtered_gradients], 0) filtered_hessians = array_ops.concat( [per_partition_hessians, filtered_hessians], 0) bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0) return partition_ids, bucket_ids, filtered_gradients, filtered_hessians
def approximate_hessian(self, grads_and_vars, name=None): """ I haven't tested this yet so I have no idea if it works, but even if it does it's probably super slow, and either way nothing else has been modified to deal with it. """ gv = 0 var_refs = [] for g_t, x_tm1 in grads_and_vars: var_refs.append(x_tm1.ref()) if g_t is None: continue with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device): if isinstance(g_t, ops.Tensor): gv += math_ops.reduce_sum(g_t * random_ops.random_normal(g_t.get_shape())) else: idxs, idxs_ = array_ops.unique(g_t.indices) g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs)) gv += math_ops.reduce_sum(g_t_ * random_ops.random_normal(g_t_.get_shape())) hesses = gradients.gradients(gv, var_refs, gate_gradients=(gate_gradients == Optimizer.GATE_OP), aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) return zip([g_t for g_t, _ in grads_and_vars], [x_tm1 for _, x_tm1 in grads_and_vars], hesses)
def testDropNegatives(self): # Note: the test is done by replacing segment_ids with 8 to -1 # for index and replace values generated by numpy with 0. dtypes = [ dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64, dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128 ] indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) num_segments = 12 for indices in indices_flat, indices_flat.reshape(5, 2): shape = indices.shape + (2,) for dtype in dtypes: with self.test_session(use_gpu=True): tf_x, np_x = self._input(shape, dtype=dtype) np_ans = self._segmentReduce( indices, np_x, np.add, op2=None, num_segments=num_segments) # Replace np_ans[8] with 0 for the value np_ans[8:] = 0 # Replace 8 with -1 in indices np.place(indices, indices == 8, [-1]) s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) tf_ans = s.eval() self.assertAllClose(np_ans, tf_ans) self.assertShapeEqual(np_ans, s)
def DynamicStitchGrads(op, grad): num_values = len(op.inputs) // 2 indices_grad = [None] * num_values def AsInt32(x): return (x if op.inputs[0].dtype == dtypes.int32 else math_ops.cast(x, dtypes.int32)) idxs = [AsInt32(array_ops.reshape(op.inputs[i], (-1,))) for i in range(num_values)] if isinstance(grad, ops.IndexedSlices): output_shape = array_ops.shape(op.outputs[0]) output_rows = output_shape[0] grad = math_ops.unsorted_segment_sum(grad.values, grad.indices, output_rows) values_grad = [] zeros = array_ops.zeros_like(grad) idx_zeros = [zeros[:array_ops.shape(x)[0]] for x in idxs] grad_range = math_ops.range(array_ops.shape(grad)[0]) for i in range(num_values): if i == num_values - 1: v_grad = grad else: v_grad = data_flow_ops.dynamic_stitch( [grad_range] + idxs[i + 1:], [grad] + idx_zeros[i + 1:]) v_grad = array_ops.gather(v_grad, AsInt32(op.inputs[i])) values_grad += [v_grad] return indices_grad + values_grad
def testGradientMatchesSegmentSum(self): # Strategy: compute the gradient for UnsortedSegmentSum and SegmentSum # and compare the outputs, which should be identical. # NB: for this test to work, indices must be valid for SegmentSum, namely # it must be sorted, the indices must be contiguous, and num_segments # must be max(indices) + 1. indices = [0, 0, 1, 1, 1, 2, 3, 4, 5] n = len(indices) num_cols = 2 shape = [n, num_cols] num_segments = max(indices) + 1 for dtype in self.differentiable_dtypes: with self.cached_session(use_gpu=True): tf_x, np_x = self._input(shape, dtype=dtype) # Results from UnsortedSegmentSum unsorted_s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) unsorted_jacob_t, unsorted_jacob_n = ( gradient_checker.compute_gradient(tf_x, shape, unsorted_s, [num_segments, num_cols], x_init_value=np_x, delta=1)) # Results from SegmentSum sorted_s = math_ops.segment_sum(data=tf_x, segment_ids=indices) sorted_jacob_t, sorted_jacob_n = gradient_checker.compute_gradient( tf_x, shape, sorted_s, [num_segments, num_cols], x_init_value=np_x, delta=1) self.assertAllClose(unsorted_jacob_t, sorted_jacob_t) self.assertAllClose(unsorted_jacob_n, sorted_jacob_n)
def _TileGrad(op, grad): """Sum reduces grad along the tiled dimensions.""" input_shape = array_ops.shape(op.inputs[0]) # We interleave multiples and input_shape to get split_shape, # reshape grad to split_shape, and reduce along all even # dimensions (the tiled dimensions) to get the result # with shape input_shape. For example # input_shape = [20, 30, 40] # multiples = [2, 3, 4] # split_shape = [2, 20, 3, 30, 4, 40] # axes = [0, 2, 4] split_shape = array_ops.reshape( array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1]) axes = math_ops.range(0, array_ops.size(split_shape), 2) # Sum reduces grad along the first dimension for IndexedSlices if isinstance(grad, ops.IndexedSlices): grad = math_ops.unsorted_segment_sum( grad.values, math_ops.mod(grad.indices, input_shape[0]), input_shape[0]) split_shape = array_ops.concat([[1], split_shape[1:]], axis=0) input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) # Fix shape inference if not context.executing_eagerly(): input_grad.set_shape(op.inputs[0].get_shape()) return [input_grad, None]
def _apply_sparse(self, g_t, x_tm1, prepare): """""" idxs, idxs_ = array_ops.unique(g_t.indices) g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs)) updates = [] if self._mu > 0: m_and_t = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', self._mu) m_t_ = array_ops.gather(m_and_t[0], idxs) gamma_t = ops.convert_to_tensor(self._gamma) m_bar_t_ = (1-gamma_t)*m_t_ + gamma_t*g_t_ updates.extend(m_and_t) else: m_bar_t_ = g_t_ if self._ups > 0: v_and_t = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', self._ups) v_t_ = array_ops.gather(v_and_t[0], idxs) eps_t = ops.convert_to_tensor(self._eps) v_bar_t_ = math_ops.sqrt(v_t_ + eps_t) updates.extend(v_and_t) else: v_bar_t_ = 1. lr_t = ops.convert_to_tensor(self._lr) s_t_ = lr_t * m_bar_t_ / v_bar_t_ return [[s_t_, x_tm1, idxs, g_t]] + updates
def _apply_sparse(self, grad, var): if len(grad.indices.get_shape()) == 1: grad_indices = grad.indices grad_values = grad.values else: grad_indices = array_ops.reshape(grad.indices, [-1]) grad_values = array_ops.reshape(grad.values, [-1, grad.values.get_shape()[-1].value]) gidxs, metagidxs = array_ops.unique(grad_indices) sizegidxs = array_ops.size(gidxs) gvals = math_ops.unsorted_segment_sum(grad_values, metagidxs, sizegidxs) # m_t = mu * m + (1 - mu) * g_t m = self.get_slot(var, "m") m_scaled_g_values = gvals * (1 - self._mu_t) m_t = state_ops.scatter_update(m, gidxs, array_ops.gather(m, gidxs) * self._mu_t, use_locking=self._use_locking) m_t = state_ops.scatter_add(m_t, gidxs, m_scaled_g_values, use_locking=self._use_locking) m_t_ = array_ops.gather(m_t, gidxs) / (1 - self._mu2_t * self._mu_power) # m_bar = mu * m_t + (1 - mu) * g_t m_bar = self._mu2_t * m_t_ + m_scaled_g_values / (1 - self._mu_power) var_update = state_ops.scatter_sub(var, gidxs, self._lr_t * m_bar, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t])
def testAggregateGradients(self): def fn(x): ind1 = constant_op.constant(np.array([0, 1])) ind2 = constant_op.constant(np.array([2, 3])) ind3 = constant_op.constant(np.array([1, 3])) # A mixture of IndexedSlices and dense tensor to aggregate. g1 = embedding_ops.embedding_lookup(x, ind1) g2 = embedding_ops.embedding_lookup(x, ind2) g3 = embedding_ops.embedding_lookup(x, ind3) g4 = math_ops.reduce_sum(x * constant_op.constant(2.0)) return g1 * g2 * g3 * g4 var_np = np.random.rand(4, 2).astype(np.float32) var = constant_op.constant(var_np) grad = backprop.gradients_function(fn, [0])(var)[0] grad = self.evaluate(ops.convert_to_tensor(grad)) if not context.executing_eagerly(): tf_var = array_ops.constant(var_np, dtypes.float32) tf_ind1 = array_ops.constant([0, 1]) tf_ind2 = array_ops.constant([2, 3]) tf_ind3 = array_ops.constant([1, 3]) tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1)) tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4 tf_grad = gradients.gradients(tf_y, [tf_var])[0] tf_dense_grad = math_ops.unsorted_segment_sum( tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0]) self.assertAllClose(grad, self.evaluate(tf_dense_grad))
def _prepare(self, grads_and_vars): """""" if self._lr is None: sTy = 0 sTs = 0 yTy = 0 for g_t, x_tm1 in grads_and_vars: if g_t is None: continue with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device): if isinstance(g_t, ops.Tensor): g_tm1 = self.get_slot(x_tm1, 'g') s_tm1 = self.get_slot(x_tm1, 's') y_t = (g_t-g_tm1) sTy += math_ops.reduce_sum(s_tm1*y_t) sTs += math_ops.reduce_sum(s_tm1**2) yTy += math_ops.reduce_sum(y_t**2) else: idxs, idxs_ = array_ops.unique(g_t.indices) g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs)) g_tm1 = self.get_slot(x_tm1, 'g') g_tm1_ = array_ops.gather(g_tm1, idxs) s_tm1 = self.get_slot(x_tm1, 's') s_tm1_ = array_ops.gather(s_tm1, idxs) y_t_ = (g_t_-g_tm1_) sTy += math_ops.reduce_sum(s_tm1_*y_t_) sTs += math_ops.reduce_sum(s_tm1_**2) yTy += math_ops.reduce_sum(y_t_**2) sTy = math_ops.abs(sTy) self._lr = sTs / (sTy + self._eps)
def testAggregateGradients(self): def fn(x): ind1 = tensor.Tensor(np.array([0, 1])) ind2 = tensor.Tensor(np.array([2, 3])) ind3 = tensor.Tensor(np.array([1, 3])) # A mixture of IndexedSlices and dense tensor to aggregate. g1 = embedding_ops.embedding_lookup(x, ind1) g2 = embedding_ops.embedding_lookup(x, ind2) g3 = embedding_ops.embedding_lookup(x, ind3) g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0)) return g1 * g2 * g3 * g4 var_np = np.random.rand(4, 2).astype(np.float32) var = tensor.Tensor(var_np) grad = backprop.gradients_function(fn, [0])(var)[0] with context.graph_mode(), self.test_session(): tf_var = array_ops.constant(var_np, dtypes.float32) tf_ind1 = array_ops.constant([0, 1]) tf_ind2 = array_ops.constant([2, 3]) tf_ind3 = array_ops.constant([1, 3]) tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1)) tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4 tf_grad = gradients.gradients(tf_y, [tf_var])[0] tf_dense_grad = math_ops.unsorted_segment_sum( tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0]) self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
def _apply_sparse(self, g_t, x_tm1, prepare): """""" idxs, idxs_ = array_ops.unique(g_t.indices) g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs)) s_t_ = self._lr * g_t_ return [[s_t_, x_tm1, idxs, g_t_]]
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.to_int32(params_shape) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: values_shape = array_ops.concat([indices_size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) return [ops.IndexedSlices(values, indices, params_shape), None, None] outer_shape = params_shape[:axis] outer_dims = array_ops.size(outer_shape) inner_shape = params_shape[axis:][1:] inner_dims = array_ops.size(inner_shape) outer_axes_indices = math_ops.range(outer_dims) inner_axes_indices = math_ops.range(outer_dims + 1, outer_dims + 1 + inner_dims) values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) # We need to sum up every slice `values[..., i, ....]` corresponding to # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not # support an axis parameter, we transpose the gather dimension to the front, # then use `unsorted_segment_sum` to build a # [gather_axis, outer_axes, inner_axes] tensor with all the gradients # affecting each index in `gather_axis` summed up. transpose_dims = array_ops.concat( [[outer_dims], outer_axes_indices, inner_axes_indices], 0) values_transpose = array_ops.transpose(values, transpose_dims) num_segments = params_shape[axis] params_grad = math_ops.unsorted_segment_sum( values_transpose, indices, num_segments) # Inverts the above transpose by moving dimension 0 back to its original # position. invert_transpose_dims = array_ops.concat( [outer_axes_indices + 1, [0], inner_axes_indices], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def testBadIndices(self): # Note: GPU kernel does not return the out-of-range error needed for this # test, so this test is marked as cpu-only. with self.test_session(use_gpu=False): for bad in [[-1]], [[7]]: unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2) with self.assertRaisesOpError( r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]): unsorted.eval()
def testBadIndices(self): # Note: GPU kernel does not return the out-of-range error needed for this # test, so this test is marked as cpu-only. # Note: With PR #13055 a negative index will be ignored silently. with self.session(use_gpu=False): for bad in [[2]], [[7]]: unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2) with self.assertRaisesOpError( r"segment_ids\[0,0\] = %d is out of range \[0, 2\)" % bad[0][0]): self.evaluate(unsorted)
def testEmptySecondDimension(self): dtypes = [np.float16, np.float32, np.float64, np.int64, np.int32, np.complex64, np.complex128] with self.session(use_gpu=True): for dtype in dtypes: for itype in (np.int32, np.int64): data = np.zeros((2, 0), dtype=dtype) segment_ids = np.array([0, 1], dtype=itype) unsorted = math_ops.unsorted_segment_sum(data, segment_ids, 2) self.assertAllEqual(unsorted.eval(), np.zeros((2, 0), dtype=dtype))
def UnsortedSegmentSum(self, data, indices, num_segments): with self.test_session() as sess, self.test_scope(): d = array_ops.placeholder(data.dtype, shape=data.shape) if isinstance(indices, int): i = array_ops.placeholder(np.int32, shape=[]) else: i = array_ops.placeholder(indices.dtype, shape=indices.shape) return sess.run( math_ops.unsorted_segment_sum(d, i, num_segments), {d: data, i: indices})
def _aggregate_sparse_grad(self, grad, var, train_ops): """Aggregate sparse gradients. Args: grad: The sparse gradient to aggregate. var: The variable to apply this gradient to. train_ops: The train_ops for the worker to run. Returns: aggregated_grad: Aggregated grad. """ # Sparse gradients have to be inserted as one pair of (value, # indice) as an element instead of the whole "indexedslice" because # their shapes are not deterministic. sparse_grad_queue = (data_flow_ops.FIFOQueue( -1, (grad.values.dtype, grad.indices.dtype), shapes=(var.get_shape().as_list()[1:], ()), shared_name="sparse_grad_q_%s" % var.name)) self._sparse_grad_queues_and_devs.append((sparse_grad_queue, var.device)) # Sparse token is inserted after the "enqueue_many" finishes. This # is needed to make sure enough sparse gradients have been enqueued # before applying them to the variables. sparse_token_queue = (data_flow_ops.FIFOQueue( self._replicas_to_aggregate * 2, types_pb2.DT_INT32, shapes=(), shared_name="sparse_token_q_%s" % var.name)) self._one_element_queue_list.append((sparse_token_queue, var.device)) enqueue_spares_op = sparse_grad_queue.enqueue_many([grad.values, grad.indices]) with ops.control_dependencies([enqueue_spares_op]): train_ops.append(sparse_token_queue.enqueue((1,))) with ops.control_dependencies([sparse_token_queue.dequeue_many( self._replicas_to_aggregate)]): values, indices = sparse_grad_queue.dequeue_many(sparse_grad_queue.size()) concat_grad = ops.IndexedSlices(values, indices, grad.dense_shape) # Sum the gradients of the same variables in the sparse layers so # that each variable is only updated once. Note that with 2 # gradients g1 and g2 from 2 replicas for the same variable, # apply(g1+g2) is different from apply(g1) and then apply(g2) when # the optimizer is complex like Momentum or Adagrad. values = concat_grad.values indices = concat_grad.indices new_indices, indx = array_ops.unique(indices) num_indices = array_ops.shape(new_indices)[0] sum_values = math_ops.unsorted_segment_sum(values, indx, num_indices) return ops.IndexedSlices(sum_values, new_indices, concat_grad.dense_shape)
def _DynamicStitchGrads(op, grad): """Gradients for DynamicStitch.""" num_values = len(op.inputs) // 2 indices_grad = [None] * num_values def AsInt32(x): return (x if op.inputs[0].dtype == dtypes.int32 else math_ops.cast(x, dtypes.int32)) inputs = [AsInt32(op.inputs[i]) for i in xrange(num_values)] if isinstance(grad, ops.IndexedSlices): output_shape = array_ops.shape(op.outputs[0]) output_rows = output_shape[0] grad = math_ops.unsorted_segment_sum(grad.values, grad.indices, output_rows) values_grad = [array_ops.gather(grad, inp) for inp in inputs] return indices_grad + values_grad
def _BroadcastToGrad(op, grad): input_value = op.inputs[0] broadcast_shape = op.inputs[1] # Assign ids for each position in input_value. input_value_shape = array_ops.shape(input_value) input_value_size = array_ops.size(input_value) ids = array_ops.reshape(math_ops.range(input_value_size), input_value_shape) broadcast_ids = array_ops.broadcast_to(ids, broadcast_shape) # Group by ids and sum its gradients. grad_flatten = array_ops.reshape(grad, [-1]) broadcast_ids_flatten = array_ops.reshape(broadcast_ids, [-1]) updates_grad_flatten = math_ops.unsorted_segment_sum(grad_flatten, broadcast_ids_flatten, input_value_size) updates_grad = array_ops.reshape(updates_grad_flatten, input_value_shape) return [updates_grad, None]
def _UnsortedSegmentMinOrMaxGrad(op, grad): """ Gradient for UnsortedSegmentMin and UnsortedSegmentMax. """ # Get the number of selected (minimum or maximum) elements in each segment. gathered_outputs, zero_clipped_indices, is_positive = \ _GatherDropNegatives(op.outputs[0], op.inputs[1]) is_selected = math_ops.equal(op.inputs[0], gathered_outputs) is_selected = math_ops.logical_and(is_selected, is_positive) num_selected = math_ops.unsorted_segment_sum( math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2]) # Compute the gradient for each segment. The gradient for the ith segment is # divided evenly among the selected elements in that segment. weighted_grads = math_ops.div(grad, num_selected) gathered_grads, _, _ = _GatherDropNegatives(weighted_grads, None, zero_clipped_indices, is_positive) zeros = array_ops.zeros_like(gathered_grads) return array_ops.where(is_selected, gathered_grads, zeros), None, None
def testGradient(self): num_cols = 2 indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) num_segments = max(indices_flat) + 3 for indices in indices_flat, indices_flat.reshape(5, 2): shape = indices.shape + (num_cols,) with self.test_session(use_gpu=self.use_gpu): tf_x, np_x = self._input(shape, dtype=dtypes_lib.float64) s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) jacob_t, jacob_n = gradient_checker.compute_gradient( tf_x, shape, s, [num_segments, num_cols], x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
def _deduplicate_indexed_slices(values, indices): """Sums `values` associated with any non-unique `indices`. Args: values: A `Tensor` with rank >= 1. indices: A one-dimensional integer `Tensor`, indexing into the first dimension of `values` (as in an IndexedSlices object). Returns: A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a de-duplicated version of `indices` and `summed_values` contains the sum of `values` slices associated with each unique index. """ unique_indices, new_index_positions = array_ops.unique(indices) summed_values = math_ops.unsorted_segment_sum( values, new_index_positions, array_ops.shape(unique_indices)[0]) return (summed_values, unique_indices)
def _indexed_slices_to_tensor(value): """Converts an IndexedSlices object `value` to a Tensor. Args: value: An ops.IndexedSlices object. Returns: A dense Tensor representing the values in the given IndexedSlices. Raises: ValueError: If the IndexedSlices does not have the same dtype. """ if value.dense_shape is None: raise ValueError( "Tensor conversion requested for IndexedSlices without dense_shape: %s" % str(value)) return math_ops.unsorted_segment_sum(value.values, value.indices, value.dense_shape[0])
def _IndexedSlicesToTensor(value, dtype=None, name=None, as_ref=False): """Converts an IndexedSlices object `value` to a Tensor. NOTE(mrry): This function is potentially expensive. Args: value: An ops.IndexedSlices object. dtype: The dtype of the Tensor to be returned. name: Optional name to use for the returned Tensor. as_ref: True if a ref is requested. Returns: A dense Tensor representing the values in the given IndexedSlices. Raises: ValueError: If the IndexedSlices does not have the same dtype. """ _ = as_ref if dtype and not dtype.is_compatible_with(value.dtype): raise ValueError( "Tensor conversion requested dtype %s for IndexedSlices with dtype %s" % (dtype.name, value.dtype.name)) if value.dense_shape is None: raise ValueError( "Tensor conversion requested for IndexedSlices without dense_shape: %s" % str(value)) # TODO(mrry): Consider adding static shape information to # IndexedSlices, to avoid using numpy here. dense_shape_value = tensor_util.ConstantValue(value.dense_shape) if dense_shape_value is not None: num_elements = np.prod(dense_shape_value) if num_elements >= _LARGE_SPARSE_NUM_ELEMENTS: warnings.warn( "Converting sparse IndexedSlices to a dense Tensor with %d elements. " "This may consume a large amount of memory." % num_elements) else: warnings.warn( "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " "This may consume a large amount of memory.") return math_ops.unsorted_segment_sum(value.values, value.indices, value.dense_shape[0], name=name)
def testValues(self): dtypes = [ dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int64, dtypes_lib.int32, dtypes_lib.complex64, dtypes_lib.complex128 ] indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) num_segments = 12 for indices in indices_flat, indices_flat.reshape(5, 2): shape = indices.shape + (2,) for dtype in dtypes: with self.test_session(use_gpu=True): tf_x, np_x = self._input(shape, dtype=dtype) np_ans = self._segmentReduce( indices, np_x, np.add, op2=None, num_segments=num_segments) s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) tf_ans = s.eval() self.assertAllClose(np_ans, tf_ans) self.assertShapeEqual(np_ans, s)
def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None): """Return histogram of values. Given the tensor `values`, this operation returns a rank 1 histogram counting the number of entries in `values` that fell into every bin. The bins are equal width and determined by the arguments `value_range` and `nbins`. Args: values: Numeric `Tensor`. value_range: Shape [2] `Tensor` of same `dtype` as `values`. values <= value_range[0] will be mapped to hist[0], values >= value_range[1] will be mapped to hist[-1]. nbins: Scalar `int32 Tensor`. Number of histogram bins. dtype: dtype for returned histogram. name: A name for this operation (defaults to 'histogram'). Returns: A 1-D `Tensor` holding histogram of values. """ with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope: values = ops.convert_to_tensor(values, name='values') values = gen_array_ops.reshape(values, [-1]) value_range = ops.convert_to_tensor(value_range, name='value_range') nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins') nbins_float = math_ops.cast(nbins, values.dtype) # Map tensor values that fall within value_range to [0, 1]. scaled_values = math_ops.truediv( values - value_range[0], value_range[1] - value_range[0], name='scaled_values') # map tensor values within the open interval value_range to {0,.., nbins-1}, # values outside the open interval will be zero or less, or nbins or more. indices = math_ops.floor(nbins_float * scaled_values, name='indices') # Clip edge cases (e.g. value = value_range[1]) or "outliers." indices = math_ops.cast( clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32) return math_ops.unsorted_segment_sum( array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
def histogram_fixed_width(values, value_range, nbins=100, dtype=dtypes.int32, name=None): """Return histogram of values. Given the tensor `values`, this operation returns a rank 1 histogram counting the number of entries in `values` that fell into every bin. The bins are equal width and determined by the arguments `value_range` and `nbins`. Args: values: Numeric `Tensor`. value_range: Shape [2] `Tensor`. new_values <= value_range[0] will be mapped to hist[0], values >= value_range[1] will be mapped to hist[-1]. Must be same dtype as new_values. nbins: Scalar `int32 Tensor`. Number of histogram bins. dtype: dtype for returned histogram. name: A name for this operation (defaults to 'histogram_fixed_width'). Returns: A 1-D `Tensor` holding histogram of values. Examples: ```python # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) nbins = 5 value_range = [0.0, 5.0] new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] with tf.default_session() as sess: hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) variables.initialize_all_variables().run() sess.run(hist) => [2, 1, 1, 0, 2] ``` """ with ops.op_scope([values, value_range, nbins], name, 'histogram_fixed_width') as scope: values = ops.convert_to_tensor(values, name='values') values = array_ops.reshape(values, [-1]) value_range = ops.convert_to_tensor(value_range, name='value_range') nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins') nbins_float = math_ops.to_float(nbins) # Map tensor values that fall within value_range to [0, 1]. scaled_values = math_ops.truediv(values - value_range[0], value_range[1] - value_range[0], name='scaled_values') # map tensor values within the open interval value_range to {0,.., nbins-1}, # values outside the open interval will be zero or less, or nbins or more. indices = math_ops.floor(nbins_float * scaled_values, name='indices') # Clip edge cases (e.g. value = value_range[1]) or "outliers." indices = math_ops.cast( clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32) # TODO(langmore) This creates an array of ones to add up and place in the # bins. This is inefficient, so replace when a better Op is available. return math_ops.unsorted_segment_sum(array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
def tpu_function(sparse): # Assumes dense_shape is (2, *) looked_up = array_ops.gather(table, sparse.values) segment_sum = math_ops.unsorted_segment_sum( looked_up, sparse.indices[:, 0], 2) return {"sparse": sparse, "segment_sum": segment_sum}
def _GatherV2Grad(op, grad): """Gradient for GatherV2 op.""" # params can be large, so colocate the shape calculation with it. # # params can be very large for sparse model, array_ops.shape raises # exception on the Windows platform when any dimension is larger than # int32. params_shape is not used in optimizer apply_sparse gradients, # so it's fine to convert it back to int32 regardless of truncation. params = op.inputs[0] with ops.colocate_with(params): params_shape = array_ops.shape(params, out_type=ops.dtypes.int64) params_shape = math_ops.to_int32(params_shape) indices = op.inputs[1] indices_size = array_ops.expand_dims(array_ops.size(indices), 0) axis = op.inputs[2] axis_static = tensor_util.constant_value(axis) # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: if context.in_eager_mode(): params_tail_shape = params_shape.cpu()[1:] else: params_tail_shape = params_shape[1:] values_shape = array_ops.concat([indices_size, params_tail_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) return [ops.IndexedSlices(values, indices, params_shape), None, None] outer_shape = params_shape[:axis] outer_dims = array_ops.size(outer_shape) inner_shape = params_shape[axis:][1:] inner_dims = array_ops.size(inner_shape) outer_axes_indices = math_ops.range(outer_dims) inner_axes_indices = math_ops.range(outer_dims + 1, outer_dims + 1 + inner_dims) values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(indices, indices_size) # We need to sum up every slice `values[..., i, ....]` corresponding to # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not # support an axis parameter, we transpose the gather dimension to the front, # then use `unsorted_segment_sum` to build a # [gather_axis, outer_axes, inner_axes] tensor with all the gradients # affecting each index in `gather_axis` summed up. transpose_dims = array_ops.concat( [[outer_dims], outer_axes_indices, inner_axes_indices], 0) values_transpose = array_ops.transpose(values, transpose_dims) num_segments = params_shape[axis] params_grad = math_ops.unsorted_segment_sum(values_transpose, indices, num_segments) # Inverts the above transpose by moving dimension 0 back to its original # position. invert_transpose_dims = array_ops.concat( [outer_axes_indices + 1, [0], inner_axes_indices], 0) params_grad = array_ops.transpose(params_grad, invert_transpose_dims) return [params_grad, None, None]
def loop_fn(i): data = array_ops.gather(t, i) data_0 = array_ops.gather(t, 0) seg_ids = array_ops.gather(segment_ids, i) return (math_ops.unsorted_segment_sum(data, seg_ids, num_segments), math_ops.unsorted_segment_sum(data_0, seg_ids, num_segments))
def _SparseSegmentSumGrad(op, grad): """Gradient for SparseSegmentSum.""" input_rows = array_ops.shape(op.inputs[0])[0] return (math_ops.unsorted_segment_sum(array_ops.gather(grad, op.inputs[2]), op.inputs[1], input_rows), None, None)
def _mini_batch_training_op(self, inputs, cluster_idx_list, cluster_centers, total_counts): """Creates an op for training for mini batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. total_counts: Tensor Ref of cluster counts. Returns: An op for doing an update of mini-batch k-means. """ update_ops = [] for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp, ignore_existing=True): assert total_counts is not None cluster_idx = array_ops.reshape(cluster_idx, [-1]) # Dedupe the unique ids of cluster_centers being updated so that updates # can be locally aggregated. unique_ids, unique_idx = array_ops.unique(cluster_idx) num_unique_cluster_idx = array_ops.size(unique_ids) # Fetch the old values of counts and cluster_centers. with ops.colocate_with(total_counts, ignore_existing=True): old_counts = array_ops.gather(total_counts, unique_ids) # TODO(agarwal): This colocation seems to run into problems. Fix it. with ops.colocate_with(cluster_centers, ignore_existing=True): old_cluster_centers = array_ops.gather( cluster_centers, unique_ids) # Locally aggregate the increment to counts. count_updates = math_ops.unsorted_segment_sum( array_ops.ones_like(unique_idx, dtype=total_counts.dtype), unique_idx, num_unique_cluster_idx) # Locally compute the sum of inputs mapped to each id. # For a cluster with old cluster value x, old count n, and with data # d_1,...d_k newly assigned to it, we recompute the new value as # x += (sum_i(d_i) - k * x) / (n + k). # Compute sum_i(d_i), see comment above. cluster_center_updates = math_ops.unsorted_segment_sum( inp, unique_idx, num_unique_cluster_idx) # Shape to enable broadcasting count_updates and learning_rate to inp. # It extends the shape with 1's to match the rank of inp. broadcast_shape = array_ops.concat([ array_ops.reshape(num_unique_cluster_idx, [1]), array_ops.ones(array_ops.reshape( array_ops.rank(inp) - 1, [1]), dtype=dtypes.int32) ], 0) # Subtract k * x, see comment above. cluster_center_updates -= math_ops.cast( array_ops.reshape(count_updates, broadcast_shape), inp.dtype) * old_cluster_centers learning_rate = math_ops.reciprocal( math_ops.cast(old_counts + count_updates, inp.dtype)) learning_rate = array_ops.reshape(learning_rate, broadcast_shape) # scale by 1 / (n + k), see comment above. cluster_center_updates *= learning_rate # Apply the updates. update_counts = state_ops.scatter_add(total_counts, unique_ids, count_updates) update_cluster_centers = state_ops.scatter_add( cluster_centers, unique_ids, cluster_center_updates) update_ops.extend([update_counts, update_cluster_centers]) return control_flow_ops.group(*update_ops)
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features, sparse_feature_with_values = [], [], [] dense_feature_weights = [] sparse_feature_weights, sparse_feature_with_values_weights = [], [] for column in sorted(columns_to_variables.keys(), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column._RealValuedColumn): # pylint: disable=protected-access # A real-valued column corresponds to a dense feature in SDCA. A # transformed tensor corresponding to a RealValuedColumn should have # rank at most 2. In order to be passed to SDCA, its rank needs to be # exactly 2 (i.e., its shape should be [batch_size, column.dim]). check_rank_op = control_flow_ops.Assert( math_ops.less_equal(array_ops.rank(transformed_tensor), 2), ['transformed_tensor should have rank at most 2.']) # Reshape to [batch_size, dense_column_dimension]. with ops.control_dependencies([check_rank_op]): transformed_tensor = array_ops.reshape( transformed_tensor, [array_ops.shape(transformed_tensor)[0], -1]) dense_features.append(transformed_tensor) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append( columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._BucketizedColumn): # pylint: disable=protected-access # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseFeatureColumn representing the one-hot encoding of the # bucketized feature. # # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a # bucketized feature column to a dense feature in SDCA. This will # likely depend on the number of buckets. dense_bucket_tensor = column._to_dnn_input_layer( transformed_tensor) # pylint: disable=protected-access sparse_feature_column = _dense_tensor_to_sparse_feature_column( dense_bucket_tensor) sparse_feature_with_values.append(sparse_feature_column) # If a partitioner was used during variable creation, we will have a # list of Variables here larger than 1. vars_to_append = columns_to_variables[column][0] if len(columns_to_variables[column]) > 1: vars_to_append = columns_to_variables[column] sparse_feature_with_values_weights.append(vars_to_append) elif isinstance( column, ( layers.feature_column._WeightedSparseColumn, # pylint: disable=protected-access layers.feature_column._CrossedColumn, # pylint: disable=protected-access layers.feature_column._SparseColumn)): # pylint: disable=protected-access if isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access id_tensor = column.id_tensor(transformed_tensor) weight_tensor = array_ops.reshape( column.weight_tensor(transformed_tensor).values, [-1]) else: id_tensor = transformed_tensor weight_tensor = array_ops.ones( [array_ops.shape(id_tensor.indices)[0]], dtypes.float32) example_ids = array_ops.reshape(id_tensor.indices[:, 0], [-1]) flat_ids = array_ops.reshape(id_tensor.values, [-1]) # Prune invalid IDs (< 0) from the flat_ids, example_ids, and # weight_tensor. These can come from looking up an OOV entry in the # vocabulary (default value being -1). is_id_valid = math_ops.greater_equal(flat_ids, 0) flat_ids = array_ops.boolean_mask(flat_ids, is_id_valid) example_ids = array_ops.boolean_mask( example_ids, is_id_valid) weight_tensor = array_ops.boolean_mask( weight_tensor, is_id_valid) projection_length = math_ops.reduce_max(flat_ids) + 1 # project ids based on example ids so that we can dedup ids that # occur multiple times for a single example. projected_ids = projection_length * example_ids + flat_ids # Remove any redundant ids. ids, idx = array_ops.unique(projected_ids) # Keep only one example id per duplicated ids. example_ids_filtered = math_ops.unsorted_segment_min( example_ids, idx, array_ops.shape(ids)[0]) # reproject ids back feature id space. reproject_ids = (ids - projection_length * example_ids_filtered) weights = array_ops.reshape( math_ops.unsorted_segment_sum(weight_tensor, idx, array_ops.shape(ids)[0]), [-1]) sparse_feature_with_values.append( SparseFeatureColumn(example_ids_filtered, reproject_ids, weights)) # If a partitioner was used during variable creation, we will have a # list of Variables here larger than 1. vars_to_append = columns_to_variables[column][0] if len(columns_to_variables[column]) > 1: vars_to_append = columns_to_variables[column] sparse_feature_with_values_weights.append(vars_to_append) else: raise ValueError( 'SDCAOptimizer does not support column type %s.' % type(column).__name__) example_weights = array_ops.reshape( features[weight_column_name], shape=[ -1 ]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] sparse_feature_with_values.extend(sparse_features) sparse_feature_with_values_weights.extend(sparse_feature_weights) examples = dict(sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=math_ops.cast( array_ops.reshape(targets, shape=[-1]), dtypes.float32), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) return examples, sdca_variables
def _sampled_scattered_embedding_lookup_sparse(params, sp_values, dimension=None, sampled_candidates=None, hash_key=None, with_sign_hash=False, name=None): """Looks up embeddings using parameter hashing for sparse values. This method looks up selected embedding dimensions if `sampled_candidates` is given, otherwise looks up all dimensions. The i-th embedding component of a value v in `values` is found by retrieving the weight whose index is a fingerprint of the pair (v,i). The concept is explored as "feature hashing" for model compression in this paper: http://arxiv.org/pdf/1504.04788.pdf This is logically equivalent to: * Transforming `sp_values` (which has shape `[d0, d1]`) into a one-hot `Tensor` of shape `[d0, N]`. * Multiplying with a `Tensor` `h` of shape `[N, dimension]`, where `h(i, j) = params[hash(i, j)]`. Args: params: A float `Tensor` with rank 1 and fully-defined shape. sp_values: A 2D `SparseTensor` to be embedded with shape `[d0, d1]`. dimension: An int `Tensor` of the final dimension. The user needs to provide either `dimension` or `sampled_candidates`. sampled_candidates: An optional `Tensor` of column indices to keep along the final dimension with shape `[d0, N]`. If given, `dimension` is ignored. If `None`, looks up all candidates. hash_key: Specify the hash_key that will be used by the `FingerprintCat64` function to combine the crosses fingerprints on SparseFeatureCrossOp (optional). with_sign_hash: A `bool` indicating whether `h(i, j)` should be multiplied by `+1` or `-1`, where the value selected is determined by hashing `(i, j)`. This is often necessary to remove bias resulting from hash collisions. name: An optional name for this op. Returns: A `Tensor` of shape `[d0, dimension]`. If `sampled_candidates` is given, the output shape is `[d0, N]`. Raises: TypeError: If sp_values is not `SparseTensor`. ValueError: If both `dimension` and `sampled_candidates` are `None`. """ if not isinstance(sp_values, sparse_tensor.SparseTensor): raise TypeError("sp_values must be SparseTensor") with ops.name_scope( name=name, default_name="sampled_scattered_embedding_lookup_sparse", values=[sp_values, params, dimension, sampled_candidates]) as name_scope: segment_ids = sp_values.indices[:, 0] if sampled_candidates is not None: # Tile sampled_candidates so there is one line corresponding to each # element in sp_values.values sampled_candidates = array_ops.gather(sampled_candidates, segment_ids) embeddings = _sampled_scattered_embedding_lookup( params, sp_values.values, dimension=dimension, sampled_candidates=sampled_candidates, hash_key=hash_key, name="values_lookup") if with_sign_hash: signs = _sampled_scattered_embedding_lookup( array_ops.constant([-1., 1.]), sp_values.values, dimension=dimension, sampled_candidates=sampled_candidates, hash_key=hash_key, name="signs_lookup") embeddings = math_ops.multiply(signs, embeddings, name="signs_hash") if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) num_segments = array_ops.shape(sp_values)[0] return math_ops.unsorted_segment_sum(embeddings, segment_ids, num_segments=num_segments, name=name_scope)
def _embedding_lookup_with_distributed_aggregation(params, ids, partition_strategy="mod", name=None, max_norm=None, weights=None, idx=None, segment_ids=None): """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation.""" if params is None or params == []: # pylint: disable=g-explicit-bool-comparison raise ValueError("Need at least one param") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] def maybe_normalize(x): if max_norm is not None: if x.get_shape().ndims is not None: ndims = x.get_shape().ndims else: ndims = array_ops.size(array_ops.shape(x)) return clip_ops.clip_by_norm(x, max_norm, axes=list(range(1, ndims))) return x with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation", params + [ids]) as name: np = len(params) # Number of partitions # Preserve the resource variable status to avoid accidental dense reads. if not any( isinstance(p, resource_variable_ops.ResourceVariable) for p in params): params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params") if np == 1: with ops.colocate_with(params[0]): ret = maybe_normalize(_do_gather(params[0], ids)) ignore_weights = weights is None if not ignore_weights: if weights.dtype != ret.dtype: weights = math_ops.cast(weights, ret.dtype) # Reshape to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set weights shape after reshape if ret.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(ret.get_shape().ndims - 1)])) ret *= weights return math_ops.segment_sum(ret, segment_ids, name=name) else: return math_ops.sparse_segment_sum(ret, idx, segment_ids, name=name) else: ids = ops.convert_to_tensor(ids, name="ids") flat_ids = array_ops.reshape(ids, [-1]) original_indices = math_ops.range(array_ops.size(flat_ids)) # Create p_assignments and set new_ids depending on the strategy. if partition_strategy == "mod": p_assignments = flat_ids % np new_ids = flat_ids // np elif partition_strategy == "div": # Compute num_total_ids as the sum of dim-0 of params, then assign to # partitions based on a constant number of ids per partition. Optimize # if we already know the full shape statically. dim_0_size = params[0].get_shape().dims[0] for p in xrange(1, np): dim_0_size += params[p].get_shape().dims[0] if dim_0_size.value: num_total_ids = constant_op.constant( dim_0_size, flat_ids.dtype) else: dim_0_sizes = [] for p in xrange(np): if params[p].get_shape().dims[0].value is not None: dim_0_sizes.append( params[p].get_shape().dims[0].value) else: with ops.colocate_with(params[p]): dim_0_sizes.append( array_ops.shape(params[p])[0]) num_total_ids = math_ops.reduce_sum( math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // np extras = num_total_ids % np p_assignments = math_ops.maximum( flat_ids // (ids_per_partition + 1), (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor is_in_first_extras_partitions = math_ops.cast( p_assignments < extras, flat_ids.dtype) new_ids = (is_in_first_extras_partitions * (flat_ids % (ids_per_partition + 1)) + (1 - is_in_first_extras_partitions) * ((flat_ids - extras) % ids_per_partition)) else: raise ValueError("Unrecognized partition strategy: " + partition_strategy) # Cast partition assignments to int32 for use in dynamic_partition. # There really should not be more than 2^32 partitions. p_assignments = math_ops.cast(p_assignments, dtypes.int32) # Partition list of ids based on assignments into np separate lists gather_ids = data_flow_ops.dynamic_partition( new_ids, p_assignments, np) # Similarly, partition the original indices. pindices = data_flow_ops.dynamic_partition(original_indices, p_assignments, np) # Do np separate lookups, finding embeddings for plist[p] in params[p] partitioned_result = [] for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result.append( _do_gather(params[p], gather_ids[p])) ignore_weights = weights is None if not ignore_weights: # Partition weights according to pindices. partitioned_weight = [] for p in xrange(np): partitioned_weight.append( array_ops.gather(weights, pindices[p])) # Reshape each partition result. element_shape = params[0].get_shape()[1:] for p in params[1:]: element_shape = element_shape.merge_with(p.get_shape()[1:]) if element_shape.is_fully_defined(): for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat( [array_ops.shape(pindices[p]), element_shape], 0)) else: with ops.colocate_with(params[0]): params_shape = array_ops.shape(params[0]) for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat([ array_ops.shape(pindices[p]), array_ops.slice(params_shape, [1], [-1]) ], 0)) # Normalize each partition result. for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = maybe_normalize( partitioned_result[p]) if not ignore_weights: # Multiply each partition result with partition weights. for p in xrange(np): with ops.colocate_with(params[p]): if partitioned_weight[p].dtype != partitioned_result[ p].dtype: partitioned_weight[p] = math_ops.cast( partitioned_weight[p], partitioned_result[p].dtype) # Reshape partition weights. ones = array_ops.fill( array_ops.expand_dims( array_ops.rank(partitioned_result[p]) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(partitioned_weight[p]), ones], 0) orig_weights_shape = partitioned_weight[p].get_shape() partitioned_weight[p] = array_ops.reshape( partitioned_weight[p], bcast_weights_shape) if partitioned_result[p].get_shape().ndims is not None: partitioned_weight[p].set_shape( orig_weights_shape.concatenate([ 1 for _ in range(partitioned_result[p]. get_shape().ndims - 1) ])) partitioned_result[p] *= partitioned_weight[p] partitioned_segment_ids = [] for p in xrange(np): if not ignore_weights: # Partition segment_ids according to pindices. p_segment_ids = array_ops.gather(segment_ids, pindices[p]) # Number the p_segment_ids to meet segment_sum's requirements. Note # that unique_p_segment_ids contains unique segment ids of this # partition and these ids' order is unchanged. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) partitioned_segment_ids.append(unique_p_segment_ids) # segment_sum this partition's result. with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.segment_sum( partitioned_result[p], unique_p_segment_idx) else: # When ignore weights, we need to get indexs of elements in idx and # segment_ids. _, exclude_idx = array_ops.setdiff1d(idx, pindices[p]) all_idx = math_ops.range(array_ops.shape(idx)[0]) _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx) # Gather segment_ids and idx according to indexs. p_segment_ids = array_ops.gather(segment_ids, include_idx) p_idx = array_ops.gather(idx, include_idx) # Number the p_segment_ids, same as ignore_weights case above. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) _, unique_p_idx_idx = array_ops.unique(p_idx) partitioned_segment_ids.append(unique_p_segment_ids) with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.sparse_segment_sum( partitioned_result[p], unique_p_idx_idx, unique_p_segment_idx) # Concat each partition's segment_ids and result for final segment_sum. concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0) concat_partitioned_result = array_ops.concat(partitioned_result, 0) return math_ops.unsorted_segment_sum( concat_partitioned_result, concat_segment_ids, math_ops.reduce_max(concat_segment_ids) + 1, name=name)
class SegmentReductionOpBenchmark(test.Benchmark): outer_dim_options = [2**x for x in range(9, 14, 2)] ratio_options = [2**x for x in range(1, 6, 2)] inner_dim_options = [2**x for x in range(9, 14, 2)] # randomly generated sizes with less alignments inner_dim_options += [ 1120, 1215, 1856, 1302, 1329, 1531, 1313, 1672, 1851, 1584 ] dtype_options = [np.float32, np.float64] options = (outer_dim_options, ratio_options, inner_dim_options, dtype_options) # pylint: disable=g-long-lambda op_functors = [ lambda vc, vs, seg_ids: ("sorted", math_ops.segment_sum(vc, vs)), lambda vc, vs, seg_ids: ("unsorted", math_ops.unsorted_segment_sum(vc, vs, seg_ids[-1] + 1)) ] # pylint: enable=g-long-lambda repeat = 10 def _npTypeToStr(self, t): if t == np.float32: return "fp32" if t == np.float64: return "fp64" def _runGraph(self, op_functor, outer_dim, ratio, inner_dim, dtype): output_outer_dim = int(outer_dim / ratio) const = np.random.randint(5, size=(outer_dim, inner_dim)) seg_ids = np.sort(np.random.randint(output_outer_dim, size=outer_dim)) vs = variables.Variable(seg_ids.astype(np.int32)) with ops.device("/gpu:0"): vc = variables.Variable(const.astype(dtype)) name, op = op_functor(vc, vs, seg_ids) with session.Session() as sess: self.evaluate(variables.global_variables_initializer()) r = self.run_op_benchmark( sess, op, min_iters=self.repeat, name="_".join( map(str, [ name, outer_dim, ratio, inner_dim, self._npTypeToStr(dtype) ]))) return name, r["wall_time"] def benchmarkSegmentSumGPU(self): if not test.is_gpu_available(cuda_only=True): return for outer_dim, ratio, inner_dim, dtype in itertools.product( *self.options): op_functor = self.op_functors[0] with ops.Graph().as_default(): self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype) def benchmarkUnsortedSegmentSumGPU(self): if not test.is_gpu_available(cuda_only=True): return for outer_dim, ratio, inner_dim, dtype in itertools.product( *self.options): op_functor = self.op_functors[1] with ops.Graph().as_default(): self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype)
def overlap_and_add(signal, frame_step, name=None): """Reconstructs a signal from a framed representation. Adds potentially overlapping frames of a signal with shape `[..., frames, frame_length]`, offsetting subsequent frames by `frame_step`. The resulting tensor has shape `[..., output_size]` where output_size = (frames - 1) * frame_step + frame_length Args: signal: A [..., frames, frame_length] `Tensor`. All dimensions may be unknown, and rank must be at least 2. frame_step: An integer or scalar `Tensor` denoting overlap offsets. Must be less than or equal to `frame_length`. name: An optional name for the operation. Returns: A `Tensor` with shape `[..., output_size]` containing the overlap-added frames of `signal`'s inner-most two dimensions. Raises: ValueError: If `signal`'s rank is less than 2, `frame_step` is not a scalar integer or `frame_step` is greater than `frame_length`. """ with ops.name_scope(name, "overlap_and_add", [signal, frame_step]): signal = ops.convert_to_tensor(signal, name="signal") signal.shape.with_rank_at_least(2) frame_step = ops.convert_to_tensor(frame_step, name="frame_step") frame_step.shape.assert_has_rank(0) if not frame_step.dtype.is_integer: raise ValueError("frame_step must be an integer. Got %s" % frame_step.dtype) # If frame_length and frame_step are known at graph construction time, check # frame_step is less than or equal to frame_length. frame_step_static = tensor_util.constant_value(frame_step) if (frame_step_static is not None and signal.shape.ndims is not None and signal.shape[-1].value is not None and frame_step_static > signal.shape[-1].value): raise ValueError( "frame_step (%d) must be less than or equal to frame_length (%d)" % (frame_step_static, signal.shape[-1].value)) signal_shape = array_ops.shape(signal) # All dimensions that are not part of the overlap-and-add. Can be empty for # rank 2 inputs. outer_dimensions = signal_shape[:-2] signal_rank = array_ops.rank(signal) frames = signal_shape[-2] frame_length = signal_shape[-1] subframe_length = util_ops.gcd(frame_length, frame_step) subframe_step = frame_step // subframe_length subframes_per_frame = frame_length // subframe_length output_size = frame_step * (frames - 1) + frame_length output_subframes = output_size // subframe_length # To avoid overlap-adding sample-by-sample, we overlap-add at the "subframe" # level, where a subframe is gcd(frame_length, frame_step). Reshape signal # from [..., frames, frame_length] into [..., subframes, subframe_length]. subframe_shape = array_ops.concat( [outer_dimensions, [-1, subframe_length]], 0) subframe_signal = array_ops.reshape(signal, subframe_shape) # Now we shuffle the last [subframes, subframe_length] dimensions to the # front. # TODO(rjryan): Add an axis argument to unsorted_segment_sum so we can # avoid this pair of transposes. subframe_signal = _shuffle_to_front(subframe_signal, 2) # Use unsorted_segment_sum to add overlapping subframes together. segment_ids = array_ops.reshape( shape_ops.frame(math_ops.range(output_subframes), subframes_per_frame, subframe_step, pad_end=False), [-1]) result = math_ops.unsorted_segment_sum(subframe_signal, segment_ids, num_segments=output_subframes) # result is a [subframes, subframe_length, ...outer_dimensions] tensor. We # return a [...outer_dimensions, output_size] tensor with a transpose and # reshape. result_shape = array_ops.concat([outer_dimensions, [output_size]], 0) return array_ops.reshape(_shuffle_to_front(result, signal_rank - 2), result_shape)
def step_fn(example): segment_ids = array_ops.zeros_like_v2(example) num_segment = array_ops.shape(example)[0] # If number of segments is dynamic, output should be a dynamic shape. return math_ops.unsorted_segment_sum(example, segment_ids, num_segment)