def _lower_triangular_mask(shape): """Creates a lower-triangular boolean mask over the last 2 dimensions.""" row_index = math_ops.cumsum( array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-2) col_index = math_ops.cumsum( array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-1) return math_ops.greater_equal(row_index, col_index)
def testAxisType(self): for dtype in self.valid_dtypes: x = np.arange(1, 6).reshape([5]).astype(dtype) for axis_dtype in self.axis_dtypes(): with self.cached_session(), self.test_scope(): p = array_ops.placeholder(x.dtype) axis = constant_op.constant(0, axis_dtype) math_ops.cumsum(p, axis).eval(feed_dict={p: x})
def testAxisType(self): for dtype in self.valid_dtypes: x = np.arange(1, 6).reshape([5]).astype(dtype) for axis_dtype in [dtypes.int64, dtypes.int32]: with self.cached_session(use_gpu=True): axis = constant_op.constant(0, axis_dtype) tf_out = math_ops.cumsum(x, axis).eval()
def power_sums_tensor(array_size, power_matrix, multiplier): r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1). Args: array_size: The number of non-trivial sums to pre-compute. power_matrix: The "A" matrix above. multiplier: The "B" matrix above Returns: A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T S[0] is the zero matrix S[1] is B S[2] is A B A^T + B ...and so on """ array_size = math_ops.cast(array_size, dtypes.int32) power_matrix = ops.convert_to_tensor(power_matrix) identity_like_power_matrix = linalg_ops.eye( array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype) identity_like_power_matrix.set_shape( ops.convert_to_tensor(power_matrix).get_shape()) transition_powers = functional_ops.scan( lambda previous_power, _: math_ops.matmul(previous_power, power_matrix), math_ops.range(array_size - 1), initializer=identity_like_power_matrix) summed = math_ops.cumsum( array_ops.concat([ array_ops.expand_dims(multiplier, 0), math_ops.matmul( batch_times_matrix(transition_powers, multiplier), transition_powers, adjoint_b=True) ], 0)) return array_ops.concat( [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
def _compareGradient(self, shape, axis, exclusive, reverse): x = np.arange(0, 50).reshape(shape).astype(np.float64) with self.cached_session(use_gpu=True): t = ops.convert_to_tensor(x) result = math_ops.cumsum(t, axis, exclusive, reverse) jacob_t, jacob_n = gradient_checker.compute_gradient( t, shape, result, shape, x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def _compare(self, x, axis, exclusive, reverse): np_out = handle_options(np.cumsum, x, axis, exclusive, reverse) with self.cached_session(), self.test_scope(): p = array_ops.placeholder(x.dtype) tf_out = math_ops.cumsum(p, axis, exclusive, reverse).eval( feed_dict={p: x}) self.assertAllClose(np_out, tf_out)
def _CumsumGrad(op, grad): axis = op.inputs[1] exclusive = op.get_attr("exclusive") reverse = op.get_attr("reverse") return [ math_ops.cumsum(grad, axis, exclusive=exclusive, reverse=not reverse), None ]
def segment_ids_to_row_splits(segment_ids, num_segments=None, out_type=None, name=None): """Generates the RaggedTensor `row_splits` corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: ```python >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval() [ 0 3 3 5 6 9 ] ``` Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). out_type: The dtype for the return value. Defaults to `segment_ids.dtype`, or `tf.int64` if `segment_ids` does not have a dtype. name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`. """ if out_type is None: if isinstance(segment_ids, ops.Tensor): out_type = segment_ids.dtype elif isinstance(num_segments, ops.Tensor): out_type = num_segments.dtype else: out_type = dtypes.int64 else: out_type = dtypes.as_dtype(out_type) with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: # Note: we cast int64 tensors to int32, since bincount currently only # supports int32 inputs. segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids", dtype=dtypes.int32) segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor(num_segments, "num_segments", dtype=dtypes.int32) num_segments.shape.assert_has_rank(0) row_lengths = math_ops.bincount( segment_ids, minlength=num_segments, maxlength=num_segments, dtype=out_type) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1])) return splits
def _CumprodGrad(op, grad): x = op.inputs[0] axis = op.inputs[1] reverse = op.get_attr("reverse") # TODO This fails when x contains 0 and should be fixed prod = math_ops.cumprod(x, axis=axis, reverse=reverse) out = math_ops.cumsum(prod * grad, axis=axis, reverse=(not reverse)) return [out / x, None]
def _effective_sample_size_single_state(states, filter_beyond_lag, filter_threshold): """ESS computation for one single Tensor argument.""" with ops.name_scope( "effective_sample_size_single_state", values=[states, filter_beyond_lag, filter_threshold]): states = ops.convert_to_tensor(states, name="states") dt = states.dtype # filter_beyond_lag == None ==> auto_corr is the full sequence. auto_corr = sample_stats.auto_correlation( states, axis=0, max_lags=filter_beyond_lag) if filter_threshold is not None: filter_threshold = ops.convert_to_tensor( filter_threshold, dtype=dt, name="filter_threshold") # Get a binary mask to zero out values of auto_corr below the threshold. # mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i, # mask[i, ...] = 0, otherwise. # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...] # Building step by step, # Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2. # Step 1: mask = [False, False, True, False] mask = auto_corr < filter_threshold # Step 2: mask = [0, 0, 1, 1] mask = math_ops.cast(mask, dtype=dt) # Step 3: mask = [0, 0, 1, 2] mask = math_ops.cumsum(mask, axis=0) # Step 4: mask = [1, 1, 0, 0] mask = math_ops.maximum(1. - mask, 0.) auto_corr *= mask # With R[k] := auto_corr[k, ...], # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]} # = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1) # approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]} # where M is the filter_beyond_lag truncation point chosen above. # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total # ndims the same as auto_corr n = _axis_size(states, axis=0) k = math_ops.range(0., _axis_size(auto_corr, axis=0)) nk_factor = (n - k) / n if auto_corr.shape.ndims is not None: new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1) else: new_shape = array_ops.concat( ([-1], array_ops.ones([array_ops.rank(auto_corr) - 1], dtype=dtypes.int32)), axis=0) nk_factor = array_ops.reshape(nk_factor, new_shape) return n / (-1 + 2 * math_ops.reduce_sum(nk_factor * auto_corr, axis=0))
def make_tril_ids(n): """Internal helper to create vector of linear indices into y.""" cols = array_ops.reshape(array_ops.tile(math_ops.range(n), [n]), [n, n]) rows = array_ops.tile( array_ops.expand_dims(math_ops.range(n), -1), [1, n]) pred = math_ops.greater(cols, rows) tril_ids = array_ops.tile(array_ops.reshape( math_ops.cumsum(math_ops.range(n)), [n, 1]), [1, n]) + cols tril_ids = math_ops.select(pred, array_ops.zeros([n, n], dtype=dtypes.int32), tril_ids + 1) tril_ids = array_ops.reshape(tril_ids, [-1]) return tril_ids
def test_searchsorted(self): sorted_inputs = math_ops.cumsum(random_ops.random_uniform([3, 2, 4]), axis=-1) values = random_ops.random_uniform([2, 3], minval=-1, maxval=4.5) def loop_fn(i): inputs_i = array_ops.gather(sorted_inputs, i) return [array_ops.searchsorted(inputs_i, values, out_type=dtypes.int32, side="left"), # creates LowerBound op. array_ops.searchsorted(inputs_i, values, out_type=dtypes.int64, side="right")] # creates UpperBound op. self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.int32, dtypes.int64])
def testReadmeExample(self): data = random_ops.random_uniform((128, 128), 0, 10, dtype=dtypes.int32) histogram = math_ops.bincount(data, minlength=10, maxlength=10) cdf = math_ops.cumsum(histogram, exclusive=False) cdf = array_ops.pad(cdf, [[1, 0]]) cdf = array_ops.reshape(cdf, [1, 1, -1]) data = math_ops.cast(data, dtypes.int16) encoded = coder_ops.range_encode(data, cdf, precision=14) decoded = coder_ops.range_decode( encoded, array_ops.shape(data), cdf, precision=14) with self.test_session() as sess: self.assertAllEqual(*sess.run((data, decoded)))
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) max_value = math_ops.reduce_max(abs_weights) histogram = _histogram( abs_weights, [0.0, max_value], nbins=self._spec.nbins, dtype=np.float32) cdf = math_ops.cumsum(histogram) norm_cdf = math_ops.div(cdf, math_ops.reduce_sum(histogram)) current_threshold = math_ops.multiply( math_ops.div( math_ops.reduce_sum( math_ops.cast( math_ops.less(norm_cdf, self._sparsity), np.float32)), float(self._spec.nbins)), max_value) smoothed_threshold = math_ops.add_n([ math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay), math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( math_ops.greater(abs_weights, smoothed_threshold), np.float32) return smoothed_threshold, new_mask
def _randomize(coeffs, radixes, seed=None): """Applies the Owen randomization to the coefficients.""" given_dtype = coeffs.dtype coeffs = math_ops.to_int32(coeffs) num_coeffs = array_ops.shape(coeffs)[-1] radixes = array_ops.reshape(math_ops.to_int32(radixes), [-1]) perms = _get_permutations(num_coeffs, radixes, seed=seed) perms = array_ops.reshape(perms, [-1]) radix_sum = math_ops.reduce_sum(radixes) radix_offsets = array_ops.reshape(math_ops.cumsum(radixes, exclusive=True), [-1, 1]) offsets = radix_offsets + math_ops.range(num_coeffs) * radix_sum permuted_coeffs = array_ops.gather(perms, coeffs + offsets) return math_ops.cast(permuted_coeffs, dtype=given_dtype)
def sparsemax(logits, name=None): """Computes sparsemax activations [1]. For each batch `i` and class `j` we have sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0) [1]: https://arxiv.org/abs/1602.02068 Args: logits: A `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. name: A name for the operation (optional). Returns: A `Tensor`. Has the same type as `logits`. """ with ops.name_scope(name, "sparsemax", [logits]) as name: logits = ops.convert_to_tensor(logits, name="logits") obs = array_ops.shape(logits)[0] dims = array_ops.shape(logits)[1] z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis] # sort z z_sorted, _ = nn.top_k(z, k=dims) # calculate k(z) z_cumsum = math_ops.cumsum(z_sorted, axis=1) k = math_ops.range( 1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype ) z_check = 1 + k * z_sorted > z_cumsum # because the z_check vector is always [1,1,...1,0,0,...0] finding the # (index + 1) of the last `1` is the same as just summing the number of 1. k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1) # calculate tau(z) indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1) tau_sum = array_ops.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype) # calculate p return math_ops.maximum( math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis] )
def test_searchsorted(self): sorted_inputs = math_ops.cumsum(random_ops.random_uniform([3, 2, 4]), axis=-1) values = random_ops.random_uniform([2, 3], minval=-1, maxval=4.5) def loop_fn(i): inputs_i = array_ops.gather(sorted_inputs, i) return [ array_ops.searchsorted(inputs_i, values, out_type=dtypes.int32, side="left"), # creates LowerBound op. array_ops.searchsorted(inputs_i, values, out_type=dtypes.int64, side="right") ] # creates UpperBound op. self._test_loop_fn(loop_fn, 3)
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None): """Generates the RaggedTensor `splits` vector corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: ```python >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval() [ 0 3 3 5 6 9 ] ``` Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`. """ with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids") segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor(num_segments, "num_segments") num_segments.shape.assert_has_rank(0) row_lengths = math_ops.bincount( segment_ids, minlength=num_segments, maxlength=num_segments, dtype=dtypes.int64) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1])) return splits
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None): """Generates the RaggedTensor `row_splits` corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: ```python >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval() [ 0 3 3 5 6 9 ] ``` Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`. """ with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids") segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor(num_segments, "num_segments") num_segments.shape.assert_has_rank(0) row_lengths = math_ops.bincount( segment_ids, minlength=num_segments, maxlength=num_segments, dtype=dtypes.int64) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1])) return splits
def safe_cumprod(x, *args, **kwargs): """Computes cumprod of x in logspace using cumsum to avoid underflow. The cumprod function and its gradient can result in numerical instabilities when its argument has very small and/or zero values. As long as the argument is all positive, we can instead compute the cumulative product as exp(cumsum(log(x))). This function can be called identically to tf.cumprod. Args: x: Tensor to take the cumulative product of. *args: Passed on to cumsum; these are identical to those in cumprod. **kwargs: Passed on to cumsum; these are identical to those in cumprod. Returns: Cumulative product of x. """ with ops.name_scope(None, "SafeCumprod", [x]): x = ops.convert_to_tensor(x, name="x") tiny = np.finfo(x.dtype.as_numpy_dtype).tiny return math_ops.exp(math_ops.cumsum( math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
def from_row_lengths(values, row_lengths, name=None): """Creates a `RaggedTensor` with rows partitioned by `row_lengths`. The returned `RaggedTensor` corresponds with the python list defined by: ```python result = [[values.pop(0) for i in range(length)] for length in row_lengths] ``` Args: values: A potentially ragged tensor with shape `[nvals, ...]`. row_lengths: A 1-D int64 tensor with shape `[nrows]`. Must be nonnegative. `sum(row_lengths)` must be `nvals`. name: A name prefix for the RaggedTensor (optional). Returns: A `RaggedTensor`. `result.rank = values.rank + 1`. `result.ragged_rank = values.ragged_rank + 1`. #### Example: ```python >>> rt = ragged.from_row_lengths( ... values=[3, 1, 4, 1, 5, 9, 2, 6], ... row_lengths=[4, 0, 3, 1, 0]) >>> rt.eval().tolist() [[3, 1, 4, 1], [], [5, 9, 2], [6], []] ``` """ with ops.name_scope(name, 'RaggedFromRowLengths', [values, row_lengths]): values = convert_to_tensor_or_ragged_tensor(values, name='values') row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64, 'row_lengths') row_lengths.shape.assert_has_rank(1) row_limits = math_ops.cumsum(row_lengths) row_splits = array_ops.concat([[0], row_limits], axis=0) return ragged_tensor.RaggedTensor( values=values, row_splits=row_splits, cached_row_lengths=row_lengths, internal=True)
def _sparsemax(logits, name=None): """Computes sparsemax activations [1]. For each batch `i` and class `j` we have sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0) [1]: https://arxiv.org/abs/1602.02068 :param logits, tensor Returns: A `Tensor`. Has the same type as `logits`. """ with ops.name_scope(name, "sparsemax", [logits]) as name: logits = ops.convert_to_tensor(logits, name="logits") obs = logits.shape[0] dims = logits.shape[1] z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis] # sort z z_sorted, _ = nn.top_k(z, k=dims) # calculate k(z) z_cumsum = math_ops.cumsum(z_sorted, axis=1) k = math_ops.range( 1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype) z_check = 1 + k * z_sorted > z_cumsum # because the z_check vector is always [1,1,...1,0,0,...0] finding the # (index + 1) of the last `1` is the same as just summing the number of 1. k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1) # calculate tau(z) indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1) tau_sum = array_ops.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype) # calculate p return math_ops.maximum( math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
def compute_cdf_from_histogram(values, value_range, **kwargs): """Returns the normalized cumulative distribution of the given values tensor. Computes the histogram and uses tf.cumsum to arrive at cdf Args: values: Numeric `Tensor`. value_range: Shape [2] `Tensor` of same `dtype` as `values`. **kwargs: keyword arguments: nbins, name Returns: A 1-D `Tensor` holding normalized cdf of values. """ nbins = kwargs.get('nbins', _NBINS) name = kwargs.get('name', None) with ops.name_scope(name, 'cdf', [values, value_range, nbins]): histogram = _histogram( values, value_range, dtype=dtypes.float32, nbins=nbins) cdf = math_ops.cumsum(histogram) return math_ops.div(cdf, math_ops.reduce_max(cdf))
def testInvalidAxis(self): x = np.arange(0, 10).reshape([2, 5]).astype(np.float32) input_tensor = ops.convert_to_tensor(x) with self.session(use_gpu=True): with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): math_ops.cumsum(input_tensor, -3).eval() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): math_ops.cumsum(input_tensor, 2).eval() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, lambda e: "axis must be a scalar" in str(e)): math_ops.cumsum(input_tensor, [0]).eval()
def from_row_lengths(values, row_lengths, name=None): """Creates a `RaggedTensor` with rows partitioned by `row_lengths`. The returned `RaggedTensor` corresponds with the python list defined by: ```python result = [[values.pop(0) for i in range(length)] for length in row_lengths] ``` Args: values: A potentially ragged tensor with shape `[nvals, ...]`. row_lengths: A 1-D int64 tensor with shape `[nrows]`. Must be nonnegative. `sum(row_lengths)` must be `nvals`. name: A name prefix for the RaggedTensor (optional). Returns: A `RaggedTensor`. `result.rank = values.rank + 1`. `result.ragged_rank = values.ragged_rank + 1`. #### Example: ```python >>> rt = ragged.from_row_lengths( ... values=[3, 1, 4, 1, 5, 9, 2, 6], ... row_lengths=[4, 0, 3, 1, 0]) >>> rt.eval().tolist() [[3, 1, 4, 1], [], [5, 9, 2], [6], []] ``` """ with ops.name_scope(name, 'RaggedFromRowLengths', [values, row_lengths]): values = convert_to_tensor_or_ragged_tensor(values, name='values') row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64, 'row_lengths') row_lengths.shape.assert_has_rank(1) row_limits = math_ops.cumsum(row_lengths) row_splits = array_ops.concat([[0], row_limits], axis=0) return ragged_tensor.RaggedTensor(values=values, row_splits=row_splits, cached_row_lengths=row_lengths, internal=True)
def testInvalidAxis(self): x = np.arange(0, 10).reshape([2, 5]).astype(np.float32) input_tensor = ops.convert_to_tensor(x) with self.session(): with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): math_ops.cumsum(input_tensor, -3).eval() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, lambda e: "Expected scan axis in the range [-2, 2)" in str(e)): math_ops.cumsum(input_tensor, 2).eval() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, lambda e: "axis must be a scalar" in str(e)): math_ops.cumsum(input_tensor, [0]).eval()
def kaiser_bessel_derived_window(window_length, beta=12., dtype=dtypes.float32, name=None): """Generate a [Kaiser Bessel derived window][kbd]. Args: window_length: A scalar `Tensor` indicating the window length to generate. beta: Beta parameter for Kaiser window. dtype: The data type to produce. Must be a floating point type. name: An optional name for the operation. Returns: A `Tensor` of shape `[window_length]` of type `dtype`. [kbd]: https://en.wikipedia.org/wiki/Kaiser_window#Kaiser%E2%80%93Bessel-derived_(KBD)_window """ with ops.name_scope(name, 'kaiser_bessel_derived_window'): window_length = _check_params(window_length, dtype) halflen = window_length // 2 kaiserw = kaiser_window(halflen + 1, beta, dtype=dtype) kaiserw_csum = math_ops.cumsum(kaiserw) halfw = math_ops.sqrt(kaiserw_csum[:-1] / kaiserw_csum[-1]) window = array_ops.concat((halfw, halfw[::-1]), axis=0) return window
def sample(self, time, outputs, state, name=None): """sample for SampleEmbeddingHelper.""" del time, state # unused by sample_fn # Outputs are logits, we sample instead of argmax (greedy). if not isinstance(outputs, ops.Tensor): raise TypeError("Expected outputs to be a single Tensor, got: %s" % type(outputs)) probs = nn_ops.softmax(outputs, -1) sorted_args = sort_ops.argsort(probs, -1, direction='DESCENDING') sorted_nucleus_probs = math_ops.cumsum( sort_ops.sort(probs, -1, direction='DESCENDING'), -1) < self._nucleus nucleus_probs = array_ops.gather(sorted_nucleus_probs, sort_ops.argsort( sorted_args, -1, direction='ASCENDING'), batch_dims=1) argmax_probs = array_ops.one_hot(math_ops.argmax(outputs, -1), depth=array_ops.shape(outputs)[-1], on_value=True, off_value=False, dtype=dtypes.bool) outputs = array_ops.where( (nucleus_probs | argmax_probs), outputs, -np.inf * array_ops.ones_like(outputs, dtype=dtypes.float32)) if self._softmax_temperature is None: logits = outputs else: logits = outputs / self._softmax_temperature sample_ids = categorical_sample(logits=logits, seed=self._seed) return sample_ids
def prop_raw(x): obs = array_ops.shape(x)[0] dim = array_ops.shape(x)[1] z = x - math_ops.reduce_mean(x, axis=1)[:, array_ops.newaxis] z_sorted, _ = nn.top_k(z, k=dim) z_cumsum = math_ops.cumsum(z_sorted, axis=1) k = math_ops.range(1, math_ops.cast(dim, x.dtype) + 1, dtype=x.dtype) z_check = 1 + k * z_sorted > z_cumsum k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1) indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1) tau_sum = array_ops.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / math_ops.cast(k_z, x.dtype) return math_ops.maximum(math_ops.cast(0, x.dtype), z - tau_z[:, array_ops.newaxis])
def monotonic_attention(p_choose_i, previous_attention, mode): """Compute monotonic attention distribution from choosing probabilities. Monotonic attention implies that the input sequence is processed in an explicitly left-to-right manner when generating the output sequence. In addition, once an input sequence element is attended to at a given output timestep, elements occurring before it cannot be attended to at subsequent output timesteps. This function generates attention distributions according to these assumptions. For more information, see ``Online and Linear-Time Attention by Enforcing Monotonic Alignments''. Args: p_choose_i: Probability of choosing input sequence/memory element i. Should be of shape (batch_size, input_sequence_length), and should all be in the range [0, 1]. previous_attention: The attention distribution from the previous output timestep. Should be of shape (batch_size, input_sequence_length). For the first output timestep, preevious_attention[n] should be [1, 0, 0, ..., 0] for all n in [0, ... batch_size - 1]. mode: How to compute the attention distribution. Must be one of 'recursive', 'parallel', or 'hard'. * 'recursive' uses tf.scan to recursively compute the distribution. This is slowest but is exact, general, and does not suffer from numerical instabilities. * 'parallel' uses parallelized cumulative-sum and cumulative-product operations to compute a closed-form solution to the recurrence relation defining the attention distribution. This makes it more efficient than 'recursive', but it requires numerical checks which make the distribution non-exact. This can be a problem in particular when input_sequence_length is long and/or p_choose_i has entries very close to 0 or 1. * 'hard' requires that the probabilities in p_choose_i are all either 0 or 1, and subsequently uses a more efficient and exact solution. Returns: A tensor of shape (batch_size, input_sequence_length) representing the attention distributions for each sequence in the batch. Raises: ValueError: mode is not one of 'recursive', 'parallel', 'hard'. """ # Force things to be tensors p_choose_i = ops.convert_to_tensor(p_choose_i, name="p_choose_i") previous_attention = ops.convert_to_tensor( previous_attention, name="previous_attention") if mode == "recursive": # Use .shape[0].value when it's not None, or fall back on symbolic shape batch_size = p_choose_i.shape[0].value or array_ops.shape(p_choose_i)[0] # Compute [1, 1 - p_choose_i[0], 1 - p_choose_i[1], ..., 1 - p_choose_i[-2]] shifted_1mp_choose_i = array_ops.concat( [array_ops.ones((batch_size, 1)), 1 - p_choose_i[:, :-1]], 1) # Compute attention distribution recursively as # q[i] = (1 - p_choose_i[i])*q[i - 1] + previous_attention[i] # attention[i] = p_choose_i[i]*q[i] attention = p_choose_i*array_ops.transpose(functional_ops.scan( # Need to use reshape to remind TF of the shape between loop iterations lambda x, yz: array_ops.reshape(yz[0]*x + yz[1], (batch_size,)), # Loop variables yz[0] and yz[1] [array_ops.transpose(shifted_1mp_choose_i), array_ops.transpose(previous_attention)], # Initial value of x is just zeros array_ops.zeros((batch_size,)))) elif mode == "parallel": # safe_cumprod computes cumprod in logspace with numeric checks cumprod_1mp_choose_i = safe_cumprod(1 - p_choose_i, axis=1, exclusive=True) # Compute recurrence relation solution attention = p_choose_i*cumprod_1mp_choose_i*math_ops.cumsum( previous_attention / # Clip cumprod_1mp to avoid divide-by-zero clip_ops.clip_by_value(cumprod_1mp_choose_i, 1e-10, 1.), axis=1) elif mode == "hard": # Remove any probabilities before the index chosen last time step p_choose_i *= math_ops.cumsum(previous_attention, axis=1) # Now, use exclusive cumprod to remove probabilities after the first # chosen index, like so: # p_choose_i = [0, 0, 0, 1, 1, 0, 1, 1] # cumprod(1 - p_choose_i, exclusive=True) = [1, 1, 1, 1, 0, 0, 0, 0] # Product of above: [0, 0, 0, 1, 0, 0, 0, 0] attention = p_choose_i*math_ops.cumprod( 1 - p_choose_i, axis=1, exclusive=True) else: raise ValueError("mode must be 'recursive', 'parallel', or 'hard'.") return attention
def lengths_to_splits(lengths): """Returns splits corresponding to the given lengths.""" return array_ops.concat([[0], math_ops.cumsum(lengths)], axis=-1)
def split(value: ragged_tensor.Ragged, num_or_size_splits, axis=0, num=None, name=None): """Splits a RaggedTensor `value` into a list of sub RaggedTensors. If `num_or_size_splits` is an `int`, then it splits `value` along the dimension `axis` into `num_or_size_splits` smaller RaggedTensors. This requires that `value.shape[axis]` is divisible by `num_or_size_splits`. If `num_or_size_splits` is a 1-D Tensor (or list), then `value` is split into `len(num_or_size_splits)` elements. The shape of the `i`-th element has the same size as the `value` except along dimension `axis` where the size is `num_or_size_splits[i]`. Splits along a ragged dimension is not allowed. For example: >>> rt = tf.RaggedTensor.from_row_lengths( ... np.arange(6 * 3).reshape(6, 3), row_lengths=[1, 2, 2, 1]) >>> rt.shape TensorShape([4, None, 3]) >>> >>> rt1, rt2 = tf.split(rt, 2) # uniform splits >>> rt1.shape TensorShape([2, None, 3]) >>> rt2.shape TensorShape([2, None, 3]) >>> >>> rt3, rt4, rt5 = tf.split(rt, [1, 2, 1]) # ragged splits >>> rt3.shape TensorShape([1, None, 3]) >>> rt4.shape TensorShape([2, None, 3]) >>> rt5.shape TensorShape([1, None, 3]) >>> >>> rt6, rt7 = tf.split(rt, [1, 2], axis=2) # splits along axis 2 >>> rt6.shape TensorShape([4, None, 1]) >>> rt7.shape TensorShape([4, None, 2]) Args: value: The `RaggedTensor` to split. num_or_size_splits: Either an `int` indicating the number of splits along `axis` or a 1-D integer `Tensor` or Python list containing the sizes of each output tensor along `axis`. If a Python int, then it must evenly divide `value.shape[axis]`; otherwise the sum of sizes along the split axis must match that of the `value`. axis: An `int` or scalar `int32` `Tensor`. The dimension along which to split. Must be in the range `[-rank(value), rank(value))`. Defaults to 0. num: An `int` used to specify the number of outputs when `num_or_size_splits` is a 1-D list or `Tensor` and its length is statically unknown, e.g., specifying `tf.TensorSepc(None)` with the `input_signature` argument of `tf.function` (optional). name: A name for the operation (optional). Returns: if `num_or_size_splits` is an `int` returns a list of `num_or_size_splits` `RaggedTensor` objects; if `num_or_size_splits` is a 1-D Tensor returns `num_or_size_splits.get_shape[0]` `RaggedTensor` objects resulting from splitting `value`. Raises: ValueError: If the dimension `axis` of `value` is a ragged dimension. ValueError: If `num` is unspecified and cannot be inferred. ValueError: If `num` is specified but doesn't match the length of `num_or_size_splits`. ValueError: If `num_or_size_splits` is an `int` and less than 1. TypeError: If `num_or_size_splits` is not an `int` or 1-D list or 1-D `Tensor`. InvalidArgumentError: If the `axis` of `value` cannot be exactly splitted by `num_or_size_splits`. InvalidArgumentError: If `num_or_size_splits` is contains negative integers. InvalidArgumentError: If `num_or_size_splits`'s static shape is unknown and its dynamic shape is inconsistent `num`. InvalidArgumentError: If `num_or_size_splits`'s static rank is unknown and `axis` is a negative integer. """ with ops.name_scope(name, 'RaggedSplit'): value = ragged_tensor.convert_to_tensor_or_ragged_tensor( value, name='value') if isinstance(num_or_size_splits, int) and num_or_size_splits == 1: return [value] # static assert check_ops.assert_integer_v2( num_or_size_splits, message=('`num_or_size_splits` must be an `int` or 1-D list or ' '`Tensor` of integers.')) value_shape = ragged_shape.RaggedShape.from_tensor(value) axis = array_ops.get_positive_axis(axis, value_shape.rank) try: dim_size = value_shape[axis] except ValueError: raise ValueError('Cannot split a ragged dimension. Got `value` with ' f'shape {value_shape} and `axis` {axis}.') if isinstance(num_or_size_splits, int): # Uniform split num_splits = num_or_size_splits if num_splits < 1: raise ValueError('`num_or_size_splits` must be >=1 if it is an `int`.' f'Received {num_or_size_splits}.') split_length = math_ops.floordiv(dim_size, num_splits) split_lengths = array_ops.repeat(split_length, num_splits) else: # Ragged split num_splits = None split_lengths = ops.convert_to_tensor(num_or_size_splits) if split_lengths.shape.ndims is not None: if split_lengths.shape.ndims != 1: raise TypeError('`num_or_size_splits` must be an `int` or 1-D list ' f'or `Tensor`. Received {num_or_size_splits}.') num_splits = tensor_shape.dimension_value(split_lengths.shape[0]) if num_splits is None: if num is None: raise ValueError('`num` must be specified as an `int` when the ' 'size of `num_or_size_split` is statically ' f'unknown. Received `num`: {num} and ' f'`num_or_size_split`: {num_or_size_splits}.') num_splits = num else: if num is not None and num != num_splits: raise ValueError('`num` does not match the size of ' f'`num_or_size_split`. Received `num`: {num} and ' f'size of `num_or_size_split`: {num_splits}.') splits = array_ops.concat([[0], math_ops.cumsum(split_lengths)], axis=0) checks = [] checks.append( check_ops.assert_non_negative_v2( num_or_size_splits, message='`num_or_size_splits` must be non-negative.')) checks.append( check_ops.assert_equal_v2( num_splits, array_ops.shape(split_lengths)[0], message='`num` is inconsistent with `num_or_size_split.shape[0]`.')) checks.append( check_ops.assert_equal_v2( math_ops.cast(dim_size, splits.dtype), splits[-1], message=('Cannot exactly split the `axis` dimension of `value` ' 'with the given `num_or_size_split`.'))) splits = control_flow_ops.with_dependencies(checks, splits) splited_rts = [] slices = [slice(None)] * (axis + 1) for i in range(num_splits): slices[-1] = slice(splits[i], splits[i + 1]) splited_rts.append(value[tuple(slices)]) return splited_rts
def _update_confusion_matrix_variables_optimized( variables_to_update, y_true, y_pred, thresholds, multi_label=False, sample_weights=None, label_weights=None, thresholds_with_epsilon=False): """Update confusion matrix variables with memory efficient alternative. Note that the thresholds need to be evenly distributed within the list, eg, the diff between consecutive elements are the same. To compute TP/FP/TN/FN, we are measuring a binary classifier C(t) = (predictions >= t) at each threshold 't'. So we have TP(t) = sum( C(t) * true_labels ) FP(t) = sum( C(t) * false_labels ) But, computing C(t) requires computation for each t. To make it fast, observe that C(t) is a cumulative integral, and so if we have thresholds = [t_0, ..., t_{n-1}]; t_0 < ... < t_{n-1} where n = num_thresholds, and if we can compute the bucket function B(i) = Sum( (predictions == t), t_i <= t < t{i+1} ) then we get C(t_i) = sum( B(j), j >= i ) which is the reversed cumulative sum in tf.cumsum(). We can compute B(i) efficiently by taking advantage of the fact that our thresholds are evenly distributed, in that width = 1.0 / (num_thresholds - 1) thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0] Given a prediction value p, we can map it to its bucket by bucket_index(p) = floor( p * (num_thresholds - 1) ) so we can use tf.math.unsorted_segment_sum() to update the buckets in one pass. Consider following example: y_true = [0, 0, 1, 1] y_pred = [0.1, 0.5, 0.3, 0.9] thresholds = [0.0, 0.5, 1.0] num_buckets = 2 # [0.0, 1.0], (1.0, 2.0] bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets) = tf.math.floor([0.2, 1.0, 0.6, 1.8]) = [0, 0, 0, 1] # The meaning of this bucket is that if any of the label is true, # then 1 will be added to the corresponding bucket with the index. # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the # label for 1.8 is true, then 1 will be added to bucket 1. # # Note the second item "1.0" is floored to 0, since the value need to be # strictly larger than the bucket lower bound. # In the implementation, we use tf.math.ceil() - 1 to achieve this. tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices, num_segments=num_thresholds) = [1, 1, 0] # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0, # and 1 value contributed by bucket 1. When we aggregate them to together, # the result become [a + b + c, b + c, c], since large thresholds will always # contribute to the value for smaller thresholds. true_positive = tf.math.cumsum(tp_bucket_value, reverse=True) = [2, 1, 0] This implementation exhibits a run time and space complexity of O(T + N), where T is the number of thresholds and N is the size of predictions. Metrics that rely on standard implementation instead exhibit a complexity of O(T * N). Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A sorted floating point `Tensor` with value in `[0, 1]`. It need to be evenly distributed (the diff between each element need to be the same). multi_label: Optional boolean indicating whether multidimensional prediction/labels should be treated as multilabel responses, or flattened into a single label. When True, the valus of `variables_to_update` must have a second dimension equal to the number of labels in y_true and y_pred, and those tensors must not be RaggedTensors. sample_weights: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). label_weights: Optional tensor of non-negative weights for multilabel data. The weights are applied when calculating TP, FP, FN, and TN without explicit multilabel handling (i.e. when the data is to be flattened). thresholds_with_epsilon: Optional boolean indicating whether the leading and tailing thresholds has any epsilon added for floating point imprecisions. It will change how we handle the leading and tailing bucket. Returns: Update op. """ num_thresholds = thresholds.shape.as_list()[0] if sample_weights is None: sample_weights = 1.0 else: sample_weights = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weights, dtype=y_pred.dtype), y_pred) if not multi_label: sample_weights = array_ops.reshape(sample_weights, [-1]) if label_weights is None: label_weights = 1.0 else: label_weights = array_ops.expand_dims(label_weights, 0) label_weights = weights_broadcast_ops.broadcast_weights(label_weights, y_pred) if not multi_label: label_weights = array_ops.reshape(label_weights, [-1]) weights = math_ops.multiply(sample_weights, label_weights) # We shouldn't need this, but in case there are predict value that is out of # the range of [0.0, 1.0] y_pred = clip_ops.clip_by_value(y_pred, clip_value_min=0.0, clip_value_max=1.0) y_true = math_ops.cast(math_ops.cast(y_true, dtypes.bool), y_true.dtype) if not multi_label: y_true = array_ops.reshape(y_true, [-1]) y_pred = array_ops.reshape(y_pred, [-1]) true_labels = math_ops.multiply(y_true, weights) false_labels = math_ops.multiply((1.0 - y_true), weights) # Compute the bucket indices for each prediction value. # Since the predict value has to be strictly greater than the thresholds, # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket. # We have to use math.ceil(val) - 1 for the bucket. bucket_indices = math_ops.ceil(y_pred * (num_thresholds - 1)) - 1 if thresholds_with_epsilon: # In this case, the first bucket should actually take into account since # the any prediction between [0.0, 1.0] should be larger than the first # threshold. We change the bucket value from -1 to 0. bucket_indices = nn_ops.relu(bucket_indices) bucket_indices = math_ops.cast(bucket_indices, dtypes.int32) if multi_label: # We need to run bucket segment sum for each of the label class. In the # multi_label case, the rank of the label is 2. We first transpose it so # that the label dim becomes the first and we can parallel run though them. true_labels = array_ops.transpose_v2(true_labels) false_labels = array_ops.transpose_v2(false_labels) bucket_indices = array_ops.transpose_v2(bucket_indices) def gather_bucket(label_and_bucket_index): label, bucket_index = label_and_bucket_index[0], label_and_bucket_index[1] return math_ops.unsorted_segment_sum( data=label, segment_ids=bucket_index, num_segments=num_thresholds) tp_bucket_v = vectorized_map( gather_bucket, (true_labels, bucket_indices)) fp_bucket_v = vectorized_map( gather_bucket, (false_labels, bucket_indices)) tp = array_ops.transpose_v2( math_ops.cumsum(tp_bucket_v, reverse=True, axis=1)) fp = array_ops.transpose_v2( math_ops.cumsum(fp_bucket_v, reverse=True, axis=1)) else: tp_bucket_v = math_ops.unsorted_segment_sum( data=true_labels, segment_ids=bucket_indices, num_segments=num_thresholds) fp_bucket_v = math_ops.unsorted_segment_sum( data=false_labels, segment_ids=bucket_indices, num_segments=num_thresholds) tp = math_ops.cumsum(tp_bucket_v, reverse=True) fp = math_ops.cumsum(fp_bucket_v, reverse=True) # fn = sum(true_labels) - tp # tn = sum(false_labels) - fp if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update or ConfusionMatrix.FALSE_NEGATIVES in variables_to_update): if multi_label: total_true_labels = math_ops.reduce_sum(true_labels, axis=1) total_false_labels = math_ops.reduce_sum(false_labels, axis=1) else: total_true_labels = math_ops.reduce_sum(true_labels) total_false_labels = math_ops.reduce_sum(false_labels) update_ops = [] if ConfusionMatrix.TRUE_POSITIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES] update_ops.append(variable.assign_add(tp)) if ConfusionMatrix.FALSE_POSITIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES] update_ops.append(variable.assign_add(fp)) if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES] tn = total_false_labels - fp update_ops.append(variable.assign_add(tn)) if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update: variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES] fn = total_true_labels - tp update_ops.append(variable.assign_add(fn)) return control_flow_ops.group(update_ops)
def segment_ids_to_row_splits(segment_ids, num_segments=None, out_type=None, name=None): """Generates the RaggedTensor `row_splits` corresponding to a segmentation. Returns an integer vector `splits`, where `splits[0] = 0` and `splits[i] = splits[i-1] + count(segment_ids==i)`. Example: >>> print(tf.ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4])) tf.Tensor([0 3 3 5 6 9], shape=(6,), dtype=int64) Args: segment_ids: A 1-D integer Tensor. num_segments: A scalar integer indicating the number of segments. Defaults to `max(segment_ids) + 1` (or zero if `segment_ids` is empty). out_type: The dtype for the return value. Defaults to `segment_ids.dtype`, or `tf.int64` if `segment_ids` does not have a dtype. name: A name prefix for the returned tensor (optional). Returns: A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`. """ # Local import bincount_ops to avoid import-cycle. from tensorflow.python.ops import bincount_ops # pylint: disable=g-import-not-at-top if out_type is None: if isinstance(segment_ids, ops.Tensor): out_type = segment_ids.dtype elif isinstance(num_segments, ops.Tensor): out_type = num_segments.dtype else: out_type = dtypes.int64 else: out_type = dtypes.as_dtype(out_type) with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name: # Note: we cast int64 tensors to int32, since bincount currently only # supports int32 inputs. segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids", dtype=dtypes.int32) segment_ids.shape.assert_has_rank(1) if num_segments is not None: num_segments = ragged_util.convert_to_int_tensor( num_segments, "num_segments", dtype=dtypes.int32) num_segments.shape.assert_has_rank(0) row_lengths = bincount_ops.bincount(segment_ids, minlength=num_segments, maxlength=num_segments, dtype=out_type) splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) # Update shape information, if possible. if num_segments is not None: const_num_segments = tensor_util.constant_value(num_segments) if const_num_segments is not None: splits.set_shape( tensor_shape.TensorShape([const_num_segments + 1])) return splits
def _strict_1d_cumsum(tensor, len_tensor): """Cumsum of a 1D tensor with defined shape by padding and convolving.""" # Assumes tensor shape is fully defined. return math_ops.cumsum(tensor)[:len_tensor]
def _CumsumGrad(op, grad): axis = op.inputs[1] exclusive = op.get_attr("exclusive") reverse = op.get_attr("reverse") return [math_ops.cumsum(grad, axis, exclusive=exclusive, reverse=not reverse), None]
def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None): """Converts a `Tensor` into a `RaggedTensor`. The set of absent/default values may be specified using a vector of lengths or a padding value (but not both). If `lengths` is specified, then the output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`. If `padding` is specified, then any row *suffix* consisting entirely of `padding` will be excluded from the returned `RaggedTensor`. If neither `lengths` nor `padding` is specified, then the returned `RaggedTensor` will have no absent/default values. Examples: ```python >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]]) >>> ragged.from_tensor(dt).eval().tolist() [[5, 7, 0], [0, 3, 0], [6, 0, 0]] >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist() [[5, 7], [], [6, 0, 0]] >>> ragged.from_tensor(dt, padding=0).eval().tolist() [[5, 7], [0, 3], [6]] ``` Args: tensor: The `Tensor` to convert. Must have rank `ragged_rank + 1` or higher. lengths: An optional set of row lengths, specified using a 1-D integer `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in `tensor`). If specified, then `output[row]` will contain `tensor[row][:lengths[row]]`. Negative lengths are treated as zero. padding: An optional padding value. If specified, then any row suffix consisting entirely of `padding` will be excluded from the returned RaggedTensor. `padding` is a `Tensor` with the same dtype as `tensor` and with `shape=tensor.shape[ragged_rank + 1:]`. ragged_rank: Integer specifying the ragged rank for the returned `RaggedTensor`. Must be greater than zero. name: A name prefix for the returned tensors (optional). Returns: A `RaggedTensor` with the specified `ragged_rank`. The shape of the returned ragged tensor is compatible with the shape of `tensor`. Raises: ValueError: If both `lengths` and `padding` are specified. """ if lengths is not None and padding is not None: raise ValueError('Specify lengths or padding, but not both') if not isinstance(ragged_rank, int): raise TypeError('ragged_rank expected int, got %r' % ragged_rank) if ragged_rank <= 0: raise ValueError('ragged_rank must be greater than 0; got %s' % ragged_rank) with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]): tensor = ops.convert_to_tensor(tensor, name='tensor') tensor.shape.with_rank_at_least(ragged_rank + 1) input_shape = array_ops.shape(tensor, out_type=dtypes.int64) ncols = input_shape[1] # Handle ragged_rank>1 via recursion: # If the output should have multiple ragged dimensions, then first # flatten the tensor to eliminate all but the last ragged dimension, # and recursively convert that flattened tensor. Then add on the splits # for the dimensions that we flattened out. if ragged_rank > 1: # Flatten `tensor` to eliminate all but the last ragged dimension. new_shape = array_ops.concat([ constant_op.constant([-1], dtypes.int64), input_shape[ragged_rank:] ], axis=0) flattened = array_ops.reshape(tensor, new_shape) # Recursively convert the flattened tensor. values = from_tensor(flattened, lengths, padding) # The total number of elements in each dimension. E.g., if # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total. dim_size = math_ops.cumprod(input_shape) # Construct splits tensors for the dimensions that were flattened. new_splits = [ math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim] for dim in range(1, ragged_rank) ] return ragged_factory_ops.from_nested_row_splits( values, new_splits) # If padding was specified, then use it to find row lengths. if padding is not None: padding = ops.convert_to_tensor(padding, name='padding', dtype=tensor.dtype) padding.shape.assert_is_compatible_with(tensor.shape[2:]) # Find places where the padding is equal to the tensor. (This will # broadcast `padding` across the outermost 2 dimensions of `tensor`, # so `has_default_value.shape = tensor.shape`.) has_default_value = math_ops.equal(padding, tensor) # If the padding isn't a scalar, then require that all values in the # padding match each item in the tensor. After this block of code, # `has_default.shape = tensor.shape[:2]`. (Unfortunately, we can't just # use reduce_all for both cases, becaue when you pass an empty `axis` # list to reduce_all, it reduces all axes; but we want it to reduce no # axes -- i.e., to be a no-op.) tensor_rank = array_ops.rank(tensor) reduce_axis = math_ops.range(2, tensor_rank) has_default = control_flow_ops.cond( tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value, axis=reduce_axis), lambda: has_default_value) has_default.set_shape(tensor_shape.TensorShape([None, None])) has_default.set_shape(tensor.shape[:2]) # Use has_default it to find the length of each row: for each non-default # item in a row, calculate the length that the row needs to have to # include that item; and then take the max of those values (across each # row). has_nondefault = math_ops.logical_not(has_default) has_nondefault = math_ops.cast(has_nondefault, dtypes.int64) length_for_nondefault_value = ( has_nondefault * array_ops.expand_dims(math_ops.range(1, ncols + 1), 0)) lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1) # If we have lengths (either directly supplied, or computed from paddings), # then use those to construct splits; and then use masking to get the # corresponding values. if lengths is not None: lengths = ragged_util.convert_to_int_tensor( lengths, 'lengths', dtypes.int64) lengths.shape.assert_has_rank(1) lengths = math_ops.minimum(lengths, ncols) lengths = math_ops.maximum(lengths, 0) limits = math_ops.cumsum(lengths) splits = array_ops.concat( [array_ops.zeros([1], dtypes.int64), limits], axis=0) mask = array_ops.sequence_mask(lengths, maxlen=ncols) values = array_ops.boolean_mask(tensor, mask) return ragged_factory_ops.from_row_splits(values, splits) # If neither padding nor lengths were specified, then create a splits # vector that contains no default values, and reshape the input tensor # to form the values for the RaggedTensor. nrows = input_shape[0] nvals = nrows * ncols splits = math_ops.range(nrows + 1) * ncols values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0) values = array_ops.reshape(tensor, values_shape) return ragged_factory_ops.from_row_splits(values, splits)
def from_value_rowids(values, value_rowids, nrows=None, name=None): """Creates a `RaggedTensor` with rows partitioned by `value_rowids`. The returned `RaggedTensor` corresponds with the python list defined by: ```python result = [[values[i] for i in range(len(values)) if value_rowids[i] == row] for row in range(nrows)] ``` Warning: currently, this needs to cast value_rowids to int64 before converting, since `tf.bincount` only supports `int32`. Args: values: A potentially ragged tensor with shape `[nvals, ...]`. value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds one-to-one with `values`, and specifies each value's row index. Must be nonnegative, and must be sorted in ascending order. nrows: An int64 scalar specifying the number of rows. This should be specified if the `RaggedTensor` may containing empty training rows. Must be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty). Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty). name: A name prefix for the RaggedTensor (optional). Returns: A `RaggedTensor`. `result.rank = values.rank + 1`. `result.ragged_rank = values.ragged_rank + 1`. Raises: ValueError: If `nrows` is incompatible with `value_rowids`. #### Example: ```python >>> rt = ragged.from_value_rowids( ... values=[3, 1, 4, 1, 5, 9, 2, 6], ... value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], ... nrows=5) >>> rt.eval().tolist() [[3, 1, 4, 1], [], [5, 9, 2], [6], []] ``` """ with ops.name_scope(name, 'RaggedFromValueRowIds', [values, value_rowids, nrows]): values = convert_to_tensor_or_ragged_tensor(values, name='values') value_rowids = ops.convert_to_tensor( value_rowids, dtypes.int64, name='value_rowids') if nrows is None: const_rowids = tensor_util.constant_value(value_rowids) if const_rowids is None: nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1 const_nrows = None else: const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0 nrows = ops.convert_to_tensor(const_nrows, dtypes.int64, name='nrows') else: nrows = ops.convert_to_tensor(nrows, dtypes.int64, 'nrows') const_nrows = tensor_util.constant_value(nrows) if const_nrows is not None: if const_nrows < 0: raise ValueError('Expected nrows >= 0; got %d' % const_nrows) const_rowids = tensor_util.constant_value(value_rowids) if const_rowids is not None and const_rowids.size > 0: if not const_nrows >= const_rowids[-1] + 1: raise ValueError( 'Expected nrows >= value_rowids[-1] + 1; got nrows=%d, ' 'value_rowids[-1]=%d' % (const_nrows, const_rowids[-1])) value_rowids.shape.assert_has_rank(1) nrows.shape.assert_has_rank(0) values.shape[:1].assert_is_compatible_with(value_rowids.shape) # Convert value_rowids & nrows to row_splits. # Note: we don't use segment_ids_to_row_splits() here because we want # to save the intermediate value `row_lengths`, so we can cache it. # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the cast # (Remove the warning in the docstring when we do.) value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32) nrows_int32 = math_ops.cast(nrows, dtypes.int32) row_lengths = math_ops.bincount( value_rowids_int32, minlength=nrows_int32, maxlength=nrows_int32, dtype=dtypes.int64) row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) if const_nrows is not None: row_lengths.set_shape([const_nrows]) row_splits.set_shape([const_nrows + 1]) return ragged_tensor.RaggedTensor( values, row_splits, cached_row_lengths=row_lengths, cached_value_rowids=value_rowids, cached_nrows=nrows, internal=True)
def kernel_classifier_distance_and_std_from_activations( real_activations, generated_activations, max_block_size=10, dtype=None): """Kernel "classifier" distance for evaluating a generative model. This methods computes the kernel classifier distance from activations of real images and generated images. This can be used independently of the kernel_classifier_distance() method, especially in the case of using large batches during evaluation where we would like to precompute all of the activations before computing the classifier distance, or if we want to compute multiple metrics based on the same images. It also returns a rough estimate of the standard error of the estimator. This technique is described in detail in https://arxiv.org/abs/1801.01401. Given two distributions P and Q of activations, this function calculates E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] where k is the polynomial kernel k(x, y) = ( x^T y / dimension + 1 )^3. This captures how different the distributions of real and generated images' visual features are. Like the Frechet distance (and unlike the Inception score), this is a true distance and incorporates information about the target images. Unlike the Frechet score, this function computes an *unbiased* and asymptotically normal estimator, which makes comparing estimates across models much more intuitive. The estimator used takes time quadratic in max_block_size. Larger values of max_block_size will decrease the variance of the estimator but increase the computational cost. This differs slightly from the estimator used by the original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. The estimate of the standard error will also be more reliable when there are more blocks, i.e. when max_block_size is smaller. NOTE: the blocking code assumes that real_activations and generated_activations are both in random order. If either is sorted in a meaningful order, the estimator will behave poorly. Args: real_activations: 2D Tensor containing activations of real data. Shape is [batch_size, activation_size]. generated_activations: 2D Tensor containing activations of generated data. Shape is [batch_size, activation_size]. max_block_size: integer, default 1024. The distance estimator splits samples into blocks for computational efficiency. Larger values are more computationally expensive but decrease the variance of the distance estimate. Having a smaller block size also gives a better estimate of the standard error. dtype: if not None, coerce activations to this dtype before computations. Returns: The Kernel Inception Distance. A floating-point scalar of the same type as the output of the activations. An estimate of the standard error of the distance estimator (a scalar of the same type). """ real_activations.shape.assert_has_rank(2) generated_activations.shape.assert_has_rank(2) real_activations.shape[1].assert_is_compatible_with( generated_activations.shape[1]) if dtype is None: dtype = real_activations.dtype assert generated_activations.dtype == dtype else: real_activations = math_ops.cast(real_activations, dtype) generated_activations = math_ops.cast(generated_activations, dtype) # Figure out how to split the activations into blocks of approximately # equal size, with none larger than max_block_size. n_r = array_ops.shape(real_activations)[0] n_g = array_ops.shape(generated_activations)[0] n_bigger = math_ops.maximum(n_r, n_g) n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size)) v_r = n_r // n_blocks v_g = n_g // n_blocks n_plusone_r = n_r - v_r * n_blocks n_plusone_g = n_g - v_g * n_blocks sizes_r = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_r], v_r), array_ops.fill([n_plusone_r], v_r + 1), ], 0) sizes_g = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_g], v_g), array_ops.fill([n_plusone_g], v_g + 1), ], 0) zero = array_ops.zeros([1], dtype=dtypes.int32) inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0) inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0) dim = math_ops.cast(tf.shape(real_activations)[1], dtype) def compute_kid_block(i): 'Compute the ith block of the KID estimate.' r_s = inds_r[i] r_e = inds_r[i + 1] r = real_activations[r_s:r_e] m = math_ops.cast(r_e - r_s, dtype) g_s = inds_g[i] g_e = inds_g[i + 1] g = generated_activations[g_s:g_e] n = math_ops.cast(g_e - g_s, dtype) k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3 k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3 k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3 return (-2 * math_ops.reduce_mean(k_rg) + (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) + (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1))) ests = functional_ops.map_fn(compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False) mn = math_ops.reduce_mean(ests) # nn_impl.moments doesn't use the Bessel correction, which we want here n_blocks_ = math_ops.cast(n_blocks, dtype) var = control_flow_ops.cond( math_ops.less_equal(n_blocks, 1), lambda: array_ops.constant(float('nan'), dtype=dtype), lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1)) return mn, math_ops.sqrt(var / n_blocks_)
def sparsemax(logits, name=None): """Computes sparsemax activations [1]. For each batch `i` and class `j` we have $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$ [1]: https://arxiv.org/abs/1602.02068 Args: logits: A `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. name: A name for the operation (optional). Returns: A `Tensor`. Has the same type as `logits`. """ with ops.name_scope(name, "sparsemax", [logits]) as name: logits = ops.convert_to_tensor(logits, name="logits") obs = array_ops.shape(logits)[0] dims = array_ops.shape(logits)[1] # In the paper, they call the logits z. # The mean(logits) can be substracted from logits to make the algorithm # more numerically stable. the instability in this algorithm comes mostly # from the z_cumsum. Substacting the mean will cause z_cumsum to be close # to zero. However, in practise the numerical instability issues are very # minor and substacting the mean causes extra issues with inf and nan # input. z = logits # sort z z_sorted, _ = nn.top_k(z, k=dims) # calculate k(z) z_cumsum = math_ops.cumsum(z_sorted, axis=1) k = math_ops.range( 1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype) z_check = 1 + k * z_sorted > z_cumsum # because the z_check vector is always [1,1,...1,0,0,...0] finding the # (index + 1) of the last `1` is the same as just summing the number of 1. k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1) # calculate tau(z) # If there are inf values or all values are -inf, the k_z will be zero, # this is mathematically invalid and will also cause the gather_nd to fail. # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then # fixed later (see p_safe) by returning p = nan. This results in the same # behavior as softmax. k_z_safe = math_ops.maximum(k_z, 1) indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1) tau_sum = array_ops.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype) # calculate p p = math_ops.maximum( math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis]) # If k_z = 0 or if z = nan, then the input is invalid p_safe = array_ops.where( math_ops.logical_or( math_ops.equal(k_z, 0), math_ops.is_nan(z_cumsum[:, -1])), array_ops.fill([obs, dims], math_ops.cast(float("nan"), logits.dtype)), p) return p_safe
def _inverse(self, y): x0 = y[..., 0, array_ops.newaxis] xk = math_ops.exp(y[..., 1:]) x = array_ops.concat([x0, xk], axis=-1) return math_ops.cumsum(x, axis=-1)
def _power_sum_array(self, max_remaining_steps): r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..max_remaining_steps. A is the transition matrix and B is the noise covariance. This is more efficient in practice than math_utils.power_sums_tensor, since each A^i B (A^i)^T term has a closed-form expression not depending on i - 1. Thus vectorization can replace explicit looping. Uses a cumulative sum on the following expression: (transition^p * transition_covariance * (transition^p)^T)_{i, j} = (-1)^(i + j) * sin^2(pi * p) / num_latent_values^2 * (1/sin(pi / num_latent_values * (p - i)) + 1/sin(pi / num_latent_values * (p - i - 1))) * (1/sin(pi / num_latent_values * (p - j)) + 1/sin(pi / num_latent_values * (p - j - 1))) The expression being derived from the eigenvectors and eigenvalues given in the class docstring (and as with CycleStateSpaceModel taking advantage of the sparsity of the transition covariance). Args: max_remaining_steps: A scalar integer Tensor indicating the number of non-trivial values to compute. Returns: A [max_remaining_steps + 1, self._num_latent_values - 1, self._num_latent_values - 1] floating point Tensor S with cumulative power sums. S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T S[0] is the zero matrix S[1] is B S[2] is A B A^T + B """ num_latent_values_float = math_ops.cast(self._num_latent_values, self.dtype) latent_values_per_period = ( num_latent_values_float / math_ops.cast(self._true_periodicity, dtype=self.dtype)) original_matrix_powers = ( math_ops.cast(math_ops.range(max_remaining_steps), self.dtype) * latent_values_per_period) matrix_dimension_range = math_ops.range(self._num_latent_values - 1)[None, ...] matrix_dimension_range_float = math_ops.cast(matrix_dimension_range, self.dtype) def _cosecant_with_freq(coefficient): return 1. / math_ops.sin( numpy.pi / num_latent_values_float * coefficient) power_minus_index = (original_matrix_powers[..., None] - matrix_dimension_range_float) mesh_values = (_cosecant_with_freq(power_minus_index) + _cosecant_with_freq(power_minus_index - 1.)) meshed = mesh_values[..., None, :] * mesh_values[..., None] full_matrix_alternating = math_ops.cast( 1 - 2 * ((matrix_dimension_range[..., None, :] + matrix_dimension_range[..., None]) % 2), self.dtype) def _sine_discontinuity(value): """A special case for dealing with discontinuities. Decides whether `value` is close to an integer, and if so computes: lim x->n |sin(x * pi)| / sin(x * pi) = sign(sin(n * pi)) = cos(n * pi) Args: value: The floating point Tensor value which may lead to a discontinuity. Returns: A tuple of (is_discontinuous, sign): is_discontinuous: A boolean Tensor of the same shape as `value`, indicating whether it is near an integer. sign: A floating point Tensor indicating the sign of the discontinuity (being near 1 or -1 when `is_discontinuous` is True), of the same shape and type as `value`. """ normalized = value / num_latent_values_float is_discontinuous = self._close_to_integer(normalized) sign = math_ops.cos(normalized * numpy.pi) return is_discontinuous, sign index_discontinuous, index_sign = _sine_discontinuity( original_matrix_powers[..., None] - matrix_dimension_range_float) index_minus_discontinuous, index_minus_sign = _sine_discontinuity( original_matrix_powers[..., None] - matrix_dimension_range_float - 1) ones_mask_vector = math_ops.logical_or(index_discontinuous, index_minus_discontinuous) ones_sign_vector = array_ops.where(index_discontinuous, index_sign, index_minus_sign) ones_mask = math_ops.logical_and(ones_mask_vector[..., None], ones_mask_vector[..., None, :]) zeros_mask = self._close_to_integer(original_matrix_powers) zeroed = array_ops.where(zeros_mask, array_ops.zeros_like(meshed), meshed) global_coefficient = (math_ops.sin(numpy.pi * original_matrix_powers) / num_latent_values_float) masked_meshed = array_ops.where( ones_mask, ones_sign_vector[..., None] * ones_sign_vector[..., None, :], zeroed * global_coefficient[..., None, None]**2) powers_above_zero = full_matrix_alternating * masked_meshed return array_ops.pad(math_ops.cumsum(powers_above_zero), [(1, 0), (0, 0), (0, 0)])
def _compare(self, x, axis, exclusive, reverse): np_out = handle_options(np.cumsum, x, axis, exclusive, reverse) with self.cached_session(): tf_out = math_ops.cumsum(x, axis, exclusive, reverse).eval() self.assertAllClose(np_out, tf_out)
def _compare(self, x, axis, exclusive, reverse): np_out = handle_options(np.cumsum, x, axis, exclusive, reverse) with self.cached_session(use_gpu=True): tf_out = math_ops.cumsum(x, axis, exclusive, reverse).eval() self.assertAllClose(np_out, tf_out)
def _ragged_segment_aggregate(unsorted_segment_op, data, segment_ids, num_segments, separator=None, name=None): """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`. Returns a RaggedTensor `output` with `num_segments` rows, where the row `output[i]` is formed by combining all rows of `data` whose corresponding `segment_id` is `i`. The values in each row are combined using `unsorted_segment_op`. The length of the row `output[i]` will be the maximum of the lengths of all rows of `data` whose corresponding `segment_id` is `i`. If no `data` rows correspond to a given segment ID, then the output row for that segment ID will be empty. Args: unsorted_segment_op: The tensorflow `op` that should be used to combine values in each row. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. data: A `RaggedTensor` containing the values to be combined. segment_ids: A `Tensor` or `RaggedTensor`. Must have type `int64` or `int32`. `segment_ids.shape` must be a prefix of `data.shape`. `segment_ids` is not required to be sorted. num_segments: An `int32` or `int64` scalar. separator: An optional string. Defaults to None. The separator to use when joining. Only used for string types. name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the aggregated values. The returned tensor has the same dtype as `data`, and its shape is `[num_segments] + data.shape[segment_ids.rank:]`. Raises: ValueError: If segment_ids.shape is not a prefix of data.shape. """ if not (ragged_tensor.is_ragged(data) or ragged_tensor.is_ragged(segment_ids)): if separator is not None: # It uses unsorted_segment_join. return unsorted_segment_op(data, segment_ids, num_segments, separator, name) else: return unsorted_segment_op(data, segment_ids, num_segments, name) with ops.name_scope(name, 'RaggedSegment', [data, segment_ids, num_segments]) as name: data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') segment_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor( segment_ids, name='segment_ids') data, segment_ids = ragged_tensor.match_row_splits_dtypes(data, segment_ids) if segment_ids.dtype not in (dtypes.int32, dtypes.int64): raise ValueError('segment_ids must have dtype int32 or int64.') if ragged_tensor.is_ragged(segment_ids): if not ragged_tensor.is_ragged(data): raise ValueError('segment_ids.shape must be a prefix of data.shape, ' 'but segment_ids is ragged and data is not.') check_splits = check_ops.assert_equal( segment_ids.row_splits, data.row_splits, message='segment_ids.shape must be a prefix of data.shape') with ops.control_dependencies([check_splits]): return _ragged_segment_aggregate(unsorted_segment_op, data.values, segment_ids.values, num_segments, separator) # Find the length of each row in data. (shape=[data_nrows]) data_row_lengths = data.row_splits[1:] - data.row_splits[:-1] # Find the length that each output row will have. The length of the row # corresponding to segment `id` is `max(data_row_lengths[i])` where # `segment_ids[i]=id`. (shape=[output_nrows]) output_row_lengths = math_ops.maximum( math_ops.unsorted_segment_max(data_row_lengths, segment_ids, num_segments), 0) # Build the splits tensor for the output RaggedTensor. output_splits = array_ops.concat([ array_ops.zeros([1], output_row_lengths.dtype), math_ops.cumsum(output_row_lengths) ], axis=0) # For each row in `data`, find the start & limit position where that row's # values will be aggregated in output.values. data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids) data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths # For each value in `data.values`, find the position where it will # aggregated in `output.values`. # Get the target output values index for each data values index. data_val_to_out_val_index = range(data_row_to_out_row_start, data_row_to_out_row_limit).values # Recursively aggregate the values. output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values, data_val_to_out_val_index, output_splits[-1], separator) return ragged_tensor.RaggedTensor.from_row_splits( output_values, output_splits, validate=False)
def kernel_classifier_distance_and_std_from_activations(real_activations, generated_activations, max_block_size=1024, dtype=None): """Kernel "classifier" distance for evaluating a generative model. This methods computes the kernel classifier distance from activations of real images and generated images. This can be used independently of the kernel_classifier_distance() method, especially in the case of using large batches during evaluation where we would like to precompute all of the activations before computing the classifier distance, or if we want to compute multiple metrics based on the same images. It also returns a rough estimate of the standard error of the estimator. This technique is described in detail in https://arxiv.org/abs/1801.01401. Given two distributions P and Q of activations, this function calculates E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] where k is the polynomial kernel k(x, y) = ( x^T y / dimension + 1 )^3. This captures how different the distributions of real and generated images' visual features are. Like the Frechet distance (and unlike the Inception score), this is a true distance and incorporates information about the target images. Unlike the Frechet score, this function computes an *unbiased* and asymptotically normal estimator, which makes comparing estimates across models much more intuitive. The estimator used takes time quadratic in max_block_size. Larger values of max_block_size will decrease the variance of the estimator but increase the computational cost. This differs slightly from the estimator used by the original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. The estimate of the standard error will also be more reliable when there are more blocks, i.e. when max_block_size is smaller. NOTE: the blocking code assumes that real_activations and generated_activations are both in random order. If either is sorted in a meaningful order, the estimator will behave poorly. Args: real_activations: 2D Tensor containing activations of real data. Shape is [batch_size, activation_size]. generated_activations: 2D Tensor containing activations of generated data. Shape is [batch_size, activation_size]. max_block_size: integer, default 1024. The distance estimator splits samples into blocks for computational efficiency. Larger values are more computationally expensive but decrease the variance of the distance estimate. Having a smaller block size also gives a better estimate of the standard error. dtype: If not None, coerce activations to this dtype before computations. Returns: The Kernel Inception Distance. A floating-point scalar of the same type as the output of the activations. An estimate of the standard error of the distance estimator (a scalar of the same type). """ real_activations.shape.assert_has_rank(2) generated_activations.shape.assert_has_rank(2) real_activations.shape[1].assert_is_compatible_with( generated_activations.shape[1]) if dtype is None: dtype = real_activations.dtype assert generated_activations.dtype == dtype else: real_activations = math_ops.cast(real_activations, dtype) generated_activations = math_ops.cast(generated_activations, dtype) # Figure out how to split the activations into blocks of approximately # equal size, with none larger than max_block_size. n_r = array_ops.shape(real_activations)[0] n_g = array_ops.shape(generated_activations)[0] n_bigger = math_ops.maximum(n_r, n_g) n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size)) v_r = n_r // n_blocks v_g = n_g // n_blocks n_plusone_r = n_r - v_r * n_blocks n_plusone_g = n_g - v_g * n_blocks sizes_r = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_r], v_r), array_ops.fill([n_plusone_r], v_r + 1), ], 0) sizes_g = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_g], v_g), array_ops.fill([n_plusone_g], v_g + 1), ], 0) zero = array_ops.zeros([1], dtype=dtypes.int32) inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0) inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0) dim = math_ops.cast(real_activations.shape[1], dtype) def compute_kid_block(i): """Computes the ith block of the KID estimate.""" r_s = inds_r[i] r_e = inds_r[i + 1] r = real_activations[r_s:r_e] m = math_ops.cast(r_e - r_s, dtype) g_s = inds_g[i] g_e = inds_g[i + 1] g = generated_activations[g_s:g_e] n = math_ops.cast(g_e - g_s, dtype) k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3 k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3 k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3 return (-2 * math_ops.reduce_mean(k_rg) + (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) + (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1))) ests = map_fn.map_fn( compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False) mn = math_ops.reduce_mean(ests) # nn_impl.moments doesn't use the Bessel correction, which we want here n_blocks_ = math_ops.cast(n_blocks, dtype) var = control_flow_ops.cond( math_ops.less_equal(n_blocks, 1), lambda: array_ops.constant(float('nan'), dtype=dtype), lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1)) return mn, math_ops.sqrt(var / n_blocks_)
def _ragged_segment_aggregate(unsorted_segment_op, data, segment_ids, num_segments, name=None): """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`. Returns a RaggedTensor `output` with `num_segments` rows, where the row `output[i]` is formed by combining all rows of `data` whose corresponding `segment_id` is `i`. The values in each row are combined using `unsorted_segment_op`. The length of the row `output[i]` will be the maximum of the lengths of all rows of `data` whose corresponding `segment_id` is `i`. If no `data` rows correspond to a given segment ID, then the output row for that segment ID will be empty. Args: unsorted_segment_op: The tensorflow `op` that should be used to combine values in each row. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. data: A `RaggedTensor` containing the values to be combined. segment_ids: A `Tensor` or `RaggedTensor`. Must have type `int64` or `int32`. `segment_ids.shape` must be a prefix of `data.shape`. `segment_ids` is not required to be sorted. num_segments: An `int32` or `int64` scalar. name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the aggregated values. The returned tensor has the same dtype as `data`, and its shape is `[num_segments] + data.shape[segment_ids.rank:]`. Raises: ValueError: If segment_ids.shape is not a prefix of data.shape. """ if not (ragged_tensor.is_ragged(data) or ragged_tensor.is_ragged(segment_ids)): return unsorted_segment_op(data, segment_ids, num_segments, name) with ops.name_scope(name, 'RaggedSegment', [data, segment_ids, num_segments]) as name: data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor( data, name='data') segment_ids = ragged_factory_ops.convert_to_tensor_or_ragged_tensor( segment_ids, name='segment_ids') if ragged_tensor.is_ragged(segment_ids): if not ragged_tensor.is_ragged(data): raise ValueError('segment_ids.shape must be a prefix of data.shape, ' 'but segment_ids is ragged and data is not.') check_splits = check_ops.assert_equal( segment_ids.row_splits, data.row_splits, message='segment_ids.shape must be a prefix of data.shape') with ops.control_dependencies([check_splits]): return _ragged_segment_aggregate(unsorted_segment_op, data.values, segment_ids.values, num_segments, name) segment_ids = math_ops.cast(segment_ids, dtypes.int64) # Find the length of each row in data. (dtype=int64, shape=[data_nrows]) data_row_lengths = data.row_splits[1:] - data.row_splits[:-1] # Find the length that each output row will have. The length of the row # corresponding to segment `id` is `max(data_row_lengths[i])` where # `segment_ids[i]=id`. (dtype=int64, shape=[output_nrows]) output_row_lengths = math_ops.maximum( math_ops.unsorted_segment_max(data_row_lengths, segment_ids, num_segments), 0) assert output_row_lengths.dtype == dtypes.int64 # Build the splits tensor for the output RaggedTensor. output_splits = array_ops.concat( [ array_ops.zeros([1], dtypes.int64), math_ops.cumsum(output_row_lengths) ], axis=0) # For each row in `data`, find the start & limit position where that row's # values will be aggregated in output.values. data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids) data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths # For each value in `data.values`, find the position where it will # aggregated in `output.values`. # Get the target output values index for each data values index. data_val_to_out_val_index = range(data_row_to_out_row_start, data_row_to_out_row_limit).values # Recursively aggregate the values. output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values, data_val_to_out_val_index, output_splits[-1]) return ragged_factory_ops.from_row_splits(output_values, output_splits)
def _computeLogSumExp(self, x, **kwargs): result_naive = math_ops.cumsum(math_ops.exp(x), **kwargs) result_fused = math_ops.exp(math_ops.cumulative_logsumexp(x, **kwargs)) return result_naive, result_fused
def from_value_rowids(cls, value_rowids, nrows=None, validate=True, preferred_dtype=None): """Creates a `RowPartition` with rows partitioned by `value_rowids`. This `RowPartition` divides a sequence `values` into rows by specifying which row each value should be added to: ```python rows = [[] for _ in nrows] for (value, rowid) in zip(values, value_rowids): rows[rowid].append(value) `` Args: value_rowids: A 1-D integer tensor with shape `[nvals]`, which corresponds one-to-one with `values`, and specifies each value's row index. Must be nonnegative, and must be sorted in ascending order. nrows: An integer scalar specifying the number of rows. This should be specified if the `RowPartition` may containing empty training rows. Must be greater than `value_rowids[-1]` (or greater than or equal to zero if `value_rowids` is empty). Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty). validate: If true, then use assertions to check that the arguments form a valid `RowPartition`. preferred_dtype: The dtype to encode value_rowids if it doesn't already have one. The default is tf.int64. Returns: A `RowPartition`. Raises: ValueError: If `nrows` is incompatible with `value_rowids`. #### Example: >>> print(RowPartition.from_value_rowids( ... value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], ... nrows=4)) tf.RowPartition(row_splits=tf.Tensor([0 4 4 7 8], shape=(5,), dtype=int64)) """ if not isinstance(validate, bool): raise TypeError("validate must have type bool") with ops.name_scope(None, "RowPartitionFromValueRowIds", [value_rowids, nrows]): value_rowids = cls._convert_row_partition(value_rowids, "value_rowids", preferred_dtype) if nrows is None: const_rowids = tensor_util.constant_value(value_rowids) if const_rowids is None: nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1 const_nrows = None else: const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0 nrows = ops.convert_to_tensor( const_nrows, value_rowids.dtype, name="nrows") else: nrows = ops.convert_to_tensor(nrows, value_rowids.dtype, "nrows") const_nrows = tensor_util.constant_value(nrows) if const_nrows is not None: if const_nrows < 0: raise ValueError("Expected nrows >= 0; got %d" % const_nrows) const_rowids = tensor_util.constant_value(value_rowids) if const_rowids is not None and const_rowids.size > 0: if not const_nrows >= const_rowids[-1] + 1: raise ValueError( "Expected nrows >= value_rowids[-1] + 1; got nrows=%d, " "value_rowids[-1]=%d" % (const_nrows, const_rowids[-1])) value_rowids.shape.assert_has_rank(1) nrows.shape.assert_has_rank(0) if validate: msg = ("Arguments to from_value_rowids do not form a valid " "RowPartition") checks = [ check_ops.assert_rank(value_rowids, 1, message=msg), check_ops.assert_rank(nrows, 0, message=msg), check_ops.assert_non_negative(value_rowids[:1], message=msg), _assert_monotonic_increasing(value_rowids, message=msg), check_ops.assert_less(value_rowids[-1:], nrows, message=msg), ] value_rowids = control_flow_ops.with_dependencies(checks, value_rowids) # Convert value_rowids & nrows to row_splits. # Note: we don't use segment_ids_to_row_splits() here because we want # to save the intermediate value `row_lengths`, so we can cache it. # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the # cast. value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32) nrows_int32 = math_ops.cast(nrows, dtypes.int32) row_lengths = math_ops.bincount( value_rowids_int32, minlength=nrows_int32, maxlength=nrows_int32, dtype=value_rowids.dtype) row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) if const_nrows is not None: row_lengths.set_shape([const_nrows]) row_splits.set_shape([const_nrows + 1]) return cls( row_splits=row_splits, row_lengths=row_lengths, value_rowids=value_rowids, nrows=nrows, internal=_row_partition_factory_key)
def _list_mle_loss(labels, logits, weights=None, lambda_weight=None, reduction=core_losses.Reduction.SUM_BY_NONZERO_WEIGHTS, name=None, seed=None): """Computes the ListMLE loss [Xia et al. 2008] for a list. Given the labels of graded relevance l_i and the logits s_i, we calculate the ListMLE loss for the given list. The `lambda_weight` re-weights examples based on l_i and r_i. The recommended weighting scheme is the formulation presented in the "Position-Aware ListMLE" paper (Lan et. al) and available using create_p_list_mle_lambda_weight() factory function above. Args: labels: A `Tensor` of the same shape as `logits` representing graded relevance. logits: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding item. weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise weights, or a `Tensor` with shape [batch_size, list_size] for item-wise weights. lambda_weight: A `DCGLambdaWeight` instance. reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. name: A string used as the name for this loss. seed: A randomization seed used when shuffling ground truth permutations. Returns: An op for the ListMLE loss. """ with ops.name_scope(name, 'list_mle_loss', (labels, logits, weights)): is_label_valid = utils.is_label_valid(labels) # Reset the invalid labels to 0 and reset the invalid logits to a logit with # ~= 0 contribution. labels = array_ops.where(is_label_valid, labels, array_ops.zeros_like(labels)) logits = array_ops.where( is_label_valid, logits, math_ops.log(_EPSILON) * array_ops.ones_like(logits)) weights = 1.0 if weights is None else ops.convert_to_tensor(weights) weights = array_ops.squeeze(weights) # Shuffle labels and logits to add randomness to sort. shuffled_indices = utils.shuffle_valid_indices(is_label_valid, seed) shuffled_labels = array_ops.gather_nd(labels, shuffled_indices) shuffled_logits = array_ops.gather_nd(logits, shuffled_indices) sorted_labels, sorted_logits = utils.sort_by_scores( shuffled_labels, [shuffled_labels, shuffled_logits]) raw_max = math_ops.reduce_max(sorted_logits, axis=1, keepdims=True) sorted_logits = sorted_logits - raw_max sums = math_ops.cumsum(math_ops.exp(sorted_logits), axis=1, reverse=True) sums = math_ops.log(sums) - sorted_logits if lambda_weight is not None and isinstance(lambda_weight, ListMLELambdaWeight): sums *= lambda_weight.individual_weights(sorted_labels) negative_log_likelihood = math_ops.reduce_sum(sums, 1) return core_losses.compute_weighted_loss(negative_log_likelihood, weights=weights, reduction=reduction)
def _add_batched_ragged_partition(rt, partition, tensor_dict, feature_key, validate, outer_splits=None): """Adds a batched ragged partition tensor to a batched ragged tensor. Args: rt: A RaggedTensor with shape [batch_size, ...]. partition: The partition configuration object. Specifies the key that should be used to look up the partition tensor (unless partition is a RaggedFeature.UniformRowLength, in which case there is no partition tensor). The specified tensor must have shape [batch_size, ...]. tensor_dict: The dictionary mapping keys to tensors. feature_key: The name of the feature being parsed (for error messages). validate: Whether to validate that the values form a valid RaggedTensor. outer_splits: If not None, then we have two batch dimensions, and this is the row-splits for the collapsed batch dimension. Every partition tensor must have an outer row_splits that matches this value. Returns: A new RaggedTensor where each batch item `rt[i]` has been partitioned using the `partition_t[i]`. """ if isinstance(partition, RaggedFeature.UniformRowLength): if rt.ragged_rank > 1: length = ops.convert_to_tensor(partition.length, rt.row_splits.dtype) return ragged_tensor.RaggedTensor.from_row_splits( ragged_tensor.RaggedTensor.from_uniform_row_length( rt.values, length, validate=validate), rt.row_splits // length, validate=validate) else: reshaped_vals = array_ops.reshape( rt.values, array_ops.concat([[-1, partition.length], array_ops.shape(rt.values)[1:]], axis=0)) return ragged_tensor.RaggedTensor.from_row_splits( reshaped_vals, rt.row_splits // partition.length, validate=validate) partition_t = tensor_dict[partition.key] if partition_t.values.dtype != rt.row_splits.dtype: partition_t = math_ops.cast(partition_t, rt.row_splits.dtype) checks = [] if outer_splits is not None: if validate: checks.append( check_ops.assert_equal( outer_splits, partition_t.row_splits, message="Feature %s: values and partitions are not aligned" % feature_key)) partition_t = partition_t.values with ops.control_dependencies(checks): if isinstance(partition, (RaggedFeature.RowSplits, RaggedFeature.RowLimits)): if isinstance(partition, RaggedFeature.RowSplits): partition_t = partition_t[:, 1:] adjusted_limits = partition_t.values + array_ops.repeat( rt.row_starts(), partition_t.row_lengths()) return partition_t.with_values( ragged_tensor.RaggedTensor.from_row_limits(rt.values, adjusted_limits, validate=validate)) elif isinstance(partition, RaggedFeature.RowStarts): adjusted_starts = partition_t.values + array_ops.repeat( rt.row_starts(), partition_t.row_lengths()) return partition_t.with_values( ragged_tensor.RaggedTensor.from_row_starts(rt.values, adjusted_starts, validate=validate)) elif isinstance(partition, RaggedFeature.RowLengths): return partition_t.with_values( ragged_tensor.RaggedTensor.from_row_lengths(rt.values, partition_t.values, validate=validate)) elif isinstance(partition, RaggedFeature.ValueRowIds): nrows = math_ops.maximum( # number of rows in each batch item ragged_math_ops.reduce_max(partition_t + 1, axis=1), 0) adjusted_rowids = partition_t.values + array_ops.repeat( math_ops.cumsum(nrows, exclusive=True), partition_t.row_lengths()) return ragged_tensor.RaggedTensor.from_row_lengths( ragged_tensor.RaggedTensor.from_value_rowids( rt.values, adjusted_rowids, validate=validate), nrows, validate=validate) raise ValueError(f"Unhandled partition type {partition!r}")
def indicators_to_sparse_ids(indicators, ignore_value=None, dtype=dtypes.int64): """Convert a dense indicator tensor to sparse IDs. This is commonly used for converting a dense classification label to sparse. In the following example, we have an input of shape (2, 2, num_classes), where num_classes=4. ```python indicators = [ [ [0, 0, 1, 0], [0, 0, 0, 0] ], [ [1, 0, 1, 1], [0, 0, 1, 0] ] ] sparse_ids = indicator_to_sparse_ids(indicators) ``` `sparse_ids` in "jagged" format: [ [ [2], [] ], [ [0, 2, 3], [2] ] ] `sparse_ids` in `SparseTensor` format: ```python { indices: [[0, 0, 1], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 1, 0]], values: [2, 0, 2, 3, 2], dense_shape: [2, 2, 3] } ``` Args: indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`. `ignore_value` values are ignored. For other values (typically, ones), the index along the last dimension is returned. ignore_value: Entries in `indicators` equal to this value will be absent from the returned `SparseTensor`. If `None`, default value of `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`). dtype: Type of result, must be integer type. Returns: `SparseTensor` of type `dtype` and shape `(d0, ..., dn, max_num_labels)`, where `max_num_labels` is the maximum number of non-zero values in any row (in the example above, row (1, 1) has 3 non-zero values, so the result shape is (2, 2, 3)). The values of this `SparseTensor` are in the range `[0, num_classes)` and correspond to the index of non-ignore values along the last dimension of `indicators`. Raises: ValueError: if `dtype` is not integer. """ if not dtype.is_integer: raise ValueError("Invalid dtype {} not integer.".format(dtype)) with ops.name_scope(None, "indicators_to_sparse_ids", (indicators, ignore_value)): # Convert indicators to binary ones and zeros. We use int64 since # SparseTensor requires int64 indices. indicators = ops.convert_to_tensor(indicators, name="indicators") missing_indicators = math_ops.equal(indicators, _ignore_value_tensor( indicators.dtype, ignore_value), name="missing") zeros_like_indicators = array_ops.zeros_like(indicators, dtype=dtypes.int64, name="zeros") binary_indicators = array_ops.where(missing_indicators, zeros_like_indicators, array_ops.ones_like( indicators, dtype=dtypes.int64, name="ones"), name="binary_indicators") # Use cumsum along the last dimension to generate per-row indexes. # Note that these are 1-based (since 0 indicates missing values), so they're # off-by-1 from the actual indices. We'll subtract 1 below. Since they're # off-by-one, the max value is the size of the last dimension (i.e., # last_index + 1). row_index_indicators = array_ops.where( missing_indicators, zeros_like_indicators, math_ops.cumsum(binary_indicators, axis=-1), "row_index_indicators") result_last_dim = array_ops.reshape( math_ops.reduce_max(row_index_indicators), shape=(1, ), name="result_last_dim") # Convert to a SparseTensor. The values of this SparseTensor are the last # indices of our result, and the last indices of this SparseTensor (i.e., # the class IDs indicated by `indicators`) are the values of our result, so # we use tensor slicing and concat to swap them. sparse_row_index_indicators = dense_to_sparse_tensor( row_index_indicators, ignore_value=0) return sparse_tensor.SparseTensor( indices=array_ops.concat( (sparse_row_index_indicators.indices[:, :-1], array_ops.reshape(sparse_row_index_indicators.values - 1, (-1, 1))), axis=1, name="indices"), values=math_ops.cast(sparse_row_index_indicators.indices[:, -1], dtype=dtype, name="values"), dense_shape=array_ops.concat( (sparse_row_index_indicators.dense_shape[0:-1], result_last_dim), axis=0, name="dense_shape"))
def f(x): return math_ops.cumsum(x)
def kernel_classifier_distance_and_std_from_activations( real_activations, generated_activations, max_block_size=10, dtype=None): real_activations.shape.assert_has_rank(2) generated_activations.shape.assert_has_rank(2) real_activations.shape[1].assert_is_compatible_with( generated_activations.shape[1]) if dtype is None: dtype = real_activations.dtype assert generated_activations.dtype == dtype else: real_activations = math_ops.cast(real_activations, dtype) generated_activations = math_ops.cast(generated_activations, dtype) n_r = array_ops.shape(real_activations)[0] n_g = array_ops.shape(generated_activations)[0] n_bigger = math_ops.maximum(n_r, n_g) n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size)) v_r = n_r // n_blocks v_g = n_g // n_blocks n_plusone_r = n_r - v_r * n_blocks n_plusone_g = n_g - v_g * n_blocks sizes_r = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_r], v_r), array_ops.fill([n_plusone_r], v_r + 1), ], 0) sizes_g = array_ops.concat([ array_ops.fill([n_blocks - n_plusone_g], v_g), array_ops.fill([n_plusone_g], v_g + 1), ], 0) zero = array_ops.zeros([1], dtype=dtypes.int32) inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0) inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0) dim = math_ops.cast(tf.shape(real_activations)[1], dtype) def compute_kid_block(i): r_s = inds_r[i] r_e = inds_r[i + 1] r = real_activations[r_s:r_e] m = math_ops.cast(r_e - r_s, dtype) g_s = inds_g[i] g_e = inds_g[i + 1] g = generated_activations[g_s:g_e] n = math_ops.cast(g_e - g_s, dtype) k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3 k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3 k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3 return (-2 * math_ops.reduce_mean(k_rg) + (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) + (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1))) ests = functional_ops.map_fn(compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False) mn = math_ops.reduce_mean(ests) n_blocks_ = math_ops.cast(n_blocks, dtype) var = control_flow_ops.cond( math_ops.less_equal(n_blocks, 1), lambda: array_ops.constant(float('nan'), dtype=dtype), lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1)) return mn, math_ops.sqrt(var / n_blocks_)
def loop_fn(i): a = array_ops.gather(x, i) return math_ops.cumsum( a, axis=axis, exclusive=exclusive, reverse=reverse)
def indicators_to_sparse_ids(indicators, ignore_value=None, dtype=dtypes.int64): """Convert a dense indicator tensor to sparse IDs. This is commonly used for converting a dense classification label to sparse. In the following example, we have an input of shape (2, 2, num_classes), where num_classes=4. indicators = [ [[0, 0, 1, 0], [0, 0, 0, 0]], [[1, 0, 1, 1], [0, 0, 1, 0]], ] indicator_to_sparse_ids(indicators) => [ [[2], []], [[0, 2, 3], [2]], ] Args: indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`. This must have a statically defined rank. `ignore_value` values are ignored. For other values (typically, ones), the index along the last dimension is returned. ignore_value: Entries in `indicators` equal to this value will be absent from the returned `SparseTensor`. If `None`, default value of `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`). dtype: Type of result, must be integer type. Returns: `tf.int64` `SparseTensor` of shape `(d0, ..., dn, max_num_labels)`, where `max_num_labels` is the maximum number of non-zero values in any row (in the example above, row (1, 1) has 3 non-zero values, so the result shape is (2, 2, 3)). The values of this `SparseTensor` are in the range `[0, num_classes)` and correspond to the index of non-empty values along the last dimension of `indicators`. Raises: ValueError: if `dtype` is not integer. """ if not dtype.is_integer: raise ValueError("Invalid dtype {} not integer.".format(dtype)) with ops.name_scope( None, "indicators_to_sparse_ids", (indicators, ignore_value)): # Convert indicators to binary ones and zeros. We use int64 since # SparseTensor requires int64 indices. indicators = ops.convert_to_tensor(indicators, name="indicators") if ignore_value is None: ignore_value = _ignore_value(indicators.dtype) missing_indicators = math_ops.equal( indicators, ignore_value, name="missing") zeros_like_indicators = array_ops.zeros_like( indicators, dtype=dtypes.int64, name="zeros") binary_indicators = array_ops.where( missing_indicators, zeros_like_indicators, array_ops.ones_like(indicators, dtype=dtypes.int64, name="ones"), name="binary_indicators") # Use cumsum along the last dimension to generate per-row indexes. # Note that these are 1-based (since 0 indicates missing values), so they're # off-by-1 from the actual indices. We'll subtract 1 below. Since they're # off-by-one, the max value is the size of last dimension (i.e., # last_index + 1). row_index_indicators = array_ops.where( missing_indicators, zeros_like_indicators, math_ops.cumsum(binary_indicators, axis=-1), "row_index_indicators") result_last_dim = array_ops.reshape( math_ops.reduce_max(row_index_indicators), shape=(1,), name="result_last_dim") # Convert to a SparseTensor. The values of this SparseTensor are the last # indices of our result, and the last indices of this SparseTensor (i.e., # the class IDs indicated by `indicators`) are the values of our result, so # we use unstack/stack to swap them. sparse_row_index_indicators = dense_to_sparse_tensor( row_index_indicators, ignore_value=0) index_columns = array_ops.unstack( sparse_row_index_indicators.indices, axis=1) return sparse_tensor.SparseTensor( indices=array_ops.stack( index_columns[0:-1] + [sparse_row_index_indicators.values - 1], axis=1, name="indices"), values=math_ops.cast(index_columns[-1], dtype=dtype, name="values"), dense_shape=array_ops.concat( (sparse_row_index_indicators.dense_shape[0:-1], result_last_dim), axis=0, name="dense_shape"))
def loop_fn(i): a = array_ops.gather(x, i) return math_ops.cumsum(a, axis=axis, exclusive=exclusive, reverse=reverse)