def testRandomShuffleScalarError(self): original = StructuredTensor.from_pyval({ "x0": 2, "y": { "z": [[3, 5], [4]] } }) # pyformat: disable with self.assertRaisesRegex(ValueError, "scalar"): random_ops.random_shuffle(original)
def call(self, inputs, training=None, **kwargs): inputs, memory = inputs batch_size = K.shape(inputs)[0] seq_len = K.shape(inputs)[1] mem_mask = K.tile(K.ones_like(memory[:, :, :1], dtype=K.floatx()), [1, 1, seq_len]) ranges = K.tile(K.expand_dims(K.arange(0, seq_len), axis=-1), [1, batch_size]) if self.enabled: shuffle = random_shuffle(ranges) else: shuffle = ranges if self.directional: shuffled = K.in_train_phase(shuffle, ranges, training) else: if self.enabled: shuffled = K.in_train_phase(shuffle, ranges + seq_len, training) else: shuffled = ranges + seq_len ranges = K.expand_dims(K.permute_dimensions(ranges, [1, 0]), axis=-1) shuffled = K.expand_dims(K.permute_dimensions(shuffled, [1, 0]), axis=1) content_mask = K.cast(ranges <= shuffled, dtype=K.floatx()) ranges = K.arange(0, seq_len) eye = K.equal(K.expand_dims(ranges, axis=0), K.expand_dims(ranges, axis=-1)) eye = K.expand_dims(K.cast(eye, dtype=K.floatx()), axis=0) query_mask = content_mask * (1.0 - eye) content_mask = K.concatenate([mem_mask, content_mask], axis=1) query_mask = K.concatenate([mem_mask, query_mask], axis=1) return [ K.permute_dimensions(content_mask, [0, 2, 1]), K.permute_dimensions(query_mask, [0, 2, 1]), ]
def rotate90(images, k=1, is_random=False, seed=None, name=None): """Rotate (randomly) images counter-clockwise by 90 degrees. (A mirror to tf.image rot90) Args: images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor of shape `[height, width, channels]`. k: A scalar integer. The number of times the image is rotated by 90 degrees. is_random: `bool`, If True, adjust randomly. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed}. name: A name for this operation (optional). Returns: If `image` was 4-D, a 4-D float Tensor of shape `[batch, target_height, target_width, channels]` If `image` was 3-D, a 3-D float Tensor of shape `[target_height, target_width, channels] Raises: ValueError: if the shape of `image` not supported. """ if is_random: k = random_ops.random_shuffle([0, 1, 2, 3], seed=seed)[0] images_shape = get_shape(images) if len(images_shape) > 4: ValueError("'image' must have either 3 or 4 dimensions, " "received `{}`.".format(images_shape)) if len(images_shape) == 4: return tf.map_fn(lambda img: tf.image.rot90(img, k, name), images) return tf.image.rot90(images, k, name)
def rotate90(images, k=1, is_random=False, seed=None, name=None): """Rotate (randomly) images counter-clockwise by 90 degrees. (A mirror to tf.image rot90) Args: images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor of shape `[height, width, channels]`. k: A scalar integer. The number of times the image is rotated by 90 degrees. is_random: `bool`, If True, adjust randomly. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed}. name: A name for this operation (optional). Returns: If `image` was 4-D, a 4-D float Tensor of shape `[batch, target_height, target_width, channels]` If `image` was 3-D, a 3-D float Tensor of shape `[target_height, target_width, channels] Raises: ValueError: if the shape of `image` not supported. """ if is_random: k = random_ops.random_shuffle([0, 1, 2, 3], seed=seed)[0] images_shape = get_shape(images) if images_shape > 4: ValueError("'image' must have either 3 or 4 dimensions, received ``.".format(images_shape)) if images_shape == 4: return tf.map_fn(lambda img: tf.image.rot90(img, k, name), images) return tf.image.rot90(images, k, name)
def train_validation_split(arrays, validation_split, shuffle=True): """Split arrays into random train and validation subsets. Arguments: arrays: Tensors to split. Allowed inputs are arbitrarily nested structures of Tensors and NumPy arrays. validation_split: Float between 0 and 1. The proportion of the dataset to include in the validation split. The rest of the dataset will be included in the training split. shuffle: Bool. Whether to shuffle the data before performing a split. If `False`, the last `validation_split` fraction of that training data will become the validation split. Returns: `(train_arrays, validation_arrays)` """ def _can_split(t): tensor_types = (ops.Tensor, np.ndarray) if pd: tensor_types = (ops.Tensor, np.ndarray, pd.Series, pd.DataFrame) return isinstance(t, tensor_types) or t is None flat_arrays = nest.flatten(arrays) if not all(_can_split(t) for t in flat_arrays): raise ValueError( "`validation_split` is only supported for Tensors or NumPy " "arrays, found: {}".format(arrays)) if all(t is None for t in flat_arrays): return arrays, arrays first_non_none = None for t in flat_arrays: if t is not None: first_non_none = t break # Assumes all arrays have the same batch shape or are `None`. batch_dim = int(first_non_none.shape[0]) indices = ops.convert_to_tensor_v2(range(batch_dim)) if shuffle: indices = random_ops.random_shuffle(indices) split_at = int(math.floor(batch_dim * (1. - validation_split))) train_indices = indices[:split_at] val_indices = indices[split_at:] def _split(t, indices): if t is None: return t t = ops.convert_to_tensor_v2(t) return array_ops.gather_v2(t, indices) train_arrays = nest.map_structure( functools.partial(_split, indices=train_indices), arrays) val_arrays = nest.map_structure( functools.partial(_split, indices=val_indices), arrays) return train_arrays, val_arrays
def permutation(_): # It turns out to be more performant to make a new set of indices rather # than reusing the same range Tensor. (presumably because of buffer # forwarding.) indices = math_ops.range(num_samples, dtype=dtypes.int64) if shuffle and shuffle != "batch": indices = random_ops.random_shuffle(indices) return indices
def input_producer(input_tensor, element_shape=None, num_epochs=None, shuffle=True, seed=None, capacity=32, shared_name=None, summary_name=None, name=None): """Output the rows of `input_tensor` to a queue for an input pipeline. Args: input_tensor: A tensor with the rows to produce. Must be at least one-dimensional. Must either have a fully-defined shape, or `element_shape` must be defined. element_shape: (Optional.) A `TensorShape` representing the shape of a row of `input_tensor`, if it cannot be inferred. num_epochs: (Optional.) An integer. If specified `input_producer` produces each row of `input_tensor` `num_epochs` times before generating an `OutOfRange` error. If not specified, `input_producer` can cycle through the rows of `input_tensor` an unlimited number of times. shuffle: (Optional.) A boolean. If true, the rows are randomly shuffled within each epoch. seed: (Optional.) An integer. The seed to use if `shuffle` is true. capacity: (Optional.) The capacity of the queue to be used for buffering the input. shared_name: (Optional.) If set, this queue will be shared under the given name across multiple sessions. summary_name: (Optional.) If set, a scalar summary for the current queue size will be generated, using this name as part of the tag. name: (Optional.) A name for queue. Returns: A queue with the output rows. A `QueueRunner` for the queue is added to the current `QUEUE_RUNNER` collection of the current graph. Raises: ValueError: If the shape of the input cannot be inferred from the arguments. """ with ops.name_scope(name, "input_producer", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") element_shape = input_tensor.get_shape()[1:].merge_with(element_shape) if not element_shape.is_fully_defined(): raise ValueError("Either `input_tensor` must have a fully defined shape " "or `element_shape` must be specified") if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) input_tensor = limit_epochs(input_tensor, num_epochs) q = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=[input_tensor.dtype.base_dtype], shapes=[element_shape], shared_name=shared_name, name=name) enq = q.enqueue_many([input_tensor]) queue_runner.add_queue_runner(queue_runner.QueueRunner(q, [enq])) if summary_name is not None: logging_ops.scalar_summary("queue/%s/%s" % (q.name, summary_name), math_ops.cast(q.size(), dtypes.float32) * (1. / capacity)) return q
def testWarnOnSeedFromOuterGraph(self): with ops.Graph().as_default() as g: g.seed = 10 warnings.simplefilter("always") # map_fun doesn't use seed, so no warning is generated. with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).map(math_ops.square) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertFalse(found_warning) def random_func(x): x = math_ops.add(x, 1) random_ops.random_shuffle([x, math_ops.square(x)]) return x with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).map(random_func) self.assertGreaterEqual(len(w), 1) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertTrue(found_warning) def random_func_seeded(x): ops.get_default_graph().seed = None random_ops.random_shuffle(x) return x with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).batch(2).map(random_func_seeded) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertFalse(found_warning) with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).batch( 2).map(lambda x: random_ops.random_shuffle(x, seed=37)) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertFalse(found_warning)
def testWarnOnSeedFromOuterGraph(self): with ops.Graph().as_default() as g: g.seed = 10 warnings.simplefilter("always") # map_fun doesn't use seed, so no warning is generated. with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).map(math_ops.square) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertFalse(found_warning) def random_func(x): x = math_ops.add(x, 1) random_ops.random_shuffle([x, math_ops.square(x)]) return x with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).map(random_func) self.assertGreaterEqual(len(w), 1) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertTrue(found_warning) def random_func_seeded(x): ops.get_default_graph().seed = None random_ops.random_shuffle(x) return x with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).batch(2).map(random_func_seeded) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertFalse(found_warning) with warnings.catch_warnings(record=True) as w: _ = dataset_ops.Dataset.range(10).batch( 2).map(lambda x: random_ops.random_shuffle(x, seed=37)) found_warning = False for warning in w: if ("Explicitly set the seed in the function if this is not the " "intended behavior" in str(warning)): found_warning = True break self.assertFalse(found_warning)
def input_producer(input_tensor, element_shape=None, num_epochs=None, shuffle=True, seed=None, capacity=32, shared_name=None, summary_name=None, name=None): """Output the rows of `input_tensor` to a queue for an input pipeline. Args: input_tensor: A tensor with the rows to produce. Must be at one-dimensional. Must either have a fully-defined shape, or `element_shape` must be defined. element_shape: (Optional.) A `TensorShape` representing the shape of a row of `input_tensor`, if it cannot be inferred. num_epochs: (Optional.) An integer. If specified `input_producer` produces each row of `input_tensor` `num_epochs` times before generating an `OutOfRange` error. If not specified, `input_producer` can cycle through the rows of `input_tensor` an unlimited number of times. shuffle: (Optional.) A boolean. If true, the rows are randomly shuffled within each eopch. seed: (Optional.) An integer. The seed to use if `shuffle` is true. capacity: (Optional.) The capacity of the queue to be used for buffering the input. shared_name: (Optional.) If set, this queue will be shared under the given name across multiple sessions. summary_name: (Optional.) If set, a scalar summary for the current queue size will be generated, using this name as part of the tag. name: (Optional.) A name for queue. Returns: A queue with the output rows. A `QueueRunner` for the queue is added to the current `QUEUE_RUNNER` collection of the current graph. Raises: ValueError: If the shape of the input cannot be inferred from the arguments. """ with ops.op_scope([input_tensor], name, "input_producer"): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") element_shape = input_tensor.get_shape()[1:].merge_with(element_shape) if not element_shape.is_fully_defined(): raise ValueError("Either `input_tensor` must have a fully defined shape " "or `element_shape` must be specified") if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) input_tensor = limit_epochs(input_tensor, num_epochs) q = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=[input_tensor.dtype.base_dtype], shapes=[element_shape], shared_name=shared_name, name=name) enq = q.enqueue_many([input_tensor]) queue_runner.add_queue_runner(queue_runner.QueueRunner(q, [enq])) if summary_name is not None: logging_ops.scalar_summary("queue/%s/%s" % (q.name, summary_name), math_ops.cast(q.size(), dtypes.float32) * (1. / capacity)) return q
def testShuffle1d(self): with self.cached_session() as sess: with self.test_scope(): x = math_ops.range(1 << 16) shuffle = random_ops.random_shuffle(x) result = sess.run(shuffle) expected = range(1 << 16) # Compare sets to avoid randomness behavior changes but make sure still # have all the values. self.assertAllEqual(set(result), set(expected))
def testShuffle1d(self): with self.cached_session() as sess: with self.test_scope(): x = math_ops.range(1 << 16) shuffle = random_ops.random_shuffle(x) result = self.evaluate(shuffle) expected = range(1 << 16) # Compare sets to avoid randomness behavior changes but make sure still # have all the values. self.assertAllEqual(set(result), set(expected))
def testShuffle2d(self): with self.cached_session() as sess: with self.test_scope(): x = array_ops.diag(math_ops.range(20)) shuffle = random_ops.random_shuffle(x) result = sess.run(shuffle) expected = np.diag(range(20)).flatten() # Compare sets to avoid randomness behavior changes but make sure still # have all the values. self.assertAllEqual(len(result.flatten()), len(expected)) self.assertAllEqual(set(result.flatten()), set(expected))
def testShuffle2d(self): with self.cached_session() as sess: with self.test_scope(): x = array_ops.diag(math_ops.range(20)) shuffle = random_ops.random_shuffle(x) result = sess.run(shuffle) expected = np.diag(range(20)).flatten() # Compare sets to avoid randomness behavior changes but make sure still # have all the values. self.assertAllEqual(len(result.flatten()), len(expected)) self.assertAllEqual(set(result.flatten()), set(expected))
def _input_producer(input_tensor, dtype, num_epochs, shuffle, seed, capacity, name, summary_name): if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) input_tensor = limit_epochs(input_tensor, num_epochs) q = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=[dtype], shapes=[[]], name=name) enq = q.enqueue_many([input_tensor]) queue_runner.add_queue_runner(queue_runner.QueueRunner(q, [enq])) summary_ops.scalar_summary( "queue/%s/%s" % (q.name, summary_name), math_ops.cast(q.size(), dtypes.float32) * (1.0 / capacity) ) return q
def testShuffle1d(self): # TODO(b/26783907): this test requires the CPU backend to implement sort. if self.device in ["XLA_CPU"]: return with self.cached_session() as sess: with self.test_scope(): x = math_ops.range(1 << 16) shuffle = random_ops.random_shuffle(x) result = sess.run(shuffle) expected = range(1 << 16) # Compare sets to avoid randomness behavior changes but make sure still # have all the values. self.assertAllEqual(set(result), set(expected))
def testShuffle1d(self): # TODO(b/26783907): this test requires the CPU backend to implement sort. if self.device in ["XLA_CPU"]: return with self.cached_session() as sess: with self.test_scope(): x = math_ops.range(1 << 16) shuffle = random_ops.random_shuffle(x) result = sess.run(shuffle) expected = range(1 << 16) # Compare sets to avoid randomness behavior changes but make sure still # have all the values. self.assertAllEqual(set(result), set(expected))
def _input_producer(input_tensor, dtype, num_epochs, shuffle, seed, capacity, name, summary_name): if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) input_tensor = limit_epochs(input_tensor, num_epochs) q = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=[dtype], shapes=[[]], name=name) enq = q.enqueue_many([input_tensor]) queue_runner.add_queue_runner(queue_runner.QueueRunner(q, [enq])) summary_ops.scalar_summary("queue/%s/%s" % (q.name, summary_name), math_ops.cast(q.size(), dtypes.float32) * (1. / capacity)) return q
def string_input_queue(self, string_tensor, shuffle=True, name=None, seed=None, capacity=16384): with ops.name_scope(name, "input_producer", [string_tensor]) as name: input_tensor = ops.convert_to_tensor( string_tensor, dtype=dtypes.string) if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) q = data_flow_ops.FIFOQueue( capacity=capacity, dtypes=[input_tensor.dtype.base_dtype]) enq = tf.cond(tf.less(q.size(), 2), lambda: q.enqueue_many([input_tensor]), lambda: tf.no_op()) return q, enq
def testRandomShuffle2021(self): original = StructuredTensor.from_pyval([ {"x0": 0, "y": {"z": [[3, 13]]}}, {"x0": 1, "y": {"z": [[3], [4, 13]]}}, {"x0": 2, "y": {"z": [[3, 5], [4]]}}, {"x0": 3, "y": {"z": [[3, 7, 1], [4]]}}, {"x0": 4, "y": {"z": [[3], [4]]}}]) # pyformat: disable random_seed.set_seed(1066) result = random_ops.random_shuffle(original, seed=2021) expected = StructuredTensor.from_pyval([ {"x0": 0, "y": {"z": [[3, 13]]}}, {"x0": 1, "y": {"z": [[3], [4, 13]]}}, {"x0": 4, "y": {"z": [[3], [4]]}}, {"x0": 2, "y": {"z": [[3, 5], [4]]}}, {"x0": 3, "y": {"z": [[3, 7, 1], [4]]}},]) # pyformat: disable self.assertAllEqual(result, expected)
def testShuffleFiles(self): cluster = data_service_test_base.TestCluster(num_workers=3) shuffled_filenames = random_ops.random_shuffle(self._filenames) dataset = dataset_ops.Dataset.from_tensor_slices(shuffled_filenames) dataset = dataset.interleave(readers.TFRecordDataset) dataset = self.make_distributed_dataset( dataset, cluster=cluster, processing_mode=data_service_ops.ShardingPolicy.DYNAMIC) # pylint:disable=g-complex-comprehension expected = [ b"Record %d of file %d" % (record, file) for file in range(0, 5) for record in range(0, 5) ] result = self.getIteratorOutput( self.getNext(dataset, requires_initialization=True)) self.assertCountEqual(result, expected)
def get_grow_tensor(self, weights, method): """Different ways to initialize new connections. Args: weights: tf.Tensor or Variable. method: str, available options: 'zeros', 'random_normal', 'random_uniform' and 'initial_value' Returns: tf.Tensor same shape and type as weights. Raises: ValueError, when the method is not valid. """ if not isinstance(method, six.string_types): raise ValueError('Grow-Init: %s is not a string' % method) if method == 'zeros': grow_tensor = array_ops.zeros_like(weights, dtype=weights.dtype) elif method.startswith('initial_dist'): original_shape = weights.initial_value.shape divisor = extract_number(method) grow_tensor = array_ops.reshape( random_ops.random_shuffle( array_ops.reshape(weights.initial_value, [-1])), original_shape) / divisor elif method.startswith('random_normal'): stddev = math_ops.reduce_std(weights) divisor = extract_number(method) grow_tensor = self._random_normal( weights.shape, stddev=stddev, dtype=weights.dtype, seed=hash(weights.name + 'grow_init_n')) / divisor elif method.startswith('random_uniform'): mean = math_ops.reduce_mean(math_ops.abs(weights)) divisor = extract_number(method) grow_tensor = self._random_uniform( weights.shape, minval=-mean, maxval=mean, dtype=weights.dtype, seed=hash(weights.name + 'grow_init_u')) / divisor else: raise ValueError('Grow-Init: %s is not a valid option.' % method) return grow_tensor
def random_shuffle(value, seed=None, name=None): """Shuffle a structured tensor on the zeroth axis. Args: value: a structured tensor of rank at least one. seed: the seed for shuffling. name: the name for shuffle. Returns: The shuffled structured tensor. """ with ops.name_scope(name, 'shuffle', [value, seed]): if value.rank == 0: raise ValueError('Cannot shuffle a scalar StructuredTensor') first_dimension = value.nrows() index = random_ops.random_shuffle(math_ops.range(first_dimension), seed=seed) return gather(value, index, axis=0)
def operator_and_matrix(self, build_info, dtype, use_placeholder, ensure_self_adjoint_and_pd=False): shape = list(build_info.shape) perm = math_ops.range(0, shape[-1]) perm = array_ops.broadcast_to(perm, shape[:-1]) perm = random_ops.random_shuffle(perm) if use_placeholder: perm = array_ops.placeholder_with_default(perm, shape=None) operator = permutation.LinearOperatorPermutation(perm, dtype=dtype) matrix = math_ops.cast( math_ops.equal(math_ops.range(0, shape[-1]), perm[..., array_ops.newaxis]), dtype) return operator, matrix
def random_func(x): x = math_ops.add(x, 1) random_ops.random_shuffle([x, math_ops.square(x)]) return x
def random_func_seeded(x): ops.get_default_graph().seed = None random_ops.random_shuffle(x) return x
def random_func(x): x = math_ops.add(x, 1) random_ops.random_shuffle([x, math_ops.square(x)]) return x
def random_func_seeded(x): ops.get_default_graph().seed = None random_ops.random_shuffle(x) return x
def generate_one(d): fn = lambda _: random_ops.random_shuffle(math_ops.range(d), seed=seed) return functional_ops.map_fn(fn, sample_range)
def test_consistent_random_seed_in_assert_all_equal(self): random_seed.set_seed(1066) index = random_ops.random_shuffle([0, 1, 2, 3, 4], seed=2021) # This failed when `a` and `b` were evaluated in separate sessions. self.assertAllEqual(index, index)
def generate_one(d): fn = lambda _: random_ops.random_shuffle(math_ops.range(d), seed=seed) return functional_ops.map_fn(fn, sample_range)
def sparse_random_mask(dense_shape, density=0.5, mask_values=[1], symmetrical=True, dtype=dtypes.float32, seed=None): """Uses values to create a sparse random mask according to a given density a density of 0 returns an empty sparse tensor Note: if symmetrical the mask has always the same number of mask_values per row which means that if ``density * dense_shape[1] < len(mask_values)``, the mask will be an empty ``SparseTensor``. It also means that if ``dense_shape[1] % len(mask_values) != 0`` and ``density = 1.0``, not all values will be corrupted because we can't fill every entry with a symmetrical mask. There are other ways to fill a dense tensor with random values though so a density of 1 defeats the purpose of this operation. if not symmetrical the number of mask_values will not be the same per row. If we need to fill 2 extra entries with values 2 masked values are picked at random to fill the excess. Example: if **not** symmetrical and ``shape = [1,10]]`` ``density = 0.5`` ``mask_values = [1,2,3]`` the result could be something like:: [[1. 1. 2. 3. 0. 0. 0. 2. 0. 0.]] Args: seed: int32 to te used as seed dtype: tensor tensor value type dense_shape: a 1-D tensor tensor with shape [2] density: desired density mask_values: the values to be used to generate the random mask Returns: A sparse random mask with a density of the original shape corrupted using the mask values """ # total number of corrupted indices num_values = len(mask_values) num_corrupted = int(density * dense_shape[1]) num_mask_values = num_corrupted // num_values * num_values if num_mask_values == 0: return empty_sparse_tensor(dense_shape) else: # num corrupted indices per value if not symmetrical: mask_values = random_ops.random_shuffle(mask_values, seed) extra_corrupted = num_corrupted - num_mask_values if not symmetrical: num_mask_values = num_corrupted samples = sample(dense_shape[1], num_mask_values, dense_shape[0], unique=True, seed=seed) indices = batch_to_matrix_indices(samples, dtype=dtypes.int64) value_tensors = [] for i in range(num_values): num_vi = num_mask_values // num_values # spread the extra to be corrupted by n mask_values if not symmetrical and i < extra_corrupted: num_vi = num_vi + 1 vi_shape = math_ops.cast([dense_shape[0], num_vi], dtypes.int32) vi_tensor = array_ops.fill(vi_shape, mask_values[i]) value_tensors.append(vi_tensor) values = array_ops.concat(value_tensors, axis=-1) values = array_ops.reshape(values, [-1]) if values.dtype != dtype: values = math_ops.cast(values, dtype) dense_shape = math_ops.cast([dense_shape[0], dense_shape[1]], dtypes.int64) sp_tensor = SparseTensor(indices, values, dense_shape) # the indices were generated at random so sp_tensor = sparse_ops.sparse_reorder(sp_tensor) return sp_tensor