def test_dataset_creator_input_options_with_cluster_coordinator(self): dataset_fn = lambda _: dataset_ops.DatasetV2.from_tensor_slices([1, 1]) input_options = distribute_lib.InputOptions( experimental_fetch_to_device=True, experimental_per_replica_buffer_size=2) x = dataset_creator.DatasetCreator(dataset_fn, input_options=input_options) strategy = self._get_parameter_server_strategy() with strategy.scope(): model = sequential.Sequential([core_layers.Dense(10)]) model._cluster_coordinator = cluster_coordinator.ClusterCoordinator( strategy) data_handler = data_adapter.get_data_handler(x, steps_per_epoch=2, model=model) iter_rv = iter(data_handler._dataset)._values[0] iter_rv._rebuild_on(model._cluster_coordinator._cluster.workers[0]) distributed_iterator = iter_rv._get_values() # Ensuring the resulting `DistributedIterator` has the right options. self.assertTrue( distributed_iterator._options.experimental_fetch_to_device) self.assertEqual( distributed_iterator._options.experimental_per_replica_buffer_size, 2)
def test_enqueue_incorrect_shape_feature(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') sparse = self._create_high_dimensional_sparse_dataset(strategy) sparse_iter = iter( strategy.experimental_distribute_dataset( sparse, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) mid_level_api._output_shapes = [TensorShape((1, 1)) for _ in range(3)] # The output shape passed to build method is consistent. mid_level_api.build([TensorShape([1, 1, 1]) for _ in range(3)]) @def_function.function def test_fn(): def step(): return mid_level_api.dequeue() mid_level_api.enqueue(next(sparse_iter), training=False) return strategy.run(step) # Enqueued tensor has shape inconsistent with the output shape setting. with self.assertRaisesRegex(ValueError, 'Inconsistent shape founded for input feature'): test_fn()
def testTypeSpecComponents(self, distribution, enable_get_next_as_optional, experimental_place_dataset_on_device, experimental_fetch_to_device): dataset = dataset_ops.DatasetV2.range(10).batch(2) distribution.extended.experimental_enable_get_next_as_optional = ( enable_get_next_as_optional) options = distribute_lib.InputOptions( experimental_place_dataset_on_device= experimental_place_dataset_on_device, experimental_fetch_to_device=experimental_fetch_to_device) dist_dataset = distribution.experimental_distribute_dataset( dataset, options) spec = dist_dataset._type_spec self.assertEqual(spec._input_workers, dist_dataset._input_workers) self.assertEqual(spec._element_spec._value_specs, (tensor_spec.TensorSpec( shape=(None, ), dtype=dtypes.int64, name=None), tensor_spec.TensorSpec( shape=(None, ), dtype=dtypes.int64, name=None))) components = spec._to_components(dist_dataset) re_dist_dataset = spec._from_components(components) self.assertEqual(dist_dataset._input_workers, re_dist_dataset._input_workers) self.assertAllEqual(dist_dataset._cloned_datasets, re_dist_dataset._cloned_datasets) self.assertEqual(dist_dataset._element_spec, re_dist_dataset._element_spec) self.assertEqual(dist_dataset._enable_get_next_as_optional, re_dist_dataset._enable_get_next_as_optional) self.assertEqual(dist_dataset._options, re_dist_dataset._options)
def test_enqueue_with_outside_compilation_in_control_flow(self, use_mlir): if use_mlir: config.enable_mlir_bridge() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') dataset = self._create_sparse_dataset(strategy) dataset_iter = iter(strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_prefetch_to_device=False))) # This is one way to force the enqueue in some control flow. @tf.functions # aren't inlined in the calling tf.function. An alternative would be to # place the enqueue in a switch_v2 or something similar. @def_function.function def enqueue_fn(features): mid_level_api.enqueue(features, training=False) @def_function.function def enqueue_with_outside_compilation(): def get_activations(features): enqueue_fn(features) return mid_level_api.dequeue() return strategy.run(get_activations, args=(next(dataset_iter),)) with self.assertRaisesRegex( RuntimeError, 'does not match graph which contains TPUReplicateContext'): enqueue_with_outside_compilation()
def test_enqueue_with_outside_compilation_auto_mode(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') mid_level_api.build(self.batch_size) dataset = self._create_sparse_dataset(strategy) dataset_iter = iter(strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_prefetch_to_device=False))) @def_function.function def enqueue_with_no_gradient_apply(data): def get_activations(features): # Note the lack of setting training=False, so training defaults to true # here even though we don't have apply gradients. # We detect the correct mode based on which ops exist that share the # same 'name'. mid_level_api.enqueue(features, name='call1') return mid_level_api.dequeue(name='call1') return strategy.run(get_activations, args=(data,)) @def_function.function def enqueue_with_gradient_apply(data): def get_activations(features): mid_level_api.enqueue(features, name='call2') activations = mid_level_api.dequeue(name='call2') # Apply an all ones gradient gradients = nest.map_structure(array_ops.ones_like, activations) mid_level_api.apply_gradients(gradients, name='call2') return activations return strategy.run(get_activations, args=(data,)) data = next(dataset_iter) before_gradient_apply = enqueue_with_gradient_apply(data) after_gradient_apply = enqueue_with_no_gradient_apply(data) before_gradient_apply0 = self._get_replica_numpy(before_gradient_apply, strategy, 0) after_gradient_apply0 = self._get_replica_numpy(after_gradient_apply, strategy, 0) num_replicas = strategy.num_replicas_in_sync # We are passing a gradient of 1 for all lookups, optimizer is SGD with a # learning rate of 0.1. Feature 0 and 1 are looked up with a sum combiner # with the following ids: # Feature 0: [0, 0, 1], [0, 1, 1], ... repeated over num_replicas # Feature 1: [0, 1, 1], [0, 0, 1], ... repeated over num_replicas # i.e. Row 0 and 1 were looked up 3*num_replicas times over all cores and as # the gradient is 1, the accumulated gradient is 3*num_replicas for each # position in row 0 and 1 in table. # # See comments in test_pass_none_to_apply_gradients for the update to # Feature 2 and its table. # The *2 in the next tests are because those rows have 2 lookups vs # the 1 lookup in the other row. update = ([[0.3 * num_replicas], [0.3 * num_replicas * 2]], [[0.3 * num_replicas * 2], [0.3 * num_replicas]], [[0.1 * num_replicas], [0.1 / 3 * num_replicas]]) golden = tuple([before - np.array(up) for before, up in zip(before_gradient_apply0, update)]) self.assertAllClose(golden, after_gradient_apply0)
def test_prefetch_to_device_dataset(self, prefetch_to_device): distribution, _, _ = create_test_objects( cluster_spec=self._cluster_spec, task_type='worker', task_id=0, num_gpus=2) if prefetch_to_device is None: input_options = None else: input_options = distribute_lib.InputOptions( experimental_fetch_to_device=prefetch_to_device) dataset = dataset_ops.Dataset.range(100) dataset = dataset.batch(distribution.num_replicas_in_sync) dataset = distribution.experimental_distribute_dataset( # pylint: disable=assignment-from-no-return dataset, options=input_options) if isinstance(dataset, input_lib.DistributedDatasetV1): item = dataset.make_initializable_iterator().get_next() else: self.skipTest('unsupported test combination') device_types = { tf_device.DeviceSpec.from_string(tensor.device).device_type for tensor in item.values } self.assertAllEqual(list(device_types), ['GPU'])
def test_enqueue_with_outside_compilation(self, use_mlir): if use_mlir: config.enable_mlir_bridge() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') dataset = self._create_sparse_dataset(strategy) dataset_iter = iter(strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_prefetch_to_device=False))) @def_function.function def enqueue_with_outside_compilation(data): def get_activations(features): mid_level_api.enqueue(features, training=False) return mid_level_api.dequeue() return strategy.run(get_activations, args=(data,)) @def_function.function def enqueue_without_outside_compilation(data): def get_activations(): return mid_level_api.dequeue() mid_level_api.enqueue(data, training=False) return strategy.run(get_activations) features = next(dataset_iter) activations_oc = enqueue_with_outside_compilation(features) activations = enqueue_without_outside_compilation(features) # Extact per core numpy arrays. activations_oc0 = self._get_replica_numpy(activations_oc, strategy, 0) activations0 = self._get_replica_numpy(activations, strategy, 0) self.assertAllClose(activations_oc0, activations0)
def test_enqueue_with_outside_compilation_non_direct_input(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') mid_level_api.build([ TensorShape((self.batch_size, 2)), TensorShape((self.batch_size, 2)), TensorShape((self.batch_size, 3)) ]) dataset = self._create_sparse_dataset(strategy) dataset_iter = iter( strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) @def_function.function def enqueue_with_outside_compilation(): def get_activations(features): # This inserts a mul operation on the TPU to trigger the direct input # error. features = (features[0] * 2, features[1] * 2, features[2] * 2) mid_level_api.enqueue(features, training=False) return mid_level_api.dequeue() return strategy.run(get_activations, args=(next(dataset_iter), )) with self.assertRaisesRegex( ValueError, 'which does not have the `_tpu_input_identity` attr'): enqueue_with_outside_compilation()
def test_sequence_feature(self, is_sparse): seq_length = 3 # Set the max_seq_length in feature config for feature in self.feature_config: feature.max_sequence_length = seq_length strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') if is_sparse: dataset = self._create_sparse_dataset(strategy) else: dataset = self._create_ragged_dataset(strategy) feature_iter = iter( strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) @def_function.function def test_fn(): def step(): return mid_level_api.dequeue() mid_level_api.enqueue(next(feature_iter), training=False) return strategy.run(step) output = test_fn() self.assertEqual( self._get_replica_numpy(output[0], strategy, 0).shape, (2, 3, 4)) self.assertEqual( self._get_replica_numpy(output[1], strategy, 0).shape, (2, 3, 4)) self.assertEqual( self._get_replica_numpy(output[2], strategy, 0).shape, (2, 3, 2))
def test_dense_lookup(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') input_fn = self._create_dense_input_fn(strategy) dist = strategy.distribute_datasets_from_function( input_fn, options=distribute_lib.InputOptions( experimental_prefetch_to_device=False)) dist_iter = iter(dist) @def_function.function def test_fn(): def step(): return mid_level_api.dequeue() mid_level_api.enqueue(next(dist_iter), training=False) return strategy.run(step) # Run model. shard0 = self._get_replica_numpy(test_fn(), strategy, 0) # embedding_values is a linear list, so we reshape to match the correct # shape of the corresponding table before performing the lookup. numpy_videos = np.reshape(self.embedding_values, (8, 4)) numpy_users = np.reshape(self.embedding_values, (16, 2)) golden = ((numpy_videos[self.feature_watched_values[-2:]], numpy_videos[self.feature_favorited_values[-2:]], numpy_users[self.feature_friends_values[-2:]])) self.assertAllClose(shard0, golden)
def test_enqueue_wrong_weight_type_for_sparse_and_ragged_tensor(self): self.skip_if_oss() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') sparse = self._create_sparse_dataset(strategy, include_weights=True) ragged = self._create_ragged_dataset(strategy, include_weights=True) sparse_iter = iter( strategy.experimental_distribute_dataset( sparse, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) ragged_iter = iter( strategy.experimental_distribute_dataset( ragged, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) @def_function.function def test_sparse_fn(): def step(): return mid_level_api.dequeue() features, _ = next(sparse_iter) _, weights = next(ragged_iter) mid_level_api.enqueue(features, weights=weights, training=False) return strategy.run(step) with self.assertRaisesRegex( ValueError, 'which does not match type input which is SparseTensor.'): test_sparse_fn() @def_function.function def test_ragged_fn(): def step(): return mid_level_api.dequeue() _, weights = next(sparse_iter) features, _ = next(ragged_iter) mid_level_api.enqueue(features, weights=weights, training=False) return strategy.run(step) with self.assertRaisesRegex( ValueError, 'which does not match type input which is RaggedTensor.'): test_ragged_fn()
def testDoesNotTriggerFunctionTracing(self, distribution, enable_get_next_as_optional): if not tf2.enabled(): self.skipTest( "DistributedIterator CompositeTensor support is only " "present in TF 2.0 only.") trace_count = [0] @def_function.function def f(iterator): trace_count[0] += 1 counter = np.int64(0) for _ in range(5): next(iterator) counter += 1 return counter ctx = distribute_lib.InputContext() batch_size = ctx.get_per_replica_batch_size(8) # Use 20 which isn't divisible by 8 to test partial batch behavior. row_lengths = np.mod(np.arange(50), 4).astype(np.int64) ragged_tensor = ragged_tensor_lib.RaggedTensor.from_row_lengths( np.repeat(np.arange(50, dtype=np.float32), row_lengths), row_lengths) dataset = dataset_ops.DatasetV2.from_tensor_slices({ "dense": ragged_tensor.to_tensor(), "ragged": ragged_tensor, "sparse": ragged_tensor.to_sparse(), }) dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id) dataset = dataset.batch(batch_size) distribution.extended.experimental_enable_get_next_as_optional = ( enable_get_next_as_optional) if isinstance(distribution, (tpu_strategy.TPUStrategyV2, tpu_strategy.TPUStrategy)): # TPUStrategy does not support distributed datasets with device prefetch # when using sparse or ragged tensors. options = distribute_lib.InputOptions( experimental_prefetch_to_device=False) else: options = None dist_dataset = distribution.experimental_distribute_dataset( dataset, options) with distribution.scope(): for _ in range(3): iterator = iter(dist_dataset) _check_type_spec_structure(iterator) counter = f(iterator) self.assertEqual(trace_count[0], 1) self.assertEqual(counter, 5)
def test_pass_none_to_apply_gradients(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') mid_level_api.build([ TensorShape((self.batch_size, 2)), TensorShape((self.batch_size, 2)), TensorShape((self.batch_size, 3)) ]) dataset = self._create_sparse_dataset(strategy) data = next( iter( strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_fetch_to_device=False)))) @def_function.function def embedding_and_set_gradients(data): mid_level_api.enqueue(data) def tpu_fn(): results = mid_level_api.dequeue() mid_level_api.apply_gradients( (None, None, array_ops.ones_like(results[2]))) return results return strategy.run(tpu_fn) @def_function.function def embedding_only(data): mid_level_api.enqueue(data, training=False) def tpu_fn(): return mid_level_api.dequeue() return strategy.run(tpu_fn) first = self._get_replica_numpy(embedding_and_set_gradients(data), strategy, 0) second = self._get_replica_numpy(embedding_only(data), strategy, 0) # First two features should be the same as None gradient was applied. # Third feature had gradient of 1 passed in from each core. # Each core received the same ids per core and returned the following batch: # [ row 3, row 0 + row 1 + row 2 ] # so gradient update was (learning rate = 0.1): # row 0: -1/3*0.1 # row 1: -1/3*0.1 # row 2: -1/3*0.1 # row 3: -1*0.1 # There is a factor of num_replicas because each replica gave an update. num_replicas = strategy.num_replicas_in_sync update = ([[0.0]], [[0.0]], [[0.1 * num_replicas], [0.1 / 3 * num_replicas]]) golden = tuple( [feature - np.array(up) for feature, up in zip(first, update)]) self.assertAllClose(golden, second)
def testFromFunctionInputSignatureForPerReplicaValuesWithOptions( self, distribution, enable_get_next_as_optional, experimental_place_dataset_on_device, experimental_fetch_to_device): if experimental_place_dataset_on_device and experimental_fetch_to_device: self.skipTest("Setting experimental_place_dataset_on_device and " "experimental_fetch_to_device to `True` is not " "allowed when using " "distribute_lib.InputReplicationMode.PER_REPLICA.") fname1 = os.path.join(self.get_temp_dir(), "1.txt") _create_text_file(fname1, 5) fname2 = os.path.join(self.get_temp_dir(), "2.txt") _create_text_file(fname2, 9) def dataset_fn(input_context): dataset = dataset_ops.DatasetV2.from_tensor_slices( [fname1, fname2]) dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) return readers.TextLineDatasetV2(dataset).map( string_ops.string_to_number).batch( input_context.get_per_replica_batch_size(4)) options = distribute_lib.InputOptions( experimental_place_dataset_on_device=( experimental_place_dataset_on_device), experimental_fetch_to_device=experimental_fetch_to_device, experimental_replication_mode=( distribute_lib.InputReplicationMode.PER_REPLICA)) distribution.extended.experimental_enable_get_next_as_optional = ( enable_get_next_as_optional) ds = distribution.experimental_distribute_datasets_from_function( dataset_fn, options) iterator = iter(ds) _check_type_spec_structure(iterator) spec = iterator._type_spec tensor_list = spec._to_components(iterator) re_iterator = spec._from_components(tensor_list) _check_type_spec_structure(iter(ds)) element_spec = ds.element_spec iter_element_spec = iter(ds).element_spec nest.assert_same_structure(element_spec, iter_element_spec) self.assertAllEqual(nest.flatten(element_spec), nest.flatten(iter_element_spec)) self.assertEqual(iterator._input_workers, re_iterator._input_workers) self.assertAllEqual(iterator._iterators, re_iterator._iterators) @def_function.function(input_signature=[element_spec]) def process_inputs(inputs): distribution.run(lambda inputs: inputs, args=(inputs, )) for x in ds: process_inputs(x)
def test_enqueue_dense_sparse_ragged(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') dataset = self._create_high_dimensional_dense_dataset(strategy) dense_iter = iter( strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) sparse = self._create_high_dimensional_sparse_dataset(strategy) sparse_iter = iter( strategy.experimental_distribute_dataset( sparse, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) ragged = self._create_high_dimensional_ragged_dataset(strategy) ragged_iter = iter( strategy.experimental_distribute_dataset( ragged, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) mid_level_api.build([ TensorShape([self.batch_size, self.data_batch_size, 1]), TensorShape([self.batch_size, self.data_batch_size, 2]), TensorShape([self.batch_size, self.data_batch_size, 3]) ]) @def_function.function def test_fn(): def step(): return mid_level_api.dequeue() features = (next(dense_iter)[0], next(sparse_iter)[1], next(ragged_iter)[2]) mid_level_api.enqueue(features, training=False) return strategy.run(step) test_fn()
def test_enqueue_with_weights(self, ragged): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') weight = 0.5 if ragged: dataset = self._create_ragged_dataset(strategy, include_weights=True, weight=weight) else: dataset = self._create_sparse_dataset(strategy, include_weights=True, weight=weight) mid_level_api.build([ TensorShape((self.batch_size, 2)), TensorShape((self.batch_size, 2)), TensorShape((self.batch_size, 3)) ]) dataset_iter = iter( strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) @def_function.function def enqueue_and_get(features, weights): def get_activations(): return mid_level_api.dequeue() mid_level_api.enqueue(features, weights=weights, training=False) return strategy.run(get_activations) features, weights = next(dataset_iter) # Replace the weight for the second feature by None to test. weights = (weights[0], None, weights[2]) no_weights_activations = enqueue_and_get(features, weights=None) weights_activations = enqueue_and_get(features, weights=weights) # Extact per core numpy arrays. no_weights0 = self._get_replica_numpy(no_weights_activations, strategy, 0) weights0 = self._get_replica_numpy(weights_activations, strategy, 0) # videos table has sum combiner and users table has mean combiner. # i.e. users table lookups isn't affected by the weights as all the weights # are the same. # Tuple entry 0 and 1 are the watched and favorited features from the videos # table and entry 2 is the friends feature from the users table. # Note that None was passed as a weight for entry 1 so weight should have no # effect. weight = (0.5, 1.0, 1.0) golden = tuple( [no_weight * w for no_weight, w in zip(no_weights0, weight)]) self.assertAllClose(golden, weights0)
def test_dataset_creator_model_fit_without_strategy(self, use_input_options): model = sequential.Sequential([core_layers.Dense(10)]) model.compile(gradient_descent.SGD(), loss="mse") input_options = distribute_lib.InputOptions() if use_input_options else None history = model.fit( dataset_creator.DatasetCreator(self._get_dataset_fn(), input_options), epochs=10, steps_per_epoch=10, verbose=0) self.assertLen(history.history["loss"], 10)
def test_composite_input_non_flat_output(self, enable_packed_var): strategy = get_tpu_strategy(enable_packed_var) if strategy.num_replicas_in_sync != 2: self.skipTest("Test assumes two replicas.") with strategy.scope(): table = variables.Variable( initial_value=[[0.0, 1.0], [3.0, 7.0]], dtype=dtypes.float32) @def_function.function def sparse_lookup(iterator): def tpu_function(sparse): # Assumes dense_shape is (2, *) looked_up = array_ops.gather(table, sparse.values) segment_sum = math_ops.unsorted_segment_sum( looked_up, sparse.indices[:, 0], 2) return {"sparse": sparse, "segment_sum": segment_sum} return nest.map_structure( strategy.experimental_local_results, strategy.run(tpu_function, args=(next(iterator),))) def dataset_fn(_): dataset = dataset_ops.Dataset.range(2) def make_sparse(_): return sparse_tensor.SparseTensor( indices=array_ops.constant([[0, 0], [1, 0], [1, 1]], dtype=dtypes.int64), values=array_ops.constant([0, 0, 1], dtype=dtypes.int32), dense_shape=array_ops.constant([2, 2], dtype=dtypes.int64)) return dataset.map(make_sparse) dataset = iter( strategy.experimental_distribute_datasets_from_function( dataset_fn, distribute_lib.InputOptions( experimental_prefetch_to_device=False))) output = sparse_lookup(dataset) # All replicas return identical reults. for replica in range(strategy.num_replicas_in_sync): self.assertIsInstance(output["sparse"][replica], sparse_tensor.SparseTensor) self.assertAllEqual(output["sparse"][replica].indices, [[0, 0], [1, 0], [1, 1]]) self.assertAllEqual(output["sparse"][replica].values, [0, 0, 1]) self.assertAllEqual(output["sparse"][replica].dense_shape, [2, 2]) self.assertAllEqual(output["segment_sum"][replica], [[0.0, 1.0], [3.0, 8.0]])
def test_embedding(self, optimizer_name, training, sparse): strategy, mid_level_api, optimizer = ( self._create_strategy_and_mid_level(optimizer_name)) if sparse: dataset = self._create_sparse_dataset(strategy) else: dataset = self._create_ragged_dataset(strategy) dist = strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions(experimental_fetch_to_device=False)) dist_iter = iter(dist) @def_function.function def test_fn(): def step(): """Create and run computation that returns the embedding activations.""" if not training: activations = mid_level_api.dequeue() total_loss = _get_total_loss_tensor(activations) ret_val = [total_loss] + list(activations) return ret_val else: with backprop.GradientTape() as tape: activations = mid_level_api.dequeue() tape.watch(activations) total_loss = _get_total_loss_tensor(activations) loss_per_replica = total_loss / strategy.num_replicas_in_sync gradients = tape.gradient(loss_per_replica, activations) mid_level_api.apply_gradients(gradients) ret_val = [total_loss] + list(activations) return ret_val mid_level_api.enqueue(next(dist_iter), training=training) result = strategy.run(step) return result # Run model. shard_out_val = test_fn() # Retrieve TPU weights to CPU. mid_level_api._retrieve_variables() # Compute sparse tensors for global batch. input_data = next(iter(self._create_sparse_dataset(strategy))) # Check results. self._check_results(strategy, shard_out_val, training, input_data, mid_level_api._variables, optimizer)
def test_prefetch_to_device_tpu(self): strategy = get_tpu_strategy() dataset = dataset_ops.Dataset.range( strategy.num_replicas_in_sync * 2, output_type=dtypes.float32).batch(strategy.num_replicas_in_sync) input_options = distribute_lib.InputOptions( experimental_fetch_to_device=True) dataset_item = next(iter(strategy.experimental_distribute_dataset( dataset, options=input_options))) dataset_location = tf_device.DeviceSpec.from_string( dataset_item.values[0].device) self.assertEqual(dataset_location.device_type, "TPU")
def testDistributeDatasetFunctionHostPrefetch(self, distribution): data = [5., 6., 7., 8.] input_iterator = iter( distribution.distribute_datasets_from_function( lambda _: get_dataset_from_tensor_slices(data), distribute_lib.InputOptions(experimental_fetch_to_device=False))) local_results = distribution.experimental_local_results( input_iterator.get_next()) for result in local_results: self.assertEqual(result.backing_device, device_util.resolve("/device:CPU:0"))
def test_different_input_shapes(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') sparse = self._create_high_dimensional_sparse_dataset(strategy) sparse_iter = iter( strategy.experimental_distribute_dataset( sparse, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) # Create a feature with shape (1, 3, 1) dense_feature = constant_op.constant( np.zeros(3), shape=(1, 3, 1), dtype=dtypes.int32) dense_dataset = dataset_ops.DatasetV2.from_tensors( dense_feature).unbatch().repeat().batch( 1 * strategy.num_replicas_in_sync, drop_remainder=True) dense_iter = iter( strategy.experimental_distribute_dataset( dense_dataset, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) @def_function.function def test_fn(): def step(): return mid_level_api.dequeue() features = (next(dense_iter), next(sparse_iter)[1], next(sparse_iter)[2]) mid_level_api.enqueue(features, training=False) return strategy.run(step) test_fn() self.assertEqual(mid_level_api._output_shapes, [ TensorShape((1, 3)), TensorShape((self.batch_size, self.data_batch_size)), TensorShape((self.batch_size, self.data_batch_size)) ])
def test_enqueue_with_weights(self, ragged): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') weight = 0.5 if ragged: dataset = self._create_ragged_dataset(strategy, include_weights=True, weight=weight) else: dataset = self._create_sparse_dataset(strategy, include_weights=True, weight=weight) dataset_iter = iter( strategy.experimental_distribute_dataset( dataset, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) @def_function.function def embedding_lookup(features, weights): def step(features, weights): return mid_level_api(features, weights) return strategy.run(step, args=(features, weights)) features, weights = next(dataset_iter) # Replace the weight for the second feature by None to test. weights = (weights[0], None, weights[2]) no_weights_activations = embedding_lookup(features, weights=None) weights_activations = embedding_lookup(features, weights=weights) no_weights0 = (self._unpack(strategy, no_weights_activations[0]), self._unpack(strategy, no_weights_activations[1]), self._unpack(strategy, no_weights_activations[2])) weights0 = (self._unpack(strategy, weights_activations[0]), self._unpack(strategy, weights_activations[1]), self._unpack(strategy, weights_activations[2])) # videos table has sum combiner and users table has mean combiner. # i.e. users table lookups isn't affected by the weights as all the weights # are the same. # Tuple entry 0 and 1 are the watched and favorited features from the videos # table and entry 2 is the friends feature from the users table. # Note that None was passed as a weight for entry 1 so weight should have no # effect. weight = (0.5, 1.0, 1.0) golden = tuple( [no_weight * w for no_weight, w in zip(no_weights0, weight)]) self.assertAllClose(golden, weights0)
def testTypeSpecRoundTrip(self, distribution, enable_get_next_as_optional): if not tf2.enabled(): self.skipTest( "DistributedIterator CompositeTensor support is only " "present in TF 2.0 only.") ctx = distribute_lib.InputContext() batch_size = ctx.get_per_replica_batch_size(8) # Use 20 which isn't divisible by 8 to test partial batch behavior. row_lengths = np.mod(np.arange(20), 4).astype(np.int64) ragged_tensor = ragged_tensor_lib.RaggedTensor.from_row_lengths( np.repeat(np.arange(20, dtype=np.float32), row_lengths), row_lengths) dataset = dataset_ops.DatasetV2.from_tensor_slices({ "dense": ragged_tensor.to_tensor(), "ragged": ragged_tensor, "sparse": ragged_tensor.to_sparse(), }) dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id) dataset = dataset.batch(batch_size) distribution.extended.experimental_enable_get_next_as_optional = ( enable_get_next_as_optional) if isinstance(distribution, (tpu_strategy.TPUStrategyV2, tpu_strategy.TPUStrategy)): # TPUStrategy does not support distributed datasets with device prefetch # when using sparse or ragged tensors. options = distribute_lib.InputOptions( experimental_prefetch_to_device=False) else: options = None dist_dataset = distribution.experimental_distribute_dataset( dataset, options) with distribution.scope(): iterator = iter(dist_dataset) _check_type_spec_structure(iterator) spec = iterator._type_spec tensor_list = spec._to_components(iterator) re_iterator = spec._from_components(tensor_list) self.assertEqual(iterator._input_workers, re_iterator._input_workers) self.assertAllEqual(iterator._iterators, re_iterator._iterators)
def test_prefetch_to_host_dataset(self, distribution): input_options = distribute_lib.InputOptions( experimental_fetch_to_device=False) dataset = dataset_ops.Dataset.range(100) dataset = dataset.batch(distribution.num_replicas_in_sync) dataset = distribution.experimental_distribute_dataset( dataset, options=input_options) if context.executing_eagerly(): item = next(iter(dataset)) else: if isinstance(dataset, input_lib_v1.DistributedDatasetV1): item = dataset.make_initializable_iterator().get_next() else: self.skipTest("unsupported test combination") self.assertAllEqual( tf_device.DeviceSpec.from_string(item.device).device_type, "CPU")
def test_enqueue_per_device(self): self.skip_if_oss() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') sparse = self._create_sparse_dataset(strategy) sparse_iter = iter( strategy.experimental_distribute_dataset( sparse, options=distribute_lib.InputOptions( experimental_fetch_to_device=False))) @def_function.function def test_fn(): def get_activations(dense_value): return mid_level_api.dequeue(), dense_value sparse_features = next(sparse_iter) mid_level_api.enqueue(sparse_features, training=False) activations, dense_value1 = strategy.run(get_activations, args=(0.0, )) def enqueue_fn(ctx): core_id = ctx.replica_id_in_sync_group device = strategy.extended.worker_devices[core_id] sparse_features_local = nest.map_structure( lambda x: strategy.experimental_local_results(x)[core_id], sparse_features) mid_level_api.enqueue(sparse_features_local, training=False, device=device) return 0.0 data = strategy.experimental_distribute_values_from_function( enqueue_fn) per_device_activations, dense_value2 = strategy.run( get_activations, args=(data, )) return activations, per_device_activations, dense_value1, dense_value2 activations, per_device_activations, _, _ = test_fn() # Extact per core numpy arrays and check that both sparse and ragged have # the same results. activations0 = self._get_replica_numpy(activations, strategy, 0) per_device_activations0 = self._get_replica_numpy( per_device_activations, strategy, 0) self.assertAllClose(activations0, per_device_activations0) test_fn()
def test_dataset_creator_input_options(self): dataset_fn = lambda _: dataset_ops.DatasetV2.from_tensor_slices([1, 1]) input_options = distribute_lib.InputOptions( experimental_fetch_to_device=True, experimental_per_replica_buffer_size=2) x = dataset_creator.DatasetCreator(dataset_fn, input_options=input_options) with collective_all_reduce_strategy.CollectiveAllReduceStrategy().scope(): data_handler = data_adapter.get_data_handler( x, steps_per_epoch=2, model=sequential.Sequential([core_layers.Dense(10)])) # Ensuring the resulting `DistributedDatasetsFromFunction` has the right # options. self.assertTrue(data_handler._dataset._options.experimental_fetch_to_device) self.assertEqual( data_handler._dataset._options.experimental_per_replica_buffer_size, 2)
def test_composite_input_dynamic_shapes_outside_compilation( self, enable_packed_var): strategy = get_tpu_strategy(enable_packed_var) if strategy.num_replicas_in_sync != 2: self.skipTest("Test assumes two replicas.") table = variables.Variable(initial_value=[[0.0, 1.0], [3.0, 7.0]], dtype=dtypes.float32) @def_function.function def sparse_lookup(iterator): def tpu_function(sparse): lookup = tpu.outside_compilation( embedding_ops.safe_embedding_lookup_sparse, table, sparse) return math_ops.reduce_sum(lookup, axis=0) return strategy.experimental_local_results( strategy.run(tpu_function, args=(next(iterator), ))) def dataset_fn(_): dataset = dataset_ops.Dataset.range(2) def make_sparse(i): indices = array_ops.constant([[0, 0], [1, 0], [1, 1]], dtype=dtypes.int64)[0:2 + i] values = array_ops.constant([0, 0, 1], dtype=dtypes.int32)[0:2 + i] shape = [ array_ops.constant([2], dtype=dtypes.int64), array_ops.expand_dims(1 + i, axis=0) ] dense_shape = array_ops.concat(shape, axis=0) return sparse_tensor.SparseTensor(indices=indices, values=values, dense_shape=dense_shape) return dataset.map(make_sparse) dataset = iter( strategy.experimental_distribute_datasets_from_function( dataset_fn, options=distribute_lib.InputOptions( experimental_prefetch_to_device=False))) result = sparse_lookup(dataset) self.assertAllEqual(result, [[0.0, 2.0], [1.5, 5.0]])
def test_prefetch_to_host_dataset(self): distribution, _, _ = self._get_test_object(task_type='worker', task_id=0, num_gpus=2) input_options = distribute_lib.InputOptions( experimental_fetch_to_device=False) dataset = dataset_ops.Dataset.range(100) dataset = dataset.batch(distribution.num_replicas_in_sync) dataset = distribution.experimental_distribute_dataset( dataset, options=input_options) if isinstance(dataset, input_lib.DistributedDatasetV1): item = dataset.make_initializable_iterator().get_next() else: self.skipTest('unsupported test combination') device_types = { tf_device.DeviceSpec.from_string(tensor.device).device_type for tensor in item.values } self.assertAllEqual(list(device_types), ['CPU'])
def test_enqueue_weight_for_dense_tensor(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') input_fn = self._create_dense_input_fn(strategy, include_weights=True) dist = strategy.distribute_datasets_from_function( input_fn, options=distribute_lib.InputOptions( experimental_prefetch_to_device=False)) dist_iter = iter(dist) @def_function.function def test_fn(): def step(): return mid_level_api.dequeue() features, weights = next(dist_iter) mid_level_api.enqueue(features, weights=weights, training=False) return strategy.run(step) with self.assertRaisesRegex(ValueError, 'Weight specified for dense input'): test_fn()