def test_previously_unexpected_cluster_spec(self): with test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITHOUT_TASK)}): run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"])))
def _get_strategy(num_gpus): if num_gpus > 1: return mirrored_strategy.MirroredStrategy( ['/GPU:%d' % i for i in range(num_gpus)]) else: return distribution_strategy_context.get_strategy( ) # The default strategy
def testDistStratInterop(self): strategy = mirrored_strategy.MirroredStrategy( devices=['CPU:0', 'CPU:1', 'CPU:2']) multiplier = np_array_ops.asarray(5.) with strategy.scope(): @def_function.function def run(): ctx = distribution_strategy_context.get_replica_context() val = np_array_ops.asarray(ctx.replica_id_in_sync_group) return val * multiplier distributed_values = strategy.run(run) reduced = strategy.reduce(reduce_util.ReduceOp.SUM, distributed_values, axis=None) values = distributed_values.values # Note that this should match the number of virtual CPUs. self.assertLen(values, 3) self.assertIsInstance(values[0], np_arrays.ndarray) self.assertIsInstance(values[1], np_arrays.ndarray) self.assertIsInstance(values[2], np_arrays.ndarray) self.assertAllClose(values[0], 0) self.assertAllClose(values[1], 5) self.assertAllClose(values[2], 10) # "strategy.reduce" doesn't rewrap in ndarray. # self.assertIsInstance(reduced, np_arrays.ndarray) self.assertAllClose(reduced, 15)
def testOneDevicePerWorker(self, input_type, api_type, iteration_type, enable_get_next_as_optional): if tf2.enabled(): dataset_fn = lambda _: dataset_ops.DatasetV2.range(4) else: dataset_fn = lambda _: dataset_ops.Dataset.range(4) dataset_or_input_fn = self._create_dataset_or_input_fn( input_type, dataset_fn) strategy = mirrored_strategy.MirroredStrategy( devices=(self._cpu_devices()[0][1] + self._cpu_devices()[1][1]), cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce( ["/job:worker/task:0", "/job:worker/task:1"], 1)) worker_devices = self._cpu_devices() with context.graph_mode(), strategy.scope(), self.cached_session() as sess: if input_type == "dataset": # Autosharded expected_values = [[0, 1], [2, 3]] else: expected_values = [[0, 0], [1, 1], [2, 2], [3, 3]] strategy.extended.experimental_enable_get_next_as_optional = ( enable_get_next_as_optional) self._test_input_iteration( input_type, api_type, iteration_type, dataset_or_input_fn, worker_devices, expected_values, strategy, sess=sess)
def testAutoshardingOption(self, input_type, api_type, iteration_type, autoshard): ds_option = dataset_ops.Options() ds_option.experimental_distribute.auto_shard = autoshard if tf2.enabled(): dataset_fn = ( lambda _: dataset_ops.DatasetV2.range(4).with_options(ds_option)) else: dataset_fn = ( lambda _: dataset_ops.Dataset.range(4).with_options(ds_option)) dataset_or_input_fn = self._create_dataset_or_input_fn( input_type, dataset_fn) strategy = mirrored_strategy.MirroredStrategy( devices=(self._cpu_devices()[0][1] + self._cpu_devices()[1][1]), cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce( ["/job:worker/task:0", "/job:worker/task:1"], 1)) worker_devices = self._cpu_devices() with context.graph_mode(), self.cached_session() as sess: if autoshard: expected_values = [[0, 1], [2, 3]] else: expected_values = [[0, 0], [1, 1], [2, 2], [3, 3]] self._test_input_iteration(input_type, api_type, iteration_type, dataset_or_input_fn, worker_devices, expected_values, strategy, sess)
def testUnevenDatasetBatches(self, input_type, api_type, iteration_type): strategy = mirrored_strategy.MirroredStrategy( devices=(self._cpu_and_one_gpu_devices()[0][1] + self._cpu_and_one_gpu_devices()[1][1]), cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce( ["/job:worker/task:0", "/job:worker/task:1"], 2)) if tf2.enabled(): dataset_fn = lambda _: dataset_ops.DatasetV2.range(9).batch(2) else: dataset_fn = lambda _: dataset_ops.Dataset.range(9).batch(2) dataset_or_input_fn = self._create_dataset_or_input_fn( input_type, dataset_fn) worker_devices = self._cpu_and_one_gpu_devices() with context.graph_mode(), strategy.scope(), self.cached_session() as sess: if input_type == "dataset": # Autosharded expected_values = [[[0, 1], [4, 5], [2, 3], [6, 7]], [[8], [], [], []]] else: expected_values = [[[0, 1], [2, 3], [0, 1], [2, 3]], [[4, 5], [6, 7], [4, 5], [6, 7]], [[8], [], [8], []]] strategy.extended.experimental_enable_get_next_as_optional = True self._test_input_iteration( input_type, api_type, iteration_type, dataset_or_input_fn, worker_devices, expected_values, strategy, sess=sess)
def testCompiledAllReduce(self): self._setup_context() def all_reduce_sum(v): return collective_ops.all_reduce_v2(t=v, group_size=2, group_key=1, instance_key=1, merge_op='Add', final_op='Id') strategy = mirrored_strategy.MirroredStrategy(['GPU:0', 'GPU:1']) @def_function.function(jit_compile=True) def f(): return control_flow_ops.while_loop( lambda i, _: i < 5, lambda i, t: (i + 1, all_reduce_sum(t)), (array_ops.zeros([]), constant_op.constant(1.0))) @def_function.function def run(): return strategy.run(f) _, reduce = strategy.experimental_local_results(run())[0] self.assertEqual(reduce.numpy(), 32.0)
def testDifferentDatasets(self, input_type, api_type, iteration_type): def dataset_fn(ctx): if ctx.input_pipeline_id == 0: return dataset_ops.Dataset.range(8).batch(2) else: return dataset_ops.Dataset.range(9).batch(2) dataset_or_input_fn = self._create_dataset_or_input_fn( input_type, dataset_fn) strategy = mirrored_strategy.MirroredStrategy( devices=(self._cpu_and_one_gpu_devices()[0][1] + self._cpu_and_one_gpu_devices()[1][1]), cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce( ["/job:worker/task:0", "/job:worker/task:1"], 2)) worker_devices = self._cpu_and_one_gpu_devices() with context.graph_mode(), strategy.scope(), self.cached_session() as sess: expected_values = [[[0, 1], [2, 3], [0, 1], [2, 3]], [[4, 5], [6, 7], [4, 5], [6, 7]], [[], [], [8], []]] strategy.extended.experimental_enable_get_next_as_optional = True self._test_input_iteration( input_type, api_type, iteration_type, dataset_or_input_fn, worker_devices, expected_values, strategy, sess=sess)
def testEagerDistributionStrategy(self): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") def _train_fn(optimizer, model): input_value = constant_op.constant([[3.]]) optimizer.minimize(functools.partial(model, input_value), global_step=root.optimizer_step) strategy = mirrored_strategy.MirroredStrategy() with strategy.scope(): for training_continuation in range(3): model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = trackable_utils.Checkpoint( optimizer=optimizer, model=model, optimizer_step=training_util.get_or_create_global_step()) root.restore( checkpoint_management.latest_checkpoint( checkpoint_directory)) for _ in range(num_training_steps): strategy.extended.call_for_each_replica( functools.partial(_train_fn, optimizer, model)) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, root.optimizer_step.numpy())
def main(_): # Build the train and eval datasets from the MNIST data. Also return the # input shape which is constructed based on the `image_data_format` # i.e channels_first or channels_last. tf.enable_eager_execution() train_ds, eval_ds, input_shape = get_input_datasets() model = get_model(input_shape) # Instantiate the MirroredStrategy object. If we don't specify `num_gpus` or # the `devices` argument then all the GPUs available on the machine are used. # TODO(priyag): Use `tf.distribute.MirroredStrategy` once available. strategy = mirrored_strategy.MirroredStrategy(['/gpu:0', '/cpu:0']) optimizer = rmsprop.RMSProp(learning_rate=0.001) # Compile the model by passing the distribution strategy object to the # `distribute` argument. `fit`, `evaluate` and `predict` will be distributed # based on the strategy instantiated. model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy'], distribute=strategy) # Train the model with the train dataset. model.fit(x=train_ds, epochs=20, steps_per_epoch=468) # Evaluate the model with the eval dataset. score = model.evaluate(eval_ds, steps=10, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])
def testGraphDistributionStrategy(self): self.skipTest("b/121381184") num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") def _train_fn(optimizer, model): input_value = constant_op.constant([[3.]]) return optimizer.minimize( functools.partial(model, input_value), global_step=root.optimizer_step) for training_continuation in range(3): with ops.Graph().as_default(): strategy = mirrored_strategy.MirroredStrategy() with strategy.scope(): model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = trackable_utils.Checkpoint( optimizer=optimizer, model=model, optimizer_step=training_util.get_or_create_global_step()) status = root.restore(checkpoint_management.latest_checkpoint( checkpoint_directory)) train_op = strategy.extended.call_for_each_replica( functools.partial(_train_fn, optimizer, model)) with self.session() as session: if training_continuation > 0: status.assert_consumed() status.initialize_or_restore() for _ in range(num_training_steps): session.run(train_op) root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, root.optimizer_step.numpy())
def test_expand_distributed_variables(self, expand_strategy, policy): # 1. Create a context with both CPU:0 and CPU:1. context._reset_context() cpus = context.context().list_physical_devices("CPU") if len(cpus) == 1: context.context().set_logical_device_configuration( cpus[0], [ context.LogicalDeviceConfiguration(), context.LogicalDeviceConfiguration() ]) context.ensure_initialized() # 2. Create and save a model under a mirrored strategy. file_name = os.path.join(self.get_temp_dir(), "saved_model.pb") strategy = mirrored_strategy.MirroredStrategy(["CPU:0", "CPU:1"]) strategy.extended._use_var_policy = policy with strategy.scope(): root = tracking.AutoTrackable() root.v = variables.Variable([1., 1.], name="v") @def_function.function(input_signature=[]) def f(): root.v.assign([2., 2.]) root.f = f save.export_meta_graph( obj=root, filename=file_name, options=save_options.SaveOptions( experimental_variable_policy=expand_strategy)) # 3. Read the output file and test behavior. meta_graph_def = meta_graph.read_meta_graph_file(file_name) object_graph = meta_graph_def.object_graph_def graph_def = meta_graph_def.graph_def v = next((n.variable for n in object_graph.nodes if n.HasField("variable") and n.variable.name == "v"), None) saved_function = next((f for f in graph_def.library.function if "inference_f_" in f.signature.name), None) self.assertIsNotNone(saved_function) if (expand_strategy == save_options.VariablePolicy.EXPAND_DISTRIBUTED_VARIABLES): # experimental_save_variable_devices should have been automatically set. self.assertIn("CPU:0", v.device) components = v.experimental_distributed_variable_components self.assertLen(components, 2) v0 = next((x for x in components if x.name == "v"), None) v1 = next((x for x in components if x.name == "v/replica_1"), None) self.assertIsNotNone(v0) self.assertIsNotNone(v1) self.assertIn("CPU:0", v0.device) self.assertIn("CPU:1", v1.device) self.assertLen(saved_function.signature.input_arg, 2) else: self.assertEmpty(v.device) self.assertEmpty(v.experimental_distributed_variable_components) self.assertLen(saved_function.signature.input_arg, 1)
def testInitializeFromTFConfig(self): tf_config = {"cluster": self._cluster_spec} with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}): strategy = mirrored_strategy.MirroredStrategy( cross_device_ops=self._make_cross_device_ops()) self.assertEqual( max(context.num_gpus(), 1) * 3, strategy.num_replicas_in_sync)
def _distribution_strategies(): return [ collective_all_reduce_strategy.CollectiveAllReduceStrategy(), mirrored_strategy.MirroredStrategy(), # TODO(pulkitb): Add parameter_server # parameter_server_strategy.ParameterServerStrategy(), one_device_strategy.OneDeviceStrategy('/cpu:0'), ]
def test_repr_distributed(self): with mirrored_strategy.MirroredStrategy(['/cpu:1', '/cpu:2']).scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.assertRegexpMatches( repr(x).replace('\n', ' '), '<AutoCastDistributedVariable dtype=float32 true_dtype=float32 ' 'inner_variable=MirroredVariable.*>')
def test_init_run_config_independent_worker(self): # When `train_distribute` is specified and TF_CONFIG is detected, use # distribute coordinator with INDEPENDENT_WORKER mode. with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): config = run_config_lib.RunConfig( train_distribute=mirrored_strategy.MirroredStrategy()) self.assertEqual(config._distribute_coordinator_mode, dc.CoordinatorMode.INDEPENDENT_WORKER)
def distributed_dataset_producer(t): strategy = mirrored_strategy.MirroredStrategy(['GPU:0', 'GPU:1']) sparse_ds = dataset_ops.Dataset.from_tensor_slices(t).batch(2) dist_dataset = strategy.experimental_distribute_dataset(sparse_ds) ds = iter(dist_dataset) result = strategy.experimental_local_results(next(ds))[0] # Reach the end of the iterator for ignore in ds: # pylint: disable=unused-variable pass return result
def test_init_run_config_standalone_client(self): # When `train_distribute` is specified, TF_CONFIG is detected and # `experimental.remote_cluster` is set use distribute coordinator with # STANDALONE_CLIENT mode. config = run_config_lib.RunConfig( train_distribute=mirrored_strategy.MirroredStrategy(), experimental_distribute=DistributeConfig( remote_cluster={"chief": ["fake_worker"]})) self.assertEqual(config._distribute_coordinator_mode, dc.CoordinatorMode.STANDALONE_CLIENT)
def test_should_run_distribute_coordinator(self): """Tests that should_run_distribute_coordinator return a correct value.""" # We don't use distribute coordinator for local training. self.assertFalse( dc_training.should_run_distribute_coordinator( run_config_lib.RunConfig())) # When `train_distribute` is not specified, don't use distribute # coordinator. with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): self.assertFalse( dc_training.should_run_distribute_coordinator( run_config_lib.RunConfig())) # When `train_distribute` is specified and TF_CONFIG is detected, use # distribute coordinator. with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): config_with_train_distribute = run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]))) config_with_eval_distribute = run_config_lib.RunConfig( experimental_distribute=DistributeConfig( eval_distribute=mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]))) self.assertTrue( dc_training.should_run_distribute_coordinator( config_with_train_distribute)) self.assertFalse( dc_training.should_run_distribute_coordinator( config_with_eval_distribute)) # With a master in the cluster, don't run distribute coordinator. with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_MASTER)}): config = run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]))) self.assertFalse(dc_training.should_run_distribute_coordinator(config))
def testMinimizeLossGraphMirroredStrategyWithOneNode(self): cluster_spec = {} cluster_spec["chief"] = self._cluster_spec["chief"] tf_config = {"cluster": cluster_spec} with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}): strategy = mirrored_strategy.MirroredStrategy() self.assertIsInstance(strategy.extended._inferred_cross_device_ops, cross_device_ops_lib.NcclAllReduce) self.skipTest('b/130551176, run the following once fixed.') self._test_minimize_loss_graph(strategy, learning_rate=0.05)
def test_init_run_config_none_distribute_coordinator_mode(self): # We don't use distribute coordinator for local training. config = run_config_lib.RunConfig( train_distribute=mirrored_strategy.MirroredStrategy()) dc_training.init_run_config(config, {}) self.assertIsNone(config._distribute_coordinator_mode) # With a master in the cluster, don't run distribute coordinator. with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_MASTER)}): config = run_config_lib.RunConfig( train_distribute=mirrored_strategy.MirroredStrategy()) self.assertIsNone(config._distribute_coordinator_mode) # When `train_distribute` is not specified, don't use distribute # coordinator. with test.mock.patch.dict("os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): config = run_config_lib.RunConfig() self.assertFalse(hasattr(config, "_distribute_coordinator_mode"))
def get_distribute_scope(distribute): class DummyContextManager(object): def __enter__(self): pass def __exit__(self, *args): pass if distribute: return mirrored_strategy.MirroredStrategy(['cpu:0']).scope() else: return DummyContextManager()
def distributed_dataset_producer(t): strategy = mirrored_strategy.MirroredStrategy(['GPU:0', 'GPU:1']) ragged_ds = dataset_ops.Dataset.from_tensor_slices(t).batch(2) dist_dataset = strategy.experimental_distribute_dataset(ragged_ds) @def_function.function def replica_fn(elem): return elem result = [] for x in dist_dataset: result.append(strategy.run(replica_fn, args=(x, ))) return result
def distributed_dataset_producer(t): strategy = mirrored_strategy.MirroredStrategy(['GPU:0', 'GPU:1']) ragged_ds = dataset_ops.Dataset.from_tensor_slices(t).batch( 2, drop_remainder) dist_dataset = strategy.experimental_distribute_dataset(ragged_ds) ds = iter(dist_dataset) result0 = strategy.experimental_local_results(next(ds)) result1 = strategy.experimental_local_results(next(ds)) result2 = strategy.experimental_local_results(next(ds)) result3 = strategy.experimental_local_results(next(ds)) # Reach the end of the iterator for ignore in ds: # pylint: disable=unused-variable pass return result0, result1, result2, result3
def testMirroredVariableWatched(self): def _replicated(input_tangent): with forwardprop.ForwardAccumulator(v, input_tangent) as acc: self.assertAllClose([.1, -.2, .3], acc.jvp(v)) x = v * 2. self.assertAllClose([.2, -.4, .6], acc.jvp(x)) x2 = v + .1 self.assertAllClose([.1, -.2, .3], acc.jvp(x2)) strategy = mirrored_strategy.MirroredStrategy() with strategy.scope(): v = variables.Variable([1., 2., 3.]) strategy.run(_replicated, args=(constant_op.constant([.1, -.2, .3]),))
def distributed_dataset_producer(t): strategy = mirrored_strategy.MirroredStrategy(['GPU:0', 'GPU:1']) ragged_ds = dataset_ops.Dataset.from_tensor_slices(t).batch(2) dist_dataset = strategy.experimental_distribute_dataset(ragged_ds) @def_function.function def replica_fn(elem): # Example of typical preprocessing of string to numeric feature hashed = string_to_hash_bucket(elem['str'], 10) return 1000 * hashed result = [] for x in dist_dataset: result.append(strategy.run(replica_fn, args=(x, ))) return result
def test_repr_distributed(self): strategy = mirrored_strategy.MirroredStrategy(['/cpu:1', '/cpu:2']) with strategy.scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) use_policy = getattr(strategy.extended, '_use_policy', False) if use_policy: self.assertRegex( repr(x).replace('\n', ' '), '<AutoCastDistributedVariable dtype=float32 true_dtype=float32 ' 'inner_variable=DistributedVariable.*>') else: self.assertRegex( repr(x).replace('\n', ' '), '<AutoCastDistributedVariable dtype=float32 true_dtype=float32 ' 'inner_variable=MirroredVariable.*>')
def test_expand_distributed_variables(self, expand_strategy): context._reset_context() cpus = context.context().list_physical_devices("CPU") if len(cpus) == 1: context.context().set_logical_device_configuration( cpus[0], [ context.LogicalDeviceConfiguration(), context.LogicalDeviceConfiguration() ]) context.ensure_initialized() file_name = os.path.join(self.get_temp_dir(), "saved_model.pb") with mirrored_strategy.MirroredStrategy(["CPU:0", "CPU:1"]).scope(): root = tracking.AutoTrackable() root.v = variables.Variable([1., 1.], name="v") @def_function.function(input_signature=[]) def f(): root.v.assign([2., 2.]) root.f = f save.export_meta_graph( obj=root, filename=file_name, options=save_options.SaveOptions( experimental_variable_policy=expand_strategy)) graph_def = meta_graph.read_meta_graph_file(file_name).graph_def v0 = next((n for n in graph_def.node if n.name == "v"), None) v1 = next((n for n in graph_def.node if n.name == "v/replica_1"), None) self.assertIsNotNone(v0) saved_function = next((f for f in graph_def.library.function if "inference_f_" in f.signature.name), None) self.assertIsNotNone(saved_function) if (expand_strategy == save_options.VariablePolicy.EXPAND_DISTRIBUTED_VARIABLES): self.assertIsNotNone(v1) # experimental_save_variable_devices should have been automatically set. self.assertIn("CPU:0", v0.device) self.assertIn("CPU:1", v1.device) self.assertLen(saved_function.signature.input_arg, 2) else: self.assertIsNone(v1) self.assertEmpty(v0.device) # TODO(b/159752793): There should be only one input here. self.assertLen(saved_function.signature.input_arg, 2)
def distributed_dataset_producer(t): strategy = mirrored_strategy.MirroredStrategy(['GPU:0', 'GPU:1']) ragged_ds = dataset_ops.Dataset.from_tensor_slices(t).batch(2) dist_dataset = strategy.experimental_distribute_dataset(ragged_ds) @def_function.function def replica_fn(elem): # Example of typical preprocessing of string to numeric feature hashed = string_to_hash_bucket(elem['str'], 10) # For dense string case, slice it to size of ragged int hashed_sliced = hashed[:, :elem['size'][0]] # Computation with both feature from string and numeric dataset output return elem['int'] * 10 + hashed_sliced result = [] for x in dist_dataset: result.append(strategy.run(replica_fn, args=(x, ))) return result
def test_merge_call(self): def fn(): var1 = variable_scope.get_variable( "var1", shape=[], initializer=init_ops.constant_initializer(21.)) ds_context.get_replica_context().merge_call(lambda _: ()) var2 = variable_scope.get_variable( "var2", shape=[], initializer=init_ops.constant_initializer(2.)) return var1 * var2 temp = template.make_template("my_template", fn) strategy = mirrored_strategy.MirroredStrategy(["/cpu:0", "/gpu:0"]) out = strategy.unwrap(strategy.experimental_run_v2(temp)) self.evaluate(variables.global_variables_initializer()) self.assertAllEqual([42., 42.], self.evaluate(out))