def testMultiDeviceFunctionWithPackedVariable(self): with ops.device('/job:worker/replica:0/task:0/device:CPU:0'): var0 = resource_variable_ops.ResourceVariable(1.0) with ops.device('/job:worker/replica:0/task:1/device:CPU:0'): var1 = resource_variable_ops.ResourceVariable(2.0) packed_var = ops.pack_eager_tensors([var0.handle, var1.handle]) self.assertEqual(packed_var.device, '/job:localhost/replica:0/task:0/device:COMPOSITE:0') self.assertEqual(packed_var.backing_device, '/job:localhost/replica:0/task:0/device:COMPOSITE:0') @def_function.function def add_variables(): with ops.device('/job:worker/replica:0/task:0/device:CPU:0'): read0 = resource_variable_ops.read_variable_op( packed_var, dtype=dtypes.float32) with ops.device('/job:worker/replica:0/task:1/device:CPU:0'): read1 = resource_variable_ops.read_variable_op( packed_var, dtype=dtypes.float32) return read0 + read1 # Run the function on a remote device with ops.device('/job:worker/replica:0/task:0'): self.assertAllEqual(add_variables().numpy(), 3.0) # Run the function on a local worker self.assertAllEqual(add_variables().numpy(), 3.0)
def __init__(self, variables, *, enable_packed_handle=False): if enable_packed_handle and not ops.executing_eagerly_outside_functions( ): raise ValueError( "Argument `enable_packed_handle` is true, but packed handle is only " "supported in eager mode. Please make sure eager execution is " "enabled.") self._variables = variables if enable_packed_handle: self._packed_handle = ops.pack_eager_tensors( [v.handle for v in variables]) else: self._packed_handle = None for v in variables: v._distributed_container = weakref.ref(self) # pylint: disable=protected-access self._device_to_handle = {v.device: v.handle for v in variables} self._primary_handle = variables[0].handle with ops.init_scope(), \ ops.name_scope("DistributedVariable", skip_on_eager=False) as name: handle_name = ops.name_from_scope_name(name) self._unique_id = "%s_%d" % (handle_name, ops.uid()) if context.executing_eagerly(): initial_value = None initializer = None else: initial_value = variables[0].initial_value initializer = control_flow_ops.group( [v.initializer for v in variables]) super().__init__( trainable=variables[0].trainable, shape=variables[0].shape, dtype=variables[0].dtype, handle=None, synchronization=variables[0].synchronization, constraint=variables[0].constraint, aggregation=variables[0].aggregation, distribute_strategy=variables[0]._distribute_strategy, name=variables[0].name, unique_id=self._unique_id, handle_name=handle_name, graph_element=variables[0]._graph_element, initial_value=initial_value, initializer_op=initializer, is_initialized_op=None, cached_value=None, handle_deleter=_DummyResourceDeleter(), caching_device=None, is_variables=True)
def __init__(self, distributed_variables=None, name=None, **unused_kwargs): """Packs a list of variables which are distributed across devices. Args: distributed_variables: A list of distributed Variables to pack. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. """ if not ops.executing_eagerly_outside_functions(): raise ValueError( "PackedDistributedVariable should be created in eager mode.") if not distributed_variables: raise ValueError("Expect a non-empty list of variables to pack.") for i, var in enumerate(distributed_variables): if not resource_variable_ops.is_resource_variable(var): raise ValueError("Expect a list of ResourceVariables to pack, " "but the %d-th variable is %s" % (i, type(var))) self._distributed_variables = distributed_variables self._devices = [v.device for v in distributed_variables] with ops.init_scope(): with ops.name_scope(name, "Variable", skip_on_eager=False) as name: handle = ops.pack_eager_tensors( [var.handle for var in distributed_variables]) handle_name = ops.name_from_scope_name(name) unique_id = "%s_%d" % (handle_name, ops.uid()) super(PackedDistributedVariable, self).__init__( trainable=distributed_variables[0].trainable, shape=distributed_variables[0].shape, dtype=distributed_variables[0].dtype, handle=handle, synchronization=distributed_variables[0].synchronization, constraint=distributed_variables[0].constraint, aggregation=distributed_variables[0].aggregation, distribute_strategy=distributed_variables[0]. _distribute_strategy, # pylint: disable=protected-access name=name, unique_id=unique_id, handle_name=handle_name, graph_element=None, initial_value=None, initializer_op=None, is_initialized_op=None, cached_value=None, caching_device=None, is_distributed_variables=True)
def testMultiDeviceFunctionExecutionOrderingWithPackedInput(self): shape = [2] with ops.device('/job:worker/replica:0/task:2/device:CPU:0'): # Send 20 remote requests to simulate heavy load on worker:2. unused_values = [] for _ in range(20): unused_values.append(array_ops.zeros(shape)) func_input = array_ops.zeros(shape) packed_input = ops.pack_eager_tensors([func_input]) @def_function.function def func(packed_input): # When worker:2 receives the component function request, packed_input # should be ready on worker:2. with ops.device('/job:worker/replica:0/task:2/device:CPU:0'): ret = packed_input + constant_op.constant(1.0) return ret + constant_op.constant(1.0) # Run the function on a worker:1 with ops.device('/job:worker/replica:0/task:1/device:CPU:0'): self.assertAllEqual(func(packed_input).numpy(), array_ops.ones(shape).numpy() * 2)