def assertDeviceEqual(self, device1, device2): """Asserts that the two given devices are the same. Args: device1: A string device name or TensorFlow `DeviceSpec` object. device2: A string device name or TensorFlow `DeviceSpec` object. """ device1 = pydev.canonical_name(device1) device2 = pydev.canonical_name(device2) self.assertEqual(device1, device2, "Devices %s and %s are not equal" % (device1, device2))
def all_gather(t, group_size, group_key, instance_key): """Accumulates tensors collectively, across devices, along first dimension. Args: t: the tensor to participate in the accumulation. group_size: the total number of tensors to be collectively accumulated. Each must reside on a different device. group_key: an integer identifying the group of devices. instance_key: an integer identifying the participating group of Ops. Returns: An Op implementing the distributed operation. Raises: ValueError: if any of the input parameter constraints are not met. """ if not device.canonical_name(t.device): raise ValueError('Device assignment required for collective ops') if group_size <= 1: raise ValueError('Parameter group_size to all_gather must be at least 2.') dims = t.shape.as_list() output_shape = [dims[0] * group_size] + dims[1:] return gen_collective_ops.collective_gather(t, shape=output_shape, group_size=group_size, group_key=group_key, instance_key=instance_key)
def all_reduce(t, group_size, group_key, instance_key, merge_op, final_op, subdiv_offsets=(0,)): """Reduces tensors collectively, across devices. Args: t: the tensor to be reduced. group_size: the total number of tensors to be collectively reduced. Each must reside on a different device. group_key: an integer identifying the group of devices. instance_key: an integer identifying the participating group of Ops. merge_op: string naming the binary Op to be applied to compute each partial reduction. final_op: string naming the unary Op to be applied to each fully reduced value. Can be 'Id' for no operation. subdiv_offsets: a list of integer offsets into the tensor at which each independent subdivision should begin. Use [0] if no subdivision should be done. Returns: An Op implementing the distributed reduction. Raises: ValueError: if any of the input parameter constraints are not met. """ if not device.canonical_name(t.device): raise ValueError('Device assignment required for collective ops') if group_size <= 1: raise ValueError('Parameter group_size to all_reduce must be at least 2.') return gen_collective_ops.collective_reduce(t, group_size=group_size, group_key=group_key, instance_key=instance_key, merge_op=merge_op, final_op=final_op, subdiv_offsets=subdiv_offsets)
def _initialize_handle_and_devices(self): """Initialize handle and devices.""" with self._initialize_lock: if self._context_handle is not None: return assert self._context_devices is None opts = pywrap_tensorflow.TFE_NewContextOptions() try: if self._config is not None: config_str = self._config.SerializeToString() pywrap_tensorflow.TFE_ContextOptionsSetConfig(opts, config_str) if self._device_policy is not None: pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy( opts, self._device_policy) if self._execution_mode == ASYNC: pywrap_tensorflow.TFE_ContextOptionsSetAsync(opts, True) self._context_handle = pywrap_tensorflow.TFE_NewContext(opts) finally: pywrap_tensorflow.TFE_DeleteContextOptions(opts) # Store list of devices self._context_devices = [] device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle) try: self._num_gpus = 0 for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): dev_name = pywrap_tensorflow.TF_DeviceListName(device_list, i) self._context_devices.append(pydev.canonical_name(dev_name)) dev_type = pywrap_tensorflow.TF_DeviceListType(device_list, i) if dev_type == "GPU": self._num_gpus += 1 finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list)
def _initialize_handle_and_devices(self): """Initialize handle and devices.""" with self._initialize_lock: if self._context_handle is not None: return assert self._context_devices is None opts = pywrap_tensorflow.TF_NewSessionOptions( target=compat.as_bytes(""), config=self._config) with errors.raise_exception_on_not_ok_status() as status: self._context_handle = pywrap_tensorflow.TFE_NewContext(opts, status) pywrap_tensorflow.TF_DeleteSessionOptions(opts) # Store list of devices self._context_devices = [] with errors.raise_exception_on_not_ok_status() as status: device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle, status) try: self._num_gpus = 0 for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): with errors.raise_exception_on_not_ok_status() as status: dev_name = pywrap_tensorflow.TF_DeviceListName( device_list, i, status) self._context_devices.append(pydev.canonical_name(dev_name)) with errors.raise_exception_on_not_ok_status() as status: dev_type = pywrap_tensorflow.TF_DeviceListType( device_list, i, status) if dev_type == "GPU": self._num_gpus += 1 finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list)
def _apply_all_reduce(reduction_op, tensors): if not tensors: raise ValueError('Must pass >0 tensors to all reduce operations') shared_name = _get_shared_name() res = [] for t in tensors: if not device.canonical_name(t.device): raise ValueError('Device assignment required for nccl collective ops') with ops.device(t.device): res.append( gen_nccl_ops.nccl_all_reduce( t, reduction=reduction_op, num_devices=len(tensors), shared_name=shared_name)) return res
def all_reduce(t, group_size, group_key, instance_key, merge_op, final_op, subdiv_offsets=(0, ), communication_hint='auto'): """Reduces tensors collectively, across devices. Args: t: the tensor to be reduced. group_size: the total number of tensors to be collectively reduced. Each must reside on a different device. group_key: an integer identifying the group of devices. instance_key: an integer identifying the participating group of Ops. merge_op: string naming the binary Op to be applied to compute each partial reduction. final_op: string naming the unary Op to be applied to each fully reduced value. Can be 'Id' for no operation. subdiv_offsets: a list of integer offsets into the tensor at which each independent subdivision should begin. Use [0] if no subdivision should be done. communication_hint: preferred collective communication. The implementation may fall back to another mechanism. Options include `auto`, `ring`, and `nccl`. Returns: An Op implementing the distributed reduction. Raises: ValueError: if any of the input parameter constraints are not met. """ if not device.canonical_name(t.device): raise ValueError('Device assignment required for collective ops') if group_size <= 1: raise ValueError( 'Parameter group_size to all_reduce must be at least 2.') return gen_collective_ops.collective_reduce( t, group_size=group_size, group_key=group_key, instance_key=instance_key, merge_op=merge_op, final_op=final_op, subdiv_offsets=subdiv_offsets, communication_hint=communication_hint.lower())
def _initialize_devices(self): """Helper to initialize devices.""" # Store list of devices self._context_devices = [] device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle) try: self._num_gpus = 0 for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): dev_name = pywrap_tensorflow.TF_DeviceListName(device_list, i) self._context_devices.append(pydev.canonical_name(dev_name)) dev_type = pywrap_tensorflow.TF_DeviceListType(device_list, i) if dev_type == "GPU": self._num_gpus += 1 finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list)
def _initialize_devices(self): """Helper to initialize devices.""" # Store list of devices self._context_devices = [] device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle) try: self._num_gpus = 0 for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): dev_name = pywrap_tensorflow.TF_DeviceListName(device_list, i) self._context_devices.append(pydev.canonical_name(dev_name)) dev_type = pywrap_tensorflow.TF_DeviceListType(device_list, i) if dev_type == "GPU": self._num_gpus += 1 finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list)
def device(name): """Context-manager to force placement of operations and Tensors on a device. For example: ```python with tfe.device('gpu:0'): with tfe.device('cpu:0'): shape = tfe.Tensor([], dtype=tf.int32) x = ops.truncated_normal(shape, tf.float32) ``` will ensure that the `shape` Tensor is on CPU but the `truncated_normal` operation runs on GPU 0. Args: name: Name of the device (see get_default_context().devices()), or None to enable automatic placement. Yields: Nothing. Raises: ValueError: If name does not correspond to a valid device. """ device_index = -1 ctx = get_default_context() if name is not None: name = pydev.canonical_name(name) all_devices = ctx.devices() for i, d in enumerate(all_devices): # TODO(ashankar): This will change when we have distributed support. # At that point, should not look for a string suffix but be able to # do a full string comparison. if d.endswith(name): device_index = i break if device_index < 0: raise ValueError( "device {} does not match the available devices ({})".format( name, all_devices)) old_device_index = ctx._device_index # pylint: disable=protected-access try: ctx._device_index = device_index # pylint: disable=protected-access yield finally: ctx._device_index = old_device_index # pylint: disable=protected-access
def broadcast_send(t, shape, dtype, group_size, group_key, instance_key): """Broadcasts one tensor to a group of others, across devices. Args: t: the tensor to be sent. shape: the shape of the tensor being sent, which must agree with t. dtype: the type of the tensor being sent, which must agree with t. group_size: one plus the number of receiving tensors, i.e. the total number of devices participating. Each tensor must reside on a different device. group_key: an integer identifying the group of devices. instance_key: an integer identifying the participating group of Ops. Returns: An Op implementing the distributed broadcast send. Raises: ValueError: if any of the input parameter constraints are not met. Note that the shape and dtype arguments appear redundant since they should be obtainable from t. The are two reasons for including them. First, the shape and type of tensors passed via broadcast must be known ahead of time in their most specific form so that the receive side can allocate memory for the operation and shape/type inference can carry forward from there. Including the same declarations on the send side clarifies a commitment already made. Secondly, having nearly identical use syntax for send and receive sides may simplify tool-driven generation of broadcast. """ if not device.canonical_name(t.device): raise ValueError('Device assignment required for collective ops') if group_size <= 1: raise ValueError( 'Parameter group_size to broadcast_send must be at least 2.') if t.shape != shape: raise ValueError( 'Shape of broadcast_send tensor not equal to delcared shape') if t.dtype != dtype: raise ValueError( 'Type of broadcast_send tensor not equal to declared type') return gen_collective_ops.collective_bcast_send(t, shape=shape, group_size=group_size, group_key=group_key, instance_key=instance_key)
def broadcast_send(t, shape, dtype, group_size, group_key, instance_key): """Broadcasts one tensor to a group of others, across devices. Args: t: the tensor to be sent. shape: the shape of the tensor being sent, which must agree with t. dtype: the type of the tensor being sent, which must agree with t. group_size: one plus the number of receiving tensors, i.e. the total number of devices participating. Each tensor must reside on a different device. group_key: an integer identifying the group of devices. instance_key: an integer identifying the participating group of Ops. Returns: An Op implementing the distributed broadcast send. Raises: ValueError: if any of the input parameter constraints are not met. Note that the shape and dtype arguments appear redundant since they should be obtainable from t. The are two reasons for including them. First, the shape and type of tensors passed via broadcast must be known ahead of time in their most specific form so that the receive side can allocate memory for the operation and shape/type inference can carry forward from there. Including the same declarations on the send side clarifies a commitment already made. Secondly, having nearly identical use syntax for send and receive sides may simplify tool-driven generation of broadcast. """ if not device.canonical_name(t.device): raise ValueError('Device assignment required for collective ops') if group_size <= 1: raise ValueError( 'Parameter group_size to broadcast_send must be at least 2.') if t.shape != shape: raise ValueError( 'Shape of broadcast_send tensor not equal to delcared shape') if t.dtype != dtype: raise ValueError( 'Type of broadcast_send tensor not equal to declared type') return gen_collective_ops.collective_bcast_send(t, shape=shape, group_size=group_size, group_key=group_key, instance_key=instance_key)
def _initialize_handle_and_devices(self): """Initialize handle and devices.""" with self._initialize_lock: if self._context_handle is not None: return assert self._context_devices is None opts = pywrap_tensorflow.TFE_NewContextOptions() try: with errors.raise_exception_on_not_ok_status() as status: if self._config is not None: config_str = self._config.SerializeToString() pywrap_tensorflow.TFE_ContextOptionsSetConfig( opts, config_str, len(config_str), status) if self._device_policy is not None: pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy( opts, self._device_policy) if self._execution_mode == ASYNC: pywrap_tensorflow.TFE_ContextOptionsSetAsync(True) self._context_handle = pywrap_tensorflow.TFE_NewContext( opts, status) finally: pywrap_tensorflow.TFE_DeleteContextOptions(opts) # Store list of devices self._context_devices = [] with errors.raise_exception_on_not_ok_status() as status: device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle, status) try: self._num_gpus = 0 for i in range( pywrap_tensorflow.TF_DeviceListCount(device_list)): with errors.raise_exception_on_not_ok_status() as status: dev_name = pywrap_tensorflow.TF_DeviceListName( device_list, i, status) self._context_devices.append( pydev.canonical_name(dev_name)) with errors.raise_exception_on_not_ok_status() as status: dev_type = pywrap_tensorflow.TF_DeviceListType( device_list, i, status) if dev_type == "GPU": self._num_gpus += 1 finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list)
def testCanonicalName(self): self.assertEqual("/job:foo/replica:0", device.canonical_name("/job:foo/replica:0")) self.assertEqual("/job:foo/replica:0", device.canonical_name("/replica:0/job:foo")) self.assertEqual("/job:foo/replica:0/task:0", device.canonical_name("/job:foo/replica:0/task:0")) self.assertEqual("/job:foo/replica:0/task:0", device.canonical_name("/job:foo/task:0/replica:0")) self.assertEqual("/device:CPU:0", device.canonical_name("/device:CPU:0")) self.assertEqual("/device:GPU:2", device.canonical_name("/device:GPU:2")) self.assertEqual( "/job:foo/replica:0/task:0/device:GPU:0", device.canonical_name("/job:foo/replica:0/task:0/device:GPU:0")) self.assertEqual( "/job:foo/replica:0/task:0/device:GPU:0", device.canonical_name("/device:GPU:0/task:0/replica:0/job:foo"))
def _GroupByDevices(self, vars_to_save): """Group Variable tensor slices per device. TODO(touts): Make sure that all the devices found are on different job/replica/task/cpu|gpu. It would be bad if 2 were on the same device. It can happen if the devices as unspecified. Args: vars_to_save: A list of BaseSaverBuilder.VarToSave objects. Returns: A list of tuples: (device_name, BaseSaverBuilder.VarToSave) tuples. The list is sorted by ascending device_name. """ per_device = collections.defaultdict(lambda: []) for var_to_save in vars_to_save: canonical_device = pydev.canonical_name(var_to_save.var.device) per_device[canonical_device].append(var_to_save) return sorted(per_device.items(), key=lambda t: t[0])
def _GroupByDevices(self, vars_to_save): """Group Variable tensor slices per device. TODO(touts): Make sure that all the devices found are on different job/replica/task/cpu|gpu. It would be bad if 2 were on the same device. It can happen if the devices as unspecified. Args: vars_to_save: A list of BaseSaverBuilder.VarToSave objects. Returns: A list of tuples: (device_name, BaseSaverBuilder.VarToSave) tuples. The list is sorted by ascending device_name. """ per_device = collections.defaultdict(lambda: []) for var_to_save in vars_to_save: canonical_device = pydev.canonical_name(var_to_save.var.device) per_device[canonical_device].append(var_to_save) return sorted(per_device.items(), key=lambda t: t[0])
def testCanonicalName(self): self.assertEqual("/job:foo/replica:0", device.canonical_name("/job:foo/replica:0")) self.assertEqual("/job:foo/replica:0", device.canonical_name("/replica:0/job:foo")) self.assertEqual("/job:foo/replica:0/task:0", device.canonical_name("/job:foo/replica:0/task:0")) self.assertEqual("/job:foo/replica:0/task:0", device.canonical_name("/job:foo/task:0/replica:0")) self.assertEqual("/device:CPU:0", device.canonical_name("/device:CPU:0")) self.assertEqual("/device:GPU:2", device.canonical_name("/device:GPU:2")) self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0", device.canonical_name( "/job:foo/replica:0/task:0/gpu:0")) self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0", device.canonical_name( "/gpu:0/task:0/replica:0/job:foo"))
def _initialize_handle_and_devices(self): """Initialize handle and devices.""" with self._initialize_lock: if self._context_handle is not None: return assert self._context_devices is None opts = pywrap_tensorflow.TFE_NewContextOptions() try: with errors.raise_exception_on_not_ok_status() as status: if self._config is not None: config_str = self._config.SerializeToString() pywrap_tensorflow.TFE_ContextOptionsSetConfig( opts, config_str, len(config_str), status) if self._device_policy is not None: pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy( opts, self._device_policy) self._context_handle = pywrap_tensorflow.TFE_NewContext(opts, status) finally: pywrap_tensorflow.TFE_DeleteContextOptions(opts) # Store list of devices self._context_devices = [] with errors.raise_exception_on_not_ok_status() as status: device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle, status) try: self._num_gpus = 0 for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): with errors.raise_exception_on_not_ok_status() as status: dev_name = pywrap_tensorflow.TF_DeviceListName( device_list, i, status) self._context_devices.append(pydev.canonical_name(dev_name)) with errors.raise_exception_on_not_ok_status() as status: dev_type = pywrap_tensorflow.TF_DeviceListType( device_list, i, status) if dev_type == "GPU": self._num_gpus += 1 finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list)
def __init__(self): self._eager_context = _EagerContext() # Create a handle opts = pywrap_tensorflow.TF_NewSessionOptions( target=compat.as_bytes(""), config=None) with errors.raise_exception_on_not_ok_status() as status: self._handle = pywrap_tensorflow.TFE_NewContext(opts, status) pywrap_tensorflow.TF_DeleteSessionOptions(opts) # Store list of devices self._devices = [] with errors.raise_exception_on_not_ok_status() as status: device_list = pywrap_tensorflow.TFE_ContextListDevices( self._handle, status) try: for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): with errors.raise_exception_on_not_ok_status() as status: dev_name = pywrap_tensorflow.TF_DeviceListName( device_list, i, status) self._devices.append(pydev.canonical_name(dev_name)) finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list) self._summary_writer_resource = None
def _initialize_handle_and_devices(self): """Initialize handle and devices.""" with self._initialize_lock: if self._context_handle is not None: return assert self._context_devices is None opts = pywrap_tensorflow.TF_NewSessionOptions( target=compat.as_bytes(""), config=self._config) with errors.raise_exception_on_not_ok_status() as status: self._context_handle = pywrap_tensorflow.TFE_NewContext(opts, status) pywrap_tensorflow.TF_DeleteSessionOptions(opts) # Store list of devices self._context_devices = [] with errors.raise_exception_on_not_ok_status() as status: device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle, status) try: for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): with errors.raise_exception_on_not_ok_status() as status: dev_name = pywrap_tensorflow.TF_DeviceListName( device_list, i, status) self._context_devices.append(pydev.canonical_name(dev_name)) finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list)
def _check_device(tensor, expected=None): if not device.canonical_name(tensor.device): raise ValueError('Device assignment required for nccl collective ops') if expected and expected != tensor.device: raise ValueError('Expected device %s, got %s' % (expected, tensor.device))
def _get_device_name(handle): """The device name encoded in the handle.""" handle_str = compat.as_str_any(handle) return pydev.canonical_name(handle_str.split(";")[-1])
def _check_device(tensor, expected=None): if not device.canonical_name(tensor.device): raise ValueError('Device assignment required for nccl collective ops') if expected and expected != tensor.device: raise ValueError('Expected device %s, got %s' % (expected, tensor.device))
def _get_device_name(handle): """The device name encoded in the handle.""" handle_str = compat.as_str_any(handle) return pydev.canonical_name(handle_str.split(";")[-1])
def _check_device_assignment(tensor): if not device.canonical_name(tensor.device): raise ValueError('Device assignment required for nccl collective ops')
def _check_device_assignment(tensor): if not device.canonical_name(tensor.device): raise ValueError('Device assignment required for nccl collective ops')