def execute(op_name, num_outputs, inputs, attrs=None, name=None): """Execute a TensorFlow operation. Args: op_name: Name of the TensorFlow operation (see REGISTER_OP in C++ code) to execute. num_outputs: The number of outputs of the operation to fetch. (Explicitly provided instead of being inferred for performance reasons). inputs: A list of inputs to the operation. Each entry should be a Tensor, or a value which can be passed to the Tensor constructor to create one. attrs: A tuple with alternating string attr names and attr values for this operation. name: Customized name for the operation. Returns: None if there are no outputs, a single Tensor object if there is one output and a list of Tensor objects if there are multiple outputs. Raises: An exception on error. """ ctx = context.get_default_context() # TODO(apassos) move this to convert_to_tensor inputs = [ag_core.getval(x) for x in inputs] # pylint: disable=protected-access input_handles = [c._handle for c in inputs] device_name = ctx.device_name try: outh = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name, str(op_name), input_handles, attrs, num_outputs) # pylint: enable=protected-access except core._NotOkStatusException as e: # pylint: disable=protected-access if name is not None: message = e.message + " name: " + name else: message = e.message raise core._status_to_exception(e.code, message) # pylint: disable=protected-access # pylint: enable=protected-access tensors = [tensor._tensor_from_handle(x) for x in outh] # pylint: disable=protected-access # TODO(alive, cais): Use the execution callback mechanism. if core.active_trace() is not None: trace_name = name if name else op_name for t in tensors: # pylint: disable=protected-access core.active_trace().record_tensor(trace_name, ops.tensor_id(t), t._device_name(), t.shape.num_elements()) # pylint: enable=protected-access # TODO(cais): Optimize this, perhaps by replacing this execute function with # a different one when there are execution callback(s). for callback in ctx.post_execution_callbacks: callback(op_name, name, attrs, inputs, tensors) return tensors
def _copy(self, ctx, device_name): """Copies tensor to dest device.""" # pylint: disable=protected-access # Creates a new tensor on the dest device. with errors.raise_exception_on_not_ok_status() as status: h = pywrap_tensorflow.TFE_TensorHandleCopyToDevice( self._handle, ctx._handle, device_name, status) new_tensor = _tensor_from_handle(h) if core.active_trace() is not None: core.active_trace().record_tensor("COPY", tape.tensor_id(new_tensor), new_tensor.device, new_tensor.shape.num_elements()) return new_tensor
def execute(op_name, num_outputs, inputs, attrs, ctx, name=None): """Execute a TensorFlow operation. Args: op_name: Name of the TensorFlow operation (see REGISTER_OP in C++ code) to execute. num_outputs: The number of outputs of the operation to fetch. (Explicitly provided instead of being inferred for performance reasons). inputs: A list of inputs to the operation. Each entry should be a Tensor, or a value which can be passed to the Tensor constructor to create one. attrs: A tuple with alternating string attr names and attr values for this operation. ctx: The value of context.context(). name: Customized name for the operation. Returns: List of output Tensor objects. The list is empty if there are no outputs Raises: An exception on error. """ device_name = ctx.device_name # pylint: disable=protected-access try: tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name, op_name, inputs, attrs, num_outputs) except core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message six.raise_from(core._status_to_exception(e.code, message), None) # TODO(alive, cais): Use the execution callback mechanism. if core.active_trace() is not None: for t in tensors: core.active_trace().record_tensor(op_name, ops.tensor_id(t), t.device, t.shape.num_elements()) # pylint: enable=protected-access # TODO(cais): Optimize this, perhaps by replacing this execute function with # a different one when there are execution callback(s). for callback in ctx.post_execution_callbacks: callback(op_name, name, attrs, inputs, tensors) return tensors
def __init__(self, value, dtype=None): """Creates a Tensor object from a Python object or numpy array. May share storage with the numpy array, in which case changes to the numpy object will reflect in the Tensor. Arguments: value: A numpy.array or a Python object to create a Tensor for. dtype: TensorFlow dtype for the returned Tensor. If None, one will be automatically selected. """ # TODO(ashankar): Evaluate if we can and perhaps share code with # tf.constant defined in # https://www.tensorflow.org/code/tensorflow/python/framework/constant_op.py self._id = tf_ops.uid() if not isinstance(value, np.ndarray): npt = None if dtype is None else dtype.as_numpy_dtype value = np.array(value, dtype=npt) if dtype is None: value = _maybe_modify_numpy_dtype_determination(value) elif dtype is not None: npt = dtype.as_numpy_dtype if npt != value.dtype: value = value.astype(npt) try: value = np.asarray(value, order="C") self._handle = pywrap_tensorflow.TFE_Py_NumpyToTensorHandle(value) except core._NotOkStatusException as e: # pylint: disable=protected-access raise core._status_to_exception(e.code, e.message) # pylint: disable=protected-access # Almost all TensorFlow kernels for GPU devices keep int32 tensors in host # memory. This change approximates the same behavior for eager execution - # keeping int32 tensors in host memory. # # We do so to preclude the need for callers into such kernels from having to # explicitly place the int32 tensors in host memory. For example, prior to # this change one needed: # # with tfe.device('/gpu:0'): # ... # code here # with tfe.device('/cpu:0'): # shape = tfe.Tensor(...) # y = tfe.ops.random_uniform(.., shape) # # Without the CPU device block tfe.ops.random_uniform would fail since the # kernel expects the shape in host memory. # # After this change, we simplify the code: # # with tfe.device('/gpu:0'): # y = tfe.ops.random_uniform(, tfe.Tensor(...)) # # The approximation is not exact since if there are GPU kernels which do not # require host memory for int32 tensors, there will be a discrepancy between # eager execution and TensorFlow graphs. However, as of July 2017, there # were no known GPU kernels that kept int32 tensors in device memory. if _in_gpu_device() and value.dtype != np.int32: ctx = context.get_default_context() # pylint: disable=protected-access device_name = ctx.device_name with errors.raise_exception_on_not_ok_status() as status: self._handle = pywrap_tensorflow.TFE_TensorHandleCopyToDevice( self._handle, ctx._handle, device_name, status) # pylint: enable=protected-access self._dtype = dtypes.as_dtype( pywrap_tensorflow.TFE_TensorHandleDataType(self._handle)) # This mirrors tensorflow.core.framework.ops.Tensor._handle_data Which will # be None for tensors of type other than DT_REOSURCE. For DT_RESOURCE # tensors, this will contain a serialized HandleData proto with shape # inference metadata about shapes and dtypes of resources accessible from # this handle. self._handle_data = None if core.active_trace() is not None: core.active_trace().record_tensor("MANUAL", tape.tensor_id(self), self.device, self.shape.num_elements())
def __del__(self): if (pywrap_tensorflow is not None and pywrap_tensorflow.TFE_DeleteTensorHandle is not None): pywrap_tensorflow.TFE_DeleteTensorHandle(self._handle) if core.active_trace() is not None: core.active_trace().delete_tensor(tape.tensor_id(self))